2022-04-22 20:27:17 +00:00
|
|
|
type functionCallInfo = DistributionTypes.DistributionOperation.genericFunctionCallInfo
|
2022-04-11 18:00:56 +00:00
|
|
|
type genericDist = DistributionTypes.genericDist
|
|
|
|
type error = DistributionTypes.error
|
2022-03-27 20:59:46 +00:00
|
|
|
|
|
|
|
// TODO: It could be great to use a cache for some calculations (basically, do memoization). Also, better analytics/tracking could go a long way.
|
2022-03-27 18:22:26 +00:00
|
|
|
|
2022-03-31 18:07:39 +00:00
|
|
|
type env = {
|
2022-03-27 18:22:26 +00:00
|
|
|
sampleCount: int,
|
|
|
|
xyPointLength: int,
|
|
|
|
}
|
|
|
|
|
2022-04-29 22:51:00 +00:00
|
|
|
let defaultEnv = {
|
|
|
|
sampleCount: 10000,
|
|
|
|
xyPointLength: 10000,
|
|
|
|
}
|
|
|
|
|
2022-03-31 17:26:29 +00:00
|
|
|
type outputType =
|
2022-04-07 14:21:48 +00:00
|
|
|
| Dist(genericDist)
|
2022-03-31 13:27:36 +00:00
|
|
|
| Float(float)
|
|
|
|
| String(string)
|
2022-04-15 17:58:00 +00:00
|
|
|
| Bool(bool)
|
2022-04-07 14:21:48 +00:00
|
|
|
| GenDistError(error)
|
2022-03-27 18:22:26 +00:00
|
|
|
|
2022-03-31 17:26:29 +00:00
|
|
|
/*
|
|
|
|
We're going to add another function to this module later, so first define a
|
|
|
|
local version, which is not exported.
|
|
|
|
*/
|
|
|
|
module OutputLocal = {
|
|
|
|
type t = outputType
|
|
|
|
|
|
|
|
let toError = (t: outputType) =>
|
|
|
|
switch t {
|
|
|
|
| GenDistError(d) => Some(d)
|
|
|
|
| _ => None
|
|
|
|
}
|
|
|
|
|
|
|
|
let toErrorOrUnreachable = (t: t): error => t->toError->E.O2.default((Unreachable: error))
|
|
|
|
|
|
|
|
let toDistR = (t: t): result<genericDist, error> =>
|
|
|
|
switch t {
|
|
|
|
| Dist(r) => Ok(r)
|
|
|
|
| e => Error(toErrorOrUnreachable(e))
|
|
|
|
}
|
|
|
|
|
|
|
|
let toDist = (t: t) =>
|
|
|
|
switch t {
|
2022-03-31 13:27:36 +00:00
|
|
|
| Dist(d) => Some(d)
|
2022-03-28 19:14:39 +00:00
|
|
|
| _ => None
|
|
|
|
}
|
|
|
|
|
2022-03-31 17:26:29 +00:00
|
|
|
let toFloat = (t: t) =>
|
|
|
|
switch t {
|
2022-03-31 13:27:36 +00:00
|
|
|
| Float(d) => Some(d)
|
2022-03-28 19:14:39 +00:00
|
|
|
| _ => None
|
|
|
|
}
|
|
|
|
|
2022-03-31 23:58:08 +00:00
|
|
|
let toFloatR = (t: t): result<float, error> =>
|
|
|
|
switch t {
|
|
|
|
| Float(r) => Ok(r)
|
|
|
|
| e => Error(toErrorOrUnreachable(e))
|
|
|
|
}
|
|
|
|
|
2022-03-31 17:26:29 +00:00
|
|
|
let toString = (t: t) =>
|
|
|
|
switch t {
|
2022-03-31 13:27:36 +00:00
|
|
|
| String(d) => Some(d)
|
2022-03-28 19:14:39 +00:00
|
|
|
| _ => None
|
|
|
|
}
|
|
|
|
|
2022-03-31 23:58:08 +00:00
|
|
|
let toStringR = (t: t): result<string, error> =>
|
|
|
|
switch t {
|
|
|
|
| String(r) => Ok(r)
|
|
|
|
| e => Error(toErrorOrUnreachable(e))
|
|
|
|
}
|
|
|
|
|
2022-04-15 17:58:00 +00:00
|
|
|
let toBool = (t: t) =>
|
|
|
|
switch t {
|
|
|
|
| Bool(d) => Some(d)
|
|
|
|
| _ => None
|
|
|
|
}
|
|
|
|
|
|
|
|
let toBoolR = (t: t): result<bool, error> =>
|
|
|
|
switch t {
|
|
|
|
| Bool(r) => Ok(r)
|
|
|
|
| e => Error(toErrorOrUnreachable(e))
|
|
|
|
}
|
|
|
|
|
2022-03-31 17:26:29 +00:00
|
|
|
//This is used to catch errors in other switch statements.
|
|
|
|
let fromResult = (r: result<t, error>): outputType =>
|
|
|
|
switch r {
|
|
|
|
| Ok(t) => t
|
|
|
|
| Error(e) => GenDistError(e)
|
2022-03-28 19:14:39 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-03-31 18:07:39 +00:00
|
|
|
let rec run = (~env, functionCallInfo: functionCallInfo): outputType => {
|
|
|
|
let {sampleCount, xyPointLength} = env
|
2022-03-27 21:37:27 +00:00
|
|
|
|
2022-03-31 18:07:39 +00:00
|
|
|
let reCall = (~env=env, ~functionCallInfo=functionCallInfo, ()) => {
|
|
|
|
run(~env, functionCallInfo)
|
2022-03-27 18:22:26 +00:00
|
|
|
}
|
2022-03-27 21:37:27 +00:00
|
|
|
|
2022-03-31 13:19:27 +00:00
|
|
|
let toPointSetFn = r => {
|
2022-03-31 18:51:42 +00:00
|
|
|
switch reCall(~functionCallInfo=FromDist(ToDist(ToPointSet), r), ()) {
|
2022-03-31 18:15:21 +00:00
|
|
|
| Dist(PointSet(p)) => Ok(p)
|
2022-03-31 17:26:29 +00:00
|
|
|
| e => Error(OutputLocal.toErrorOrUnreachable(e))
|
2022-03-27 18:22:26 +00:00
|
|
|
}
|
|
|
|
}
|
2022-03-27 21:37:27 +00:00
|
|
|
|
2022-03-31 13:19:27 +00:00
|
|
|
let toSampleSetFn = r => {
|
2022-03-31 18:51:42 +00:00
|
|
|
switch reCall(~functionCallInfo=FromDist(ToDist(ToSampleSet(sampleCount)), r), ()) {
|
2022-03-31 18:15:21 +00:00
|
|
|
| Dist(SampleSet(p)) => Ok(p)
|
2022-03-31 17:26:29 +00:00
|
|
|
| e => Error(OutputLocal.toErrorOrUnreachable(e))
|
2022-03-27 18:22:26 +00:00
|
|
|
}
|
|
|
|
}
|
2022-03-27 20:59:46 +00:00
|
|
|
|
2022-03-27 21:37:27 +00:00
|
|
|
let scaleMultiply = (r, weight) =>
|
|
|
|
reCall(
|
2022-03-31 18:51:42 +00:00
|
|
|
~functionCallInfo=FromDist(ToDistCombination(Pointwise, #Multiply, #Float(weight)), r),
|
2022-03-27 21:37:27 +00:00
|
|
|
(),
|
2022-03-31 17:26:29 +00:00
|
|
|
)->OutputLocal.toDistR
|
2022-03-27 21:37:27 +00:00
|
|
|
|
|
|
|
let pointwiseAdd = (r1, r2) =>
|
|
|
|
reCall(
|
2022-03-31 18:51:42 +00:00
|
|
|
~functionCallInfo=FromDist(ToDistCombination(Pointwise, #Add, #Dist(r2)), r1),
|
2022-03-27 21:37:27 +00:00
|
|
|
(),
|
2022-03-31 17:26:29 +00:00
|
|
|
)->OutputLocal.toDistR
|
2022-03-27 21:37:27 +00:00
|
|
|
|
2022-04-22 20:27:17 +00:00
|
|
|
let fromDistFn = (
|
|
|
|
subFnName: DistributionTypes.DistributionOperation.fromDist,
|
|
|
|
dist: genericDist,
|
|
|
|
) => {
|
2022-04-15 17:58:00 +00:00
|
|
|
let response = switch subFnName {
|
2022-03-31 18:51:42 +00:00
|
|
|
| ToFloat(distToFloatOperation) =>
|
2022-03-31 18:07:39 +00:00
|
|
|
GenericDist.toFloatOperation(dist, ~toPointSetFn, ~distToFloatOperation)
|
2022-03-31 13:27:36 +00:00
|
|
|
->E.R2.fmap(r => Float(r))
|
2022-03-31 17:26:29 +00:00
|
|
|
->OutputLocal.fromResult
|
2022-04-08 13:08:49 +00:00
|
|
|
| ToString(ToString) => dist->GenericDist.toString->String
|
2022-04-09 02:55:06 +00:00
|
|
|
| ToString(ToSparkline(bucketCount)) =>
|
|
|
|
GenericDist.toSparkline(dist, ~sampleCount, ~bucketCount, ())
|
2022-04-08 12:44:04 +00:00
|
|
|
->E.R2.fmap(r => String(r))
|
|
|
|
->OutputLocal.fromResult
|
2022-03-31 18:51:42 +00:00
|
|
|
| ToDist(Inspect) => {
|
2022-03-28 19:14:39 +00:00
|
|
|
Js.log2("Console log requested: ", dist)
|
2022-03-31 13:27:36 +00:00
|
|
|
Dist(dist)
|
2022-03-28 19:14:39 +00:00
|
|
|
}
|
2022-03-31 18:51:42 +00:00
|
|
|
| ToDist(Normalize) => dist->GenericDist.normalize->Dist
|
2022-05-04 16:21:30 +00:00
|
|
|
| ToScore(KLDivergence(t2)) =>
|
2022-05-04 17:02:58 +00:00
|
|
|
GenericDist.klDivergence(dist, t2, ~toPointSetFn)
|
2022-04-29 00:24:13 +00:00
|
|
|
->E.R2.fmap(r => Float(r))
|
|
|
|
->OutputLocal.fromResult
|
2022-04-15 17:58:00 +00:00
|
|
|
| ToBool(IsNormalized) => dist->GenericDist.isNormalized->Bool
|
2022-03-31 18:51:42 +00:00
|
|
|
| ToDist(Truncate(leftCutoff, rightCutoff)) =>
|
2022-03-31 13:19:27 +00:00
|
|
|
GenericDist.truncate(~toPointSetFn, ~leftCutoff, ~rightCutoff, dist, ())
|
2022-03-31 13:27:36 +00:00
|
|
|
->E.R2.fmap(r => Dist(r))
|
2022-03-31 17:26:29 +00:00
|
|
|
->OutputLocal.fromResult
|
2022-03-31 18:51:42 +00:00
|
|
|
| ToDist(ToSampleSet(n)) =>
|
2022-04-10 01:24:44 +00:00
|
|
|
dist
|
|
|
|
->GenericDist.toSampleSetDist(n)
|
|
|
|
->E.R2.fmap(r => Dist(SampleSet(r)))
|
|
|
|
->OutputLocal.fromResult
|
2022-03-31 18:51:42 +00:00
|
|
|
| ToDist(ToPointSet) =>
|
2022-03-31 17:26:29 +00:00
|
|
|
dist
|
2022-04-08 12:44:04 +00:00
|
|
|
->GenericDist.toPointSet(~xyPointLength, ~sampleCount, ())
|
2022-03-31 18:15:21 +00:00
|
|
|
->E.R2.fmap(r => Dist(PointSet(r)))
|
2022-03-31 17:26:29 +00:00
|
|
|
->OutputLocal.fromResult
|
2022-05-04 17:02:58 +00:00
|
|
|
| ToDist(Scale(#LogarithmWithThreshold(eps), f)) =>
|
|
|
|
dist
|
|
|
|
->GenericDist.pointwiseCombinationFloat(
|
|
|
|
~toPointSetFn,
|
|
|
|
~algebraicCombination=#LogarithmWithThreshold(eps),
|
|
|
|
~f,
|
|
|
|
)
|
|
|
|
->E.R2.fmap(r => Dist(r))
|
|
|
|
->OutputLocal.fromResult
|
2022-04-30 17:57:41 +00:00
|
|
|
| ToDist(Scale(#Logarithm, f)) =>
|
2022-04-29 01:14:03 +00:00
|
|
|
dist
|
|
|
|
->GenericDist.pointwiseCombinationFloat(~toPointSetFn, ~algebraicCombination=#Logarithm, ~f)
|
|
|
|
->E.R2.fmap(r => Dist(r))
|
|
|
|
->OutputLocal.fromResult
|
2022-04-30 17:57:41 +00:00
|
|
|
| ToDist(Scale(#Power, f)) =>
|
2022-04-29 01:14:03 +00:00
|
|
|
dist
|
|
|
|
->GenericDist.pointwiseCombinationFloat(~toPointSetFn, ~algebraicCombination=#Power, ~f)
|
|
|
|
->E.R2.fmap(r => Dist(r))
|
|
|
|
->OutputLocal.fromResult
|
2022-04-26 20:06:51 +00:00
|
|
|
| ToDistCombination(Algebraic(_), _, #Float(_)) => GenDistError(NotYetImplemented)
|
|
|
|
| ToDistCombination(Algebraic(strategy), arithmeticOperation, #Dist(t2)) =>
|
2022-03-27 20:59:46 +00:00
|
|
|
dist
|
2022-04-26 20:06:51 +00:00
|
|
|
->GenericDist.algebraicCombination(
|
|
|
|
~strategy,
|
|
|
|
~toPointSetFn,
|
|
|
|
~toSampleSetFn,
|
|
|
|
~arithmeticOperation,
|
|
|
|
~t2,
|
|
|
|
)
|
2022-03-31 13:27:36 +00:00
|
|
|
->E.R2.fmap(r => Dist(r))
|
2022-03-31 17:26:29 +00:00
|
|
|
->OutputLocal.fromResult
|
2022-04-23 18:09:06 +00:00
|
|
|
| ToDistCombination(Pointwise, algebraicCombination, #Dist(t2)) =>
|
2022-03-27 20:59:46 +00:00
|
|
|
dist
|
2022-04-23 18:09:06 +00:00
|
|
|
->GenericDist.pointwiseCombination(~toPointSetFn, ~algebraicCombination, ~t2)
|
2022-03-31 13:27:36 +00:00
|
|
|
->E.R2.fmap(r => Dist(r))
|
2022-03-31 17:26:29 +00:00
|
|
|
->OutputLocal.fromResult
|
2022-04-23 18:09:06 +00:00
|
|
|
| ToDistCombination(Pointwise, algebraicCombination, #Float(f)) =>
|
2022-03-27 20:59:46 +00:00
|
|
|
dist
|
2022-04-23 18:09:06 +00:00
|
|
|
->GenericDist.pointwiseCombinationFloat(~toPointSetFn, ~algebraicCombination, ~f)
|
2022-03-31 13:27:36 +00:00
|
|
|
->E.R2.fmap(r => Dist(r))
|
2022-03-31 17:26:29 +00:00
|
|
|
->OutputLocal.fromResult
|
2022-03-27 20:59:46 +00:00
|
|
|
}
|
2022-04-15 17:58:00 +00:00
|
|
|
response
|
|
|
|
}
|
2022-03-27 20:59:46 +00:00
|
|
|
|
2022-03-31 18:07:39 +00:00
|
|
|
switch functionCallInfo {
|
2022-03-31 18:51:42 +00:00
|
|
|
| FromDist(subFnName, dist) => fromDistFn(subFnName, dist)
|
|
|
|
| FromFloat(subFnName, float) =>
|
|
|
|
reCall(~functionCallInfo=FromDist(subFnName, GenericDist.fromFloat(float)), ())
|
|
|
|
| Mixture(dists) =>
|
2022-03-31 13:19:27 +00:00
|
|
|
dists
|
|
|
|
->GenericDist.mixture(~scaleMultiplyFn=scaleMultiply, ~pointwiseAddFn=pointwiseAdd)
|
2022-03-31 13:27:36 +00:00
|
|
|
->E.R2.fmap(r => Dist(r))
|
2022-03-31 17:26:29 +00:00
|
|
|
->OutputLocal.fromResult
|
2022-04-30 15:33:03 +00:00
|
|
|
| FromSamples(xs) =>
|
|
|
|
xs
|
2022-04-29 22:38:55 +00:00
|
|
|
->SampleSetDist.make
|
2022-04-30 01:41:09 +00:00
|
|
|
->E.R2.errMap(x => DistributionTypes.SampleSetError(x))
|
2022-04-29 22:38:55 +00:00
|
|
|
->E.R2.fmap(x => x->DistributionTypes.SampleSet->Dist)
|
|
|
|
->OutputLocal.fromResult
|
2022-03-27 18:22:26 +00:00
|
|
|
}
|
|
|
|
}
|
2022-03-28 19:14:39 +00:00
|
|
|
|
2022-03-31 18:51:42 +00:00
|
|
|
let runFromDist = (~env, ~functionCallInfo, dist) => run(~env, FromDist(functionCallInfo, dist))
|
|
|
|
let runFromFloat = (~env, ~functionCallInfo, float) => run(~env, FromFloat(functionCallInfo, float))
|
2022-03-28 19:14:39 +00:00
|
|
|
|
2022-03-31 17:26:29 +00:00
|
|
|
module Output = {
|
|
|
|
include OutputLocal
|
|
|
|
|
|
|
|
let fmap = (
|
2022-03-31 18:07:39 +00:00
|
|
|
~env,
|
2022-03-31 17:26:29 +00:00
|
|
|
input: outputType,
|
2022-04-22 20:27:17 +00:00
|
|
|
functionCallInfo: DistributionTypes.DistributionOperation.singleParamaterFunction,
|
2022-03-31 17:26:29 +00:00
|
|
|
): outputType => {
|
2022-03-31 18:07:39 +00:00
|
|
|
let newFnCall: result<functionCallInfo, error> = switch (functionCallInfo, input) {
|
2022-03-31 18:51:42 +00:00
|
|
|
| (FromDist(fromDist), Dist(o)) => Ok(FromDist(fromDist, o))
|
|
|
|
| (FromFloat(fromDist), Float(o)) => Ok(FromFloat(fromDist, o))
|
2022-03-31 17:26:29 +00:00
|
|
|
| (_, GenDistError(r)) => Error(r)
|
2022-04-23 18:13:38 +00:00
|
|
|
| (FromDist(_), _) => Error(OtherError("Expected dist, got something else"))
|
|
|
|
| (FromFloat(_), _) => Error(OtherError("Expected float, got something else"))
|
2022-03-31 17:26:29 +00:00
|
|
|
}
|
2022-03-31 18:07:39 +00:00
|
|
|
newFnCall->E.R2.fmap(run(~env))->OutputLocal.fromResult
|
2022-03-28 19:14:39 +00:00
|
|
|
}
|
2022-03-31 13:19:27 +00:00
|
|
|
}
|
2022-04-08 18:42:14 +00:00
|
|
|
|
2022-04-22 20:27:17 +00:00
|
|
|
// See comment above DistributionTypes.Constructors to explain the purpose of this module.
|
2022-04-09 02:55:06 +00:00
|
|
|
// I tried having another internal module called UsingDists, similar to how its done in
|
2022-04-22 20:27:17 +00:00
|
|
|
// DistributionTypes.Constructors. However, this broke GenType for me, so beware.
|
2022-04-08 18:42:14 +00:00
|
|
|
module Constructors = {
|
2022-04-22 20:27:17 +00:00
|
|
|
module C = DistributionTypes.Constructors.UsingDists
|
2022-04-09 02:55:06 +00:00
|
|
|
open OutputLocal
|
|
|
|
let mean = (~env, dist) => C.mean(dist)->run(~env)->toFloatR
|
|
|
|
let sample = (~env, dist) => C.sample(dist)->run(~env)->toFloatR
|
|
|
|
let cdf = (~env, dist, f) => C.cdf(dist, f)->run(~env)->toFloatR
|
|
|
|
let inv = (~env, dist, f) => C.inv(dist, f)->run(~env)->toFloatR
|
|
|
|
let pdf = (~env, dist, f) => C.pdf(dist, f)->run(~env)->toFloatR
|
|
|
|
let normalize = (~env, dist) => C.normalize(dist)->run(~env)->toDistR
|
2022-04-15 17:58:00 +00:00
|
|
|
let isNormalized = (~env, dist) => C.isNormalized(dist)->run(~env)->toBoolR
|
2022-04-29 00:24:13 +00:00
|
|
|
let logScore = (~env, dist1, dist2) => C.logScore(dist1, dist2)->run(~env)->toFloatR
|
2022-04-09 02:55:06 +00:00
|
|
|
let toPointSet = (~env, dist) => C.toPointSet(dist)->run(~env)->toDistR
|
|
|
|
let toSampleSet = (~env, dist, n) => C.toSampleSet(dist, n)->run(~env)->toDistR
|
2022-04-29 22:38:55 +00:00
|
|
|
let fromSamples = (~env, xs) => C.fromSamples(xs)->run(~env)->toDistR
|
2022-04-09 02:55:06 +00:00
|
|
|
let truncate = (~env, dist, leftCutoff, rightCutoff) =>
|
|
|
|
C.truncate(dist, leftCutoff, rightCutoff)->run(~env)->toDistR
|
|
|
|
let inspect = (~env, dist) => C.inspect(dist)->run(~env)->toDistR
|
|
|
|
let toString = (~env, dist) => C.toString(dist)->run(~env)->toStringR
|
2022-04-10 01:24:44 +00:00
|
|
|
let toSparkline = (~env, dist, bucketCount) =>
|
|
|
|
C.toSparkline(dist, bucketCount)->run(~env)->toStringR
|
2022-04-09 02:55:06 +00:00
|
|
|
let algebraicAdd = (~env, dist1, dist2) => C.algebraicAdd(dist1, dist2)->run(~env)->toDistR
|
|
|
|
let algebraicMultiply = (~env, dist1, dist2) =>
|
|
|
|
C.algebraicMultiply(dist1, dist2)->run(~env)->toDistR
|
|
|
|
let algebraicDivide = (~env, dist1, dist2) => C.algebraicDivide(dist1, dist2)->run(~env)->toDistR
|
|
|
|
let algebraicSubtract = (~env, dist1, dist2) =>
|
|
|
|
C.algebraicSubtract(dist1, dist2)->run(~env)->toDistR
|
|
|
|
let algebraicLogarithm = (~env, dist1, dist2) =>
|
|
|
|
C.algebraicLogarithm(dist1, dist2)->run(~env)->toDistR
|
2022-04-10 01:24:44 +00:00
|
|
|
let algebraicPower = (~env, dist1, dist2) => C.algebraicPower(dist1, dist2)->run(~env)->toDistR
|
2022-05-02 21:15:23 +00:00
|
|
|
let scalePower = (~env, dist, n) => C.scalePower(dist, n)->run(~env)->toDistR
|
|
|
|
let scaleLogarithm = (~env, dist, n) => C.scaleLogarithm(dist, n)->run(~env)->toDistR
|
2022-04-09 02:55:06 +00:00
|
|
|
let pointwiseAdd = (~env, dist1, dist2) => C.pointwiseAdd(dist1, dist2)->run(~env)->toDistR
|
|
|
|
let pointwiseMultiply = (~env, dist1, dist2) =>
|
|
|
|
C.pointwiseMultiply(dist1, dist2)->run(~env)->toDistR
|
|
|
|
let pointwiseDivide = (~env, dist1, dist2) => C.pointwiseDivide(dist1, dist2)->run(~env)->toDistR
|
|
|
|
let pointwiseSubtract = (~env, dist1, dist2) =>
|
|
|
|
C.pointwiseSubtract(dist1, dist2)->run(~env)->toDistR
|
|
|
|
let pointwiseLogarithm = (~env, dist1, dist2) =>
|
|
|
|
C.pointwiseLogarithm(dist1, dist2)->run(~env)->toDistR
|
2022-04-10 01:24:44 +00:00
|
|
|
let pointwisePower = (~env, dist1, dist2) => C.pointwisePower(dist1, dist2)->run(~env)->toDistR
|
2022-04-08 20:09:00 +00:00
|
|
|
}
|