squiggle/packages/squiggle-lang/src/rescript/ReducerInterface/ReducerInterface_GenericDistribution.res

375 lines
14 KiB
Plaintext
Raw Normal View History

module ExpressionValue = ReducerInterface_InternalExpressionValue
type expressionValue = ExpressionValue.expressionValue
module Helpers = {
let arithmeticMap = r =>
switch r {
| "add" => #Add
| "dotAdd" => #Add
| "subtract" => #Subtract
| "dotSubtract" => #Subtract
| "divide" => #Divide
| "log" => #Logarithm
| "dotDivide" => #Divide
2022-04-09 16:37:26 +00:00
| "pow" => #Power
| "dotPow" => #Power
| "multiply" => #Multiply
| "dotMultiply" => #Multiply
| _ => #Multiply
}
let catchAndConvertTwoArgsToDists = (args: array<expressionValue>): option<(
DistributionTypes.genericDist,
DistributionTypes.genericDist,
2022-04-29 18:42:34 +00:00
)> =>
switch args {
2022-06-23 18:38:07 +00:00
| [IEvDistribution(a), IEvDistribution(b)] => Some((a, b))
| [IEvNumber(a), IEvDistribution(b)] => Some((GenericDist.fromFloat(a), b))
| [IEvDistribution(a), IEvNumber(b)] => Some((a, GenericDist.fromFloat(b)))
| _ => None
}
let toFloatFn = (
2022-04-23 18:09:06 +00:00
fnCall: DistributionTypes.DistributionOperation.toFloat,
dist: DistributionTypes.genericDist,
2022-05-13 20:16:52 +00:00
~env: DistributionOperation.env,
) => {
FromDist(DistributionTypes.DistributionOperation.ToFloat(fnCall), dist)
2022-05-13 20:16:52 +00:00
->DistributionOperation.run(~env)
->Some
}
let toStringFn = (
fnCall: DistributionTypes.DistributionOperation.toString,
dist: DistributionTypes.genericDist,
2022-05-13 20:16:52 +00:00
~env: DistributionOperation.env,
) => {
FromDist(DistributionTypes.DistributionOperation.ToString(fnCall), dist)
2022-05-13 20:16:52 +00:00
->DistributionOperation.run(~env)
->Some
}
let toBoolFn = (
fnCall: DistributionTypes.DistributionOperation.toBool,
dist: DistributionTypes.genericDist,
2022-05-13 20:16:52 +00:00
~env: DistributionOperation.env,
) => {
FromDist(DistributionTypes.DistributionOperation.ToBool(fnCall), dist)
2022-05-13 20:16:52 +00:00
->DistributionOperation.run(~env)
->Some
}
2022-05-13 20:16:52 +00:00
let toDistFn = (
fnCall: DistributionTypes.DistributionOperation.toDist,
dist,
~env: DistributionOperation.env,
) => {
FromDist(DistributionTypes.DistributionOperation.ToDist(fnCall), dist)
2022-05-13 20:16:52 +00:00
->DistributionOperation.run(~env)
->Some
}
2022-05-13 20:16:52 +00:00
let twoDiststoDistFn = (direction, arithmetic, dist1, dist2, ~env: DistributionOperation.env) => {
FromDist(
DistributionTypes.DistributionOperation.ToDistCombination(
direction,
arithmeticMap(arithmetic),
#Dist(dist2),
),
dist1,
2022-05-13 20:16:52 +00:00
)->DistributionOperation.run(~env)
}
2022-04-29 00:24:13 +00:00
let parseNumber = (args: expressionValue): Belt.Result.t<float, string> =>
2022-04-12 07:39:38 +00:00
switch args {
2022-06-23 18:38:07 +00:00
| IEvNumber(x) => Ok(x)
2022-04-12 07:39:38 +00:00
| _ => Error("Not a number")
}
let parseNumberArray = (ags: array<expressionValue>): Belt.Result.t<array<float>, string> =>
E.A.fmap(parseNumber, ags) |> E.A.R.firstErrorOrOpen
2022-04-12 07:39:38 +00:00
let parseDist = (args: expressionValue): Belt.Result.t<DistributionTypes.genericDist, string> =>
2022-04-12 07:39:38 +00:00
switch args {
2022-06-23 18:38:07 +00:00
| IEvDistribution(x) => Ok(x)
| IEvNumber(x) => Ok(GenericDist.fromFloat(x))
2022-04-12 07:39:38 +00:00
| _ => Error("Not a distribution")
}
let parseDistributionArray = (ags: array<expressionValue>): Belt.Result.t<
array<DistributionTypes.genericDist>,
string,
> => E.A.fmap(parseDist, ags) |> E.A.R.firstErrorOrOpen
2022-04-12 07:39:38 +00:00
let mixtureWithGivenWeights = (
distributions: array<DistributionTypes.genericDist>,
weights: array<float>,
2022-05-13 20:16:52 +00:00
~env: DistributionOperation.env,
): DistributionOperation.outputType =>
E.A.length(distributions) == E.A.length(weights)
2022-05-13 20:16:52 +00:00
? Mixture(Belt.Array.zip(distributions, weights))->DistributionOperation.run(~env)
: GenDistError(
ArgumentError("Error, mixture call has different number of distributions and weights"),
)
let mixtureWithDefaultWeights = (
distributions: array<DistributionTypes.genericDist>,
2022-05-13 20:16:52 +00:00
~env: DistributionOperation.env,
): DistributionOperation.outputType => {
let length = E.A.length(distributions)
let weights = Belt.Array.make(length, 1.0 /. Belt.Int.toFloat(length))
2022-05-13 20:16:52 +00:00
mixtureWithGivenWeights(distributions, weights, ~env)
}
2022-05-13 20:16:52 +00:00
let mixture = (
args: array<expressionValue>,
~env: DistributionOperation.env,
): DistributionOperation.outputType => {
2022-05-10 20:34:03 +00:00
let error = (err: string): DistributionOperation.outputType =>
err->DistributionTypes.ArgumentError->GenDistError
switch args {
2022-06-23 18:38:07 +00:00
| [IEvArray(distributions)] =>
switch parseDistributionArray(distributions) {
2022-05-13 20:16:52 +00:00
| Ok(distrs) => mixtureWithDefaultWeights(distrs, ~env)
| Error(err) => error(err)
}
2022-06-23 18:38:07 +00:00
| [IEvArray(distributions), IEvArray(weights)] =>
switch (parseDistributionArray(distributions), parseNumberArray(weights)) {
2022-05-13 20:16:52 +00:00
| (Ok(distrs), Ok(wghts)) => mixtureWithGivenWeights(distrs, wghts, ~env)
| (Error(err), Ok(_)) => error(err)
| (Ok(_), Error(err)) => error(err)
| (Error(err1), Error(err2)) => error(`${err1}|${err2}`)
}
| _ =>
switch E.A.last(args) {
2022-06-23 18:38:07 +00:00
| Some(IEvArray(b)) => {
let weights = parseNumberArray(b)
let distributions = parseDistributionArray(
Belt.Array.slice(args, ~offset=0, ~len=E.A.length(args) - 1),
)
switch E.R.merge(distributions, weights) {
2022-05-13 20:16:52 +00:00
| Ok(d, w) => mixtureWithGivenWeights(d, w, ~env)
| Error(err) => error(err)
}
}
2022-06-23 18:38:07 +00:00
| Some(IEvNumber(_))
| Some(IEvDistribution(_)) =>
switch parseDistributionArray(args) {
2022-05-13 20:16:52 +00:00
| Ok(distributions) => mixtureWithDefaultWeights(distributions, ~env)
| Error(err) => error(err)
2022-04-12 07:39:38 +00:00
}
| _ => error("Last argument of mx must be array or distribution")
}
2022-04-12 07:39:38 +00:00
}
}
let klDivergenceWithPrior = (
prediction: DistributionTypes.genericDist,
answer: DistributionTypes.genericDist,
prior: DistributionTypes.genericDist,
env: DistributionOperation.env,
) => {
let term1 = DistributionOperation.Constructors.klDivergence(~env, prediction, answer)
let term2 = DistributionOperation.Constructors.klDivergence(~env, prior, answer)
switch E.R.merge(term1, term2)->E.R2.fmap(((a, b)) => a -. b) {
| Ok(x) => x->DistributionOperation.Float->Some
| Error(_) => None
}
}
}
module SymbolicConstructors = {
let threeFloat = name =>
switch name {
| "triangular" => Ok(SymbolicDist.Triangular.make)
2022-04-02 15:01:53 +00:00
| _ => Error("Unreachable state")
}
let symbolicResultToOutput = (
symbolicResult: result<SymbolicDistTypes.symbolicDist, string>,
): option<DistributionOperation.outputType> =>
switch symbolicResult {
| Ok(r) => Some(Dist(Symbolic(r)))
2022-04-23 18:13:38 +00:00
| Error(r) => Some(GenDistError(OtherError(r)))
}
}
let dispatchToGenericOutput = (
2022-05-13 20:16:52 +00:00
call: ExpressionValue.functionCall,
env: DistributionOperation.env,
): option<DistributionOperation.outputType> => {
let (fnName, args) = call
switch (fnName, args) {
2022-06-23 18:38:07 +00:00
| ("triangular" as fnName, [IEvNumber(f1), IEvNumber(f2), IEvNumber(f3)]) =>
SymbolicConstructors.threeFloat(fnName)
->E.R.bind(r => r(f1, f2, f3))
->SymbolicConstructors.symbolicResultToOutput
2022-06-23 18:38:07 +00:00
| ("sample", [IEvDistribution(dist)]) => Helpers.toFloatFn(#Sample, dist, ~env)
| ("sampleN", [IEvDistribution(dist), IEvNumber(n)]) =>
2022-05-15 14:58:29 +00:00
Some(FloatArray(GenericDist.sampleN(dist, Belt.Int.fromFloat(n))))
2022-06-23 18:38:07 +00:00
| (("mean" | "stdev" | "variance" | "min" | "max" | "mode") as op, [IEvDistribution(dist)]) => {
let fn = switch op {
| "mean" => #Mean
| "stdev" => #Stdev
| "variance" => #Variance
| "min" => #Min
| "max" => #Max
| "mode" => #Mode
| _ => #Mean
}
Helpers.toFloatFn(fn, dist, ~env)
}
2022-06-23 18:38:07 +00:00
| ("integralSum", [IEvDistribution(dist)]) => Helpers.toFloatFn(#IntegralSum, dist, ~env)
| ("toString", [IEvDistribution(dist)]) => Helpers.toStringFn(ToString, dist, ~env)
| ("sparkline", [IEvDistribution(dist)]) =>
Helpers.toStringFn(ToSparkline(MagicNumbers.Environment.sparklineLength), dist, ~env)
2022-06-23 18:38:07 +00:00
| ("sparkline", [IEvDistribution(dist), IEvNumber(n)]) =>
2022-05-13 20:16:52 +00:00
Helpers.toStringFn(ToSparkline(Belt.Float.toInt(n)), dist, ~env)
2022-06-23 18:38:07 +00:00
| ("exp", [IEvDistribution(a)]) =>
// https://mathjs.org/docs/reference/functions/exp.html
2022-04-27 17:59:33 +00:00
Helpers.twoDiststoDistFn(
Algebraic(AsDefault),
"pow",
GenericDist.fromFloat(MagicNumbers.Math.e),
a,
2022-05-13 20:16:52 +00:00
~env,
2022-04-27 17:59:33 +00:00
)->Some
2022-06-23 18:38:07 +00:00
| ("normalize", [IEvDistribution(dist)]) => Helpers.toDistFn(Normalize, dist, ~env)
| ("klDivergence", [IEvDistribution(prediction), IEvDistribution(answer)]) =>
Some(DistributionOperation.run(FromDist(ToScore(KLDivergence(answer)), prediction), ~env))
| (
"klDivergence",
2022-06-23 18:38:07 +00:00
[IEvDistribution(prediction), IEvDistribution(answer), IEvDistribution(prior)],
) =>
Helpers.klDivergenceWithPrior(prediction, answer, prior, env)
| (
2022-05-16 16:06:21 +00:00
"logScoreWithPointAnswer",
2022-06-23 18:38:07 +00:00
[IEvDistribution(prediction), IEvNumber(answer), IEvDistribution(prior)],
)
| (
2022-05-16 16:06:21 +00:00
"logScoreWithPointAnswer",
[
2022-06-23 18:38:07 +00:00
IEvDistribution(prediction),
IEvDistribution(Symbolic(#Float(answer))),
IEvDistribution(prior),
],
) =>
DistributionOperation.run(
FromDist(ToScore(LogScore(answer, prior->Some)), prediction),
~env,
)->Some
2022-06-23 18:38:07 +00:00
| ("logScoreWithPointAnswer", [IEvDistribution(prediction), IEvNumber(answer)])
| (
2022-05-16 16:06:21 +00:00
"logScoreWithPointAnswer",
2022-06-23 18:38:07 +00:00
[IEvDistribution(prediction), IEvDistribution(Symbolic(#Float(answer)))],
) =>
DistributionOperation.run(FromDist(ToScore(LogScore(answer, None)), prediction), ~env)->Some
2022-06-23 18:38:07 +00:00
| ("isNormalized", [IEvDistribution(dist)]) => Helpers.toBoolFn(IsNormalized, dist, ~env)
| ("toPointSet", [IEvDistribution(dist)]) => Helpers.toDistFn(ToPointSet, dist, ~env)
| ("scaleLog", [IEvDistribution(dist)]) =>
2022-05-13 20:16:52 +00:00
Helpers.toDistFn(Scale(#Logarithm, MagicNumbers.Math.e), dist, ~env)
2022-06-23 18:38:07 +00:00
| ("scaleLog10", [IEvDistribution(dist)]) => Helpers.toDistFn(Scale(#Logarithm, 10.0), dist, ~env)
| ("scaleLog", [IEvDistribution(dist), IEvNumber(float)]) =>
2022-05-13 20:16:52 +00:00
Helpers.toDistFn(Scale(#Logarithm, float), dist, ~env)
2022-06-23 18:38:07 +00:00
| ("scaleLogWithThreshold", [IEvDistribution(dist), IEvNumber(base), IEvNumber(eps)]) =>
2022-05-13 20:16:52 +00:00
Helpers.toDistFn(Scale(#LogarithmWithThreshold(eps), base), dist, ~env)
2022-06-23 18:38:07 +00:00
| ("scaleMultiply", [IEvDistribution(dist), IEvNumber(float)]) =>
2022-06-17 13:44:06 +00:00
Helpers.toDistFn(Scale(#Multiply, float), dist, ~env)
2022-06-23 18:38:07 +00:00
| ("scalePow", [IEvDistribution(dist), IEvNumber(float)]) =>
2022-05-13 20:16:52 +00:00
Helpers.toDistFn(Scale(#Power, float), dist, ~env)
2022-06-23 18:38:07 +00:00
| ("scaleExp", [IEvDistribution(dist)]) =>
2022-05-13 20:16:52 +00:00
Helpers.toDistFn(Scale(#Power, MagicNumbers.Math.e), dist, ~env)
2022-06-23 18:38:07 +00:00
| ("cdf", [IEvDistribution(dist), IEvNumber(float)]) => Helpers.toFloatFn(#Cdf(float), dist, ~env)
| ("pdf", [IEvDistribution(dist), IEvNumber(float)]) => Helpers.toFloatFn(#Pdf(float), dist, ~env)
| ("inv", [IEvDistribution(dist), IEvNumber(float)]) => Helpers.toFloatFn(#Inv(float), dist, ~env)
| ("quantile", [IEvDistribution(dist), IEvNumber(float)]) =>
2022-06-13 19:10:24 +00:00
Helpers.toFloatFn(#Inv(float), dist, ~env)
2022-06-23 18:38:07 +00:00
| ("toSampleSet", [IEvDistribution(dist), IEvNumber(float)]) =>
2022-05-13 20:16:52 +00:00
Helpers.toDistFn(ToSampleSet(Belt.Int.fromFloat(float)), dist, ~env)
2022-06-23 18:38:07 +00:00
| ("toSampleSet", [IEvDistribution(dist)]) =>
2022-05-13 20:16:52 +00:00
Helpers.toDistFn(ToSampleSet(env.sampleCount), dist, ~env)
2022-06-23 18:38:07 +00:00
| ("toList", [IEvDistribution(SampleSet(dist))]) => Some(FloatArray(SampleSetDist.T.get(dist)))
| ("fromSamples", [IEvArray(inputArray)]) => {
2022-04-30 01:41:09 +00:00
let _wrapInputErrors = x => SampleSetDist.NonNumericInput(x)
let parsedArray = Helpers.parseNumberArray(inputArray)->E.R2.errMap(_wrapInputErrors)
switch parsedArray {
2022-05-13 20:16:52 +00:00
| Ok(array) => DistributionOperation.run(FromSamples(array), ~env)
2022-04-30 01:41:09 +00:00
| Error(e) => GenDistError(SampleSetError(e))
}->Some
}
2022-06-23 18:38:07 +00:00
| ("inspect", [IEvDistribution(dist)]) => Helpers.toDistFn(Inspect, dist, ~env)
| ("truncateLeft", [IEvDistribution(dist), IEvNumber(float)]) =>
2022-05-13 20:16:52 +00:00
Helpers.toDistFn(Truncate(Some(float), None), dist, ~env)
2022-06-23 18:38:07 +00:00
| ("truncateRight", [IEvDistribution(dist), IEvNumber(float)]) =>
2022-05-13 20:16:52 +00:00
Helpers.toDistFn(Truncate(None, Some(float)), dist, ~env)
2022-06-23 18:38:07 +00:00
| ("truncate", [IEvDistribution(dist), IEvNumber(float1), IEvNumber(float2)]) =>
2022-05-13 20:16:52 +00:00
Helpers.toDistFn(Truncate(Some(float1), Some(float2)), dist, ~env)
| ("mx" | "mixture", args) => Helpers.mixture(args, ~env)->Some
2022-06-23 18:38:07 +00:00
| ("log", [IEvDistribution(a)]) =>
2022-04-27 17:59:33 +00:00
Helpers.twoDiststoDistFn(
Algebraic(AsDefault),
"log",
a,
GenericDist.fromFloat(MagicNumbers.Math.e),
2022-05-13 20:16:52 +00:00
~env,
2022-04-27 17:59:33 +00:00
)->Some
2022-06-23 18:38:07 +00:00
| ("log10", [IEvDistribution(a)]) =>
2022-05-13 20:16:52 +00:00
Helpers.twoDiststoDistFn(
Algebraic(AsDefault),
"log",
a,
GenericDist.fromFloat(10.0),
~env,
)->Some
2022-06-23 18:38:07 +00:00
| ("unaryMinus", [IEvDistribution(a)]) =>
2022-05-13 20:16:52 +00:00
Helpers.twoDiststoDistFn(
Algebraic(AsDefault),
"multiply",
a,
GenericDist.fromFloat(-1.0),
~env,
)->Some
2022-04-21 22:09:06 +00:00
| (("add" | "multiply" | "subtract" | "divide" | "pow" | "log") as arithmetic, [_, _] as args) =>
Helpers.catchAndConvertTwoArgsToDists(args)->E.O2.fmap(((fst, snd)) =>
2022-05-13 20:16:52 +00:00
Helpers.twoDiststoDistFn(Algebraic(AsDefault), arithmetic, fst, snd, ~env)
)
| (
("dotAdd"
| "dotMultiply"
| "dotSubtract"
| "dotDivide"
2022-04-23 18:09:06 +00:00
| "dotPow") as arithmetic,
2022-04-21 22:09:06 +00:00
[_, _] as args,
) =>
Helpers.catchAndConvertTwoArgsToDists(args)->E.O2.fmap(((fst, snd)) =>
2022-05-13 20:16:52 +00:00
Helpers.twoDiststoDistFn(Pointwise, arithmetic, fst, snd, ~env)
)
2022-06-23 18:38:07 +00:00
| ("dotExp", [IEvDistribution(a)]) =>
2022-04-27 17:59:33 +00:00
Helpers.twoDiststoDistFn(
Pointwise,
"dotPow",
GenericDist.fromFloat(MagicNumbers.Math.e),
a,
2022-05-13 20:16:52 +00:00
~env,
2022-04-27 17:59:33 +00:00
)->Some
| _ => None
}
}
let genericOutputToReducerValue = (o: DistributionOperation.outputType): result<
expressionValue,
Reducer_ErrorValue.errorValue,
> =>
switch o {
2022-06-23 18:38:07 +00:00
| Dist(d) => Ok(ReducerInterface_InternalExpressionValue.IEvDistribution(d))
| Float(d) => Ok(IEvNumber(d))
| String(d) => Ok(IEvString(d))
| Bool(d) => Ok(IEvBool(d))
| FloatArray(d) =>
2022-06-23 18:38:07 +00:00
Ok(IEvArray(d |> E.A.fmap(r => ReducerInterface_InternalExpressionValue.IEvNumber(r))))
| GenDistError(err) => Error(REDistributionError(err))
}
2022-06-06 21:42:49 +00:00
let dispatch = (call: ExpressionValue.functionCall, environment) =>
dispatchToGenericOutput(call, environment)->E.O2.fmap(genericOutputToReducerValue)