diff --git a/packages/squiggle-lang/src/rescript/Distributions/DistributionOperation/DistributionOperation.res b/packages/squiggle-lang/src/rescript/Distributions/DistributionOperation/DistributionOperation.res index 18ee2d6a..23389ddc 100644 --- a/packages/squiggle-lang/src/rescript/Distributions/DistributionOperation/DistributionOperation.res +++ b/packages/squiggle-lang/src/rescript/Distributions/DistributionOperation/DistributionOperation.res @@ -139,6 +139,10 @@ let rec run = (~env, functionCallInfo: functionCallInfo): outputType => { Dist(dist) } | ToDist(Normalize) => dist->GenericDist.normalize->Dist + | ToScore(LogScore(t2)) => + GenericDist.logScore(dist, t2, ~toPointSetFn) + ->E.R2.fmap(r => Float(r)) + ->OutputLocal.fromResult | ToBool(IsNormalized) => dist->GenericDist.isNormalized->Bool | ToDist(Truncate(leftCutoff, rightCutoff)) => GenericDist.truncate(~toPointSetFn, ~leftCutoff, ~rightCutoff, dist, ()) @@ -227,6 +231,7 @@ module Constructors = { let pdf = (~env, dist, f) => C.pdf(dist, f)->run(~env)->toFloatR let normalize = (~env, dist) => C.normalize(dist)->run(~env)->toDistR let isNormalized = (~env, dist) => C.isNormalized(dist)->run(~env)->toBoolR + let logScore = (~env, dist1, dist2) => C.logScore(dist1, dist2)->run(~env)->toFloatR let toPointSet = (~env, dist) => C.toPointSet(dist)->run(~env)->toDistR let toSampleSet = (~env, dist, n) => C.toSampleSet(dist, n)->run(~env)->toDistR let truncate = (~env, dist, leftCutoff, rightCutoff) => diff --git a/packages/squiggle-lang/src/rescript/Distributions/DistributionOperation/DistributionOperation.resi b/packages/squiggle-lang/src/rescript/Distributions/DistributionOperation/DistributionOperation.resi index 5ad34354..6ba16557 100644 --- a/packages/squiggle-lang/src/rescript/Distributions/DistributionOperation/DistributionOperation.resi +++ b/packages/squiggle-lang/src/rescript/Distributions/DistributionOperation/DistributionOperation.resi @@ -57,6 +57,8 @@ module Constructors: { @genType let isNormalized: (~env: env, genericDist) => result @genType + let logScore: (~env: env, genericDist, genericDist) => result + @genType let toPointSet: (~env: env, genericDist) => result @genType let toSampleSet: (~env: env, genericDist, int) => result diff --git a/packages/squiggle-lang/src/rescript/Distributions/DistributionTypes.res b/packages/squiggle-lang/src/rescript/Distributions/DistributionTypes.res index 93f86798..41f19e3a 100644 --- a/packages/squiggle-lang/src/rescript/Distributions/DistributionTypes.res +++ b/packages/squiggle-lang/src/rescript/Distributions/DistributionTypes.res @@ -85,9 +85,12 @@ module DistributionOperation = { | ToString | ToSparkline(int) + type toScore = LogScore(genericDist) + type fromDist = | ToFloat(toFloat) | ToDist(toDist) + | ToScore(toScore) | ToDistCombination(direction, Operation.Algebraic.t, [#Dist(genericDist) | #Float(float)]) | ToString(toString) | ToBool(toBool) @@ -108,6 +111,7 @@ module DistributionOperation = { | ToFloat(#Mean) => `mean` | ToFloat(#Pdf(r)) => `pdf(${E.Float.toFixed(r)})` | ToFloat(#Sample) => `sample` + | ToScore(LogScore(_)) => `logScore` | ToDist(Normalize) => `normalize` | ToDist(ToPointSet) => `toPointSet` | ToDist(ToSampleSet(r)) => `toSampleSet(${E.I.toString(r)})` @@ -142,6 +146,7 @@ module Constructors = { let toSampleSet = (dist, r): t => FromDist(ToDist(ToSampleSet(r)), dist) let truncate = (dist, left, right): t => FromDist(ToDist(Truncate(left, right)), dist) let inspect = (dist): t => FromDist(ToDist(Inspect), dist) + let logScore = (dist1, dist2): t => FromDist(ToScore(LogScore(dist2)), dist1) let toString = (dist): t => FromDist(ToString(ToString), dist) let toSparkline = (dist, n): t => FromDist(ToString(ToSparkline(n)), dist) let algebraicAdd = (dist1, dist2: genericDist): t => FromDist( diff --git a/packages/squiggle-lang/src/rescript/Distributions/GenericDist/GenericDist.res b/packages/squiggle-lang/src/rescript/Distributions/GenericDist/GenericDist.res index c19bdf7f..0b851ec7 100644 --- a/packages/squiggle-lang/src/rescript/Distributions/GenericDist/GenericDist.res +++ b/packages/squiggle-lang/src/rescript/Distributions/GenericDist/GenericDist.res @@ -59,6 +59,13 @@ let integralEndY = (t: t): float => let isNormalized = (t: t): bool => Js.Math.abs_float(integralEndY(t) -. 1.0) < 1e-7 +let logScore = (t1, t2, ~toPointSetFn: toPointSetFn): result => { + let pointSets = E.R.merge(toPointSetFn(t1), toPointSetFn(t2)) + pointSets |> E.R2.bind(((a, b)) => + PointSetDist.T.logScore(a, b)->E.R2.errMap(x => DistributionTypes.OperationError(x)) + ) +} + let toFloatOperation = ( t, ~toPointSetFn: toPointSetFn, diff --git a/packages/squiggle-lang/src/rescript/Distributions/GenericDist/GenericDist.resi b/packages/squiggle-lang/src/rescript/Distributions/GenericDist/GenericDist.resi index e91803e2..34f8ef7d 100644 --- a/packages/squiggle-lang/src/rescript/Distributions/GenericDist/GenericDist.resi +++ b/packages/squiggle-lang/src/rescript/Distributions/GenericDist/GenericDist.resi @@ -23,6 +23,8 @@ let toFloatOperation: ( ~distToFloatOperation: Operation.distToFloatOperation, ) => result +let logScore: (t, t, ~toPointSetFn: toPointSetFn) => result + @genType let toPointSet: ( t, diff --git a/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/Continuous.res b/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/Continuous.res index d4286387..4397093d 100644 --- a/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/Continuous.res +++ b/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/Continuous.res @@ -267,6 +267,12 @@ module T = Dist({ } let variance = (t: t): float => XYShape.Analysis.getVarianceDangerously(t, mean, Analysis.getMeanOfSquares) + + let logScore = (base: t, reference: t) => { + combinePointwise(PointSetDist_Scoring.LogScoring.logScore, base, reference) + |> E.R.fmap(shapeMap(XYShape.T.filterYValues(Js.Float.isFinite))) + |> E.R.fmap(integralEndY) + } }) let isNormalized = (t: t): bool => { diff --git a/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/Discrete.res b/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/Discrete.res index fdc921c6..8cdac723 100644 --- a/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/Discrete.res +++ b/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/Discrete.res @@ -34,6 +34,7 @@ let lastY = (t: t) => t |> getShape |> XYShape.T.lastY let combinePointwise = ( ~integralSumCachesFn=(_, _) => None, + ~fn=(a, b) => Ok(a +. b), t1: PointSetTypes.discreteShape, t2: PointSetTypes.discreteShape, ): PointSetTypes.discreteShape => { @@ -47,9 +48,8 @@ let combinePointwise = ( // It could be done for pointwise additions, but is that ever needed? make( - ~integralSumCache=combinedIntegralSum, XYShape.PointwiseCombination.combine( - (a, b) => Ok(a +. b), + fn, XYShape.XtoY.discreteInterpolator, t1.xyShape, t2.xyShape, @@ -221,4 +221,10 @@ module T = Dist({ let getMeanOfSquares = t => t |> shapeMap(XYShape.T.square) |> mean XYShape.Analysis.getVarianceDangerously(t, mean, getMeanOfSquares) } + + let logScore = (base: t, reference: t) => { + combinePointwise(~fn=PointSetDist_Scoring.LogScoring.logScore, base, reference) + |> integralEndY + |> (r => Ok(r)) + } }) diff --git a/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/Distributions.res b/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/Distributions.res index 407eae85..e1b3173c 100644 --- a/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/Distributions.res +++ b/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/Distributions.res @@ -33,6 +33,7 @@ module type dist = { let mean: t => float let variance: t => float + let logScore: (t,t) => result } module Dist = (T: dist) => { @@ -55,6 +56,7 @@ module Dist = (T: dist) => { let mean = T.mean let variance = T.variance let integralEndY = T.integralEndY + let logScore = T.logScore let updateIntegralCache = T.updateIntegralCache diff --git a/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/Mixed.res b/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/Mixed.res index 4ce2bdd6..3bbd2ea5 100644 --- a/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/Mixed.res +++ b/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/Mixed.res @@ -36,6 +36,43 @@ let updateIntegralCache = (integralCache, t: t): t => { integralCache: integralCache, } +let combinePointwise = ( + ~integralSumCachesFn=(_, _) => None, + ~integralCachesFn=(_, _) => None, + fn: (float, float) => result, + t1: t, + t2: t, +): result => { + let reducedDiscrete = + [t1, t2] |> E.A.fmap(toDiscrete) |> E.A.O.concatSomes |> Discrete.reduce(~integralSumCachesFn) + + let reducedContinuous = + [t1, t2] + |> E.A.fmap(toContinuous) + |> E.A.O.concatSomes + |> Continuous.reduce(~integralSumCachesFn, fn) + + let combinedIntegralSum = Common.combineIntegralSums( + integralSumCachesFn, + t1.integralSumCache, + t2.integralSumCache, + ) + + let combinedIntegral = Common.combineIntegrals( + integralCachesFn, + t1.integralCache, + t2.integralCache, + ) + reducedContinuous->E.R2.fmap(continuous => + make( + ~integralSumCache=combinedIntegralSum, + ~integralCache=combinedIntegral, + ~discrete=reducedDiscrete, + ~continuous, + ) + ) +} + module T = Dist({ type t = PointSetTypes.mixedShape type integral = PointSetTypes.continuousShape @@ -259,6 +296,12 @@ module T = Dist({ | _ => XYShape.Analysis.getVarianceDangerously(t, mean, getMeanOfSquares) } } + + let logScore = (base: t, reference: t) => { + combinePointwise(PointSetDist_Scoring.LogScoring.logScore, base, reference) |> E.R.fmap( + integralEndY, + ) + } }) let combineAlgebraically = (op: Operation.convolutionOperation, t1: t, t2: t): t => { @@ -307,40 +350,3 @@ let combineAlgebraically = (op: Operation.convolutionOperation, t1: t, t2: t): t integralCache: None, } } - -let combinePointwise = ( - ~integralSumCachesFn=(_, _) => None, - ~integralCachesFn=(_, _) => None, - fn: (float, float) => result, - t1: t, - t2: t, -): result => { - let reducedDiscrete = - [t1, t2] |> E.A.fmap(toDiscrete) |> E.A.O.concatSomes |> Discrete.reduce(~integralSumCachesFn) - - let reducedContinuous = - [t1, t2] - |> E.A.fmap(toContinuous) - |> E.A.O.concatSomes - |> Continuous.reduce(~integralSumCachesFn, fn) - - let combinedIntegralSum = Common.combineIntegralSums( - integralSumCachesFn, - t1.integralSumCache, - t2.integralSumCache, - ) - - let combinedIntegral = Common.combineIntegrals( - integralCachesFn, - t1.integralCache, - t2.integralCache, - ) - reducedContinuous->E.R2.fmap(continuous => - make( - ~integralSumCache=combinedIntegralSum, - ~integralCache=combinedIntegral, - ~discrete=reducedDiscrete, - ~continuous, - ) - ) -} diff --git a/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/PointSetDist.res b/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/PointSetDist.res index 12aa5477..6073d64e 100644 --- a/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/PointSetDist.res +++ b/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/PointSetDist.res @@ -190,6 +190,18 @@ module T = Dist({ | Discrete(m) => Discrete.T.variance(m) | Continuous(m) => Continuous.T.variance(m) } + + let logScore = (t1: t, t2: t) => + switch (t1, t2) { + | (Continuous(t1), Continuous(t2)) => Continuous.T.logScore(t1, t2) + | (Discrete(t1), Discrete(t2)) => Discrete.T.logScore(t1, t2) + | (Mixed(t1), Mixed(t2)) => Mixed.T.logScore(t1, t2) + | _ => { + let t1 = toMixed(t1) + let t2 = toMixed(t2) + Mixed.T.logScore(t1, t2) + } + } }) let pdf = (f: float, t: t) => { @@ -239,4 +251,4 @@ let toSparkline = (t: t, bucketCount): resultE.O2.fmap(Continuous.downsampleEquallyOverX(bucketCount)) ->E.O2.toResult(PointSetTypes.CannotSparklineDiscrete) - ->E.R2.fmap(r => Continuous.getShape(r).ys->Sparklines.create()) + ->E.R2.fmap(r => Continuous.getShape(r).ys->Sparklines.create()) \ No newline at end of file diff --git a/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/PointSetDist_Scoring.res b/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/PointSetDist_Scoring.res new file mode 100644 index 00000000..b55d2677 --- /dev/null +++ b/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/PointSetDist_Scoring.res @@ -0,0 +1,7 @@ +module LogScoring = { + let logFn = Js.Math.log2 + let subtraction = (a, b) => Ok(a -. b) + let logScore = (a: float, b: float): result => Ok( + Js.Math.log2(Js.Math.abs_float(a /. b)), + ) +} \ No newline at end of file diff --git a/packages/squiggle-lang/src/rescript/ReducerInterface/ReducerInterface_GenericDistribution.res b/packages/squiggle-lang/src/rescript/ReducerInterface/ReducerInterface_GenericDistribution.res index ab76f469..45b1f15f 100644 --- a/packages/squiggle-lang/src/rescript/ReducerInterface/ReducerInterface_GenericDistribution.res +++ b/packages/squiggle-lang/src/rescript/ReducerInterface/ReducerInterface_GenericDistribution.res @@ -80,6 +80,7 @@ module Helpers = { dist1, )->runGenericOperation } + let parseNumber = (args: expressionValue): Belt.Result.t => switch args { | EvNumber(x) => Ok(x) @@ -209,6 +210,9 @@ let dispatchToGenericOutput = (call: ExpressionValue.functionCall): option< a, )->Some | ("normalize", [EvDistribution(dist)]) => Helpers.toDistFn(Normalize, dist) + | ("logScore", [EvDistribution(a), EvDistribution(b)]) => Some( + runGenericOperation(FromDist(ToScore(LogScore(b)), a)), + ) | ("isNormalized", [EvDistribution(dist)]) => Helpers.toBoolFn(IsNormalized, dist) | ("toPointSet", [EvDistribution(dist)]) => Helpers.toDistFn(ToPointSet, dist) | ("cdf", [EvDistribution(dist), EvNumber(float)]) => Helpers.toFloatFn(#Cdf(float), dist) diff --git a/packages/squiggle-lang/src/rescript/Utility/XYShape.res b/packages/squiggle-lang/src/rescript/Utility/XYShape.res index 1f1e87ca..55b7717c 100644 --- a/packages/squiggle-lang/src/rescript/Utility/XYShape.res +++ b/packages/squiggle-lang/src/rescript/Utility/XYShape.res @@ -96,6 +96,7 @@ module T = { let fromZippedArray = (pairs: array<(float, float)>): t => pairs |> Belt.Array.unzip |> fromArray let equallyDividedXs = (t: t, newLength) => E.A.Floats.range(minX(t), maxX(t), newLength) let toJs = (t: t) => {"xs": t.xs, "ys": t.ys} + let filterYValues = (fn, t: t): t => t |> zip |> E.A.filter(((_,y)) => fn(y)) |> fromZippedArray module Validator = { let fnName = "XYShape validate"