From 2845bd3e39d621c964017c88d6010e7f5d6bfd73 Mon Sep 17 00:00:00 2001 From: Sam Nolan Date: Fri, 22 Apr 2022 12:43:18 -0400 Subject: [PATCH] Use a more conservative convolution policy --- .prettierignore | 2 ++ .../ReducerInterface_Distribution_test.res | 16 ++++----- .../Distributions/GenericDist/GenericDist.res | 36 +++++++++++-------- .../AlgebraicShapeCombination.res | 24 +++---------- .../Distributions/PointSetDist/Continuous.res | 7 ++-- .../Distributions/PointSetDist/Discrete.res | 4 +-- .../Distributions/PointSetDist/Mixed.res | 2 +- .../PointSetDist/PointSetDist.res | 2 +- .../src/rescript/Utility/Operation.res | 17 +++++++++ 9 files changed, 61 insertions(+), 49 deletions(-) diff --git a/.prettierignore b/.prettierignore index 175742ce..68c24941 100644 --- a/.prettierignore +++ b/.prettierignore @@ -7,3 +7,5 @@ node_modules packages/*/node_modules packages/website/.docusaurus packages/squiggle-lang/lib +packages/squiggle-lang/.nyc_output/ +packages/squiggle-lang/coverage/ diff --git a/packages/squiggle-lang/__tests__/ReducerInterface/ReducerInterface_Distribution_test.res b/packages/squiggle-lang/__tests__/ReducerInterface/ReducerInterface_Distribution_test.res index 1b5f8bfd..d3defb40 100644 --- a/packages/squiggle-lang/__tests__/ReducerInterface/ReducerInterface_Distribution_test.res +++ b/packages/squiggle-lang/__tests__/ReducerInterface/ReducerInterface_Distribution_test.res @@ -67,7 +67,7 @@ describe("eval on distribution functions", () => { testEval("lognormal(10,2) / lognormal(5,2)", "Ok(Lognormal(5,2.8284271247461903))") testEval("lognormal(5, 2) / 2", "Ok(Lognormal(4.306852819440055,2))") testEval("2 / lognormal(5, 2)", "Ok(Lognormal(-4.306852819440055,2))") - testEval("2 / normal(10, 2)", "Ok(Point Set Distribution)") + testEval("2 / normal(10, 2)", "Ok(Sample Set Distribution)") testEval("normal(10, 2) / 2", "Ok(Normal(5,1))") }) describe("truncate", () => { @@ -77,21 +77,21 @@ describe("eval on distribution functions", () => { }) describe("exp", () => { - testEval("exp(normal(5,2))", "Ok(Point Set Distribution)") + testEval("exp(normal(5,2))", "Ok(Sample Set Distribution)") }) describe("pow", () => { - testEval("pow(3, uniform(5,8))", "Ok(Point Set Distribution)") - testEval("pow(uniform(5,8), 3)", "Ok(Point Set Distribution)") + testEval("pow(3, uniform(5,8))", "Ok(Sample Set Distribution)") + testEval("pow(uniform(5,8), 3)", "Ok(Sample Set Distribution)") testEval("pow(uniform(5,8), uniform(9, 10))", "Ok(Sample Set Distribution)") }) describe("log", () => { - testEval("log(2, uniform(5,8))", "Ok(Point Set Distribution)") - testEval("log(normal(5,2), 3)", "Ok(Point Set Distribution)") + testEval("log(2, uniform(5,8))", "Ok(Sample Set Distribution)") + testEval("log(normal(5,2), 3)", "Ok(Sample Set Distribution)") testEval("log(normal(5,2), normal(10,1))", "Ok(Sample Set Distribution)") - testEval("log(uniform(5,8))", "Ok(Point Set Distribution)") - testEval("log10(uniform(5,8))", "Ok(Point Set Distribution)") + testEval("log(uniform(5,8))", "Ok(Sample Set Distribution)") + testEval("log10(uniform(5,8))", "Ok(Sample Set Distribution)") }) describe("dotLog", () => { diff --git a/packages/squiggle-lang/src/rescript/Distributions/GenericDist/GenericDist.res b/packages/squiggle-lang/src/rescript/Distributions/GenericDist/GenericDist.res index e39d01b7..5e62de53 100644 --- a/packages/squiggle-lang/src/rescript/Distributions/GenericDist/GenericDist.res +++ b/packages/squiggle-lang/src/rescript/Distributions/GenericDist/GenericDist.res @@ -164,7 +164,7 @@ module AlgebraicCombination = { let runConvolution = ( toPointSet: toPointSetFn, - arithmeticOperation: GenericDist_Types.Operation.arithmeticOperation, + arithmeticOperation: Operation.convolutionOperation, t1: t, t2: t, ) => @@ -197,10 +197,23 @@ module AlgebraicCombination = { | _ => 1000 } - let chooseConvolutionOrMonteCarlo = (t2: t, t1: t) => - expectedConvolutionCost(t1) * expectedConvolutionCost(t2) > 10000 - ? #CalculateWithMonteCarlo - : #CalculateWithConvolution + type calculationMethod = MonteCarlo | Convolution(Operation.convolutionOperation) + + let chooseConvolutionOrMonteCarlo = ( + op: Operation.algebraicOperation, + t2: t, + t1: t, + ): calculationMethod => + switch op { + | #Divide + | #Power + | #Logarithm => + MonteCarlo + | (#Add | #Subtract | #Multiply) as convOp => + expectedConvolutionCost(t1) * expectedConvolutionCost(t2) > 10000 + ? MonteCarlo + : Convolution(convOp) + } let run = ( t1: t, @@ -213,15 +226,10 @@ module AlgebraicCombination = { | Some(Ok(symbolicDist)) => Ok(Symbolic(symbolicDist)) | Some(Error(e)) => Error(Other(e)) | None => - switch chooseConvolutionOrMonteCarlo(t1, t2) { - | #CalculateWithMonteCarlo => runMonteCarlo(toSampleSetFn, arithmeticOperation, t1, t2) - | #CalculateWithConvolution => - runConvolution( - toPointSetFn, - arithmeticOperation, - t1, - t2, - )->E.R2.fmap(r => DistributionTypes.PointSet(r)) + switch chooseConvolutionOrMonteCarlo(arithmeticOperation, t1, t2) { + | MonteCarlo => runMonteCarlo(toSampleSetFn, arithmeticOperation, t1, t2) + | Convolution(convOp) => + runConvolution(toPointSetFn, convOp, t1, t2)->E.R2.fmap(r => DistributionTypes.PointSet(r)) } } } diff --git a/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/AlgebraicShapeCombination.res b/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/AlgebraicShapeCombination.res index f8740de9..908d100b 100644 --- a/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/AlgebraicShapeCombination.res +++ b/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/AlgebraicShapeCombination.res @@ -96,36 +96,25 @@ let toDiscretePointMassesFromTriangulars = ( } let combineShapesContinuousContinuous = ( - op: Operation.algebraicOperation, + op: Operation.convolutionOperation, s1: PointSetTypes.xyShape, s2: PointSetTypes.xyShape, ): PointSetTypes.xyShape => { // if we add the two distributions, we should probably use normal filters. // if we multiply the two distributions, we should probably use lognormal filters. let t1m = toDiscretePointMassesFromTriangulars(s1) - let t2m = switch op { - | #Divide => toDiscretePointMassesFromTriangulars(~inverse=true, s2) - | _ => toDiscretePointMassesFromTriangulars(~inverse=false, s2) - } + let t2m = toDiscretePointMassesFromTriangulars(~inverse=false, s2) let combineMeansFn = switch op { | #Add => (m1, m2) => m1 +. m2 | #Subtract => (m1, m2) => m1 -. m2 | #Multiply => (m1, m2) => m1 *. m2 - | #Divide => (m1, mInv2) => m1 *. mInv2 - | #Power => (m1, mInv2) => m1 ** mInv2 - | #Logarithm => (m1, m2) => log(m1) /. log(m2) } // note: here, mInv2 = mean(1 / t2) ~= 1 / mean(t2) - // TODO: Variances are for exponentatiation or logarithms are almost totally made up and very likely very wrong. - // converts the variances and means of the two inputs into the variance of the output let combineVariancesFn = switch op { | #Add => (v1, v2, _, _) => v1 +. v2 | #Subtract => (v1, v2, _, _) => v1 +. v2 | #Multiply => (v1, v2, m1, m2) => v1 *. v2 +. v1 *. m2 ** 2. +. v2 *. m1 ** 2. - | #Power => (v1, v2, m1, m2) => v1 *. v2 +. v1 *. m2 ** 2. +. v2 *. m1 ** 2. - | #Logarithm => (v1, v2, m1, m2) => v1 *. v2 +. v1 *. m2 ** 2. +. v2 *. m1 ** 2. - | #Divide => (v1, vInv2, m1, mInv2) => v1 *. vInv2 +. v1 *. mInv2 ** 2. +. vInv2 *. m1 ** 2. } // TODO: If operating on two positive-domain distributions, we should take that into account @@ -199,7 +188,7 @@ let toDiscretePointMassesFromDiscrete = (s: PointSetTypes.xyShape): pointMassesW } let combineShapesContinuousDiscrete = ( - op: Operation.algebraicOperation, + op: Operation.convolutionOperation, continuousShape: PointSetTypes.xyShape, discreteShape: PointSetTypes.xyShape, ): PointSetTypes.xyShape => { @@ -207,7 +196,7 @@ let combineShapesContinuousDiscrete = ( let t2n = discreteShape |> XYShape.T.length // each x pair is added/subtracted - let fn = Operation.Algebraic.toFn(op) + let fn = Operation.Convolution.toFn(op) let outXYShapes: array> = Belt.Array.makeUninitializedUnsafe(t2n) @@ -231,10 +220,7 @@ let combineShapesContinuousDiscrete = ( Belt.Array.set(outXYShapes, j, dxyShape) |> ignore () } - | #Multiply - | #Power - | #Logarithm - | #Divide => + | #Multiply => for j in 0 to t2n - 1 { // creates a new continuous shape for each one of the discrete points, and collects them in outXYShapes. let dxyShape: array<(float, float)> = Belt.Array.makeUninitializedUnsafe(t1n) diff --git a/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/Continuous.res b/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/Continuous.res index 746d1d30..6c9cb3fd 100644 --- a/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/Continuous.res +++ b/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/Continuous.res @@ -241,7 +241,7 @@ let downsampleEquallyOverX = (length, t): t => /* This simply creates multiple copies of the continuous distribution, scaled and shifted according to each discrete data point, and then adds them all together. */ let combineAlgebraicallyWithDiscrete = ( - op: Operation.algebraicOperation, + op: Operation.convolutionOperation, t1: t, t2: PointSetTypes.discreteShape, ) => { @@ -263,8 +263,7 @@ let combineAlgebraicallyWithDiscrete = ( ) let combinedIntegralSum = switch op { - | #Multiply - | #Divide => + | #Multiply => Common.combineIntegralSums((a, b) => Some(a *. b), t1.integralSumCache, t2.integralSumCache) | _ => None } @@ -274,7 +273,7 @@ let combineAlgebraicallyWithDiscrete = ( } } -let combineAlgebraically = (op: Operation.algebraicOperation, t1: t, t2: t) => { +let combineAlgebraically = (op: Operation.convolutionOperation, t1: t, t2: t) => { let s1 = t1 |> getShape let s2 = t2 |> getShape let t1n = s1 |> XYShape.T.length diff --git a/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/Discrete.res b/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/Discrete.res index 4ff2df7b..4037338d 100644 --- a/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/Discrete.res +++ b/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/Discrete.res @@ -72,7 +72,7 @@ let updateIntegralCache = (integralCache, t: t): t => { /* This multiples all of the data points together and creates a new discrete distribution from the results. Data points at the same xs get added together. It may be a good idea to downsample t1 and t2 before and/or the result after. */ -let combineAlgebraically = (op: Operation.algebraicOperation, t1: t, t2: t): t => { +let combineAlgebraically = (op: Operation.convolutionOperation, t1: t, t2: t): t => { let t1s = t1 |> getShape let t2s = t2 |> getShape let t1n = t1s |> XYShape.T.length @@ -84,7 +84,7 @@ let combineAlgebraically = (op: Operation.algebraicOperation, t1: t, t2: t): t = t2.integralSumCache, ) - let fn = Operation.Algebraic.toFn(op) + let fn = Operation.Convolution.toFn(op) let xToYMap = E.FloatFloatMap.empty() for i in 0 to t1n - 1 { diff --git a/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/Mixed.res b/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/Mixed.res index 012bb3e5..223c55e2 100644 --- a/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/Mixed.res +++ b/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/Mixed.res @@ -221,7 +221,7 @@ module T = Dist({ } }) -let combineAlgebraically = (op: Operation.algebraicOperation, t1: t, t2: t): t => { +let combineAlgebraically = (op: Operation.convolutionOperation, t1: t, t2: t): t => { // Discrete convolution can cause a huge increase in the number of samples, // so we'll first downsample. diff --git a/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/PointSetDist.res b/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/PointSetDist.res index 787af9d8..0035d646 100644 --- a/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/PointSetDist.res +++ b/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/PointSetDist.res @@ -35,7 +35,7 @@ let toMixed = mapToAll(( )) //TODO WARNING: The combineAlgebraicallyWithDiscrete will break for subtraction and division, like, discrete - continous -let combineAlgebraically = (op: Operation.algebraicOperation, t1: t, t2: t): t => +let combineAlgebraically = (op: Operation.convolutionOperation, t1: t, t2: t): t => switch (t1, t2) { | (Continuous(m1), Continuous(m2)) => Continuous.combineAlgebraically(op, m1, m2) |> Continuous.T.toPointSetDist diff --git a/packages/squiggle-lang/src/rescript/Utility/Operation.res b/packages/squiggle-lang/src/rescript/Utility/Operation.res index 6fb3b24b..f78a432c 100644 --- a/packages/squiggle-lang/src/rescript/Utility/Operation.res +++ b/packages/squiggle-lang/src/rescript/Utility/Operation.res @@ -9,6 +9,13 @@ type algebraicOperation = [ | #Power | #Logarithm ] + +type convolutionOperation = [ + | #Add + | #Multiply + | #Subtract +] + @genType type pointwiseOperation = [#Add | #Multiply | #Power] type scaleOperation = [#Multiply | #Power | #Logarithm | #Divide] @@ -20,6 +27,16 @@ type distToFloatOperation = [ | #Sample ] +module Convolution = { + type t = convolutionOperation + let toFn: (t, float, float) => float = x => + switch x { + | #Add => \"+." + | #Subtract => \"-." + | #Multiply => \"*." + } +} + module Algebraic = { type t = algebraicOperation let toFn: (t, float, float) => float = x =>