Use a more conservative convolution policy

2022-04-22 12:43:18 -04:00 · 2022-04-22 12:43:18 -04:00 · 2845bd3e39
commit 2845bd3e39
parent 998128033f
9 changed files with 61 additions and 49 deletions
--- a/.prettierignore
+++ b/.prettierignore
@ -7,3 +7,5 @@ node_modules
 packages/*/node_modules
 packages/website/.docusaurus
 packages/squiggle-lang/lib
+packages/squiggle-lang/.nyc_output/
+packages/squiggle-lang/coverage/
--- a/packages/squiggle-lang/tests/ReducerInterface/ReducerInterface_Distribution_test.res
+++ b/packages/squiggle-lang/tests/ReducerInterface/ReducerInterface_Distribution_test.res
@ -67,7 +67,7 @@ describe("eval on distribution functions", () => {
    testEval("lognormal(10,2) / lognormal(5,2)", "Ok(Lognormal(5,2.8284271247461903))")
    testEval("lognormal(5, 2) / 2", "Ok(Lognormal(4.306852819440055,2))")
    testEval("2 / lognormal(5, 2)", "Ok(Lognormal(-4.306852819440055,2))")
-    testEval("2 / normal(10, 2)", "Ok(Point Set Distribution)")
+    testEval("2 / normal(10, 2)", "Ok(Sample Set Distribution)")
    testEval("normal(10, 2) / 2", "Ok(Normal(5,1))")
  })
  describe("truncate", () => {
@ -77,21 +77,21 @@ describe("eval on distribution functions", () => {
  })

  describe("exp", () => {
-    testEval("exp(normal(5,2))", "Ok(Point Set Distribution)")
+    testEval("exp(normal(5,2))", "Ok(Sample Set Distribution)")
  })

  describe("pow", () => {
-    testEval("pow(3, uniform(5,8))", "Ok(Point Set Distribution)")
-    testEval("pow(uniform(5,8), 3)", "Ok(Point Set Distribution)")
+    testEval("pow(3, uniform(5,8))", "Ok(Sample Set Distribution)")
+    testEval("pow(uniform(5,8), 3)", "Ok(Sample Set Distribution)")
    testEval("pow(uniform(5,8), uniform(9, 10))", "Ok(Sample Set Distribution)")
  })

  describe("log", () => {
-    testEval("log(2, uniform(5,8))", "Ok(Point Set Distribution)")
-    testEval("log(normal(5,2), 3)", "Ok(Point Set Distribution)")
+    testEval("log(2, uniform(5,8))", "Ok(Sample Set Distribution)")
+    testEval("log(normal(5,2), 3)", "Ok(Sample Set Distribution)")
    testEval("log(normal(5,2), normal(10,1))", "Ok(Sample Set Distribution)")
-    testEval("log(uniform(5,8))", "Ok(Point Set Distribution)")
-    testEval("log10(uniform(5,8))", "Ok(Point Set Distribution)")
+    testEval("log(uniform(5,8))", "Ok(Sample Set Distribution)")
+    testEval("log10(uniform(5,8))", "Ok(Sample Set Distribution)")
  })

  describe("dotLog", () => {
--- a/packages/squiggle-lang/src/rescript/Distributions/GenericDist/GenericDist.res
+++ b/packages/squiggle-lang/src/rescript/Distributions/GenericDist/GenericDist.res
@ -164,7 +164,7 @@ module AlgebraicCombination = {

  let runConvolution = (
    toPointSet: toPointSetFn,
-    arithmeticOperation: GenericDist_Types.Operation.arithmeticOperation,
+    arithmeticOperation: Operation.convolutionOperation,
    t1: t,
    t2: t,
  ) =>
@ -197,10 +197,23 @@ module AlgebraicCombination = {
    | _ => 1000
    }

-  let chooseConvolutionOrMonteCarlo = (t2: t, t1: t) =>
-    expectedConvolutionCost(t1) * expectedConvolutionCost(t2) > 10000
-      ? #CalculateWithMonteCarlo
-      : #CalculateWithConvolution
+  type calculationMethod = MonteCarlo | Convolution(Operation.convolutionOperation)
+
+  let chooseConvolutionOrMonteCarlo = (
+    op: Operation.algebraicOperation,
+    t2: t,
+    t1: t,
+  ): calculationMethod =>
+    switch op {
+    | #Divide
+    | #Power
+    | #Logarithm =>
+      MonteCarlo
+    | (#Add | #Subtract | #Multiply) as convOp =>
+      expectedConvolutionCost(t1) * expectedConvolutionCost(t2) > 10000
+        ? MonteCarlo
+        : Convolution(convOp)
+    }

  let run = (
    t1: t,
@ -213,15 +226,10 @@ module AlgebraicCombination = {
    | Some(Ok(symbolicDist)) => Ok(Symbolic(symbolicDist))
    | Some(Error(e)) => Error(Other(e))
    | None =>
-      switch chooseConvolutionOrMonteCarlo(t1, t2) {
-      | #CalculateWithMonteCarlo => runMonteCarlo(toSampleSetFn, arithmeticOperation, t1, t2)
-      | #CalculateWithConvolution =>
-        runConvolution(
-          toPointSetFn,
-          arithmeticOperation,
-          t1,
-          t2,
-        )->E.R2.fmap(r => DistributionTypes.PointSet(r))
+      switch chooseConvolutionOrMonteCarlo(arithmeticOperation, t1, t2) {
+      | MonteCarlo => runMonteCarlo(toSampleSetFn, arithmeticOperation, t1, t2)
+      | Convolution(convOp) =>
+        runConvolution(toPointSetFn, convOp, t1, t2)->E.R2.fmap(r => DistributionTypes.PointSet(r))
      }
    }
  }
--- a/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/AlgebraicShapeCombination.res
+++ b/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/AlgebraicShapeCombination.res
@ -96,36 +96,25 @@ let toDiscretePointMassesFromTriangulars = (
 }

 let combineShapesContinuousContinuous = (
-  op: Operation.algebraicOperation,
+  op: Operation.convolutionOperation,
  s1: PointSetTypes.xyShape,
  s2: PointSetTypes.xyShape,
 ): PointSetTypes.xyShape => {
  // if we add the two distributions, we should probably use normal filters.
  // if we multiply the two distributions, we should probably use lognormal filters.
  let t1m = toDiscretePointMassesFromTriangulars(s1)
-  let t2m = switch op {
-  | #Divide => toDiscretePointMassesFromTriangulars(~inverse=true, s2)
-  | _ => toDiscretePointMassesFromTriangulars(~inverse=false, s2)
-  }
+  let t2m = toDiscretePointMassesFromTriangulars(~inverse=false, s2)

  let combineMeansFn = switch op {
  | #Add => (m1, m2) => m1 +. m2
  | #Subtract => (m1, m2) => m1 -. m2
  | #Multiply => (m1, m2) => m1 *. m2
-  | #Divide => (m1, mInv2) => m1 *. mInv2
-  | #Power => (m1, mInv2) => m1 ** mInv2
-  | #Logarithm => (m1, m2) => log(m1) /. log(m2)
  } // note: here, mInv2 = mean(1 / t2) ~= 1 / mean(t2)

-  // TODO: Variances are for exponentatiation or logarithms are almost totally made up and very likely very wrong.
-  // converts the variances and means of the two inputs into the variance of the output
  let combineVariancesFn = switch op {
  | #Add => (v1, v2, _, _) => v1 +. v2
  | #Subtract => (v1, v2, _, _) => v1 +. v2
  | #Multiply => (v1, v2, m1, m2) => v1 *. v2 +. v1 *. m2 ** 2. +. v2 *. m1 ** 2.
-  | #Power => (v1, v2, m1, m2) => v1 *. v2 +. v1 *. m2 ** 2. +. v2 *. m1 ** 2.
-  | #Logarithm => (v1, v2, m1, m2) => v1 *. v2 +. v1 *. m2 ** 2. +. v2 *. m1 ** 2.
-  | #Divide => (v1, vInv2, m1, mInv2) => v1 *. vInv2 +. v1 *. mInv2 ** 2. +. vInv2 *. m1 ** 2.
  }

  // TODO: If operating on two positive-domain distributions, we should take that into account
@ -199,7 +188,7 @@ let toDiscretePointMassesFromDiscrete = (s: PointSetTypes.xyShape): pointMassesW
 }

 let combineShapesContinuousDiscrete = (
-  op: Operation.algebraicOperation,
+  op: Operation.convolutionOperation,
  continuousShape: PointSetTypes.xyShape,
  discreteShape: PointSetTypes.xyShape,
 ): PointSetTypes.xyShape => {
@ -207,7 +196,7 @@ let combineShapesContinuousDiscrete = (
  let t2n = discreteShape |> XYShape.T.length

  // each x pair is added/subtracted
-  let fn = Operation.Algebraic.toFn(op)
+  let fn = Operation.Convolution.toFn(op)

  let outXYShapes: array<array<(float, float)>> = Belt.Array.makeUninitializedUnsafe(t2n)

@ -231,10 +220,7 @@ let combineShapesContinuousDiscrete = (
      Belt.Array.set(outXYShapes, j, dxyShape) |> ignore
      ()
    }
-  | #Multiply
-  | #Power
-  | #Logarithm
-  | #Divide =>
+  | #Multiply =>
    for j in 0 to t2n - 1 {
      // creates a new continuous shape for each one of the discrete points, and collects them in outXYShapes.
      let dxyShape: array<(float, float)> = Belt.Array.makeUninitializedUnsafe(t1n)
--- a/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/Continuous.res
+++ b/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/Continuous.res
@ -241,7 +241,7 @@ let downsampleEquallyOverX = (length, t): t =>
 /* This simply creates multiple copies of the continuous distribution, scaled and shifted according to
 each discrete data point, and then adds them all together. */
 let combineAlgebraicallyWithDiscrete = (
-  op: Operation.algebraicOperation,
+  op: Operation.convolutionOperation,
  t1: t,
  t2: PointSetTypes.discreteShape,
 ) => {
@ -263,8 +263,7 @@ let combineAlgebraicallyWithDiscrete = (
    )

    let combinedIntegralSum = switch op {
-    | #Multiply
-    | #Divide =>
+    | #Multiply =>
      Common.combineIntegralSums((a, b) => Some(a *. b), t1.integralSumCache, t2.integralSumCache)
    | _ => None
    }
@ -274,7 +273,7 @@ let combineAlgebraicallyWithDiscrete = (
  }
 }

-let combineAlgebraically = (op: Operation.algebraicOperation, t1: t, t2: t) => {
+let combineAlgebraically = (op: Operation.convolutionOperation, t1: t, t2: t) => {
  let s1 = t1 |> getShape
  let s2 = t2 |> getShape
  let t1n = s1 |> XYShape.T.length
--- a/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/Discrete.res
+++ b/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/Discrete.res
@ -72,7 +72,7 @@ let updateIntegralCache = (integralCache, t: t): t => {

 /* This multiples all of the data points together and creates a new discrete distribution from the results.
 Data points at the same xs get added together. It may be a good idea to downsample t1 and t2 before and/or the result after. */
-let combineAlgebraically = (op: Operation.algebraicOperation, t1: t, t2: t): t => {
+let combineAlgebraically = (op: Operation.convolutionOperation, t1: t, t2: t): t => {
  let t1s = t1 |> getShape
  let t2s = t2 |> getShape
  let t1n = t1s |> XYShape.T.length
@ -84,7 +84,7 @@ let combineAlgebraically = (op: Operation.algebraicOperation, t1: t, t2: t): t =
    t2.integralSumCache,
  )

-  let fn = Operation.Algebraic.toFn(op)
+  let fn = Operation.Convolution.toFn(op)
  let xToYMap = E.FloatFloatMap.empty()

  for i in 0 to t1n - 1 {
--- a/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/Mixed.res
+++ b/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/Mixed.res
@ -221,7 +221,7 @@ module T = Dist({
  }
 })

-let combineAlgebraically = (op: Operation.algebraicOperation, t1: t, t2: t): t => {
+let combineAlgebraically = (op: Operation.convolutionOperation, t1: t, t2: t): t => {
  // Discrete convolution can cause a huge increase in the number of samples,
  // so we'll first downsample.

--- a/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/PointSetDist.res
+++ b/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/PointSetDist.res
@ -35,7 +35,7 @@ let toMixed = mapToAll((
 ))

 //TODO WARNING: The combineAlgebraicallyWithDiscrete will break for subtraction and division, like, discrete - continous
-let combineAlgebraically = (op: Operation.algebraicOperation, t1: t, t2: t): t =>
+let combineAlgebraically = (op: Operation.convolutionOperation, t1: t, t2: t): t =>
  switch (t1, t2) {
  | (Continuous(m1), Continuous(m2)) =>
    Continuous.combineAlgebraically(op, m1, m2) |> Continuous.T.toPointSetDist
--- a/packages/squiggle-lang/src/rescript/Utility/Operation.res
+++ b/packages/squiggle-lang/src/rescript/Utility/Operation.res
@ -9,6 +9,13 @@ type algebraicOperation = [
  | #Power
  | #Logarithm
 ]
+
+type convolutionOperation = [
+  | #Add
+  | #Multiply
+  | #Subtract
+]
+
@genType
 type pointwiseOperation = [#Add | #Multiply | #Power]
 type scaleOperation = [#Multiply | #Power | #Logarithm | #Divide]
@ -20,6 +27,16 @@ type distToFloatOperation = [
  | #Sample
 ]

+module Convolution = {
+  type t = convolutionOperation
+  let toFn: (t, float, float) => float = x =>
+    switch x {
+    | #Add => \"+."
+    | #Subtract => \"-."
+    | #Multiply => \"*."
+    }
+}
+
 module Algebraic = {
  type t = algebraicOperation
  let toFn: (t, float, float) => float = x =>