one CR comment; chasing kldivergence on mixeds bug

Value: [1e-10 to 1e-3]
2022-05-25 13:10:31 -04:00 · 2022-05-25 13:10:31 -04:00 · cbaf4c150d
commit cbaf4c150d
parent 249f58d3d8
8 changed files with 107 additions and 113 deletions
--- a/packages/squiggle-lang/tests/Distributions/Scoring/KlDivergence_test.res
+++ b/packages/squiggle-lang/tests/Distributions/Scoring/KlDivergence_test.res
@ -3,7 +3,7 @@ open Expect
 open TestHelpers
 open GenericDist_Fixtures

-let klDivergence = DistributionOperation.Constructors.logScore_DistEstimateDistAnswer(~env)
+let klDivergence = DistributionOperation.Constructors.LogScore.distEstimateDistAnswer(~env)
 // integral from low to high of 1 / (high - low) log(normal(mean, stdev)(x) / (1 / (high - low))) dx
 let klNormalUniform = (mean, stdev, low, high): float =>
  -.Js.Math.log((high -. low) /. Js.Math.sqrt(2.0 *. MagicNumbers.Math.pi *. stdev ** 2.0)) +.
@ -194,7 +194,7 @@ describe("combineAlongSupportOfSecondArgument0", () => {

    let result = switch (answerWrapped, predictionWrapped) {
    | (Ok(Dist(PointSet(Continuous(a)))), Ok(Dist(PointSet(Continuous(b))))) =>
-      Some(combineAlongSupportOfSecondArgument(integrand, interpolator, a.xyShape, b.xyShape))
+      Some(combineAlongSupportOfSecondArgument(interpolator, integrand, a.xyShape, b.xyShape))
    | _ => None
    }
    result
--- a/packages/squiggle-lang/src/rescript/Distributions/DistributionOperation.res
+++ b/packages/squiggle-lang/src/rescript/Distributions/DistributionOperation.res
@ -262,22 +262,24 @@ module Constructors = {
  let pdf = (~env, dist, f) => C.pdf(dist, f)->run(~env)->toFloatR
  let normalize = (~env, dist) => C.normalize(dist)->run(~env)->toDistR
  let isNormalized = (~env, dist) => C.isNormalized(dist)->run(~env)->toBoolR
-  let logScore_DistEstimateDistAnswer = (~env, estimate, answer) =>
-    C.logScore_DistEstimateDistAnswer(estimate, answer)->run(~env)->toFloatR
-  let logScore_DistEstimateDistAnswerWithPrior = (~env, estimate, answer, prior) =>
-    C.logScore_DistEstimateDistAnswerWithPrior(estimate, answer, prior)->run(~env)->toFloatR
-  let logScore_DistEstimateScalarAnswer = (~env, estimate, answer) =>
-    C.logScore_DistEstimateScalarAnswer(estimate, answer)->run(~env)->toFloatR
-  let logScore_DistEstimateScalarAnswerWithPrior = (~env, estimate, answer, prior) =>
-    C.logScore_DistEstimateScalarAnswerWithPrior(estimate, answer, prior)->run(~env)->toFloatR
-  let logScore_ScalarEstimateDistAnswer = (~env, estimate, answer) =>
-    C.logScore_ScalarEstimateDistAnswer(estimate, answer)->run(~env)->toFloatR
-  let logScore_ScalarEstimateDistAnswerWithPrior = (~env, estimate, answer, prior) =>
-    C.logScore_ScalarEstimateDistAnswerWithPrior(estimate, answer, prior)->run(~env)->toFloatR
-  let logScore_ScalarEstimateScalarAnswer = (~env, estimate, answer) =>
-    C.logScore_ScalarEstimateScalarAnswer(estimate, answer)->run(~env)->toFloatR
-  let logScore_ScalarEstimateScalarAnswerWithPrior = (~env, estimate, answer, prior) =>
-    C.logScore_ScalarEstimateScalarAnswerWithPrior(estimate, answer, prior)->run(~env)->toFloatR
+  module LogScore = {
+    let distEstimateDistAnswer = (~env, estimate, answer) =>
+      C.LogScore.distEstimateDistAnswer(estimate, answer)->run(~env)->toFloatR
+    let distEstimateDistAnswerWithPrior = (~env, estimate, answer, prior) =>
+      C.LogScore.distEstimateDistAnswerWithPrior(estimate, answer, prior)->run(~env)->toFloatR
+    let distEstimateScalarAnswer = (~env, estimate, answer) =>
+      C.LogScore.distEstimateScalarAnswer(estimate, answer)->run(~env)->toFloatR
+    let distEstimateScalarAnswerWithPrior = (~env, estimate, answer, prior) =>
+      C.LogScore.distEstimateScalarAnswerWithPrior(estimate, answer, prior)->run(~env)->toFloatR
+    let scalarEstimateDistAnswer = (~env, estimate, answer) =>
+      C.LogScore.scalarEstimateDistAnswer(estimate, answer)->run(~env)->toFloatR
+    let scalarEstimateDistAnswerWithPrior = (~env, estimate, answer, prior) =>
+      C.LogScore.scalarEstimateDistAnswerWithPrior(estimate, answer, prior)->run(~env)->toFloatR
+    let scalarEstimateScalarAnswer = (~env, estimate, answer) =>
+      C.LogScore.scalarEstimateScalarAnswer(estimate, answer)->run(~env)->toFloatR
+    let scalarEstimateScalarAnswerWithPrior = (~env, estimate, answer, prior) =>
+      C.LogScore.scalarEstimateScalarAnswerWithPrior(estimate, answer, prior)->run(~env)->toFloatR
+  }
  let toPointSet = (~env, dist) => C.toPointSet(dist)->run(~env)->toDistR
  let toSampleSet = (~env, dist, n) => C.toSampleSet(dist, n)->run(~env)->toDistR
  let fromSamples = (~env, xs) => C.fromSamples(xs)->run(~env)->toDistR
--- a/packages/squiggle-lang/src/rescript/Distributions/DistributionOperation.resi
+++ b/packages/squiggle-lang/src/rescript/Distributions/DistributionOperation.resi
@ -60,42 +60,44 @@ module Constructors: {
  let normalize: (~env: env, genericDist) => result<genericDist, error>
  @genType
  let isNormalized: (~env: env, genericDist) => result<bool, error>
-  @genType
-  let logScore_DistEstimateDistAnswer: (~env: env, genericDist, genericDist) => result<float, error>
-  @genType
-  let logScore_DistEstimateDistAnswerWithPrior: (
-    ~env: env,
-    genericDist,
-    genericDist,
-    DistributionTypes.DistributionOperation.scoreDistOrScalar,
-  ) => result<float, error>
-  @genType
-  let logScore_DistEstimateScalarAnswer: (~env: env, genericDist, float) => result<float, error>
-  @genType
-  let logScore_DistEstimateScalarAnswerWithPrior: (
-    ~env: env,
-    genericDist,
-    float,
-    DistributionTypes.DistributionOperation.scoreDistOrScalar,
-  ) => result<float, error>
-  @genType
-  let logScore_ScalarEstimateDistAnswer: (~env: env, float, genericDist) => result<float, error>
-  @genType
-  let logScore_ScalarEstimateDistAnswerWithPrior: (
-    ~env: env,
-    float,
-    genericDist,
-    DistributionTypes.DistributionOperation.scoreDistOrScalar,
-  ) => result<float, error>
-  @genType
-  let logScore_ScalarEstimateScalarAnswer: (~env: env, float, float) => result<float, error>
-  @genType
-  let logScore_ScalarEstimateScalarAnswerWithPrior: (
-    ~env: env,
-    float,
-    float,
-    DistributionTypes.DistributionOperation.scoreDistOrScalar,
-  ) => result<float, error>
+  module LogScore: {
+    @genType
+    let distEstimateDistAnswer: (~env: env, genericDist, genericDist) => result<float, error>
+    @genType
+    let distEstimateDistAnswerWithPrior: (
+      ~env: env,
+      genericDist,
+      genericDist,
+      DistributionTypes.DistributionOperation.scoreDistOrScalar,
+    ) => result<float, error>
+    @genType
+    let distEstimateScalarAnswer: (~env: env, genericDist, float) => result<float, error>
+    @genType
+    let distEstimateScalarAnswerWithPrior: (
+      ~env: env,
+      genericDist,
+      float,
+      DistributionTypes.DistributionOperation.scoreDistOrScalar,
+    ) => result<float, error>
+    @genType
+    let scalarEstimateDistAnswer: (~env: env, float, genericDist) => result<float, error>
+    @genType
+    let scalarEstimateDistAnswerWithPrior: (
+      ~env: env,
+      float,
+      genericDist,
+      DistributionTypes.DistributionOperation.scoreDistOrScalar,
+    ) => result<float, error>
+    @genType
+    let scalarEstimateScalarAnswer: (~env: env, float, float) => result<float, error>
+    @genType
+    let scalarEstimateScalarAnswerWithPrior: (
+      ~env: env,
+      float,
+      float,
+      DistributionTypes.DistributionOperation.scoreDistOrScalar,
+    ) => result<float, error>
+  }
  @genType
  let toPointSet: (~env: env, genericDist) => result<genericDist, error>
  @genType
--- a/packages/squiggle-lang/src/rescript/Distributions/DistributionTypes.res
+++ b/packages/squiggle-lang/src/rescript/Distributions/DistributionTypes.res
@ -163,38 +163,40 @@ module Constructors = {
    let fromSamples = (xs): t => FromSamples(xs)
    let truncate = (dist, left, right): t => FromDist(ToDist(Truncate(left, right)), dist)
    let inspect = (dist): t => FromDist(ToDist(Inspect), dist)
-    let logScore_DistEstimateDistAnswer = (estimate, answer): t => FromDist(
-      ToScore(LogScore(Score_Dist(answer), None)),
-      estimate,
-    )
-    let logScore_DistEstimateDistAnswerWithPrior = (estimate, answer, prior): t => FromDist(
-      ToScore(LogScore(Score_Dist(answer), Some(prior))),
-      estimate,
-    )
-    let logScore_DistEstimateScalarAnswer = (estimate, answer): t => FromDist(
-      ToScore(LogScore(Score_Scalar(answer), None)),
-      estimate,
-    )
-    let logScore_DistEstimateScalarAnswerWithPrior = (estimate, answer, prior): t => FromDist(
-      ToScore(LogScore(Score_Scalar(answer), Some(prior))),
-      estimate,
-    )
-    let logScore_ScalarEstimateDistAnswer = (estimate, answer): t => FromFloat(
-      ToScore(LogScore(Score_Dist(answer), None)),
-      estimate,
-    )
-    let logScore_ScalarEstimateDistAnswerWithPrior = (estimate, answer, prior): t => FromFloat(
-      ToScore(LogScore(Score_Dist(answer), Some(prior))),
-      estimate,
-    )
-    let logScore_ScalarEstimateScalarAnswer = (estimate, answer): t => FromFloat(
-      ToScore(LogScore(Score_Scalar(answer), None)),
-      estimate,
-    )
-    let logScore_ScalarEstimateScalarAnswerWithPrior = (estimate, answer, prior): t => FromFloat(
-      ToScore(LogScore(Score_Scalar(answer), Some(prior))),
-      estimate,
-    )
+    module LogScore = {
+      let distEstimateDistAnswer = (estimate, answer): t => FromDist(
+        ToScore(LogScore(Score_Dist(answer), None)),
+        estimate,
+      )
+      let distEstimateDistAnswerWithPrior = (estimate, answer, prior): t => FromDist(
+        ToScore(LogScore(Score_Dist(answer), Some(prior))),
+        estimate,
+      )
+      let distEstimateScalarAnswer = (estimate, answer): t => FromDist(
+        ToScore(LogScore(Score_Scalar(answer), None)),
+        estimate,
+      )
+      let distEstimateScalarAnswerWithPrior = (estimate, answer, prior): t => FromDist(
+        ToScore(LogScore(Score_Scalar(answer), Some(prior))),
+        estimate,
+      )
+      let scalarEstimateDistAnswer = (estimate, answer): t => FromFloat(
+        ToScore(LogScore(Score_Dist(answer), None)),
+        estimate,
+      )
+      let scalarEstimateDistAnswerWithPrior = (estimate, answer, prior): t => FromFloat(
+        ToScore(LogScore(Score_Dist(answer), Some(prior))),
+        estimate,
+      )
+      let scalarEstimateScalarAnswer = (estimate, answer): t => FromFloat(
+        ToScore(LogScore(Score_Scalar(answer), None)),
+        estimate,
+      )
+      let scalarEstimateScalarAnswerWithPrior = (estimate, answer, prior): t => FromFloat(
+        ToScore(LogScore(Score_Scalar(answer), Some(prior))),
+        estimate,
+      )
+    }
    let scalePower = (dist, n): t => FromDist(ToDist(Scale(#Power, n)), dist)
    let scaleLogarithm = (dist, n): t => FromDist(ToDist(Scale(#Logarithm, n)), dist)
    let scaleLogarithmWithThreshold = (dist, n, eps): t => FromDist(
--- a/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/Continuous.res
+++ b/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/Continuous.res
@ -120,7 +120,7 @@ let combinePointwise = (

  let interpolator = XYShape.XtoY.continuousInterpolator(t1.interpolation, extrapolation)

-  combiner(fn, interpolator, t1.xyShape, t2.xyShape)->E.R2.fmap(x =>
+  combiner(interpolator, fn, t1.xyShape, t2.xyShape)->E.R2.fmap(x =>
    make(~integralSumCache=combinedIntegralSum, x)
  )
 }
--- a/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/Discrete.res
+++ b/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/Discrete.res
@ -48,7 +48,7 @@ let combinePointwise = (
  // TODO: does it ever make sense to pointwise combine the integrals here?
  // It could be done for pointwise additions, but is that ever needed?

-  combiner(fn, XYShape.XtoY.discreteInterpolator, t1.xyShape, t2.xyShape)->E.R2.fmap(make)
+  combiner(XYShape.XtoY.discreteInterpolator, fn, t1.xyShape, t2.xyShape)->E.R2.fmap(make)
 }

 let reduce = (
@ -221,15 +221,4 @@ module T = Dist({
    let getMeanOfSquares = t => t |> shapeMap(XYShape.T.square) |> mean
    XYShape.Analysis.getVarianceDangerously(t, mean, getMeanOfSquares)
  }
-
-  //  let klDivergence = (prediction: t, answer: t) => {
-  //    combinePointwise(
-  //      ~fn=PointSetDist_Scoring.KLDivergence.integrand,
-  //      prediction,
-  //      answer,
-  //    )->E.R2.fmap(integralEndY)
-  //  }
-  //  let logScoreWithPointResolution = (~prediction: t, ~answer: float, ~prior: option<t>) => {
-  //    Error(Operation.NotYetImplemented)
-  //  }
 })
--- a/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/PointSetDist.res
+++ b/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/PointSetDist.res
@ -66,6 +66,7 @@ let combineAlgebraically = (op: Operation.convolutionOperation, t1: t, t2: t): t
  }

 let combinePointwise = (
+  ~combiner=XYShape.PointwiseCombination.combine,
  ~integralSumCachesFn: (float, float) => option<float>=(_, _) => None,
  ~integralCachesFn: (
    PointSetTypes.continuousShape,
@ -78,6 +79,7 @@ let combinePointwise = (
  switch (t1, t2) {
  | (Continuous(m1), Continuous(m2)) =>
    Continuous.combinePointwise(
+      ~combiner,
      ~integralSumCachesFn,
      fn,
      m1,
@ -85,6 +87,7 @@ let combinePointwise = (
    )->E.R2.fmap(x => PointSetTypes.Continuous(x))
  | (Discrete(m1), Discrete(m2)) =>
    Discrete.combinePointwise(
+      ~combiner,
      ~integralSumCachesFn,
      ~fn,
      m1,
--- a/packages/squiggle-lang/src/rescript/Utility/XYShape.res
+++ b/packages/squiggle-lang/src/rescript/Utility/XYShape.res
@ -322,8 +322,8 @@ module Zipped = {
 module PointwiseCombination = {
  // t1Interpolator and t2Interpolator are functions from XYShape.XtoY, e.g. linearBetweenPointsExtrapolateFlat.
  let combine: (
-    (float, float) => result<float, Operation.Error.t>,
    interpolator,
+    (float, float) => result<float, Operation.Error.t>,
    T.t,
    T.t,
  ) => result<T.t, Operation.Error.t> = %raw(`
@ -332,7 +332,7 @@ module PointwiseCombination = {
      // and interpolates the value on the other side, thus accumulating xs and ys.
      // This is written in raw JS because this can still be a bottleneck, and using refs for the i and j indices is quite painful.

-      function(fn, interpolator, t1, t2) {
+      function(interpolator, fn, t1, t2) {
        let t1n = t1.xs.length;
        let t2n = t2.xs.length;
        let outX = [];
@ -394,11 +394,11 @@ module PointwiseCombination = {
    This is from an approach to kl divergence that was ultimately rejected. Leaving it in for now because it may help us factor `combine` out of raw javascript soon.
 */
  let combineAlongSupportOfSecondArgument0: (
-    (float, float) => result<float, Operation.Error.t>,
    interpolator,
+    (float, float) => result<float, Operation.Error.t>,
    T.t,
    T.t,
-  ) => result<T.t, Operation.Error.t> = (fn, interpolator, t1, t2) => {
+  ) => result<T.t, Operation.Error.t> = (interpolator, fn, t1, t2) => {
    let newYs = []
    let newXs = []
    let (l1, l2) = (E.A.length(t1.xs), E.A.length(t2.xs))
@ -493,27 +493,23 @@ module PointwiseCombination = {
  }
  // This function is used for klDivergence
  let combineAlongSupportOfSecondArgument: (
+    interpolator,
    (float, float) => result<float, Operation.Error.t>,
    T.t,
    T.t,
-  ) => result<T.t, Operation.Error.t> = (fn, prediction, answer) => {
+  ) => result<T.t, Operation.Error.t> = (interpolator, fn, prediction, answer) => {
    let combineWithFn = (answerX: float, i: int) => {
      let answerY = answer.ys[i]
-      let predictionY = XtoY.linear(answerX, prediction)
+      // let predictionY = XtoY.linear(answerX, prediction)
+      let predictionY = interpolator(prediction, i, answerX)
      fn(predictionY, answerY)
    }
    let newYsWithError = Js.Array.mapi((x, i) => combineWithFn(x, i), answer.xs)
-    let newYsOrError = E.A.R.firstErrorOrOpen(newYsWithError)
-    let result = switch newYsOrError {
-    | Ok(a) => Ok({xs: answer.xs, ys: a})
-    | Error(b) => Error(b)
-    }
-
-    result
+    E.A.R.firstErrorOrOpen(newYsWithError)->E.R2.fmap(ys => {xs: answer.xs, ys: ys})
  }

  let addCombine = (interpolator: interpolator, t1: T.t, t2: T.t): T.t =>
-    combine((a, b) => Ok(a +. b), interpolator, t1, t2)->E.R.toExn(
+    combine(interpolator, (a, b) => Ok(a +. b), t1, t2)->E.R.toExn(
      "Add operation should never fail",
      _,
    )