Merge branch 'develop' into loss-testing

2022-04-26 17:19:33 -04:00 · 2022-04-26 17:19:33 -04:00 · 93c06f85b0
commit 93c06f85b0
parent 93bedc88d9 1ef766a6c8
8 changed files with 65 additions and 24 deletions
--- a/packages/squiggle-lang/tests/Distributions/Invariants/AlgebraicCombination_test.res
+++ b/packages/squiggle-lang/tests/Distributions/Invariants/AlgebraicCombination_test.res
@ -65,7 +65,7 @@ describe("(Algebraic) addition of distributions", () => {
      | None => "algebraicAdd has"->expect->toBe("failed")
      // This is nondeterministic, we could be in a situation where ci fails but you click rerun and it passes, which is bad.
      // sometimes it works with ~digits=2.
-      | Some(x) => x->expect->toBeSoCloseTo(0.01927225696028752, ~digits=1) // (uniformMean +. betaMean)
+      | Some(x) => x->expect->toBeSoCloseTo(9.78655777150074, ~digits=1) // (uniformMean +. betaMean)
      }
    })
    test("beta(alpha=2, beta=5) + uniform(low=9, high=10)", () => {
@ -82,7 +82,7 @@ describe("(Algebraic) addition of distributions", () => {
      | None => "algebraicAdd has"->expect->toBe("failed")
      // This is nondeterministic, we could be in a situation where ci fails but you click rerun and it passes, which is bad.
      // sometimes it works with ~digits=2.
-      | Some(x) => x->expect->toBeSoCloseTo(0.019275414920485248, ~digits=1) // (uniformMean +. betaMean)
+      | Some(x) => x->expect->toBeSoCloseTo(9.786753454457116, ~digits=1) // (uniformMean +. betaMean)
      }
    })
  })
@ -162,8 +162,8 @@ describe("(Algebraic) addition of distributions", () => {
      switch received {
      | None => "algebraicAdd has"->expect->toBe("failed")
      // This is nondeterministic, we could be in a situation where ci fails but you click rerun and it passes, which is bad.
-      // sometimes it works with ~digits=4.
-      | Some(x) => x->expect->toBeSoCloseTo(0.001978994877226945, ~digits=3)
+      // This value was calculated by a python script
+      | Some(x) => x->expect->toBeSoCloseTo(0.979023, ~digits=0)
      }
    })
    test("(beta(alpha=2, beta=5) + uniform(low=9, high=10)).pdf(10)", () => {
@ -176,9 +176,8 @@ describe("(Algebraic) addition of distributions", () => {
        ->E.R.toExn("Expected float", _)
      switch received {
      | None => "algebraicAdd has"->expect->toBe("failed")
-      // This is nondeterministic, we could be in a situation where ci fails but you click rerun and it passes, which is bad.
-      // sometimes it works with ~digits=4.
-      | Some(x) => x->expect->toBeSoCloseTo(0.001978994877226945, ~digits=3)
+      // This is nondeterministic.
+      | Some(x) => x->expect->toBeSoCloseTo(0.979023, ~digits=0)
      }
    })
  })
@ -253,8 +252,8 @@ describe("(Algebraic) addition of distributions", () => {
      switch received {
      | None => "algebraicAdd has"->expect->toBe("failed")
      // This is nondeterministic, we could be in a situation where ci fails but you click rerun and it passes, which is bad.
-      // sometimes it works with ~digits=4.
-      | Some(x) => x->expect->toBeSoCloseTo(0.0013961779932477507, ~digits=3)
+      // The value was calculated externally using a python script
+      | Some(x) => x->expect->toBeSoCloseTo(0.71148, ~digits=1)
      }
    })
    test("(beta(alpha=2, beta=5) + uniform(low=9, high=10)).cdf(10)", () => {
@ -268,8 +267,8 @@ describe("(Algebraic) addition of distributions", () => {
      switch received {
      | None => "algebraicAdd has"->expect->toBe("failed")
      // This is nondeterministic, we could be in a situation where ci fails but you click rerun and it passes, which is bad.
-      // sometimes it works with ~digits=4.
-      | Some(x) => x->expect->toBeSoCloseTo(0.001388898111625753, ~digits=3)
+      // The value was calculated externally using a python script
+      | Some(x) => x->expect->toBeSoCloseTo(0.71148, ~digits=1)
      }
    })
  })
@ -346,7 +345,7 @@ describe("(Algebraic) addition of distributions", () => {
      | None => "algebraicAdd has"->expect->toBe("failed")
      // This is nondeterministic, we could be in a situation where ci fails but you click rerun and it passes, which is bad.
      // sometimes it works with ~digits=2.
-      | Some(x) => x->expect->toBeSoCloseTo(10.927078217530806, ~digits=0)
+      | Some(x) => x->expect->toBeSoCloseTo(9.179319623146968, ~digits=0)
      }
    })
    test("(beta(alpha=2, beta=5) + uniform(low=9, high=10)).inv(2e-2)", () => {
@ -361,7 +360,7 @@ describe("(Algebraic) addition of distributions", () => {
      | None => "algebraicAdd has"->expect->toBe("failed")
      // This is nondeterministic, we could be in a situation where ci fails but you click rerun and it passes, which is bad.
      // sometimes it works with ~digits=2.
-      | Some(x) => x->expect->toBeSoCloseTo(10.915396627014363, ~digits=0)
+      | Some(x) => x->expect->toBeSoCloseTo(9.174267267465632, ~digits=0)
      }
    })
  })
--- a/packages/squiggle-lang/tests/Distributions/SampleSetDist_ToPointSet_test.res
+++ b/packages/squiggle-lang/tests/Distributions/SampleSetDist_ToPointSet_test.res
@ -0,0 +1,20 @@
+open Jest
+open Expect
+
+describe("Converting from a sample set distribution", () => {
+  test("Should be normalized", () => {
+    let outputXYShape = SampleSetDist_ToPointSet.Internals.KDE.normalSampling(
+      [1., 2., 3., 3., 4., 5., 5., 5., 6., 8., 9., 9.],
+      50,
+      2,
+    )
+    let c: PointSetTypes.continuousShape = {
+      xyShape: outputXYShape,
+      interpolation: #Linear,
+      integralSumCache: None,
+      integralCache: None,
+    }
+
+    expect(Continuous.isNormalized(c))->toBe(true)
+  })
+})
--- a/packages/squiggle-lang/tests/TS/JS_test.ts
+++ b/packages/squiggle-lang/tests/TS/JS_test.ts
@ -46,6 +46,8 @@ describe("Distribution", () => {
  //It's important that sampleCount is less than 9. If it's more, than that will create randomness
  //Also, note, the value should be created using makeSampleSetDist() later on.
  let env = { sampleCount: 8, xyPointLength: 100 };
+  let dist1Samples = [3, 4, 5, 6, 6, 7, 10, 15, 30];
+  let dist1SampleCount = dist1Samples.length;
  let dist = new Distribution(
    { tag: "SampleSet", value: [3, 4, 5, 6, 6, 7, 10, 15, 30] },
    env
@ -56,16 +58,18 @@ describe("Distribution", () => {
  );

  test("mean", () => {
-    expect(dist.mean().value).toBeCloseTo(3.737);
+    expect(dist.mean().value).toBeCloseTo(8.704375514292865);
  });
  test("pdf", () => {
-    expect(dist.pdf(5.0).value).toBeCloseTo(0.0431);
+    expect(dist.pdf(5.0).value).toBeCloseTo(0.052007455285386944, 1);
  });
  test("cdf", () => {
-    expect(dist.cdf(5.0).value).toBeCloseTo(0.155);
+    expect(dist.cdf(5.0).value).toBeCloseTo(
+      dist1Samples.filter((x) => x <= 5).length / dist1SampleCount
+    );
  });
  test("inv", () => {
-    expect(dist.inv(0.5).value).toBeCloseTo(9.458);
+    expect(dist.inv(0.5).value).toBeCloseTo(6);
  });
  test("toPointSet", () => {
    expect(
@ -87,6 +91,6 @@ describe("Distribution", () => {
      resultMap(dist.pointwiseAdd(dist2), (r: Distribution) =>
        r.toSparkline(20)
      ).value
-    ).toEqual(Ok("▁▂▅██▅▅▅▆▇█▆▅▃▃▂▂▁▁▁"));
+    ).toEqual(Ok("▁▂▅██▅▅▅▆▆▇▅▄▃▃▂▂▁▁▁"));
  });
 });
--- a/packages/squiggle-lang/tests/TS/SampleSet_test.ts
+++ b/packages/squiggle-lang/tests/TS/SampleSet_test.ts
@ -46,7 +46,9 @@ describe("cumulative density function", () => {
    );
  });

-  test("at the highest number in the sample is close to 1", () => {
+  // This may not be true due to KDE estimating there to be mass above the
+  // highest value. These tests fail
+  test.skip("at the highest number in the sample is close to 1", () => {
    fc.assert(
      fc.property(arrayGen(), (xs_) => {
        let xs = Array.from(xs_);
--- a/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/Continuous.res
+++ b/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/Continuous.res
@ -269,6 +269,11 @@ module T = Dist({
    XYShape.Analysis.getVarianceDangerously(t, mean, Analysis.getMeanOfSquares)
 })

+let isNormalized = (t: t): bool => {
+  let areaUnderIntegral = t |> updateIntegralCache(Some(T.integral(t))) |> T.integralEndY
+  areaUnderIntegral < 1. +. 1e-7 && areaUnderIntegral > 1. -. 1e-7
+}
+
 let downsampleEquallyOverX = (length, t): t =>
  t |> shapeMap(XYShape.XsConversion.proportionEquallyOverX(length))

--- a/packages/squiggle-lang/src/rescript/Distributions/SampleSetDist/KdeLibrary.js
+++ b/packages/squiggle-lang/src/rescript/Distributions/SampleSetDist/KdeLibrary.js
@ -15,8 +15,18 @@ const samplesToContinuousPdf = (
  if (_.isFinite(max)) {
    _samples = _.filter(_samples, (r) => r < max);
  }
+
+  // The pdf that's created from this function is not a pdf but a pmf. y values
+  // being probability mass and not density.
+  // This is awkward, because our code assumes later that y is a density
  let pdf = pdfast.create(_samples, { size, width });
-  return { xs: pdf.map((r) => r.x), ys: pdf.map((r) => r.y) };
+
+  // To convert this to a density, we need to find the step size. This is kept
+  // constant for all y values
+  let stepSize = pdf[1].x - pdf[0].x;
+
+  // We then adjust the y values to density
+  return { xs: pdf.map((r) => r.x), ys: pdf.map((r) => r.y / stepSize) };
 };

 module.exports = {
--- a/packages/squiggle-lang/src/rescript/Distributions/SymbolicDist/SymbolicDist.res
+++ b/packages/squiggle-lang/src/rescript/Distributions/SymbolicDist/SymbolicDist.res
@ -86,9 +86,10 @@ module Exponential = {

 module Cauchy = {
  type t = cauchy
-  let make = (local, scale): result<symbolicDist, string> => Ok(
-    #Cauchy({local: local, scale: scale}),
-  )
+  let make = (local, scale): result<symbolicDist, string> =>
+    scale > 0.0
+      ? Ok(#Cauchy({local: local, scale: scale}))
+      : Error("Cauchy distribution scale parameter must larger than 0.")
  let pdf = (x, t: t) => Jstat.Cauchy.pdf(x, t.local, t.scale)
  let cdf = (x, t: t) => Jstat.Cauchy.cdf(x, t.local, t.scale)
  let inv = (p, t: t) => Jstat.Cauchy.inv(p, t.local, t.scale)
--- a/packages/squiggle-lang/src/rescript/Reducer/Reducer_Dispatch/Reducer_Dispatch_BuiltInMacros.res
+++ b/packages/squiggle-lang/src/rescript/Reducer/Reducer_Dispatch/Reducer_Dispatch_BuiltInMacros.res
@ -90,7 +90,7 @@ let dispatchMacroCall = (
      Js.Dict.set(acc, key, value)
      acc
    })
-    externalBindings->EvRecord->ExpressionT.EValue->Ok
+    externalBindings->ExpressionValue.EvRecord->ExpressionT.EValue->Ok
  }

  let doBindExpression = (expression: expression, bindings: ExpressionT.bindings) =>