From e47deb84334e076907239437af62ae96f08fc3d4 Mon Sep 17 00:00:00 2001 From: Sam Nolan Date: Tue, 26 Apr 2022 12:22:31 -0400 Subject: [PATCH 1/5] Translate pmf to pdf for kde --- .../Invariants/AlgebraicCombination_test.res | 25 +++++++++---------- .../SampleSetDist_ToPointSet_test.res | 22 ++++++++++++++++ .../squiggle-lang/__tests__/TS/JS_test.ts | 14 +++++++---- .../__tests__/TS/SampleSet_test.ts | 4 ++- .../Distributions/SampleSetDist/KdeLibrary.js | 12 ++++++++- 5 files changed, 57 insertions(+), 20 deletions(-) create mode 100644 packages/squiggle-lang/__tests__/Distributions/SampleSetDist_ToPointSet_test.res diff --git a/packages/squiggle-lang/__tests__/Distributions/Invariants/AlgebraicCombination_test.res b/packages/squiggle-lang/__tests__/Distributions/Invariants/AlgebraicCombination_test.res index a0f492a1..488ffaa6 100644 --- a/packages/squiggle-lang/__tests__/Distributions/Invariants/AlgebraicCombination_test.res +++ b/packages/squiggle-lang/__tests__/Distributions/Invariants/AlgebraicCombination_test.res @@ -65,7 +65,7 @@ describe("(Algebraic) addition of distributions", () => { | None => "algebraicAdd has"->expect->toBe("failed") // This is nondeterministic, we could be in a situation where ci fails but you click rerun and it passes, which is bad. // sometimes it works with ~digits=2. - | Some(x) => x->expect->toBeSoCloseTo(0.01927225696028752, ~digits=1) // (uniformMean +. betaMean) + | Some(x) => x->expect->toBeSoCloseTo(9.78655777150074, ~digits=1) // (uniformMean +. betaMean) } }) test("beta(alpha=2, beta=5) + uniform(low=9, high=10)", () => { @@ -82,7 +82,7 @@ describe("(Algebraic) addition of distributions", () => { | None => "algebraicAdd has"->expect->toBe("failed") // This is nondeterministic, we could be in a situation where ci fails but you click rerun and it passes, which is bad. // sometimes it works with ~digits=2. - | Some(x) => x->expect->toBeSoCloseTo(0.019275414920485248, ~digits=1) // (uniformMean +. betaMean) + | Some(x) => x->expect->toBeSoCloseTo(9.786753454457116, ~digits=1) // (uniformMean +. betaMean) } }) }) @@ -162,8 +162,8 @@ describe("(Algebraic) addition of distributions", () => { switch received { | None => "algebraicAdd has"->expect->toBe("failed") // This is nondeterministic, we could be in a situation where ci fails but you click rerun and it passes, which is bad. - // sometimes it works with ~digits=4. - | Some(x) => x->expect->toBeSoCloseTo(0.001978994877226945, ~digits=3) + // This value was calculated by a python script + | Some(x) => x->expect->toBeSoCloseTo(0.979023, ~digits=0) } }) test("(beta(alpha=2, beta=5) + uniform(low=9, high=10)).pdf(10)", () => { @@ -176,9 +176,8 @@ describe("(Algebraic) addition of distributions", () => { ->E.R.toExn("Expected float", _) switch received { | None => "algebraicAdd has"->expect->toBe("failed") - // This is nondeterministic, we could be in a situation where ci fails but you click rerun and it passes, which is bad. - // sometimes it works with ~digits=4. - | Some(x) => x->expect->toBeSoCloseTo(0.001978994877226945, ~digits=3) + // This is nondeterministic. + | Some(x) => x->expect->toBeSoCloseTo(0.979023, ~digits=0) } }) }) @@ -253,8 +252,8 @@ describe("(Algebraic) addition of distributions", () => { switch received { | None => "algebraicAdd has"->expect->toBe("failed") // This is nondeterministic, we could be in a situation where ci fails but you click rerun and it passes, which is bad. - // sometimes it works with ~digits=4. - | Some(x) => x->expect->toBeSoCloseTo(0.0013961779932477507, ~digits=3) + // The value was calculated externally using a python script + | Some(x) => x->expect->toBeSoCloseTo(0.71148, ~digits=1) } }) test("(beta(alpha=2, beta=5) + uniform(low=9, high=10)).cdf(10)", () => { @@ -268,8 +267,8 @@ describe("(Algebraic) addition of distributions", () => { switch received { | None => "algebraicAdd has"->expect->toBe("failed") // This is nondeterministic, we could be in a situation where ci fails but you click rerun and it passes, which is bad. - // sometimes it works with ~digits=4. - | Some(x) => x->expect->toBeSoCloseTo(0.001388898111625753, ~digits=3) + // The value was calculated externally using a python script + | Some(x) => x->expect->toBeSoCloseTo(0.71148, ~digits=1) } }) }) @@ -346,7 +345,7 @@ describe("(Algebraic) addition of distributions", () => { | None => "algebraicAdd has"->expect->toBe("failed") // This is nondeterministic, we could be in a situation where ci fails but you click rerun and it passes, which is bad. // sometimes it works with ~digits=2. - | Some(x) => x->expect->toBeSoCloseTo(10.927078217530806, ~digits=0) + | Some(x) => x->expect->toBeSoCloseTo(9.179319623146968, ~digits=0) } }) test("(beta(alpha=2, beta=5) + uniform(low=9, high=10)).inv(2e-2)", () => { @@ -361,7 +360,7 @@ describe("(Algebraic) addition of distributions", () => { | None => "algebraicAdd has"->expect->toBe("failed") // This is nondeterministic, we could be in a situation where ci fails but you click rerun and it passes, which is bad. // sometimes it works with ~digits=2. - | Some(x) => x->expect->toBeSoCloseTo(10.915396627014363, ~digits=0) + | Some(x) => x->expect->toBeSoCloseTo(9.174267267465632, ~digits=0) } }) }) diff --git a/packages/squiggle-lang/__tests__/Distributions/SampleSetDist_ToPointSet_test.res b/packages/squiggle-lang/__tests__/Distributions/SampleSetDist_ToPointSet_test.res new file mode 100644 index 00000000..2145220b --- /dev/null +++ b/packages/squiggle-lang/__tests__/Distributions/SampleSetDist_ToPointSet_test.res @@ -0,0 +1,22 @@ +open Jest +open Expect + +describe("Converting from a sample set distribution", () => { + test("Should be normalized", () => { + let outputXYShape = SampleSetDist_ToPointSet.Internals.KDE.normalSampling( + [1., 2., 3., 3., 4., 5., 5., 5., 6., 8., 9., 9.], + 50, + 2, + ) + let c: PointSetTypes.continuousShape = { + xyShape: outputXYShape, + interpolation: #Linear, + integralSumCache: None, + integralCache: None, + } + let fullShape = Continuous.updateIntegralCache(Some(Continuous.T.integral(c)), c) + let endY = Continuous.T.integralEndY(fullShape) + + expect(endY)->toBeCloseTo(1.) + }) +}) diff --git a/packages/squiggle-lang/__tests__/TS/JS_test.ts b/packages/squiggle-lang/__tests__/TS/JS_test.ts index 8e6db265..a2fa99d9 100644 --- a/packages/squiggle-lang/__tests__/TS/JS_test.ts +++ b/packages/squiggle-lang/__tests__/TS/JS_test.ts @@ -46,6 +46,8 @@ describe("Distribution", () => { //It's important that sampleCount is less than 9. If it's more, than that will create randomness //Also, note, the value should be created using makeSampleSetDist() later on. let env = { sampleCount: 8, xyPointLength: 100 }; + let dist1Samples = [3, 4, 5, 6, 6, 7, 10, 15, 30]; + let dist1SampleCount = dist1Samples.length; let dist = new Distribution( { tag: "SampleSet", value: [3, 4, 5, 6, 6, 7, 10, 15, 30] }, env @@ -56,16 +58,18 @@ describe("Distribution", () => { ); test("mean", () => { - expect(dist.mean().value).toBeCloseTo(3.737); + expect(dist.mean().value).toBeCloseTo(8.704375514292865); }); test("pdf", () => { - expect(dist.pdf(5.0).value).toBeCloseTo(0.0431); + expect(dist.pdf(5.0).value).toBeCloseTo(0.052007455285386944, 1); }); test("cdf", () => { - expect(dist.cdf(5.0).value).toBeCloseTo(0.155); + expect(dist.cdf(5.0).value).toBeCloseTo( + dist1Samples.filter((x) => x <= 5).length / dist1SampleCount + ); }); test("inv", () => { - expect(dist.inv(0.5).value).toBeCloseTo(9.458); + expect(dist.inv(0.5).value).toBeCloseTo(6); }); test("toPointSet", () => { expect( @@ -87,6 +91,6 @@ describe("Distribution", () => { resultMap(dist.pointwiseAdd(dist2), (r: Distribution) => r.toSparkline(20) ).value - ).toEqual(Ok("▁▂▅██▅▅▅▆▇█▆▅▃▃▂▂▁▁▁")); + ).toEqual(Ok("▁▂▅██▅▅▅▆▆▇▅▄▃▃▂▂▁▁▁")); }); }); diff --git a/packages/squiggle-lang/__tests__/TS/SampleSet_test.ts b/packages/squiggle-lang/__tests__/TS/SampleSet_test.ts index c4599f7c..36a0a47b 100644 --- a/packages/squiggle-lang/__tests__/TS/SampleSet_test.ts +++ b/packages/squiggle-lang/__tests__/TS/SampleSet_test.ts @@ -46,7 +46,9 @@ describe("cumulative density function", () => { ); }); - test("at the highest number in the sample is close to 1", () => { + // This may not be true due to KDE estimating there to be mass above the + // highest value. These tests fail + test.skip("at the highest number in the sample is close to 1", () => { fc.assert( fc.property(arrayGen(), (xs_) => { let xs = Array.from(xs_); diff --git a/packages/squiggle-lang/src/rescript/Distributions/SampleSetDist/KdeLibrary.js b/packages/squiggle-lang/src/rescript/Distributions/SampleSetDist/KdeLibrary.js index 460846e8..9cc75b39 100644 --- a/packages/squiggle-lang/src/rescript/Distributions/SampleSetDist/KdeLibrary.js +++ b/packages/squiggle-lang/src/rescript/Distributions/SampleSetDist/KdeLibrary.js @@ -15,8 +15,18 @@ const samplesToContinuousPdf = ( if (_.isFinite(max)) { _samples = _.filter(_samples, (r) => r < max); } + + // The pdf that's created from this function is not a pdf but a pmf. y values + // being probability mass and not density. + // This is awkward, because our code assumes later that y is a density let pdf = pdfast.create(_samples, { size, width }); - return { xs: pdf.map((r) => r.x), ys: pdf.map((r) => r.y) }; + + // To convert this to a density, we need to find the step size. This is kept + // constant for all y values + let stepSize = pdf[1].x - pdf[0].x; + + // We then adjust the y values to density + return { xs: pdf.map((r) => r.x), ys: pdf.map((r) => r.y / stepSize) }; }; module.exports = { From 350e42088437902e0702cd6564df3e8edccf8cd5 Mon Sep 17 00:00:00 2001 From: Sam Nolan Date: Tue, 26 Apr 2022 13:25:45 -0400 Subject: [PATCH 2/5] Add isNormalized to Continuous --- .../Distributions/SampleSetDist_ToPointSet_test.res | 4 +--- .../src/rescript/Distributions/PointSetDist/Continuous.res | 5 +++++ 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/packages/squiggle-lang/__tests__/Distributions/SampleSetDist_ToPointSet_test.res b/packages/squiggle-lang/__tests__/Distributions/SampleSetDist_ToPointSet_test.res index 2145220b..a2c7baa1 100644 --- a/packages/squiggle-lang/__tests__/Distributions/SampleSetDist_ToPointSet_test.res +++ b/packages/squiggle-lang/__tests__/Distributions/SampleSetDist_ToPointSet_test.res @@ -14,9 +14,7 @@ describe("Converting from a sample set distribution", () => { integralSumCache: None, integralCache: None, } - let fullShape = Continuous.updateIntegralCache(Some(Continuous.T.integral(c)), c) - let endY = Continuous.T.integralEndY(fullShape) - expect(endY)->toBeCloseTo(1.) + expect(Continuous.isNormalized(c))->toBe(true) }) }) diff --git a/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/Continuous.res b/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/Continuous.res index 5e44f900..905ffdb1 100644 --- a/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/Continuous.res +++ b/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/Continuous.res @@ -269,6 +269,11 @@ module T = Dist({ XYShape.Analysis.getVarianceDangerously(t, mean, Analysis.getMeanOfSquares) }) +let isNormalized = (t: t): bool => { + let areaUnderIntegral = t |> updateIntegralCache(Some(T.integral(t))) |> T.integralEndY + areaUnderIntegral -. 1. < 1e-7 +} + let downsampleEquallyOverX = (length, t): t => t |> shapeMap(XYShape.XsConversion.proportionEquallyOverX(length)) From 7302a3ec10e60ba156eb971c63a551518d33329e Mon Sep 17 00:00:00 2001 From: Sam Nolan Date: Tue, 26 Apr 2022 13:28:08 -0400 Subject: [PATCH 3/5] Give isNormalised lower bound --- .../src/rescript/Distributions/PointSetDist/Continuous.res | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/Continuous.res b/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/Continuous.res index 905ffdb1..d4286387 100644 --- a/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/Continuous.res +++ b/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/Continuous.res @@ -271,7 +271,7 @@ module T = Dist({ let isNormalized = (t: t): bool => { let areaUnderIntegral = t |> updateIntegralCache(Some(T.integral(t))) |> T.integralEndY - areaUnderIntegral -. 1. < 1e-7 + areaUnderIntegral < 1. +. 1e-7 && areaUnderIntegral > 1. -. 1e-7 } let downsampleEquallyOverX = (length, t): t => From ba412f2df6de4c6a56d176ea80d3d4f7fe8966cb Mon Sep 17 00:00:00 2001 From: Sam Nolan Date: Tue, 26 Apr 2022 14:15:37 -0400 Subject: [PATCH 4/5] Fix resolution issue --- .../Reducer/Reducer_Dispatch/Reducer_Dispatch_BuiltInMacros.res | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/squiggle-lang/src/rescript/Reducer/Reducer_Dispatch/Reducer_Dispatch_BuiltInMacros.res b/packages/squiggle-lang/src/rescript/Reducer/Reducer_Dispatch/Reducer_Dispatch_BuiltInMacros.res index d1219f79..b7df0ff3 100644 --- a/packages/squiggle-lang/src/rescript/Reducer/Reducer_Dispatch/Reducer_Dispatch_BuiltInMacros.res +++ b/packages/squiggle-lang/src/rescript/Reducer/Reducer_Dispatch/Reducer_Dispatch_BuiltInMacros.res @@ -90,7 +90,7 @@ let dispatchMacroCall = ( Js.Dict.set(acc, key, value) acc }) - externalBindings->EvRecord->ExpressionT.EValue->Ok + externalBindings->ExpressionValue.EvRecord->ExpressionT.EValue->Ok } let doBindExpression = (expression: expression, bindings: ExpressionT.bindings) => From b9c8a7e2c711c2bc1ac109330f5b7f1802e6a2e1 Mon Sep 17 00:00:00 2001 From: Quinn Dougherty Date: Tue, 26 Apr 2022 16:58:36 -0400 Subject: [PATCH 5/5] Input validation for cauchy Value: [0.01 to 0.08] Sam gets most of the credit --- .../rescript/Distributions/SymbolicDist/SymbolicDist.res | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/packages/squiggle-lang/src/rescript/Distributions/SymbolicDist/SymbolicDist.res b/packages/squiggle-lang/src/rescript/Distributions/SymbolicDist/SymbolicDist.res index 92249eae..997506d9 100644 --- a/packages/squiggle-lang/src/rescript/Distributions/SymbolicDist/SymbolicDist.res +++ b/packages/squiggle-lang/src/rescript/Distributions/SymbolicDist/SymbolicDist.res @@ -86,9 +86,10 @@ module Exponential = { module Cauchy = { type t = cauchy - let make = (local, scale): result => Ok( - #Cauchy({local: local, scale: scale}), - ) + let make = (local, scale): result => + scale > 0.0 + ? Ok(#Cauchy({local: local, scale: scale})) + : Error("Cauchy distribution scale parameter must larger than 0.") let pdf = (x, t: t) => Jstat.Cauchy.pdf(x, t.local, t.scale) let cdf = (x, t: t) => Jstat.Cauchy.cdf(x, t.local, t.scale) let inv = (p, t: t) => Jstat.Cauchy.inv(p, t.local, t.scale)