diff --git a/packages/squiggle-lang/__tests__/Distributions/Invariants/AlgebraicCombination_test.res b/packages/squiggle-lang/__tests__/Distributions/Invariants/AlgebraicCombination_test.res index 488ffaa6..b7e14f56 100644 --- a/packages/squiggle-lang/__tests__/Distributions/Invariants/AlgebraicCombination_test.res +++ b/packages/squiggle-lang/__tests__/Distributions/Invariants/AlgebraicCombination_test.res @@ -65,7 +65,7 @@ describe("(Algebraic) addition of distributions", () => { | None => "algebraicAdd has"->expect->toBe("failed") // This is nondeterministic, we could be in a situation where ci fails but you click rerun and it passes, which is bad. // sometimes it works with ~digits=2. - | Some(x) => x->expect->toBeSoCloseTo(9.78655777150074, ~digits=1) // (uniformMean +. betaMean) + | Some(x) => x->expect->toBeSoCloseTo(9.786831807237022, ~digits=1) // (uniformMean +. betaMean) } }) test("beta(alpha=2, beta=5) + uniform(low=9, high=10)", () => { @@ -82,7 +82,7 @@ describe("(Algebraic) addition of distributions", () => { | None => "algebraicAdd has"->expect->toBe("failed") // This is nondeterministic, we could be in a situation where ci fails but you click rerun and it passes, which is bad. // sometimes it works with ~digits=2. - | Some(x) => x->expect->toBeSoCloseTo(9.786753454457116, ~digits=1) // (uniformMean +. betaMean) + | Some(x) => x->expect->toBeSoCloseTo(9.784290207736126, ~digits=1) // (uniformMean +. betaMean) } }) }) @@ -162,6 +162,7 @@ describe("(Algebraic) addition of distributions", () => { switch received { | None => "algebraicAdd has"->expect->toBe("failed") // This is nondeterministic, we could be in a situation where ci fails but you click rerun and it passes, which is bad. + // sometimes it works with ~digits=4. // This value was calculated by a python script | Some(x) => x->expect->toBeSoCloseTo(0.979023, ~digits=0) } @@ -360,7 +361,7 @@ describe("(Algebraic) addition of distributions", () => { | None => "algebraicAdd has"->expect->toBe("failed") // This is nondeterministic, we could be in a situation where ci fails but you click rerun and it passes, which is bad. // sometimes it works with ~digits=2. - | Some(x) => x->expect->toBeSoCloseTo(9.174267267465632, ~digits=0) + | Some(x) => x->expect->toBeSoCloseTo(9.190872365862756, ~digits=0) } }) }) diff --git a/packages/squiggle-lang/__tests__/Distributions/SampleSetDist_test.res b/packages/squiggle-lang/__tests__/Distributions/SampleSetDist_test.res index ee36d191..1c430f3d 100644 --- a/packages/squiggle-lang/__tests__/Distributions/SampleSetDist_test.res +++ b/packages/squiggle-lang/__tests__/Distributions/SampleSetDist_test.res @@ -37,4 +37,5 @@ describe("Continuous and discrete splits", () => { ) let toArr2 = discrete2 |> E.FloatFloatMap.toArray makeTest("splitMedium at count=500", toArr2 |> Belt.Array.length, 500) + // makeTest("foo", [] |> Belt.Array.length, 500) }) diff --git a/packages/squiggle-lang/__tests__/TS/JS_test.ts b/packages/squiggle-lang/__tests__/TS/JS_test.ts index a2fa99d9..1974dee6 100644 --- a/packages/squiggle-lang/__tests__/TS/JS_test.ts +++ b/packages/squiggle-lang/__tests__/TS/JS_test.ts @@ -58,14 +58,15 @@ describe("Distribution", () => { ); test("mean", () => { - expect(dist.mean().value).toBeCloseTo(8.704375514292865); + expect(dist.mean().value).toBeCloseTo(9.5555555); }); test("pdf", () => { - expect(dist.pdf(5.0).value).toBeCloseTo(0.052007455285386944, 1); + expect(dist.pdf(5.0).value).toBeCloseTo(0.10499097598222966, 1); }); test("cdf", () => { expect(dist.cdf(5.0).value).toBeCloseTo( - dist1Samples.filter((x) => x <= 5).length / dist1SampleCount + dist1Samples.filter((x) => x <= 5).length / dist1SampleCount, + 1 ); }); test("inv", () => { @@ -77,7 +78,7 @@ describe("Distribution", () => { ).toEqual(Ok("Point Set Distribution")); }); test("toSparkline", () => { - expect(dist.toSparkline(20).value).toEqual("▁▁▃▅███▆▄▃▂▁▁▂▂▃▂▁▁▁"); + expect(dist.toSparkline(20).value).toEqual("▁▁▃▇█▇▄▂▂▂▁▁▁▁▁▂▂▁▁▁"); }); test("algebraicAdd", () => { expect( @@ -91,6 +92,6 @@ describe("Distribution", () => { resultMap(dist.pointwiseAdd(dist2), (r: Distribution) => r.toSparkline(20) ).value - ).toEqual(Ok("▁▂▅██▅▅▅▆▆▇▅▄▃▃▂▂▁▁▁")); + ).toEqual(Ok("▁▂██▃▃▃▃▄▅▄▃▃▂▂▂▁▁▁▁")); }); }); diff --git a/packages/squiggle-lang/__tests__/TS/SampleSet_test.ts b/packages/squiggle-lang/__tests__/TS/SampleSet_test.ts index 36a0a47b..a617010b 100644 --- a/packages/squiggle-lang/__tests__/TS/SampleSet_test.ts +++ b/packages/squiggle-lang/__tests__/TS/SampleSet_test.ts @@ -59,13 +59,7 @@ describe("cumulative density function", () => { { sampleCount: n, xyPointLength: 100 } ); let cdfValue = dist.cdf(max).value; - let min = Math.min(...xs); - let epsilon = 5e-3; - if (max - min < epsilon) { - expect(cdfValue).toBeLessThan(1 - epsilon); - } else { - expect(dist.cdf(max).value).toBeGreaterThan(1 - epsilon); - } + expect(cdfValue).toBeCloseTo(1.0, 2); }) ); }); diff --git a/packages/squiggle-lang/src/rescript/Distributions/GenericDist/GenericDist.res b/packages/squiggle-lang/src/rescript/Distributions/GenericDist/GenericDist.res index 3af431f2..a3fc6323 100644 --- a/packages/squiggle-lang/src/rescript/Distributions/GenericDist/GenericDist.res +++ b/packages/squiggle-lang/src/rescript/Distributions/GenericDist/GenericDist.res @@ -46,18 +46,25 @@ let toFloatOperation = ( ~toPointSetFn: toPointSetFn, ~distToFloatOperation: Operation.distToFloatOperation, ) => { - let symbolicSolution = switch (t: t) { - | Symbolic(r) => - switch SymbolicDist.T.operate(distToFloatOperation, r) { - | Ok(f) => Some(f) - | _ => None - } + let trySymbolicSolution = switch (t: t) { + | Symbolic(r) => SymbolicDist.T.operate(distToFloatOperation, r)->E.R.toOption | _ => None } - switch symbolicSolution { + let trySampleSetSolution = switch ((t: t), distToFloatOperation) { + | (SampleSet(sampleSet), #Mean) => SampleSetDist.mean(sampleSet)->Some + | (SampleSet(sampleSet), #Sample) => SampleSetDist.sample(sampleSet)->Some + | (SampleSet(sampleSet), #Inv(r)) => SampleSetDist.percentile(sampleSet, r)->Some + | _ => None + } + + switch trySymbolicSolution { | Some(r) => Ok(r) - | None => toPointSetFn(t)->E.R2.fmap(PointSetDist.operate(distToFloatOperation)) + | None => + switch trySampleSetSolution { + | Some(r) => Ok(r) + | None => toPointSetFn(t)->E.R2.fmap(PointSetDist.operate(distToFloatOperation)) + } } } diff --git a/packages/squiggle-lang/src/rescript/Distributions/SampleSetDist/SampleSetDist.res b/packages/squiggle-lang/src/rescript/Distributions/SampleSetDist/SampleSetDist.res index 74dba954..14c66812 100644 --- a/packages/squiggle-lang/src/rescript/Distributions/SampleSetDist/SampleSetDist.res +++ b/packages/squiggle-lang/src/rescript/Distributions/SampleSetDist/SampleSetDist.res @@ -98,3 +98,13 @@ let map2 = (~fn: (float, float) => result, ~t1: t, ~t2 E.R.toExn("Input of samples should be larger than 5", make(x)) ) } + +let mean = t => T.get(t)->E.A.Floats.mean +let geomean = t => T.get(t)->E.A.Floats.geomean +let mode = t => T.get(t)->E.A.Floats.mode +let sum = t => T.get(t)->E.A.Floats.sum +let min = t => T.get(t)->E.A.Floats.min +let max = t => T.get(t)->E.A.Floats.max +let stdev = t => T.get(t)->E.A.Floats.stdev +let variance = t => T.get(t)->E.A.Floats.variance +let percentile = (t, f) => T.get(t)->E.A.Floats.percentile(f) diff --git a/packages/squiggle-lang/src/rescript/Distributions/SampleSetDist/SampleSetDist_ToPointSet.res b/packages/squiggle-lang/src/rescript/Distributions/SampleSetDist/SampleSetDist_ToPointSet.res index 3916e6fe..90537a12 100644 --- a/packages/squiggle-lang/src/rescript/Distributions/SampleSetDist/SampleSetDist_ToPointSet.res +++ b/packages/squiggle-lang/src/rescript/Distributions/SampleSetDist/SampleSetDist_ToPointSet.res @@ -133,9 +133,17 @@ let toPointSetDist = ( ~discrete=Some(discrete), ) + /* + I'm surprised that this doesn't come out normalized. My guess is that the KDE library + we're using is standardizing on something else. If we ever change that library, we should + check to see if we still need to do this. + */ + + let normalizedPointSet = pointSetDist->E.O2.fmap(PointSetDist.T.normalize) + let samplesParse: Internals.Types.outputs = { continuousParseParams: pdf |> E.O.fmap(snd), - pointSetDist: pointSetDist, + pointSetDist: normalizedPointSet, } samplesParse diff --git a/packages/squiggle-lang/src/rescript/Utility/E.res b/packages/squiggle-lang/src/rescript/Utility/E.res index c3ba48a2..030c2961 100644 --- a/packages/squiggle-lang/src/rescript/Utility/E.res +++ b/packages/squiggle-lang/src/rescript/Utility/E.res @@ -521,14 +521,15 @@ module A = { let split = (sortedArray: array) => { let continuous = [] let discrete = FloatFloatMap.empty() - Belt.Array.forEachWithIndex(sortedArray, (index, element) => { - let maxIndex = (sortedArray |> Array.length) - 1 - let possiblySimilarElements = switch index { - | 0 => [index + 1] - | n if n == maxIndex => [index - 1] - | _ => [index - 1, index + 1] - } |> Belt.Array.map(_, r => sortedArray[r]) - let hasSimilarElement = Belt.Array.some(possiblySimilarElements, r => r == element) + Belt.Array.forEachWithIndex(sortedArray, (_, element) => { + // let maxIndex = (sortedArray |> Array.length) - 1 + // let possiblySimilarElements = switch index { + // | 0 => [index + 1] + // | n if n == maxIndex => [index - 1] + // | _ => [index - 1, index + 1] + // } |> Belt.Array.map(_, r => sortedArray[r]) + // let hasSimilarElement = Belt.Array.some(possiblySimilarElements, r => r == element) + let hasSimilarElement = false hasSimilarElement ? FloatFloatMap.increment(element, discrete) : { @@ -544,10 +545,18 @@ module A = { } module Floats = { - let sum = Belt.Array.reduce(_, 0., (i, j) => i +. j) - let mean = a => sum(a) /. (Array.length(a) |> float_of_int) + let mean = Jstat.mean + let geomean = Jstat.geomean + let mode = Jstat.mode + let variance = Jstat.variance + let stdev = Jstat.stdev + let sum = Jstat.sum let random = Js.Math.random_int + //Passing true for the exclusive parameter excludes both endpoints of the range. + //https://jstat.github.io/all.html + let percentile = (a, b) => Jstat.percentile(a, b, false) + // Gives an array with all the differences between values // diff([1,5,3,7]) = [4,-2,4] let diff = (arr: array): array =>