From 93f4c1e0c238c0ca6ab35befd7d2ba36ed09cada Mon Sep 17 00:00:00 2001 From: Ozzie Gooen Date: Fri, 2 Sep 2022 21:51:42 -0700 Subject: [PATCH 1/2] fix: SampleSet.fromDist works for discrete and mixed --- .../rescript/Distributions/GenericDist.res | 4 +- .../Distributions/PointSetDist/Continuous.res | 19 +++++++++ .../Distributions/PointSetDist/Discrete.res | 4 +- .../PointSetDist/Distributions.res | 3 ++ .../Distributions/PointSetDist/Mixed.res | 39 ++++++++++++------- .../PointSetDist/PointSetDist.res | 13 ++++--- .../SampleSetDist/SampleSetDist.res | 2 +- .../squiggle-lang/src/rescript/Utility/E.res | 1 + 8 files changed, 59 insertions(+), 26 deletions(-) diff --git a/packages/squiggle-lang/src/rescript/Distributions/GenericDist.res b/packages/squiggle-lang/src/rescript/Distributions/GenericDist.res index 0c279a9c..266fc383 100644 --- a/packages/squiggle-lang/src/rescript/Distributions/GenericDist.res +++ b/packages/squiggle-lang/src/rescript/Distributions/GenericDist.res @@ -31,9 +31,9 @@ let isSymbolic = (t: t) => let sampleN = (t: t, n) => switch t { - | PointSet(r) => PointSetDist.sampleNRendered(n, r) - | Symbolic(r) => SymbolicDist.T.sampleN(n, r) + | PointSet(r) => PointSetDist.T.sampleN(r,n) | SampleSet(r) => SampleSetDist.sampleN(r, n) + | Symbolic(r) => SymbolicDist.T.sampleN(n, r) } let sample = (t: t) => sampleN(t, 1)->E.A.first |> E.O.toExn("Should not have happened") diff --git a/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/Continuous.res b/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/Continuous.res index 2297a3e3..69c3c01c 100644 --- a/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/Continuous.res +++ b/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/Continuous.res @@ -270,6 +270,25 @@ module T = Dist({ } let variance = (t: t): float => XYShape.Analysis.getVarianceDangerously(t, mean, Analysis.getMeanOfSquares) + +let doN = (n, fn) => { + let items = Belt.Array.make(n, 0.0) + for x in 0 to n - 1 { + let _ = Belt.Array.set(items, x, fn()) + } + items +} + +let sample = (t: t): float => { + let randomItem = Random.float(1.0) + t |> integralYtoX(randomItem) +} + +let sampleN = (dist, n) => { + let integralCache = integral(dist) + let distWithUpdatedIntegralCache = updateIntegralCache(Some(integralCache), dist) + doN(n, () => sample(distWithUpdatedIntegralCache)) +} }) let isNormalized = (t: t): bool => { diff --git a/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/Discrete.res b/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/Discrete.res index 7142f097..dec30f35 100644 --- a/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/Discrete.res +++ b/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/Discrete.res @@ -223,9 +223,9 @@ module T = Dist({ let getMeanOfSquares = t => t |> shapeMap(XYShape.T.square) |> mean XYShape.Analysis.getVarianceDangerously(t, mean, getMeanOfSquares) } -}) let sampleN = (t: t, n): array => { - let normalized = t->T.normalize->getShape + let normalized = t->normalize->getShape Stdlib.Random.sample(normalized.xs, {probs: normalized.ys, size: n}) } +}) diff --git a/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/Distributions.res b/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/Distributions.res index 3a35d57b..55d51747 100644 --- a/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/Distributions.res +++ b/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/Distributions.res @@ -33,6 +33,7 @@ module type dist = { let mean: t => float let variance: t => float + let sampleN: (t, int) => array } module Dist = (T: dist) => { @@ -64,6 +65,8 @@ module Dist = (T: dist) => { let yToX = T.integralYtoX let sum = T.integralEndY } + + let sampleN = T.sampleN } module Common = { diff --git a/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/Mixed.res b/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/Mixed.res index c9f31dbf..807a56ef 100644 --- a/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/Mixed.res +++ b/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/Mixed.res @@ -270,38 +270,47 @@ module T = Dist({ }) } - let mean = ({discrete, continuous}: t): float => { + let discreteIntegralSum =({discrete}: t): float => Discrete.T.Integral.sum(discrete) + let continuousIntegralSum =({continuous}: t): float => Continuous.T.Integral.sum(continuous) + let integralSum =(t:t): float => discreteIntegralSum(t) +. continuousIntegralSum(t) + + let mean = ({discrete, continuous} as t: t): float => { let discreteMean = Discrete.T.mean(discrete) let continuousMean = Continuous.T.mean(continuous) - // the combined mean is the weighted sum of the two: - let discreteIntegralSum = Discrete.T.Integral.sum(discrete) - let continuousIntegralSum = Continuous.T.Integral.sum(continuous) - let totalIntegralSum = discreteIntegralSum +. continuousIntegralSum - - (discreteMean *. discreteIntegralSum +. continuousMean *. continuousIntegralSum) /. - totalIntegralSum + (discreteMean *. discreteIntegralSum(t) +. continuousMean *. continuousIntegralSum(t)) /. + integralSum(t) } let variance = ({discrete, continuous} as t: t): float => { // the combined mean is the weighted sum of the two: - let discreteIntegralSum = Discrete.T.Integral.sum(discrete) - let continuousIntegralSum = Continuous.T.Integral.sum(continuous) - let totalIntegralSum = discreteIntegralSum +. continuousIntegralSum + let _discreteIntegralSum = discreteIntegralSum(t) + let _integralSum = integralSum(t) let getMeanOfSquares = ({discrete, continuous}: t) => { let discreteMean = discrete |> Discrete.shapeMap(XYShape.T.square) |> Discrete.T.mean - let continuousMean = continuous |> Continuous.Analysis.getMeanOfSquares - (discreteMean *. discreteIntegralSum +. continuousMean *. continuousIntegralSum) /. - totalIntegralSum + let continuousMean = continuous -> Continuous.Analysis.getMeanOfSquares + (discreteMean *. discreteIntegralSum(t) +. continuousMean *. continuousIntegralSum(t)) /. + integralSum(t) } - switch discreteIntegralSum /. totalIntegralSum { + switch _discreteIntegralSum /. _integralSum { | 1.0 => Discrete.T.variance(discrete) | 0.0 => Continuous.T.variance(continuous) | _ => XYShape.Analysis.getVarianceDangerously(t, mean, getMeanOfSquares) } } + + let sampleN = (t: t, n:int): array => { + let discreteIntegralSum = discreteIntegralSum(t); + let integralSum = integralSum(t); + let discreteSampleLength:int = (Js.Int.toFloat(n) *. discreteIntegralSum /. integralSum) -> E.Float.toInt + let continuousSampleLength = n - discreteSampleLength; + let continuousSamples = t.continuous ->Continuous.T.normalize-> Continuous.T.sampleN( continuousSampleLength) + let discreteSamples = t.discrete ->Discrete.T.normalize->Discrete.T.sampleN(discreteSampleLength) + Js.log3("Samples", continuousSamples, discreteSamples); + E.A.concat(discreteSamples, continuousSamples) -> E.A.shuffle + } }) let combineAlgebraically = (op: Operation.convolutionOperation, t1: t, t2: t): t => { diff --git a/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/PointSetDist.res b/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/PointSetDist.res index a52ee784..2a1467a5 100644 --- a/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/PointSetDist.res +++ b/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/PointSetDist.res @@ -198,6 +198,13 @@ module T = Dist({ | Discrete(m) => Discrete.T.variance(m) | Continuous(m) => Continuous.T.variance(m) } + + let sampleN = (t: t, int): array => + switch t { + | Mixed(m) => Mixed.T.sampleN(m,int) + | Discrete(m) => Discrete.T.sampleN(m,int) + | Continuous(m) => Continuous.T.sampleN(m,int) + } }) let logScore = (args: PointSetDist_Scoring.scoreArgs): result => @@ -235,12 +242,6 @@ let isFloat = (t: t) => | _ => false } -let sampleNRendered = (n, dist) => { - let integralCache = T.Integral.get(dist) - let distWithUpdatedIntegralCache = T.updateIntegralCache(Some(integralCache), dist) - doN(n, () => sample(distWithUpdatedIntegralCache)) -} - let operate = (distToFloatOp: Operation.distToFloatOperation, s): float => switch distToFloatOp { | #Pdf(f) => pdf(f, s) diff --git a/packages/squiggle-lang/src/rescript/Distributions/SampleSetDist/SampleSetDist.res b/packages/squiggle-lang/src/rescript/Distributions/SampleSetDist/SampleSetDist.res index 41856882..ebdb2598 100644 --- a/packages/squiggle-lang/src/rescript/Distributions/SampleSetDist/SampleSetDist.res +++ b/packages/squiggle-lang/src/rescript/Distributions/SampleSetDist/SampleSetDist.res @@ -139,7 +139,7 @@ let mixture = (values: array<(t, float)>, intendedLength: int) => { ->Belt.Array.mapWithIndex((i, (_, weight)) => (E.I.toFloat(i), weight /. totalWeight)) ->XYShape.T.fromZippedArray ->Discrete.make - ->Discrete.sampleN(intendedLength) + ->Discrete.T.sampleN(intendedLength) let dists = values->E.A2.fmap(E.Tuple2.first)->E.A2.fmap(T.get) let samples = discreteSamples diff --git a/packages/squiggle-lang/src/rescript/Utility/E.res b/packages/squiggle-lang/src/rescript/Utility/E.res index 60da5737..02940943 100644 --- a/packages/squiggle-lang/src/rescript/Utility/E.res +++ b/packages/squiggle-lang/src/rescript/Utility/E.res @@ -559,6 +559,7 @@ module A = { let isEmpty = r => length(r) < 1 let stableSortBy = Belt.SortArray.stableSortBy let toNoneIfEmpty = r => isEmpty(r) ? None : Some(r) + let shuffle = Belt.Array.shuffle let toRanges = (a: array<'a>) => switch a |> Belt.Array.length { | 0 From b87e952785f07d1bdaf28ff5d7e8c116b8809a53 Mon Sep 17 00:00:00 2001 From: Ozzie Gooen Date: Fri, 2 Sep 2022 21:53:20 -0700 Subject: [PATCH 2/2] Formatter --- ...leLibrary_FunctionRegistryLibrary_test.res | 10 +++++-- .../rescript/Distributions/GenericDist.res | 2 +- .../Distributions/PointSetDist/Continuous.res | 30 +++++++++---------- .../Distributions/PointSetDist/Discrete.res | 8 ++--- .../Distributions/PointSetDist/Mixed.res | 30 ++++++++++--------- .../PointSetDist/PointSetDist.res | 6 ++-- 6 files changed, 47 insertions(+), 39 deletions(-) diff --git a/packages/squiggle-lang/__tests__/SquiggleLibrary/SquiggleLibrary_FunctionRegistryLibrary_test.res b/packages/squiggle-lang/__tests__/SquiggleLibrary/SquiggleLibrary_FunctionRegistryLibrary_test.res index 7ddb57d4..82933838 100644 --- a/packages/squiggle-lang/__tests__/SquiggleLibrary/SquiggleLibrary_FunctionRegistryLibrary_test.res +++ b/packages/squiggle-lang/__tests__/SquiggleLibrary/SquiggleLibrary_FunctionRegistryLibrary_test.res @@ -63,9 +63,15 @@ describe("FunctionRegistry Library", () => { testEvalToBe("SampleSet.fromList([3,5,2,3,5,2,3,5,2,3,3,5])", "Ok(Sample Set Distribution)") testEvalToBe("SampleSet.fromList([3,5,2,3,5,2,3,5,2,3,3,5])", "Ok(Sample Set Distribution)") testEvalToBe("SampleSet.fromFn({|| sample(normal(5,2))})", "Ok(Sample Set Distribution)") - testEvalToBe("SampleSet.min(SampleSet.fromDist(normal(50,2)), 2)", "Ok(Sample Set Distribution)") + testEvalToBe( + "SampleSet.min(SampleSet.fromDist(normal(50,2)), 2)", + "Ok(Sample Set Distribution)", + ) testEvalToBe("mean(SampleSet.min(SampleSet.fromDist(normal(50,2)), 2))", "Ok(2)") - testEvalToBe("SampleSet.max(SampleSet.fromDist(normal(50,2)), 10)", "Ok(Sample Set Distribution)") + testEvalToBe( + "SampleSet.max(SampleSet.fromDist(normal(50,2)), 10)", + "Ok(Sample Set Distribution)", + ) testEvalToBe( "addOne(t)=t+1; SampleSet.toList(SampleSet.map(SampleSet.fromList([1,2,3,4,5,6]), addOne))", "Ok([2,3,4,5,6,7])", diff --git a/packages/squiggle-lang/src/rescript/Distributions/GenericDist.res b/packages/squiggle-lang/src/rescript/Distributions/GenericDist.res index 266fc383..24aea69a 100644 --- a/packages/squiggle-lang/src/rescript/Distributions/GenericDist.res +++ b/packages/squiggle-lang/src/rescript/Distributions/GenericDist.res @@ -31,7 +31,7 @@ let isSymbolic = (t: t) => let sampleN = (t: t, n) => switch t { - | PointSet(r) => PointSetDist.T.sampleN(r,n) + | PointSet(r) => PointSetDist.T.sampleN(r, n) | SampleSet(r) => SampleSetDist.sampleN(r, n) | Symbolic(r) => SymbolicDist.T.sampleN(n, r) } diff --git a/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/Continuous.res b/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/Continuous.res index 69c3c01c..ada7e462 100644 --- a/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/Continuous.res +++ b/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/Continuous.res @@ -271,24 +271,24 @@ module T = Dist({ let variance = (t: t): float => XYShape.Analysis.getVarianceDangerously(t, mean, Analysis.getMeanOfSquares) -let doN = (n, fn) => { - let items = Belt.Array.make(n, 0.0) - for x in 0 to n - 1 { - let _ = Belt.Array.set(items, x, fn()) + let doN = (n, fn) => { + let items = Belt.Array.make(n, 0.0) + for x in 0 to n - 1 { + let _ = Belt.Array.set(items, x, fn()) + } + items } - items -} -let sample = (t: t): float => { - let randomItem = Random.float(1.0) - t |> integralYtoX(randomItem) -} + let sample = (t: t): float => { + let randomItem = Random.float(1.0) + t |> integralYtoX(randomItem) + } -let sampleN = (dist, n) => { - let integralCache = integral(dist) - let distWithUpdatedIntegralCache = updateIntegralCache(Some(integralCache), dist) - doN(n, () => sample(distWithUpdatedIntegralCache)) -} + let sampleN = (dist, n) => { + let integralCache = integral(dist) + let distWithUpdatedIntegralCache = updateIntegralCache(Some(integralCache), dist) + doN(n, () => sample(distWithUpdatedIntegralCache)) + } }) let isNormalized = (t: t): bool => { diff --git a/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/Discrete.res b/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/Discrete.res index dec30f35..e8564daf 100644 --- a/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/Discrete.res +++ b/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/Discrete.res @@ -224,8 +224,8 @@ module T = Dist({ XYShape.Analysis.getVarianceDangerously(t, mean, getMeanOfSquares) } -let sampleN = (t: t, n): array => { - let normalized = t->normalize->getShape - Stdlib.Random.sample(normalized.xs, {probs: normalized.ys, size: n}) -} + let sampleN = (t: t, n): array => { + let normalized = t->normalize->getShape + Stdlib.Random.sample(normalized.xs, {probs: normalized.ys, size: n}) + } }) diff --git a/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/Mixed.res b/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/Mixed.res index 807a56ef..9086f505 100644 --- a/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/Mixed.res +++ b/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/Mixed.res @@ -270,9 +270,9 @@ module T = Dist({ }) } - let discreteIntegralSum =({discrete}: t): float => Discrete.T.Integral.sum(discrete) - let continuousIntegralSum =({continuous}: t): float => Continuous.T.Integral.sum(continuous) - let integralSum =(t:t): float => discreteIntegralSum(t) +. continuousIntegralSum(t) + let discreteIntegralSum = ({discrete}: t): float => Discrete.T.Integral.sum(discrete) + let continuousIntegralSum = ({continuous}: t): float => Continuous.T.Integral.sum(continuous) + let integralSum = (t: t): float => discreteIntegralSum(t) +. continuousIntegralSum(t) let mean = ({discrete, continuous} as t: t): float => { let discreteMean = Discrete.T.mean(discrete) @@ -289,7 +289,7 @@ module T = Dist({ let _integralSum = integralSum(t) let getMeanOfSquares = ({discrete, continuous}: t) => { let discreteMean = discrete |> Discrete.shapeMap(XYShape.T.square) |> Discrete.T.mean - let continuousMean = continuous -> Continuous.Analysis.getMeanOfSquares + let continuousMean = continuous->Continuous.Analysis.getMeanOfSquares (discreteMean *. discreteIntegralSum(t) +. continuousMean *. continuousIntegralSum(t)) /. integralSum(t) } @@ -300,16 +300,18 @@ module T = Dist({ | _ => XYShape.Analysis.getVarianceDangerously(t, mean, getMeanOfSquares) } } - - let sampleN = (t: t, n:int): array => { - let discreteIntegralSum = discreteIntegralSum(t); - let integralSum = integralSum(t); - let discreteSampleLength:int = (Js.Int.toFloat(n) *. discreteIntegralSum /. integralSum) -> E.Float.toInt - let continuousSampleLength = n - discreteSampleLength; - let continuousSamples = t.continuous ->Continuous.T.normalize-> Continuous.T.sampleN( continuousSampleLength) - let discreteSamples = t.discrete ->Discrete.T.normalize->Discrete.T.sampleN(discreteSampleLength) - Js.log3("Samples", continuousSamples, discreteSamples); - E.A.concat(discreteSamples, continuousSamples) -> E.A.shuffle + + let sampleN = (t: t, n: int): array => { + let discreteIntegralSum = discreteIntegralSum(t) + let integralSum = integralSum(t) + let discreteSampleLength: int = + (Js.Int.toFloat(n) *. discreteIntegralSum /. integralSum)->E.Float.toInt + let continuousSampleLength = n - discreteSampleLength + let continuousSamples = + t.continuous->Continuous.T.normalize->Continuous.T.sampleN(continuousSampleLength) + let discreteSamples = t.discrete->Discrete.T.normalize->Discrete.T.sampleN(discreteSampleLength) + Js.log3("Samples", continuousSamples, discreteSamples) + E.A.concat(discreteSamples, continuousSamples)->E.A.shuffle } }) diff --git a/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/PointSetDist.res b/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/PointSetDist.res index 2a1467a5..ba6174e9 100644 --- a/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/PointSetDist.res +++ b/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/PointSetDist.res @@ -201,9 +201,9 @@ module T = Dist({ let sampleN = (t: t, int): array => switch t { - | Mixed(m) => Mixed.T.sampleN(m,int) - | Discrete(m) => Discrete.T.sampleN(m,int) - | Continuous(m) => Continuous.T.sampleN(m,int) + | Mixed(m) => Mixed.T.sampleN(m, int) + | Discrete(m) => Discrete.T.sampleN(m, int) + | Continuous(m) => Continuous.T.sampleN(m, int) } })