From 93f4c1e0c238c0ca6ab35befd7d2ba36ed09cada Mon Sep 17 00:00:00 2001 From: Ozzie Gooen Date: Fri, 2 Sep 2022 21:51:42 -0700 Subject: [PATCH] fix: SampleSet.fromDist works for discrete and mixed --- .../rescript/Distributions/GenericDist.res | 4 +- .../Distributions/PointSetDist/Continuous.res | 19 +++++++++ .../Distributions/PointSetDist/Discrete.res | 4 +- .../PointSetDist/Distributions.res | 3 ++ .../Distributions/PointSetDist/Mixed.res | 39 ++++++++++++------- .../PointSetDist/PointSetDist.res | 13 ++++--- .../SampleSetDist/SampleSetDist.res | 2 +- .../squiggle-lang/src/rescript/Utility/E.res | 1 + 8 files changed, 59 insertions(+), 26 deletions(-) diff --git a/packages/squiggle-lang/src/rescript/Distributions/GenericDist.res b/packages/squiggle-lang/src/rescript/Distributions/GenericDist.res index 0c279a9c..266fc383 100644 --- a/packages/squiggle-lang/src/rescript/Distributions/GenericDist.res +++ b/packages/squiggle-lang/src/rescript/Distributions/GenericDist.res @@ -31,9 +31,9 @@ let isSymbolic = (t: t) => let sampleN = (t: t, n) => switch t { - | PointSet(r) => PointSetDist.sampleNRendered(n, r) - | Symbolic(r) => SymbolicDist.T.sampleN(n, r) + | PointSet(r) => PointSetDist.T.sampleN(r,n) | SampleSet(r) => SampleSetDist.sampleN(r, n) + | Symbolic(r) => SymbolicDist.T.sampleN(n, r) } let sample = (t: t) => sampleN(t, 1)->E.A.first |> E.O.toExn("Should not have happened") diff --git a/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/Continuous.res b/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/Continuous.res index 2297a3e3..69c3c01c 100644 --- a/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/Continuous.res +++ b/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/Continuous.res @@ -270,6 +270,25 @@ module T = Dist({ } let variance = (t: t): float => XYShape.Analysis.getVarianceDangerously(t, mean, Analysis.getMeanOfSquares) + +let doN = (n, fn) => { + let items = Belt.Array.make(n, 0.0) + for x in 0 to n - 1 { + let _ = Belt.Array.set(items, x, fn()) + } + items +} + +let sample = (t: t): float => { + let randomItem = Random.float(1.0) + t |> integralYtoX(randomItem) +} + +let sampleN = (dist, n) => { + let integralCache = integral(dist) + let distWithUpdatedIntegralCache = updateIntegralCache(Some(integralCache), dist) + doN(n, () => sample(distWithUpdatedIntegralCache)) +} }) let isNormalized = (t: t): bool => { diff --git a/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/Discrete.res b/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/Discrete.res index 7142f097..dec30f35 100644 --- a/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/Discrete.res +++ b/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/Discrete.res @@ -223,9 +223,9 @@ module T = Dist({ let getMeanOfSquares = t => t |> shapeMap(XYShape.T.square) |> mean XYShape.Analysis.getVarianceDangerously(t, mean, getMeanOfSquares) } -}) let sampleN = (t: t, n): array => { - let normalized = t->T.normalize->getShape + let normalized = t->normalize->getShape Stdlib.Random.sample(normalized.xs, {probs: normalized.ys, size: n}) } +}) diff --git a/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/Distributions.res b/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/Distributions.res index 3a35d57b..55d51747 100644 --- a/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/Distributions.res +++ b/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/Distributions.res @@ -33,6 +33,7 @@ module type dist = { let mean: t => float let variance: t => float + let sampleN: (t, int) => array } module Dist = (T: dist) => { @@ -64,6 +65,8 @@ module Dist = (T: dist) => { let yToX = T.integralYtoX let sum = T.integralEndY } + + let sampleN = T.sampleN } module Common = { diff --git a/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/Mixed.res b/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/Mixed.res index c9f31dbf..807a56ef 100644 --- a/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/Mixed.res +++ b/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/Mixed.res @@ -270,38 +270,47 @@ module T = Dist({ }) } - let mean = ({discrete, continuous}: t): float => { + let discreteIntegralSum =({discrete}: t): float => Discrete.T.Integral.sum(discrete) + let continuousIntegralSum =({continuous}: t): float => Continuous.T.Integral.sum(continuous) + let integralSum =(t:t): float => discreteIntegralSum(t) +. continuousIntegralSum(t) + + let mean = ({discrete, continuous} as t: t): float => { let discreteMean = Discrete.T.mean(discrete) let continuousMean = Continuous.T.mean(continuous) - // the combined mean is the weighted sum of the two: - let discreteIntegralSum = Discrete.T.Integral.sum(discrete) - let continuousIntegralSum = Continuous.T.Integral.sum(continuous) - let totalIntegralSum = discreteIntegralSum +. continuousIntegralSum - - (discreteMean *. discreteIntegralSum +. continuousMean *. continuousIntegralSum) /. - totalIntegralSum + (discreteMean *. discreteIntegralSum(t) +. continuousMean *. continuousIntegralSum(t)) /. + integralSum(t) } let variance = ({discrete, continuous} as t: t): float => { // the combined mean is the weighted sum of the two: - let discreteIntegralSum = Discrete.T.Integral.sum(discrete) - let continuousIntegralSum = Continuous.T.Integral.sum(continuous) - let totalIntegralSum = discreteIntegralSum +. continuousIntegralSum + let _discreteIntegralSum = discreteIntegralSum(t) + let _integralSum = integralSum(t) let getMeanOfSquares = ({discrete, continuous}: t) => { let discreteMean = discrete |> Discrete.shapeMap(XYShape.T.square) |> Discrete.T.mean - let continuousMean = continuous |> Continuous.Analysis.getMeanOfSquares - (discreteMean *. discreteIntegralSum +. continuousMean *. continuousIntegralSum) /. - totalIntegralSum + let continuousMean = continuous -> Continuous.Analysis.getMeanOfSquares + (discreteMean *. discreteIntegralSum(t) +. continuousMean *. continuousIntegralSum(t)) /. + integralSum(t) } - switch discreteIntegralSum /. totalIntegralSum { + switch _discreteIntegralSum /. _integralSum { | 1.0 => Discrete.T.variance(discrete) | 0.0 => Continuous.T.variance(continuous) | _ => XYShape.Analysis.getVarianceDangerously(t, mean, getMeanOfSquares) } } + + let sampleN = (t: t, n:int): array => { + let discreteIntegralSum = discreteIntegralSum(t); + let integralSum = integralSum(t); + let discreteSampleLength:int = (Js.Int.toFloat(n) *. discreteIntegralSum /. integralSum) -> E.Float.toInt + let continuousSampleLength = n - discreteSampleLength; + let continuousSamples = t.continuous ->Continuous.T.normalize-> Continuous.T.sampleN( continuousSampleLength) + let discreteSamples = t.discrete ->Discrete.T.normalize->Discrete.T.sampleN(discreteSampleLength) + Js.log3("Samples", continuousSamples, discreteSamples); + E.A.concat(discreteSamples, continuousSamples) -> E.A.shuffle + } }) let combineAlgebraically = (op: Operation.convolutionOperation, t1: t, t2: t): t => { diff --git a/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/PointSetDist.res b/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/PointSetDist.res index a52ee784..2a1467a5 100644 --- a/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/PointSetDist.res +++ b/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/PointSetDist.res @@ -198,6 +198,13 @@ module T = Dist({ | Discrete(m) => Discrete.T.variance(m) | Continuous(m) => Continuous.T.variance(m) } + + let sampleN = (t: t, int): array => + switch t { + | Mixed(m) => Mixed.T.sampleN(m,int) + | Discrete(m) => Discrete.T.sampleN(m,int) + | Continuous(m) => Continuous.T.sampleN(m,int) + } }) let logScore = (args: PointSetDist_Scoring.scoreArgs): result => @@ -235,12 +242,6 @@ let isFloat = (t: t) => | _ => false } -let sampleNRendered = (n, dist) => { - let integralCache = T.Integral.get(dist) - let distWithUpdatedIntegralCache = T.updateIntegralCache(Some(integralCache), dist) - doN(n, () => sample(distWithUpdatedIntegralCache)) -} - let operate = (distToFloatOp: Operation.distToFloatOperation, s): float => switch distToFloatOp { | #Pdf(f) => pdf(f, s) diff --git a/packages/squiggle-lang/src/rescript/Distributions/SampleSetDist/SampleSetDist.res b/packages/squiggle-lang/src/rescript/Distributions/SampleSetDist/SampleSetDist.res index 41856882..ebdb2598 100644 --- a/packages/squiggle-lang/src/rescript/Distributions/SampleSetDist/SampleSetDist.res +++ b/packages/squiggle-lang/src/rescript/Distributions/SampleSetDist/SampleSetDist.res @@ -139,7 +139,7 @@ let mixture = (values: array<(t, float)>, intendedLength: int) => { ->Belt.Array.mapWithIndex((i, (_, weight)) => (E.I.toFloat(i), weight /. totalWeight)) ->XYShape.T.fromZippedArray ->Discrete.make - ->Discrete.sampleN(intendedLength) + ->Discrete.T.sampleN(intendedLength) let dists = values->E.A2.fmap(E.Tuple2.first)->E.A2.fmap(T.get) let samples = discreteSamples diff --git a/packages/squiggle-lang/src/rescript/Utility/E.res b/packages/squiggle-lang/src/rescript/Utility/E.res index 60da5737..02940943 100644 --- a/packages/squiggle-lang/src/rescript/Utility/E.res +++ b/packages/squiggle-lang/src/rescript/Utility/E.res @@ -559,6 +559,7 @@ module A = { let isEmpty = r => length(r) < 1 let stableSortBy = Belt.SortArray.stableSortBy let toNoneIfEmpty = r => isEmpty(r) ? None : Some(r) + let shuffle = Belt.Array.shuffle let toRanges = (a: array<'a>) => switch a |> Belt.Array.length { | 0