From bfcea1df02315e7f466d7fe141c3f199aec6679f Mon Sep 17 00:00:00 2001 From: Vyacheslav Matyukhin Date: Sat, 24 Sep 2022 19:08:41 +0400 Subject: [PATCH] implement SampleSet.cdf --- .../ReducerInterface_Distribution_test.res | 1 + .../rescript/Distributions/GenericDist.res | 19 ++++++------------- .../SampleSetDist/SampleSetDist.res | 4 ++++ 3 files changed, 11 insertions(+), 13 deletions(-) diff --git a/packages/squiggle-lang/__tests__/ReducerInterface/ReducerInterface_Distribution_test.res b/packages/squiggle-lang/__tests__/ReducerInterface/ReducerInterface_Distribution_test.res index c8207131..46ed4c42 100644 --- a/packages/squiggle-lang/__tests__/ReducerInterface/ReducerInterface_Distribution_test.res +++ b/packages/squiggle-lang/__tests__/ReducerInterface/ReducerInterface_Distribution_test.res @@ -98,6 +98,7 @@ describe("eval on distribution functions", () => { "log(normal(5,2), normal(10,1))", "Error(Distribution Math Error: Logarithm of input error: First input must be completely greater than 0)", ) + testEval("log(2, SampleSet.fromDist(0.0001 to 5))", "Ok(Sample Set Distribution)") // log with low values, see https://github.com/quantified-uncertainty/squiggle/issues/1098 testEval("log(uniform(5,8))", "Ok(Sample Set Distribution)") testEval("log10(uniform(5,8))", "Ok(Sample Set Distribution)") }) diff --git a/packages/squiggle-lang/src/rescript/Distributions/GenericDist.res b/packages/squiggle-lang/src/rescript/Distributions/GenericDist.res index f9f5a5d0..231a893c 100644 --- a/packages/squiggle-lang/src/rescript/Distributions/GenericDist.res +++ b/packages/squiggle-lang/src/rescript/Distributions/GenericDist.res @@ -86,6 +86,7 @@ let toFloatOperation = ( | (SampleSet(sampleSet), #Inv(r)) => SampleSetDist.percentile(sampleSet, r)->Some | (SampleSet(sampleSet), #Min) => SampleSetDist.min(sampleSet)->Some | (SampleSet(sampleSet), #Max) => SampleSetDist.max(sampleSet)->Some + | (SampleSet(sampleSet), #Cdf(r)) => SampleSetDist.cdf(sampleSet, r)->Some | _ => None } @@ -277,22 +278,14 @@ module AlgebraicCombination = { Right now we don't yet have a way of getting probability mass, so I'll leave this for later. */ let getLogarithmInputError = (t1: t, t2: t, ~toPointSetFn: toPointSetFn): option => { - let firstOperandIsGreaterThanZero = + let isDistGreaterThanZero = t => toFloatOperation( - t1, + t, ~toPointSetFn, ~distToFloatOperation=#Cdf(MagicNumbers.Epsilon.ten), - ) |> E.R.fmap(r => r > 0.) - let secondOperandIsGreaterThanZero = - toFloatOperation( - t2, - ~toPointSetFn, - ~distToFloatOperation=#Cdf(MagicNumbers.Epsilon.ten), - ) |> E.R.fmap(r => r > 0.) - let items = E.A.R.firstErrorOrOpen([ - firstOperandIsGreaterThanZero, - secondOperandIsGreaterThanZero, - ]) + )->E.R2.fmap(r => r > 0.) + + let items = E.A.R.firstErrorOrOpen([isDistGreaterThanZero(t1), isDistGreaterThanZero(t2)]) switch items { | Error(r) => Some(r) | Ok([true, _]) => diff --git a/packages/squiggle-lang/src/rescript/Distributions/SampleSetDist/SampleSetDist.res b/packages/squiggle-lang/src/rescript/Distributions/SampleSetDist/SampleSetDist.res index 41856882..17fbe431 100644 --- a/packages/squiggle-lang/src/rescript/Distributions/SampleSetDist/SampleSetDist.res +++ b/packages/squiggle-lang/src/rescript/Distributions/SampleSetDist/SampleSetDist.res @@ -131,6 +131,10 @@ let max = t => T.get(t)->E.A.Floats.max let stdev = t => T.get(t)->E.A.Floats.stdev let variance = t => T.get(t)->E.A.Floats.variance let percentile = (t, f) => T.get(t)->E.A.Floats.percentile(f) +let cdf = (t: t, f: float) => { + let countBelowF = t->E.A.reduce(0, (acc, x) => acc + (x <= f ? 1 : 0)) + countBelowF->Js.Int.toFloat /. t->length->Js.Int.toFloat +} let mixture = (values: array<(t, float)>, intendedLength: int) => { let totalWeight = values->E.A2.fmap(E.Tuple2.second)->E.A.Floats.sum