From 4bd961a80856a3cdf824803157980087a9143579 Mon Sep 17 00:00:00 2001 From: Vyacheslav Matyukhin Date: Thu, 6 Oct 2022 00:02:10 +0400 Subject: [PATCH] store SampleSet as typed array --- .../__tests__/Bandwidth_test.res | 4 +- .../E/splitContinuousAndDiscrete_test.res | 23 +-- .../squiggle-lang/src/js/SqDistribution.ts | 6 +- .../Distributions/DistributionOperation.res | 2 +- .../Distributions/DistributionOperation.resi | 2 +- .../Distributions/DistributionTypes.res | 1 + .../rescript/Distributions/GenericDist.res | 4 +- .../SampleSetDist/SampleSetDist.res | 188 +++++++++++++----- .../SampleSetDist/SampleSetDist_Bandwidth.res | 15 +- .../SampleSetDist_ToPointSet.res | 22 +- .../src/rescript/FR/FR_Sampleset.res | 10 +- .../src/rescript/MagicNumbers.res | 2 +- .../squiggle-lang/src/rescript/Utility/E.res | 1 + .../src/rescript/Utility/E/E_A.res | 49 ----- .../src/rescript/Utility/E/E_FloatArray.res | 89 +++++++++ .../src/rescript/Utility/Operation.res | 2 + .../src/rescript/Utility/Stdlib.res | 8 + 17 files changed, 286 insertions(+), 142 deletions(-) create mode 100644 packages/squiggle-lang/src/rescript/Utility/E/E_FloatArray.res diff --git a/packages/squiggle-lang/__tests__/Bandwidth_test.res b/packages/squiggle-lang/__tests__/Bandwidth_test.res index df621743..fd24b6d5 100644 --- a/packages/squiggle-lang/__tests__/Bandwidth_test.res +++ b/packages/squiggle-lang/__tests__/Bandwidth_test.res @@ -3,11 +3,11 @@ open Expect describe("Bandwidth", () => { test("nrd0()", () => { - let data = [1., 4., 3., 2.] + let data = [1., 4., 3., 2.]->E.FloatArray.make expect(SampleSetDist_Bandwidth.nrd0(data))->toEqual(0.7625801874014622) }) test("nrd()", () => { - let data = [1., 4., 3., 2.] + let data = [1., 4., 3., 2.]->E.FloatArray.make expect(SampleSetDist_Bandwidth.nrd(data))->toEqual(0.8981499984950554) }) }) diff --git a/packages/squiggle-lang/__tests__/E/splitContinuousAndDiscrete_test.res b/packages/squiggle-lang/__tests__/E/splitContinuousAndDiscrete_test.res index 43b0019c..5012accc 100644 --- a/packages/squiggle-lang/__tests__/E/splitContinuousAndDiscrete_test.res +++ b/packages/squiggle-lang/__tests__/E/splitContinuousAndDiscrete_test.res @@ -2,9 +2,10 @@ open Jest open TestHelpers let prepareInputs = (ar, minWeight) => - E.A.Floats.Sorted.splitContinuousAndDiscreteForMinWeight(ar, ~minDiscreteWeight=minWeight) |> ( - ((c, disc)) => (c, disc |> E.FloatFloatMap.toArray) - ) + E.FloatArray.Sorted.splitContinuousAndDiscreteForMinWeight( + ar->E.FloatArray.make, + ~minDiscreteWeight=minWeight, + ) |> (((c, disc)) => (c->E.FloatArray.toArray, disc->E.FloatFloatMap.toArray)) describe("Continuous and discrete splits", () => { makeTest( @@ -37,18 +38,10 @@ describe("Continuous and discrete splits", () => { E.A.concatMany([sorted, sorted, sorted, sorted]) |> Belt.SortArray.stableSortBy(_, compare) } - let (_, discrete1) = E.A.Floats.Sorted.splitContinuousAndDiscreteForMinWeight( - makeDuplicatedArray(10), - ~minDiscreteWeight=2, - ) - let toArr1 = discrete1 |> E.FloatFloatMap.toArray - makeTest("splitMedium at count=10", toArr1 |> Belt.Array.length, 10) + let (_, toArr1) = prepareInputs(makeDuplicatedArray(10), 2) + makeTest("splitMedium at count=10", toArr1->Belt.Array.length, 10) - let (_c, discrete2) = E.A.Floats.Sorted.splitContinuousAndDiscreteForMinWeight( - makeDuplicatedArray(500), - ~minDiscreteWeight=2, - ) - let toArr2 = discrete2 |> E.FloatFloatMap.toArray - makeTest("splitMedium at count=500", toArr2 |> Belt.Array.length, 500) + let (_, toArr2) = prepareInputs(makeDuplicatedArray(500), 2) + makeTest("splitMedium at count=500", toArr2->Belt.Array.length, 500) // makeTest("foo", [] |> Belt.Array.length, 500) }) diff --git a/packages/squiggle-lang/src/js/SqDistribution.ts b/packages/squiggle-lang/src/js/SqDistribution.ts index e05fab95..54bc6a91 100644 --- a/packages/squiggle-lang/src/js/SqDistribution.ts +++ b/packages/squiggle-lang/src/js/SqDistribution.ts @@ -90,9 +90,9 @@ export class SqPointSetDistribution extends SqAbstractDistribution { export class SqSampleSetDistribution extends SqAbstractDistribution { tag = Tag.SampleSet as const; - value(): number[] { - return this.valueMethod(RSDistribution.getSampleSet); - } + // value(): number[] { + // return this.valueMethod(RSDistribution.getSampleSet); + // } } export class SqSymbolicDistribution extends SqAbstractDistribution { diff --git a/packages/squiggle-lang/src/rescript/Distributions/DistributionOperation.res b/packages/squiggle-lang/src/rescript/Distributions/DistributionOperation.res index 9c61211e..a747dc0d 100644 --- a/packages/squiggle-lang/src/rescript/Distributions/DistributionOperation.res +++ b/packages/squiggle-lang/src/rescript/Distributions/DistributionOperation.res @@ -221,7 +221,7 @@ let rec run = (~env: env, functionCallInfo: functionCallInfo): outputType => { ->OutputLocal.fromResult | FromSamples(xs) => xs - ->SampleSetDist.make + ->SampleSetDist.makeFromJsArray ->E.R2.errMap(x => DistributionTypes.SampleSetError(x)) ->E.R2.fmap(x => x->DistributionTypes.SampleSet->Dist) ->OutputLocal.fromResult diff --git a/packages/squiggle-lang/src/rescript/Distributions/DistributionOperation.resi b/packages/squiggle-lang/src/rescript/Distributions/DistributionOperation.resi index 68da9534..29f03da1 100644 --- a/packages/squiggle-lang/src/rescript/Distributions/DistributionOperation.resi +++ b/packages/squiggle-lang/src/rescript/Distributions/DistributionOperation.resi @@ -98,7 +98,7 @@ module Constructors: { @genType let toSampleSet: (~env: GenericDist.env, genericDist, int) => result @genType - let fromSamples: (~env: GenericDist.env, SampleSetDist.t) => result + let fromSamples: (~env: GenericDist.env, array) => result @genType let truncate: ( ~env: GenericDist.env, diff --git a/packages/squiggle-lang/src/rescript/Distributions/DistributionTypes.res b/packages/squiggle-lang/src/rescript/Distributions/DistributionTypes.res index 93f934e1..238cd861 100644 --- a/packages/squiggle-lang/src/rescript/Distributions/DistributionTypes.res +++ b/packages/squiggle-lang/src/rescript/Distributions/DistributionTypes.res @@ -38,6 +38,7 @@ module Error = { | SampleSetError(TooFewSamples) => "Too Few Samples" | SampleSetError(NonNumericInput(err)) => `Found a non-number in input: ${err}` | SampleSetError(OperationError(err)) => Operation.Error.toString(err) + | SampleSetError(UnequalSizes) => "Expected sample sets of equal size" | OperationError(err) => Operation.Error.toString(err) | PointSetConversionError(err) => SampleSetDist.pointsetConversionErrorToString(err) | SparklineError(err) => PointSetTypes.sparklineErrorToString(err) diff --git a/packages/squiggle-lang/src/rescript/Distributions/GenericDist.res b/packages/squiggle-lang/src/rescript/Distributions/GenericDist.res index 231a893c..bf7c79a1 100644 --- a/packages/squiggle-lang/src/rescript/Distributions/GenericDist.res +++ b/packages/squiggle-lang/src/rescript/Distributions/GenericDist.res @@ -40,7 +40,9 @@ let sampleN = (t: t, n) => let sample = (t: t) => sampleN(t, 1)->E.A.first |> E.O.toExn("Should not have happened") let toSampleSetDist = (t: t, n) => - SampleSetDist.make(sampleN(t, n))->E.R2.errMap(DistributionTypes.Error.sampleErrorToDistErr) + SampleSetDist.makeFromJsArray(sampleN(t, n))->E.R2.errMap( + DistributionTypes.Error.sampleErrorToDistErr, + ) let fromFloat = (f: float): t => Symbolic(SymbolicDist.Float.make(f)) diff --git a/packages/squiggle-lang/src/rescript/Distributions/SampleSetDist/SampleSetDist.res b/packages/squiggle-lang/src/rescript/Distributions/SampleSetDist/SampleSetDist.res index 17fbe431..ea5c83a9 100644 --- a/packages/squiggle-lang/src/rescript/Distributions/SampleSetDist/SampleSetDist.res +++ b/packages/squiggle-lang/src/rescript/Distributions/SampleSetDist/SampleSetDist.res @@ -2,14 +2,10 @@ module Error = { @genType type sampleSetError = - TooFewSamples | NonNumericInput(string) | OperationError(Operation.operationError) - - let sampleSetErrorToString = (err: sampleSetError): string => - switch err { - | TooFewSamples => "Too few samples when constructing sample set" - | NonNumericInput(err) => `Found a non-number in input: ${err}` - | OperationError(err) => Operation.Error.toString(err) - } + | TooFewSamples + | NonNumericInput(string) + | OperationError(Operation.operationError) + | UnequalSizes @genType type pointsetConversionError = TooFewSamplesForConversionToPointSet @@ -26,6 +22,7 @@ module Error = { | TooFewSamples => "Too few samples when constructing sample set" | NonNumericInput(err) => `Found a non-number in input: ${err}` | OperationError(err) => Operation.Error.toString(err) + | UnequalSizes => "Expected sample sets of equal size" } } } @@ -38,26 +35,29 @@ this constructor. https://stackoverflow.com/questions/66909578/how-to-make-a-type-constructor-private-in-rescript-except-in-current-module */ module T: { - //This really should be hidden (remove the array). The reason it isn't is to act as an escape hatch in JS__Test.ts. - //When we get a good functional library in TS, we could refactor that out. - @genType - type t = array - let make: array => result - let get: t => array + @genType.opaque + type t + let makeFromTypedArray: E.FloatArray.t => result + let makeFromJsArray: array => result + let toJsArray: t => array + let get: t => E.FloatArray.t } = { - type t = array - let make = (a: array) => - if E.A.length(a) > 5 { + type t = E.FloatArray.t + let makeFromTypedArray = (a: E.FloatArray.t): result => + if E.FloatArray.length(a) > 5 { Ok(a) } else { Error(TooFewSamples) } - let get = (a: t) => a + let makeFromJsArray = (a: array): result => + E.FloatArray.make(a)->makeFromTypedArray + let toJsArray = (t: t) => t->E.FloatArray.toArray + let get = (t: t) => t } include T -let length = (t: t) => get(t)->E.A.length +let length = (t: T.t) => T.get(t)->E.FloatArray.length /* TODO: Refactor to get a more precise estimate. Also, this code is just fairly messy, could use @@ -68,15 +68,15 @@ let toPointSetDist = (~samples: t, ~samplingInputs: SamplingInputs.samplingInput pointsetConversionError, > => SampleSetDist_ToPointSet.toPointSetDist( - ~samples=get(samples), + ~samples=T.get(samples), ~samplingInputs, (), ).pointSetDist->E.O2.toResult(TooFewSamplesForConversionToPointSet) //Randomly get one sample from the distribution let sample = (t: t): float => { - let i = E.Int.random(~min=0, ~max=E.A.length(get(t)) - 1) - E.A.unsafe_get(get(t), i) + let i = E.Int.random(~min=0, ~max=E.FloatArray.length(get(t)) - 1) + E.FloatArray.unsafe_get(get(t), i) } /* @@ -87,52 +87,144 @@ The former helps in cases where multiple distributions are correlated. However, if n > length(t), then there's no clear right answer, so we just randomly sample everything. */ -let sampleN = (t: t, n) => { - if n <= E.A.length(get(t)) { - E.A.slice(get(t), ~offset=0, ~len=n) +let sampleN = (t: t, n): array => { + if n <= length(t) { + E.FloatArray.slice(~start=0, ~end_=n, get(t))->E.FloatArray.toArray } else { Belt.Array.makeBy(n, _ => sample(t)) } } -let _fromSampleResultArray = (samples: array>) => - E.A.R.firstErrorOrOpen(samples)->E.R2.errMap(Error.fromOperationError) |> E.R2.bind(make) - let samplesMap = (~fn: float => result, t: t): result< t, sampleSetError, -> => T.get(t)->E.A2.fmap(fn)->_fromSampleResultArray +> => { + try { + T.get(t) + ->E.FloatArray.map((. v) => { + switch fn(v) { + | Ok(res) => res + | Error(err) => err->Operation.Error.OperationException->raise + } + }) + ->T.makeFromTypedArray + } catch { + | Operation.Error.OperationException(err) => Error.fromOperationError(err)->Error + } +} -//TODO: Figure out what to do if distributions are different lengths. ``zip`` is kind of inelegant for this. let map2 = (~fn: (float, float) => result, ~t1: t, ~t2: t): result< t, sampleSetError, -> => E.A.zip(get(t1), get(t2))->E.A2.fmap(E.Tuple2.toFnCall(fn))->_fromSampleResultArray +> => { + let length1 = t1->length + let length2 = t2->length + if length1 == length2 { + try { + let res = E.FloatArray.fromLength(length1) + for i in 0 to length1 - 1 { + let v = switch fn( + get(t1)->E.FloatArray.unsafe_get(i), + get(t2)->E.FloatArray.unsafe_get(i), + ) { + | Ok(fnResult) => fnResult + | Error(err) => err->Operation.Error.OperationException->raise + } + res->E.FloatArray.set(i, v) + } + res->T.makeFromTypedArray + } catch { + | Operation.Error.OperationException(err) => Error.fromOperationError(err)->Error + } + } else { + Error.UnequalSizes->Error + } +} let map3 = ( ~fn: (float, float, float) => result, ~t1: t, ~t2: t, ~t3: t, -): result => - E.A.zip3(get(t1), get(t2), get(t3))->E.A2.fmap(E.Tuple3.toFnCall(fn))->_fromSampleResultArray +): result => { + let length1 = t1->length + let length2 = t2->length + let length3 = t3->length + if length1 == length2 && length2 == length3 { + try { + let res = E.FloatArray.fromLength(length1) + for i in 0 to length1 - 1 { + let v = switch fn( + get(t1)->E.FloatArray.unsafe_get(i), + get(t2)->E.FloatArray.unsafe_get(i), + get(t3)->E.FloatArray.unsafe_get(i), + ) { + | Ok(fnResult) => fnResult + | Error(err) => err->Operation.Error.OperationException->raise + } + res->E.FloatArray.set(i, v) + } + res->T.makeFromTypedArray + } catch { + | Operation.Error.OperationException(err) => Error.fromOperationError(err)->Error + } + } else { + Error.UnequalSizes->Error + } +} let mapN = (~fn: array => result, ~t1: array): result< t, sampleSetError, -> => E.A.transpose(E.A.fmap(get, t1))->E.A2.fmap(fn)->_fromSampleResultArray +> => { + let lengths = t1->E.A2.fmap(t => t->length) + let l0 = lengths[0] + if lengths->E.A.all(l => l == l0, _) { + try { + let res = E.FloatArray.fromLength(l0) + for i in 0 to l0 - 1 { + let v = switch fn(t1->E.A2.fmap(t => get(t)->E.FloatArray.unsafe_get(i))) { + | Ok(fnResult) => fnResult + | Error(err) => err->Operation.Error.OperationException->raise + } + res->E.FloatArray.set(i, v) + } + res->T.makeFromTypedArray + } catch { + | Operation.Error.OperationException(err) => Error.fromOperationError(err)->Error + } + } else { + Error.UnequalSizes->Error + } +} -let mean = t => T.get(t)->E.A.Floats.mean -let geomean = t => T.get(t)->E.A.Floats.geomean -let mode = t => T.get(t)->E.A.Floats.mode -let sum = t => T.get(t)->E.A.Floats.sum -let min = t => T.get(t)->E.A.Floats.min -let max = t => T.get(t)->E.A.Floats.max -let stdev = t => T.get(t)->E.A.Floats.stdev -let variance = t => T.get(t)->E.A.Floats.variance -let percentile = (t, f) => T.get(t)->E.A.Floats.percentile(f) +let makeBy = (n: int, fn: int => result): result => { + let res = E.FloatArray.fromLength(n) + try { + for i in 0 to n - 1 { + let fnResult = fn(i) + switch fnResult { + | Ok(v) => res->E.FloatArray.set(i, v) + | Error(err) => err->Operation.Error.OperationException->raise + } + } + res->T.makeFromTypedArray + } catch { + | Operation.Error.OperationException(err) => Error.fromOperationError(err)->Error + } +} + +let mean = t => T.get(t)->E.FloatArray.mean +let geomean = t => T.get(t)->E.FloatArray.geomean +let mode = t => T.get(t)->E.FloatArray.mode +let sum = t => T.get(t)->E.FloatArray.sum +let min = t => T.get(t)->E.FloatArray.min +let max = t => T.get(t)->E.FloatArray.max +let stdev = t => T.get(t)->E.FloatArray.stdev +let variance = t => T.get(t)->E.FloatArray.variance +let percentile = (t, f) => T.get(t)->E.FloatArray.percentile(f) let cdf = (t: t, f: float) => { - let countBelowF = t->E.A.reduce(0, (acc, x) => acc + (x <= f ? 1 : 0)) + let countBelowF = T.get(t)->E.FloatArray.reduce((. acc, x) => acc + (x <= f ? 1 : 0), 0) countBelowF->Js.Int.toFloat /. t->length->Js.Int.toFloat } @@ -149,14 +241,14 @@ let mixture = (values: array<(t, float)>, intendedLength: int) => { discreteSamples ->Belt.Array.mapWithIndex((index, distIndexToChoose) => { let chosenDist = E.A.get(dists, E.Float.toInt(distIndexToChoose)) - chosenDist->E.O.bind(E.A.get(_, index)) + chosenDist->E.O.bind(E.FloatArray.get(_, index)) }) ->E.A.O.openIfAllSome - samples->E.O2.toExn("Mixture unreachable error")->T.make + samples->E.O2.toExn("Mixture unreachable error")->T.makeFromJsArray } -let truncateLeft = (t, f) => T.get(t)->E.A2.filter(x => x >= f)->T.make -let truncateRight = (t, f) => T.get(t)->E.A2.filter(x => x <= f)->T.make +let truncateLeft = (t, f) => T.get(t)->E.FloatArray.filter((. x) => x >= f)->T.makeFromTypedArray +let truncateRight = (t, f) => T.get(t)->E.FloatArray.filter((. x) => x <= f)->T.makeFromTypedArray let truncate = (t, ~leftCutoff: option, ~rightCutoff: option) => { let withTruncatedLeft = t => leftCutoff |> E.O.dimap(left => truncateLeft(t, left), _ => Ok(t)) diff --git a/packages/squiggle-lang/src/rescript/Distributions/SampleSetDist/SampleSetDist_Bandwidth.res b/packages/squiggle-lang/src/rescript/Distributions/SampleSetDist/SampleSetDist_Bandwidth.res index 29d48ad3..7cbc56c7 100644 --- a/packages/squiggle-lang/src/rescript/Distributions/SampleSetDist/SampleSetDist_Bandwidth.res +++ b/packages/squiggle-lang/src/rescript/Distributions/SampleSetDist/SampleSetDist_Bandwidth.res @@ -2,16 +2,19 @@ let {iqr_percentile, nrd0_lo_denominator, one, nrd0_coef, nrd_coef, nrd_fractionalPower} = module( MagicNumbers.SampleSetBandwidth ) -let len = x => E.A.length(x) |> float_of_int +let len = x => E.FloatArray.length(x)->float_of_int -let iqr = x => - Jstat.percentile(x, iqr_percentile, true) -. Jstat.percentile(x, 1.0 -. iqr_percentile, true) +let iqr = x => { + let xArr = x->E.FloatArray.toArray + Jstat.percentile(xArr, iqr_percentile, true) -. + Jstat.percentile(xArr, 1.0 -. iqr_percentile, true) +} // Silverman, B. W. (1986) Density Estimation. London: Chapman and Hall. let nrd0 = x => { - let hi = Js_math.sqrt(Jstat.variance(x)) + let hi = Js_math.sqrt(Stdlib.Base.variance(x)) let lo = Js_math.minMany_float([hi, iqr(x) /. nrd0_lo_denominator]) - let e = Js_math.abs_float(x[1]) + let e = Js_math.abs_float(x->E.FloatArray.unsafe_get(1)) let lo' = switch (lo, hi, e) { | (lo, _, _) if !Js.Float.isNaN(lo) => lo | (_, hi, _) if !Js.Float.isNaN(hi) => hi @@ -25,6 +28,6 @@ let nrd0 = x => { let nrd = x => { let h = iqr(x) /. nrd0_lo_denominator nrd_coef *. - Js.Math.min_float(Js.Math.sqrt(Jstat.variance(x)), h) *. + Js.Math.min_float(Js.Math.sqrt(Stdlib.Base.variance(x)), h) *. Js.Math.pow_float(~base=len(x), ~exp=nrd_fractionalPower) } diff --git a/packages/squiggle-lang/src/rescript/Distributions/SampleSetDist/SampleSetDist_ToPointSet.res b/packages/squiggle-lang/src/rescript/Distributions/SampleSetDist/SampleSetDist_ToPointSet.res index 4cf2b868..acdc4e92 100644 --- a/packages/squiggle-lang/src/rescript/Distributions/SampleSetDist/SampleSetDist_ToPointSet.res +++ b/packages/squiggle-lang/src/rescript/Distributions/SampleSetDist/SampleSetDist_ToPointSet.res @@ -37,40 +37,40 @@ module Internals = { } module T = { - type t = array + type t = E.FloatArray.t - let xWidthToUnitWidth = (samples, outputXYPoints, xWidth) => { - let xyPointRange = E.A.Sorted.range(samples)->E.O2.default(0.0) + let xWidthToUnitWidth = (samples: t, outputXYPoints, xWidth) => { + let xyPointRange = E.FloatArray.Sorted.range(samples)->E.O2.default(0.0) let xyPointWidth = xyPointRange /. float_of_int(outputXYPoints) xWidth /. xyPointWidth } let formatUnitWidth = w => Jstat.max([w, 1.0])->int_of_float - let suggestedUnitWidth = (samples, outputXYPoints) => { + let suggestedUnitWidth = (samples: t, outputXYPoints) => { let suggestedXWidth = SampleSetDist_Bandwidth.nrd0(samples) xWidthToUnitWidth(samples, outputXYPoints, suggestedXWidth) } - let kde = (~samples, ~outputXYPoints, width) => - KDE.normalSampling(samples, outputXYPoints, width) + let kde = (~samples: t, ~outputXYPoints, width) => + KDE.normalSampling(samples->E.FloatArray.toArray, outputXYPoints, width) } } let toPointSetDist = ( - ~samples: Internals.T.t, + ~samples: E.FloatArray.t, ~samplingInputs: SamplingInputs.samplingInputs, (), ): Internals.Types.outputs => { - let samples = samples->E.A.Floats.sort + let samples = samples->E.FloatArray.sort let minDiscreteToKeep = MagicNumbers.ToPointSet.minDiscreteToKeep(samples) - let (continuousPart, discretePart) = E.A.Floats.Sorted.splitContinuousAndDiscreteForMinWeight( + let (continuousPart, discretePart) = E.FloatArray.Sorted.splitContinuousAndDiscreteForMinWeight( samples, ~minDiscreteWeight=minDiscreteToKeep, ) - let length = samples->E.A.length->float_of_int + let length = samples->E.FloatArray.length->float_of_int let discrete: PointSetTypes.discreteShape = discretePart ->E.FloatFloatMap.fmap(r => r /. length, _) @@ -79,7 +79,7 @@ let toPointSetDist = ( ->Discrete.make let pdf = - continuousPart->E.A.length > 5 + continuousPart->E.FloatArray.length > 5 ? { let _suggestedXWidth = SampleSetDist_Bandwidth.nrd0(continuousPart) // todo: This does some recalculating from the last step. diff --git a/packages/squiggle-lang/src/rescript/FR/FR_Sampleset.res b/packages/squiggle-lang/src/rescript/FR/FR_Sampleset.res index 9263bf29..9563e4a5 100644 --- a/packages/squiggle-lang/src/rescript/FR/FR_Sampleset.res +++ b/packages/squiggle-lang/src/rescript/FR/FR_Sampleset.res @@ -28,7 +28,7 @@ module Internal = { let fromFn = (aLambdaValue, environment: Reducer_T.environment, reducer: Reducer_T.reducerFn) => { let sampleCount = environment.sampleCount let fn = r => doLambdaCall(aLambdaValue, [IEvNumber(r)], environment, reducer) - Belt_Array.makeBy(sampleCount, r => fn(r->Js.Int.toFloat))->E.A.R.firstErrorOrOpen + SampleSetDist.makeBy(sampleCount, r => fn(r->Js.Int.toFloat)) } let map1 = (sampleSetDist: t, aLambdaValue, environment: Reducer_T.environment, reducer) => { @@ -116,7 +116,9 @@ let libaryBase = [ ~run=(inputs, _, _) => { let sampleSet = inputs->Prepare.ToTypedArray.numbers - |> E.R2.bind(r => SampleSetDist.make(r)->E.R2.errMap(_ => "AM I HERE? WHYERE AMI??")) + |> E.R2.bind(r => + SampleSetDist.makeFromJsArray(r)->E.R2.errMap(_ => "AM I HERE? WHYERE AMI??") + ) sampleSet ->E.R2.fmap(Wrappers.sampleSet) ->E.R2.fmap(Wrappers.evDistribution) @@ -140,7 +142,7 @@ let libaryBase = [ ~run=(inputs, _, _) => switch inputs { | [IEvDistribution(SampleSet(dist))] => - dist->E.A2.fmap(Wrappers.evNumber)->Wrappers.evArray->Ok + dist->SampleSetDist.toJsArray->E.A2.fmap(Wrappers.evNumber)->Wrappers.evArray->Ok | _ => Error(impossibleError) }, (), @@ -163,7 +165,7 @@ let libaryBase = [ | [IEvLambda(lambda)] => switch Internal.fromFn(lambda, environment, reducer) { | Ok(r) => Ok(r->Wrappers.sampleSet->Wrappers.evDistribution) - | Error(e) => e->Reducer_ErrorValue.REOperationError->Error + | Error(e) => e->SampleSetError->Reducer_ErrorValue.REDistributionError->Error } | _ => Error(impossibleError) }, diff --git a/packages/squiggle-lang/src/rescript/MagicNumbers.res b/packages/squiggle-lang/src/rescript/MagicNumbers.res index b859421c..dd363c12 100644 --- a/packages/squiggle-lang/src/rescript/MagicNumbers.res +++ b/packages/squiggle-lang/src/rescript/MagicNumbers.res @@ -35,7 +35,7 @@ module ToPointSet = { it would strike a reasonable trade-off, but I’m really unsure what’s best right now. */ - let minDiscreteToKeep = samples => max(20, E.A.length(samples) / 50) + let minDiscreteToKeep = samples => max(20, E.FloatArray.length(samples) / 50) } module SampleSetBandwidth = { diff --git a/packages/squiggle-lang/src/rescript/Utility/E.res b/packages/squiggle-lang/src/rescript/Utility/E.res index fb0b553f..588e0812 100644 --- a/packages/squiggle-lang/src/rescript/Utility/E.res +++ b/packages/squiggle-lang/src/rescript/Utility/E.res @@ -11,6 +11,7 @@ module B = E_B module Dict = E_Dict module F = E_F module Float = E_Float +module FloatArray = E_FloatArray module FloatFloatMap = E_FloatFloatMap module I = E_I module Int = E_Int diff --git a/packages/squiggle-lang/src/rescript/Utility/E/E_A.res b/packages/squiggle-lang/src/rescript/Utility/E/E_A.res index 6ea01f45..cb33aabd 100644 --- a/packages/squiggle-lang/src/rescript/Utility/E/E_A.res +++ b/packages/squiggle-lang/src/rescript/Utility/E/E_A.res @@ -303,55 +303,6 @@ module Floats = { let makeIncrementalDown = (a, b) => Array.make(a - b + 1, a) |> Array.mapi((i, c) => c - i) |> Belt.Array.map(_, float_of_int) - - /* - This function goes through a sorted array and divides it into two different clusters: - continuous samples and discrete samples. The discrete samples are stored in a mutable map. - Samples are thought to be discrete if they have at least `minDiscreteWight` duplicates. - - If the min discrete weight is 4, that would mean that at least four elements needed from a specific - value for that to be kept as discrete. This is important because in some cases, we can expect that - some common elements will be generated by regular operations. The final continuous array will be sorted. - - This function is performance-critical, don't change it significantly without benchmarking - SampleSet->PointSet conversion performance. - */ - let splitContinuousAndDiscreteForMinWeight = ( - sortedArray: array, - ~minDiscreteWeight: int, - ) => { - let continuous: array = [] - let discrete = FloatFloatMap.empty() - - let addData = (count: int, value: float): unit => { - if count >= minDiscreteWeight { - FloatFloatMap.add(value, count->Belt.Int.toFloat, discrete) - } else { - for _ in 1 to count { - continuous->Js.Array2.push(value)->ignore - } - } - } - - let (finalCount, finalValue) = sortedArray->Belt.Array.reduce( - // initial prev value doesn't matter; if it collides with the first element of the array, flush won't do anything - (0, 0.), - ((count, prev), element) => { - if element == prev { - (count + 1, prev) - } else { - // new value, process previous ones - addData(count, prev) - (1, element) - } - }, - ) - - // flush final values - addData(finalCount, finalValue) - - (continuous, discrete) - } } } module Sorted = Floats.Sorted diff --git a/packages/squiggle-lang/src/rescript/Utility/E/E_FloatArray.res b/packages/squiggle-lang/src/rescript/Utility/E/E_FloatArray.res new file mode 100644 index 00000000..eeca0955 --- /dev/null +++ b/packages/squiggle-lang/src/rescript/Utility/E/E_FloatArray.res @@ -0,0 +1,89 @@ +module FloatArray = Js.TypedArray2.Float64Array + +type t = FloatArray.t +let make = FloatArray.make +let fromLength = FloatArray.fromLength +let length = FloatArray.length +let reduce = FloatArray.reduce +let filter = FloatArray.filter +let slice = FloatArray.slice +let sort = (t: t) => t->FloatArray.copy->FloatArray.sortInPlace +let toArray: t => array = %raw(`a => Array.from(a)`) +let map = FloatArray.map + +let unsafe_get = (t: t, i: int) => t->FloatArray.unsafe_get(i) +let get = (t: t, i: int) => i >= t->length ? None : Some(t->FloatArray.unsafe_get(i)) +let set = (t: t, i: int) => t->FloatArray.unsafe_set(i) + +let mean = t => t->toArray->Jstat.mean +let geomean = t => t->toArray->Jstat.geomean +let mode = t => t->toArray->Jstat.mode +let variance = t => t->toArray->Jstat.variance +let stdev = t => t->toArray->Jstat.stdev +let sum = t => t->toArray->Jstat.sum +let product = Jstat.product +let random = Js.Math.random_int +let min = t => t->toArray->Js.Math.minMany_float +let max = t => t->toArray->Js.Math.maxMany_float +let percentile = (t, b) => Jstat.percentile(t->toArray, b, false) + +module Sorted = { + let min = (t: t) => t->length > 0 ? Some(FloatArray.unsafe_get(t, 0)) : None + let max = (t: t) => t->length > 0 ? Some(FloatArray.unsafe_get(t, t->length - 1)) : None + let range = (~min=min, ~max=max, a) => + switch (min(a), max(a)) { + | (Some(min), Some(max)) => Some(max -. min) + | _ => None + } + + /* + This function goes through a sorted array and divides it into two different clusters: + continuous samples and discrete samples. The discrete samples are stored in a mutable map. + Samples are thought to be discrete if they have at least `minDiscreteWight` duplicates. + + If the min discrete weight is 4, that would mean that at least four elements needed from a specific + value for that to be kept as discrete. This is important because in some cases, we can expect that + some common elements will be generated by regular operations. The final continuous array will be sorted. + + This function is performance-critical, don't change it significantly without benchmarking + SampleSet->PointSet conversion performance. + */ + let splitContinuousAndDiscreteForMinWeight = (sortedArray: t, ~minDiscreteWeight: int): ( + t, + E_FloatFloatMap.t, + ) => { + // TODO - preallocate as typed array, then trim + let continuous: array = [] + let discrete = E_FloatFloatMap.empty() + + let addData = (count: int, value: float): unit => { + if count >= minDiscreteWeight { + E_FloatFloatMap.add(value, count->Belt.Int.toFloat, discrete) + } else { + for _ in 1 to count { + continuous->Js.Array2.push(value)->ignore + } + } + } + + let (finalCount, finalValue) = reduce( + sortedArray, + // initial prev value doesn't matter; if it collides with the first element of the array, flush won't do anything + (. (count, prev), element) => { + if element == prev { + (count + 1, prev) + } else { + // new value, process previous ones + addData(count, prev) + (1, element) + } + }, + (0, 0.), + ) + + // flush final values + addData(finalCount, finalValue) + + (continuous->make, discrete) + } +} diff --git a/packages/squiggle-lang/src/rescript/Utility/Operation.res b/packages/squiggle-lang/src/rescript/Utility/Operation.res index 8e22169c..f5eff8a2 100644 --- a/packages/squiggle-lang/src/rescript/Utility/Operation.res +++ b/packages/squiggle-lang/src/rescript/Utility/Operation.res @@ -78,6 +78,8 @@ module Error = { | NotYetImplemented => "This pathway is not yet implemented" | Other(t) => t } + + exception OperationException(t) } let power = (a: float, b: float): result => diff --git a/packages/squiggle-lang/src/rescript/Utility/Stdlib.res b/packages/squiggle-lang/src/rescript/Utility/Stdlib.res index 249b93d7..69da694f 100644 --- a/packages/squiggle-lang/src/rescript/Utility/Stdlib.res +++ b/packages/squiggle-lang/src/rescript/Utility/Stdlib.res @@ -51,3 +51,11 @@ module Random = { module Math = { @module external factorial: float => float = "@stdlib/math/base/special/factorial" } + +module Base = { + @module + external variance: (int, int, Js.TypedArray2.Float64Array.t, int) => float = + "@stdlib/stats/base/variance" + let variance = (t: Js.TypedArray2.Float64Array.t) => + variance(t->Js.TypedArray2.Float64Array.length, 0, t, 1) +}