store SampleSet as typed array

This commit is contained in:
Vyacheslav Matyukhin 2022-10-06 00:02:10 +04:00
parent d60792aa93
commit 4bd961a808
No known key found for this signature in database
GPG Key ID: 3D2A774C5489F96C
17 changed files with 286 additions and 142 deletions

View File

@ -3,11 +3,11 @@ open Expect
describe("Bandwidth", () => { describe("Bandwidth", () => {
test("nrd0()", () => { test("nrd0()", () => {
let data = [1., 4., 3., 2.] let data = [1., 4., 3., 2.]->E.FloatArray.make
expect(SampleSetDist_Bandwidth.nrd0(data))->toEqual(0.7625801874014622) expect(SampleSetDist_Bandwidth.nrd0(data))->toEqual(0.7625801874014622)
}) })
test("nrd()", () => { test("nrd()", () => {
let data = [1., 4., 3., 2.] let data = [1., 4., 3., 2.]->E.FloatArray.make
expect(SampleSetDist_Bandwidth.nrd(data))->toEqual(0.8981499984950554) expect(SampleSetDist_Bandwidth.nrd(data))->toEqual(0.8981499984950554)
}) })
}) })

View File

@ -2,9 +2,10 @@ open Jest
open TestHelpers open TestHelpers
let prepareInputs = (ar, minWeight) => let prepareInputs = (ar, minWeight) =>
E.A.Floats.Sorted.splitContinuousAndDiscreteForMinWeight(ar, ~minDiscreteWeight=minWeight) |> ( E.FloatArray.Sorted.splitContinuousAndDiscreteForMinWeight(
((c, disc)) => (c, disc |> E.FloatFloatMap.toArray) ar->E.FloatArray.make,
) ~minDiscreteWeight=minWeight,
) |> (((c, disc)) => (c->E.FloatArray.toArray, disc->E.FloatFloatMap.toArray))
describe("Continuous and discrete splits", () => { describe("Continuous and discrete splits", () => {
makeTest( makeTest(
@ -37,18 +38,10 @@ describe("Continuous and discrete splits", () => {
E.A.concatMany([sorted, sorted, sorted, sorted]) |> Belt.SortArray.stableSortBy(_, compare) E.A.concatMany([sorted, sorted, sorted, sorted]) |> Belt.SortArray.stableSortBy(_, compare)
} }
let (_, discrete1) = E.A.Floats.Sorted.splitContinuousAndDiscreteForMinWeight( let (_, toArr1) = prepareInputs(makeDuplicatedArray(10), 2)
makeDuplicatedArray(10), makeTest("splitMedium at count=10", toArr1->Belt.Array.length, 10)
~minDiscreteWeight=2,
)
let toArr1 = discrete1 |> E.FloatFloatMap.toArray
makeTest("splitMedium at count=10", toArr1 |> Belt.Array.length, 10)
let (_c, discrete2) = E.A.Floats.Sorted.splitContinuousAndDiscreteForMinWeight( let (_, toArr2) = prepareInputs(makeDuplicatedArray(500), 2)
makeDuplicatedArray(500), makeTest("splitMedium at count=500", toArr2->Belt.Array.length, 500)
~minDiscreteWeight=2,
)
let toArr2 = discrete2 |> E.FloatFloatMap.toArray
makeTest("splitMedium at count=500", toArr2 |> Belt.Array.length, 500)
// makeTest("foo", [] |> Belt.Array.length, 500) // makeTest("foo", [] |> Belt.Array.length, 500)
}) })

View File

@ -90,9 +90,9 @@ export class SqPointSetDistribution extends SqAbstractDistribution {
export class SqSampleSetDistribution extends SqAbstractDistribution { export class SqSampleSetDistribution extends SqAbstractDistribution {
tag = Tag.SampleSet as const; tag = Tag.SampleSet as const;
value(): number[] { // value(): number[] {
return this.valueMethod(RSDistribution.getSampleSet); // return this.valueMethod(RSDistribution.getSampleSet);
} // }
} }
export class SqSymbolicDistribution extends SqAbstractDistribution { export class SqSymbolicDistribution extends SqAbstractDistribution {

View File

@ -221,7 +221,7 @@ let rec run = (~env: env, functionCallInfo: functionCallInfo): outputType => {
->OutputLocal.fromResult ->OutputLocal.fromResult
| FromSamples(xs) => | FromSamples(xs) =>
xs xs
->SampleSetDist.make ->SampleSetDist.makeFromJsArray
->E.R2.errMap(x => DistributionTypes.SampleSetError(x)) ->E.R2.errMap(x => DistributionTypes.SampleSetError(x))
->E.R2.fmap(x => x->DistributionTypes.SampleSet->Dist) ->E.R2.fmap(x => x->DistributionTypes.SampleSet->Dist)
->OutputLocal.fromResult ->OutputLocal.fromResult

View File

@ -98,7 +98,7 @@ module Constructors: {
@genType @genType
let toSampleSet: (~env: GenericDist.env, genericDist, int) => result<genericDist, error> let toSampleSet: (~env: GenericDist.env, genericDist, int) => result<genericDist, error>
@genType @genType
let fromSamples: (~env: GenericDist.env, SampleSetDist.t) => result<genericDist, error> let fromSamples: (~env: GenericDist.env, array<float>) => result<genericDist, error>
@genType @genType
let truncate: ( let truncate: (
~env: GenericDist.env, ~env: GenericDist.env,

View File

@ -38,6 +38,7 @@ module Error = {
| SampleSetError(TooFewSamples) => "Too Few Samples" | SampleSetError(TooFewSamples) => "Too Few Samples"
| SampleSetError(NonNumericInput(err)) => `Found a non-number in input: ${err}` | SampleSetError(NonNumericInput(err)) => `Found a non-number in input: ${err}`
| SampleSetError(OperationError(err)) => Operation.Error.toString(err) | SampleSetError(OperationError(err)) => Operation.Error.toString(err)
| SampleSetError(UnequalSizes) => "Expected sample sets of equal size"
| OperationError(err) => Operation.Error.toString(err) | OperationError(err) => Operation.Error.toString(err)
| PointSetConversionError(err) => SampleSetDist.pointsetConversionErrorToString(err) | PointSetConversionError(err) => SampleSetDist.pointsetConversionErrorToString(err)
| SparklineError(err) => PointSetTypes.sparklineErrorToString(err) | SparklineError(err) => PointSetTypes.sparklineErrorToString(err)

View File

@ -40,7 +40,9 @@ let sampleN = (t: t, n) =>
let sample = (t: t) => sampleN(t, 1)->E.A.first |> E.O.toExn("Should not have happened") let sample = (t: t) => sampleN(t, 1)->E.A.first |> E.O.toExn("Should not have happened")
let toSampleSetDist = (t: t, n) => let toSampleSetDist = (t: t, n) =>
SampleSetDist.make(sampleN(t, n))->E.R2.errMap(DistributionTypes.Error.sampleErrorToDistErr) SampleSetDist.makeFromJsArray(sampleN(t, n))->E.R2.errMap(
DistributionTypes.Error.sampleErrorToDistErr,
)
let fromFloat = (f: float): t => Symbolic(SymbolicDist.Float.make(f)) let fromFloat = (f: float): t => Symbolic(SymbolicDist.Float.make(f))

View File

@ -2,14 +2,10 @@
module Error = { module Error = {
@genType @genType
type sampleSetError = type sampleSetError =
TooFewSamples | NonNumericInput(string) | OperationError(Operation.operationError) | TooFewSamples
| NonNumericInput(string)
let sampleSetErrorToString = (err: sampleSetError): string => | OperationError(Operation.operationError)
switch err { | UnequalSizes
| TooFewSamples => "Too few samples when constructing sample set"
| NonNumericInput(err) => `Found a non-number in input: ${err}`
| OperationError(err) => Operation.Error.toString(err)
}
@genType @genType
type pointsetConversionError = TooFewSamplesForConversionToPointSet type pointsetConversionError = TooFewSamplesForConversionToPointSet
@ -26,6 +22,7 @@ module Error = {
| TooFewSamples => "Too few samples when constructing sample set" | TooFewSamples => "Too few samples when constructing sample set"
| NonNumericInput(err) => `Found a non-number in input: ${err}` | NonNumericInput(err) => `Found a non-number in input: ${err}`
| OperationError(err) => Operation.Error.toString(err) | OperationError(err) => Operation.Error.toString(err)
| UnequalSizes => "Expected sample sets of equal size"
} }
} }
} }
@ -38,26 +35,29 @@ this constructor.
https://stackoverflow.com/questions/66909578/how-to-make-a-type-constructor-private-in-rescript-except-in-current-module https://stackoverflow.com/questions/66909578/how-to-make-a-type-constructor-private-in-rescript-except-in-current-module
*/ */
module T: { module T: {
//This really should be hidden (remove the array<float>). The reason it isn't is to act as an escape hatch in JS__Test.ts. @genType.opaque
//When we get a good functional library in TS, we could refactor that out. type t
@genType let makeFromTypedArray: E.FloatArray.t => result<t, sampleSetError>
type t = array<float> let makeFromJsArray: array<float> => result<t, sampleSetError>
let make: array<float> => result<t, sampleSetError> let toJsArray: t => array<float>
let get: t => array<float> let get: t => E.FloatArray.t
} = { } = {
type t = array<float> type t = E.FloatArray.t
let make = (a: array<float>) => let makeFromTypedArray = (a: E.FloatArray.t): result<t, sampleSetError> =>
if E.A.length(a) > 5 { if E.FloatArray.length(a) > 5 {
Ok(a) Ok(a)
} else { } else {
Error(TooFewSamples) Error(TooFewSamples)
} }
let get = (a: t) => a let makeFromJsArray = (a: array<float>): result<t, sampleSetError> =>
E.FloatArray.make(a)->makeFromTypedArray
let toJsArray = (t: t) => t->E.FloatArray.toArray
let get = (t: t) => t
} }
include T include T
let length = (t: t) => get(t)->E.A.length let length = (t: T.t) => T.get(t)->E.FloatArray.length
/* /*
TODO: Refactor to get a more precise estimate. Also, this code is just fairly messy, could use TODO: Refactor to get a more precise estimate. Also, this code is just fairly messy, could use
@ -68,15 +68,15 @@ let toPointSetDist = (~samples: t, ~samplingInputs: SamplingInputs.samplingInput
pointsetConversionError, pointsetConversionError,
> => > =>
SampleSetDist_ToPointSet.toPointSetDist( SampleSetDist_ToPointSet.toPointSetDist(
~samples=get(samples), ~samples=T.get(samples),
~samplingInputs, ~samplingInputs,
(), (),
).pointSetDist->E.O2.toResult(TooFewSamplesForConversionToPointSet) ).pointSetDist->E.O2.toResult(TooFewSamplesForConversionToPointSet)
//Randomly get one sample from the distribution //Randomly get one sample from the distribution
let sample = (t: t): float => { let sample = (t: t): float => {
let i = E.Int.random(~min=0, ~max=E.A.length(get(t)) - 1) let i = E.Int.random(~min=0, ~max=E.FloatArray.length(get(t)) - 1)
E.A.unsafe_get(get(t), i) E.FloatArray.unsafe_get(get(t), i)
} }
/* /*
@ -87,52 +87,144 @@ The former helps in cases where multiple distributions are correlated.
However, if n > length(t), then there's no clear right answer, so we just randomly However, if n > length(t), then there's no clear right answer, so we just randomly
sample everything. sample everything.
*/ */
let sampleN = (t: t, n) => { let sampleN = (t: t, n): array<float> => {
if n <= E.A.length(get(t)) { if n <= length(t) {
E.A.slice(get(t), ~offset=0, ~len=n) E.FloatArray.slice(~start=0, ~end_=n, get(t))->E.FloatArray.toArray
} else { } else {
Belt.Array.makeBy(n, _ => sample(t)) Belt.Array.makeBy(n, _ => sample(t))
} }
} }
let _fromSampleResultArray = (samples: array<result<float, QuriSquiggleLang.Operation.Error.t>>) =>
E.A.R.firstErrorOrOpen(samples)->E.R2.errMap(Error.fromOperationError) |> E.R2.bind(make)
let samplesMap = (~fn: float => result<float, Operation.Error.t>, t: t): result< let samplesMap = (~fn: float => result<float, Operation.Error.t>, t: t): result<
t, t,
sampleSetError, sampleSetError,
> => T.get(t)->E.A2.fmap(fn)->_fromSampleResultArray > => {
try {
T.get(t)
->E.FloatArray.map((. v) => {
switch fn(v) {
| Ok(res) => res
| Error(err) => err->Operation.Error.OperationException->raise
}
})
->T.makeFromTypedArray
} catch {
| Operation.Error.OperationException(err) => Error.fromOperationError(err)->Error
}
}
//TODO: Figure out what to do if distributions are different lengths. ``zip`` is kind of inelegant for this.
let map2 = (~fn: (float, float) => result<float, Operation.Error.t>, ~t1: t, ~t2: t): result< let map2 = (~fn: (float, float) => result<float, Operation.Error.t>, ~t1: t, ~t2: t): result<
t, t,
sampleSetError, sampleSetError,
> => E.A.zip(get(t1), get(t2))->E.A2.fmap(E.Tuple2.toFnCall(fn))->_fromSampleResultArray > => {
let length1 = t1->length
let length2 = t2->length
if length1 == length2 {
try {
let res = E.FloatArray.fromLength(length1)
for i in 0 to length1 - 1 {
let v = switch fn(
get(t1)->E.FloatArray.unsafe_get(i),
get(t2)->E.FloatArray.unsafe_get(i),
) {
| Ok(fnResult) => fnResult
| Error(err) => err->Operation.Error.OperationException->raise
}
res->E.FloatArray.set(i, v)
}
res->T.makeFromTypedArray
} catch {
| Operation.Error.OperationException(err) => Error.fromOperationError(err)->Error
}
} else {
Error.UnequalSizes->Error
}
}
let map3 = ( let map3 = (
~fn: (float, float, float) => result<float, Operation.Error.t>, ~fn: (float, float, float) => result<float, Operation.Error.t>,
~t1: t, ~t1: t,
~t2: t, ~t2: t,
~t3: t, ~t3: t,
): result<t, sampleSetError> => ): result<t, sampleSetError> => {
E.A.zip3(get(t1), get(t2), get(t3))->E.A2.fmap(E.Tuple3.toFnCall(fn))->_fromSampleResultArray let length1 = t1->length
let length2 = t2->length
let length3 = t3->length
if length1 == length2 && length2 == length3 {
try {
let res = E.FloatArray.fromLength(length1)
for i in 0 to length1 - 1 {
let v = switch fn(
get(t1)->E.FloatArray.unsafe_get(i),
get(t2)->E.FloatArray.unsafe_get(i),
get(t3)->E.FloatArray.unsafe_get(i),
) {
| Ok(fnResult) => fnResult
| Error(err) => err->Operation.Error.OperationException->raise
}
res->E.FloatArray.set(i, v)
}
res->T.makeFromTypedArray
} catch {
| Operation.Error.OperationException(err) => Error.fromOperationError(err)->Error
}
} else {
Error.UnequalSizes->Error
}
}
let mapN = (~fn: array<float> => result<float, Operation.Error.t>, ~t1: array<t>): result< let mapN = (~fn: array<float> => result<float, Operation.Error.t>, ~t1: array<t>): result<
t, t,
sampleSetError, sampleSetError,
> => E.A.transpose(E.A.fmap(get, t1))->E.A2.fmap(fn)->_fromSampleResultArray > => {
let lengths = t1->E.A2.fmap(t => t->length)
let l0 = lengths[0]
if lengths->E.A.all(l => l == l0, _) {
try {
let res = E.FloatArray.fromLength(l0)
for i in 0 to l0 - 1 {
let v = switch fn(t1->E.A2.fmap(t => get(t)->E.FloatArray.unsafe_get(i))) {
| Ok(fnResult) => fnResult
| Error(err) => err->Operation.Error.OperationException->raise
}
res->E.FloatArray.set(i, v)
}
res->T.makeFromTypedArray
} catch {
| Operation.Error.OperationException(err) => Error.fromOperationError(err)->Error
}
} else {
Error.UnequalSizes->Error
}
}
let mean = t => T.get(t)->E.A.Floats.mean let makeBy = (n: int, fn: int => result<float, Operation.Error.t>): result<t, sampleSetError> => {
let geomean = t => T.get(t)->E.A.Floats.geomean let res = E.FloatArray.fromLength(n)
let mode = t => T.get(t)->E.A.Floats.mode try {
let sum = t => T.get(t)->E.A.Floats.sum for i in 0 to n - 1 {
let min = t => T.get(t)->E.A.Floats.min let fnResult = fn(i)
let max = t => T.get(t)->E.A.Floats.max switch fnResult {
let stdev = t => T.get(t)->E.A.Floats.stdev | Ok(v) => res->E.FloatArray.set(i, v)
let variance = t => T.get(t)->E.A.Floats.variance | Error(err) => err->Operation.Error.OperationException->raise
let percentile = (t, f) => T.get(t)->E.A.Floats.percentile(f) }
}
res->T.makeFromTypedArray
} catch {
| Operation.Error.OperationException(err) => Error.fromOperationError(err)->Error
}
}
let mean = t => T.get(t)->E.FloatArray.mean
let geomean = t => T.get(t)->E.FloatArray.geomean
let mode = t => T.get(t)->E.FloatArray.mode
let sum = t => T.get(t)->E.FloatArray.sum
let min = t => T.get(t)->E.FloatArray.min
let max = t => T.get(t)->E.FloatArray.max
let stdev = t => T.get(t)->E.FloatArray.stdev
let variance = t => T.get(t)->E.FloatArray.variance
let percentile = (t, f) => T.get(t)->E.FloatArray.percentile(f)
let cdf = (t: t, f: float) => { let cdf = (t: t, f: float) => {
let countBelowF = t->E.A.reduce(0, (acc, x) => acc + (x <= f ? 1 : 0)) let countBelowF = T.get(t)->E.FloatArray.reduce((. acc, x) => acc + (x <= f ? 1 : 0), 0)
countBelowF->Js.Int.toFloat /. t->length->Js.Int.toFloat countBelowF->Js.Int.toFloat /. t->length->Js.Int.toFloat
} }
@ -149,14 +241,14 @@ let mixture = (values: array<(t, float)>, intendedLength: int) => {
discreteSamples discreteSamples
->Belt.Array.mapWithIndex((index, distIndexToChoose) => { ->Belt.Array.mapWithIndex((index, distIndexToChoose) => {
let chosenDist = E.A.get(dists, E.Float.toInt(distIndexToChoose)) let chosenDist = E.A.get(dists, E.Float.toInt(distIndexToChoose))
chosenDist->E.O.bind(E.A.get(_, index)) chosenDist->E.O.bind(E.FloatArray.get(_, index))
}) })
->E.A.O.openIfAllSome ->E.A.O.openIfAllSome
samples->E.O2.toExn("Mixture unreachable error")->T.make samples->E.O2.toExn("Mixture unreachable error")->T.makeFromJsArray
} }
let truncateLeft = (t, f) => T.get(t)->E.A2.filter(x => x >= f)->T.make let truncateLeft = (t, f) => T.get(t)->E.FloatArray.filter((. x) => x >= f)->T.makeFromTypedArray
let truncateRight = (t, f) => T.get(t)->E.A2.filter(x => x <= f)->T.make let truncateRight = (t, f) => T.get(t)->E.FloatArray.filter((. x) => x <= f)->T.makeFromTypedArray
let truncate = (t, ~leftCutoff: option<float>, ~rightCutoff: option<float>) => { let truncate = (t, ~leftCutoff: option<float>, ~rightCutoff: option<float>) => {
let withTruncatedLeft = t => leftCutoff |> E.O.dimap(left => truncateLeft(t, left), _ => Ok(t)) let withTruncatedLeft = t => leftCutoff |> E.O.dimap(left => truncateLeft(t, left), _ => Ok(t))

View File

@ -2,16 +2,19 @@
let {iqr_percentile, nrd0_lo_denominator, one, nrd0_coef, nrd_coef, nrd_fractionalPower} = module( let {iqr_percentile, nrd0_lo_denominator, one, nrd0_coef, nrd_coef, nrd_fractionalPower} = module(
MagicNumbers.SampleSetBandwidth MagicNumbers.SampleSetBandwidth
) )
let len = x => E.A.length(x) |> float_of_int let len = x => E.FloatArray.length(x)->float_of_int
let iqr = x => let iqr = x => {
Jstat.percentile(x, iqr_percentile, true) -. Jstat.percentile(x, 1.0 -. iqr_percentile, true) let xArr = x->E.FloatArray.toArray
Jstat.percentile(xArr, iqr_percentile, true) -.
Jstat.percentile(xArr, 1.0 -. iqr_percentile, true)
}
// Silverman, B. W. (1986) Density Estimation. London: Chapman and Hall. // Silverman, B. W. (1986) Density Estimation. London: Chapman and Hall.
let nrd0 = x => { let nrd0 = x => {
let hi = Js_math.sqrt(Jstat.variance(x)) let hi = Js_math.sqrt(Stdlib.Base.variance(x))
let lo = Js_math.minMany_float([hi, iqr(x) /. nrd0_lo_denominator]) let lo = Js_math.minMany_float([hi, iqr(x) /. nrd0_lo_denominator])
let e = Js_math.abs_float(x[1]) let e = Js_math.abs_float(x->E.FloatArray.unsafe_get(1))
let lo' = switch (lo, hi, e) { let lo' = switch (lo, hi, e) {
| (lo, _, _) if !Js.Float.isNaN(lo) => lo | (lo, _, _) if !Js.Float.isNaN(lo) => lo
| (_, hi, _) if !Js.Float.isNaN(hi) => hi | (_, hi, _) if !Js.Float.isNaN(hi) => hi
@ -25,6 +28,6 @@ let nrd0 = x => {
let nrd = x => { let nrd = x => {
let h = iqr(x) /. nrd0_lo_denominator let h = iqr(x) /. nrd0_lo_denominator
nrd_coef *. nrd_coef *.
Js.Math.min_float(Js.Math.sqrt(Jstat.variance(x)), h) *. Js.Math.min_float(Js.Math.sqrt(Stdlib.Base.variance(x)), h) *.
Js.Math.pow_float(~base=len(x), ~exp=nrd_fractionalPower) Js.Math.pow_float(~base=len(x), ~exp=nrd_fractionalPower)
} }

View File

@ -37,40 +37,40 @@ module Internals = {
} }
module T = { module T = {
type t = array<float> type t = E.FloatArray.t
let xWidthToUnitWidth = (samples, outputXYPoints, xWidth) => { let xWidthToUnitWidth = (samples: t, outputXYPoints, xWidth) => {
let xyPointRange = E.A.Sorted.range(samples)->E.O2.default(0.0) let xyPointRange = E.FloatArray.Sorted.range(samples)->E.O2.default(0.0)
let xyPointWidth = xyPointRange /. float_of_int(outputXYPoints) let xyPointWidth = xyPointRange /. float_of_int(outputXYPoints)
xWidth /. xyPointWidth xWidth /. xyPointWidth
} }
let formatUnitWidth = w => Jstat.max([w, 1.0])->int_of_float let formatUnitWidth = w => Jstat.max([w, 1.0])->int_of_float
let suggestedUnitWidth = (samples, outputXYPoints) => { let suggestedUnitWidth = (samples: t, outputXYPoints) => {
let suggestedXWidth = SampleSetDist_Bandwidth.nrd0(samples) let suggestedXWidth = SampleSetDist_Bandwidth.nrd0(samples)
xWidthToUnitWidth(samples, outputXYPoints, suggestedXWidth) xWidthToUnitWidth(samples, outputXYPoints, suggestedXWidth)
} }
let kde = (~samples, ~outputXYPoints, width) => let kde = (~samples: t, ~outputXYPoints, width) =>
KDE.normalSampling(samples, outputXYPoints, width) KDE.normalSampling(samples->E.FloatArray.toArray, outputXYPoints, width)
} }
} }
let toPointSetDist = ( let toPointSetDist = (
~samples: Internals.T.t, ~samples: E.FloatArray.t,
~samplingInputs: SamplingInputs.samplingInputs, ~samplingInputs: SamplingInputs.samplingInputs,
(), (),
): Internals.Types.outputs => { ): Internals.Types.outputs => {
let samples = samples->E.A.Floats.sort let samples = samples->E.FloatArray.sort
let minDiscreteToKeep = MagicNumbers.ToPointSet.minDiscreteToKeep(samples) let minDiscreteToKeep = MagicNumbers.ToPointSet.minDiscreteToKeep(samples)
let (continuousPart, discretePart) = E.A.Floats.Sorted.splitContinuousAndDiscreteForMinWeight( let (continuousPart, discretePart) = E.FloatArray.Sorted.splitContinuousAndDiscreteForMinWeight(
samples, samples,
~minDiscreteWeight=minDiscreteToKeep, ~minDiscreteWeight=minDiscreteToKeep,
) )
let length = samples->E.A.length->float_of_int let length = samples->E.FloatArray.length->float_of_int
let discrete: PointSetTypes.discreteShape = let discrete: PointSetTypes.discreteShape =
discretePart discretePart
->E.FloatFloatMap.fmap(r => r /. length, _) ->E.FloatFloatMap.fmap(r => r /. length, _)
@ -79,7 +79,7 @@ let toPointSetDist = (
->Discrete.make ->Discrete.make
let pdf = let pdf =
continuousPart->E.A.length > 5 continuousPart->E.FloatArray.length > 5
? { ? {
let _suggestedXWidth = SampleSetDist_Bandwidth.nrd0(continuousPart) let _suggestedXWidth = SampleSetDist_Bandwidth.nrd0(continuousPart)
// todo: This does some recalculating from the last step. // todo: This does some recalculating from the last step.

View File

@ -28,7 +28,7 @@ module Internal = {
let fromFn = (aLambdaValue, environment: Reducer_T.environment, reducer: Reducer_T.reducerFn) => { let fromFn = (aLambdaValue, environment: Reducer_T.environment, reducer: Reducer_T.reducerFn) => {
let sampleCount = environment.sampleCount let sampleCount = environment.sampleCount
let fn = r => doLambdaCall(aLambdaValue, [IEvNumber(r)], environment, reducer) let fn = r => doLambdaCall(aLambdaValue, [IEvNumber(r)], environment, reducer)
Belt_Array.makeBy(sampleCount, r => fn(r->Js.Int.toFloat))->E.A.R.firstErrorOrOpen SampleSetDist.makeBy(sampleCount, r => fn(r->Js.Int.toFloat))
} }
let map1 = (sampleSetDist: t, aLambdaValue, environment: Reducer_T.environment, reducer) => { let map1 = (sampleSetDist: t, aLambdaValue, environment: Reducer_T.environment, reducer) => {
@ -116,7 +116,9 @@ let libaryBase = [
~run=(inputs, _, _) => { ~run=(inputs, _, _) => {
let sampleSet = let sampleSet =
inputs->Prepare.ToTypedArray.numbers inputs->Prepare.ToTypedArray.numbers
|> E.R2.bind(r => SampleSetDist.make(r)->E.R2.errMap(_ => "AM I HERE? WHYERE AMI??")) |> E.R2.bind(r =>
SampleSetDist.makeFromJsArray(r)->E.R2.errMap(_ => "AM I HERE? WHYERE AMI??")
)
sampleSet sampleSet
->E.R2.fmap(Wrappers.sampleSet) ->E.R2.fmap(Wrappers.sampleSet)
->E.R2.fmap(Wrappers.evDistribution) ->E.R2.fmap(Wrappers.evDistribution)
@ -140,7 +142,7 @@ let libaryBase = [
~run=(inputs, _, _) => ~run=(inputs, _, _) =>
switch inputs { switch inputs {
| [IEvDistribution(SampleSet(dist))] => | [IEvDistribution(SampleSet(dist))] =>
dist->E.A2.fmap(Wrappers.evNumber)->Wrappers.evArray->Ok dist->SampleSetDist.toJsArray->E.A2.fmap(Wrappers.evNumber)->Wrappers.evArray->Ok
| _ => Error(impossibleError) | _ => Error(impossibleError)
}, },
(), (),
@ -163,7 +165,7 @@ let libaryBase = [
| [IEvLambda(lambda)] => | [IEvLambda(lambda)] =>
switch Internal.fromFn(lambda, environment, reducer) { switch Internal.fromFn(lambda, environment, reducer) {
| Ok(r) => Ok(r->Wrappers.sampleSet->Wrappers.evDistribution) | Ok(r) => Ok(r->Wrappers.sampleSet->Wrappers.evDistribution)
| Error(e) => e->Reducer_ErrorValue.REOperationError->Error | Error(e) => e->SampleSetError->Reducer_ErrorValue.REDistributionError->Error
} }
| _ => Error(impossibleError) | _ => Error(impossibleError)
}, },

View File

@ -35,7 +35,7 @@ module ToPointSet = {
it would strike a reasonable trade-off, but Im really unsure whats it would strike a reasonable trade-off, but Im really unsure whats
best right now. best right now.
*/ */
let minDiscreteToKeep = samples => max(20, E.A.length(samples) / 50) let minDiscreteToKeep = samples => max(20, E.FloatArray.length(samples) / 50)
} }
module SampleSetBandwidth = { module SampleSetBandwidth = {

View File

@ -11,6 +11,7 @@ module B = E_B
module Dict = E_Dict module Dict = E_Dict
module F = E_F module F = E_F
module Float = E_Float module Float = E_Float
module FloatArray = E_FloatArray
module FloatFloatMap = E_FloatFloatMap module FloatFloatMap = E_FloatFloatMap
module I = E_I module I = E_I
module Int = E_Int module Int = E_Int

View File

@ -303,55 +303,6 @@ module Floats = {
let makeIncrementalDown = (a, b) => let makeIncrementalDown = (a, b) =>
Array.make(a - b + 1, a) |> Array.mapi((i, c) => c - i) |> Belt.Array.map(_, float_of_int) Array.make(a - b + 1, a) |> Array.mapi((i, c) => c - i) |> Belt.Array.map(_, float_of_int)
/*
This function goes through a sorted array and divides it into two different clusters:
continuous samples and discrete samples. The discrete samples are stored in a mutable map.
Samples are thought to be discrete if they have at least `minDiscreteWight` duplicates.
If the min discrete weight is 4, that would mean that at least four elements needed from a specific
value for that to be kept as discrete. This is important because in some cases, we can expect that
some common elements will be generated by regular operations. The final continuous array will be sorted.
This function is performance-critical, don't change it significantly without benchmarking
SampleSet->PointSet conversion performance.
*/
let splitContinuousAndDiscreteForMinWeight = (
sortedArray: array<float>,
~minDiscreteWeight: int,
) => {
let continuous: array<float> = []
let discrete = FloatFloatMap.empty()
let addData = (count: int, value: float): unit => {
if count >= minDiscreteWeight {
FloatFloatMap.add(value, count->Belt.Int.toFloat, discrete)
} else {
for _ in 1 to count {
continuous->Js.Array2.push(value)->ignore
}
}
}
let (finalCount, finalValue) = sortedArray->Belt.Array.reduce(
// initial prev value doesn't matter; if it collides with the first element of the array, flush won't do anything
(0, 0.),
((count, prev), element) => {
if element == prev {
(count + 1, prev)
} else {
// new value, process previous ones
addData(count, prev)
(1, element)
}
},
)
// flush final values
addData(finalCount, finalValue)
(continuous, discrete)
}
} }
} }
module Sorted = Floats.Sorted module Sorted = Floats.Sorted

View File

@ -0,0 +1,89 @@
module FloatArray = Js.TypedArray2.Float64Array
type t = FloatArray.t
let make = FloatArray.make
let fromLength = FloatArray.fromLength
let length = FloatArray.length
let reduce = FloatArray.reduce
let filter = FloatArray.filter
let slice = FloatArray.slice
let sort = (t: t) => t->FloatArray.copy->FloatArray.sortInPlace
let toArray: t => array<float> = %raw(`a => Array.from(a)`)
let map = FloatArray.map
let unsafe_get = (t: t, i: int) => t->FloatArray.unsafe_get(i)
let get = (t: t, i: int) => i >= t->length ? None : Some(t->FloatArray.unsafe_get(i))
let set = (t: t, i: int) => t->FloatArray.unsafe_set(i)
let mean = t => t->toArray->Jstat.mean
let geomean = t => t->toArray->Jstat.geomean
let mode = t => t->toArray->Jstat.mode
let variance = t => t->toArray->Jstat.variance
let stdev = t => t->toArray->Jstat.stdev
let sum = t => t->toArray->Jstat.sum
let product = Jstat.product
let random = Js.Math.random_int
let min = t => t->toArray->Js.Math.minMany_float
let max = t => t->toArray->Js.Math.maxMany_float
let percentile = (t, b) => Jstat.percentile(t->toArray, b, false)
module Sorted = {
let min = (t: t) => t->length > 0 ? Some(FloatArray.unsafe_get(t, 0)) : None
let max = (t: t) => t->length > 0 ? Some(FloatArray.unsafe_get(t, t->length - 1)) : None
let range = (~min=min, ~max=max, a) =>
switch (min(a), max(a)) {
| (Some(min), Some(max)) => Some(max -. min)
| _ => None
}
/*
This function goes through a sorted array and divides it into two different clusters:
continuous samples and discrete samples. The discrete samples are stored in a mutable map.
Samples are thought to be discrete if they have at least `minDiscreteWight` duplicates.
If the min discrete weight is 4, that would mean that at least four elements needed from a specific
value for that to be kept as discrete. This is important because in some cases, we can expect that
some common elements will be generated by regular operations. The final continuous array will be sorted.
This function is performance-critical, don't change it significantly without benchmarking
SampleSet->PointSet conversion performance.
*/
let splitContinuousAndDiscreteForMinWeight = (sortedArray: t, ~minDiscreteWeight: int): (
t,
E_FloatFloatMap.t,
) => {
// TODO - preallocate as typed array, then trim
let continuous: array<float> = []
let discrete = E_FloatFloatMap.empty()
let addData = (count: int, value: float): unit => {
if count >= minDiscreteWeight {
E_FloatFloatMap.add(value, count->Belt.Int.toFloat, discrete)
} else {
for _ in 1 to count {
continuous->Js.Array2.push(value)->ignore
}
}
}
let (finalCount, finalValue) = reduce(
sortedArray,
// initial prev value doesn't matter; if it collides with the first element of the array, flush won't do anything
(. (count, prev), element) => {
if element == prev {
(count + 1, prev)
} else {
// new value, process previous ones
addData(count, prev)
(1, element)
}
},
(0, 0.),
)
// flush final values
addData(finalCount, finalValue)
(continuous->make, discrete)
}
}

View File

@ -78,6 +78,8 @@ module Error = {
| NotYetImplemented => "This pathway is not yet implemented" | NotYetImplemented => "This pathway is not yet implemented"
| Other(t) => t | Other(t) => t
} }
exception OperationException(t)
} }
let power = (a: float, b: float): result<float, Error.t> => let power = (a: float, b: float): result<float, Error.t> =>

View File

@ -51,3 +51,11 @@ module Random = {
module Math = { module Math = {
@module external factorial: float => float = "@stdlib/math/base/special/factorial" @module external factorial: float => float = "@stdlib/math/base/special/factorial"
} }
module Base = {
@module
external variance: (int, int, Js.TypedArray2.Float64Array.t, int) => float =
"@stdlib/stats/base/variance"
let variance = (t: Js.TypedArray2.Float64Array.t) =>
variance(t->Js.TypedArray2.Float64Array.length, 0, t, 1)
}