store SampleSet as typed array

This commit is contained in:
Vyacheslav Matyukhin 2022-10-06 00:02:10 +04:00
parent d60792aa93
commit 4bd961a808
No known key found for this signature in database
GPG Key ID: 3D2A774C5489F96C
17 changed files with 286 additions and 142 deletions

View File

@ -3,11 +3,11 @@ open Expect
describe("Bandwidth", () => {
test("nrd0()", () => {
let data = [1., 4., 3., 2.]
let data = [1., 4., 3., 2.]->E.FloatArray.make
expect(SampleSetDist_Bandwidth.nrd0(data))->toEqual(0.7625801874014622)
})
test("nrd()", () => {
let data = [1., 4., 3., 2.]
let data = [1., 4., 3., 2.]->E.FloatArray.make
expect(SampleSetDist_Bandwidth.nrd(data))->toEqual(0.8981499984950554)
})
})

View File

@ -2,9 +2,10 @@ open Jest
open TestHelpers
let prepareInputs = (ar, minWeight) =>
E.A.Floats.Sorted.splitContinuousAndDiscreteForMinWeight(ar, ~minDiscreteWeight=minWeight) |> (
((c, disc)) => (c, disc |> E.FloatFloatMap.toArray)
)
E.FloatArray.Sorted.splitContinuousAndDiscreteForMinWeight(
ar->E.FloatArray.make,
~minDiscreteWeight=minWeight,
) |> (((c, disc)) => (c->E.FloatArray.toArray, disc->E.FloatFloatMap.toArray))
describe("Continuous and discrete splits", () => {
makeTest(
@ -37,18 +38,10 @@ describe("Continuous and discrete splits", () => {
E.A.concatMany([sorted, sorted, sorted, sorted]) |> Belt.SortArray.stableSortBy(_, compare)
}
let (_, discrete1) = E.A.Floats.Sorted.splitContinuousAndDiscreteForMinWeight(
makeDuplicatedArray(10),
~minDiscreteWeight=2,
)
let toArr1 = discrete1 |> E.FloatFloatMap.toArray
makeTest("splitMedium at count=10", toArr1 |> Belt.Array.length, 10)
let (_, toArr1) = prepareInputs(makeDuplicatedArray(10), 2)
makeTest("splitMedium at count=10", toArr1->Belt.Array.length, 10)
let (_c, discrete2) = E.A.Floats.Sorted.splitContinuousAndDiscreteForMinWeight(
makeDuplicatedArray(500),
~minDiscreteWeight=2,
)
let toArr2 = discrete2 |> E.FloatFloatMap.toArray
makeTest("splitMedium at count=500", toArr2 |> Belt.Array.length, 500)
let (_, toArr2) = prepareInputs(makeDuplicatedArray(500), 2)
makeTest("splitMedium at count=500", toArr2->Belt.Array.length, 500)
// makeTest("foo", [] |> Belt.Array.length, 500)
})

View File

@ -90,9 +90,9 @@ export class SqPointSetDistribution extends SqAbstractDistribution {
export class SqSampleSetDistribution extends SqAbstractDistribution {
tag = Tag.SampleSet as const;
value(): number[] {
return this.valueMethod(RSDistribution.getSampleSet);
}
// value(): number[] {
// return this.valueMethod(RSDistribution.getSampleSet);
// }
}
export class SqSymbolicDistribution extends SqAbstractDistribution {

View File

@ -221,7 +221,7 @@ let rec run = (~env: env, functionCallInfo: functionCallInfo): outputType => {
->OutputLocal.fromResult
| FromSamples(xs) =>
xs
->SampleSetDist.make
->SampleSetDist.makeFromJsArray
->E.R2.errMap(x => DistributionTypes.SampleSetError(x))
->E.R2.fmap(x => x->DistributionTypes.SampleSet->Dist)
->OutputLocal.fromResult

View File

@ -98,7 +98,7 @@ module Constructors: {
@genType
let toSampleSet: (~env: GenericDist.env, genericDist, int) => result<genericDist, error>
@genType
let fromSamples: (~env: GenericDist.env, SampleSetDist.t) => result<genericDist, error>
let fromSamples: (~env: GenericDist.env, array<float>) => result<genericDist, error>
@genType
let truncate: (
~env: GenericDist.env,

View File

@ -38,6 +38,7 @@ module Error = {
| SampleSetError(TooFewSamples) => "Too Few Samples"
| SampleSetError(NonNumericInput(err)) => `Found a non-number in input: ${err}`
| SampleSetError(OperationError(err)) => Operation.Error.toString(err)
| SampleSetError(UnequalSizes) => "Expected sample sets of equal size"
| OperationError(err) => Operation.Error.toString(err)
| PointSetConversionError(err) => SampleSetDist.pointsetConversionErrorToString(err)
| SparklineError(err) => PointSetTypes.sparklineErrorToString(err)

View File

@ -40,7 +40,9 @@ let sampleN = (t: t, n) =>
let sample = (t: t) => sampleN(t, 1)->E.A.first |> E.O.toExn("Should not have happened")
let toSampleSetDist = (t: t, n) =>
SampleSetDist.make(sampleN(t, n))->E.R2.errMap(DistributionTypes.Error.sampleErrorToDistErr)
SampleSetDist.makeFromJsArray(sampleN(t, n))->E.R2.errMap(
DistributionTypes.Error.sampleErrorToDistErr,
)
let fromFloat = (f: float): t => Symbolic(SymbolicDist.Float.make(f))

View File

@ -2,14 +2,10 @@
module Error = {
@genType
type sampleSetError =
TooFewSamples | NonNumericInput(string) | OperationError(Operation.operationError)
let sampleSetErrorToString = (err: sampleSetError): string =>
switch err {
| TooFewSamples => "Too few samples when constructing sample set"
| NonNumericInput(err) => `Found a non-number in input: ${err}`
| OperationError(err) => Operation.Error.toString(err)
}
| TooFewSamples
| NonNumericInput(string)
| OperationError(Operation.operationError)
| UnequalSizes
@genType
type pointsetConversionError = TooFewSamplesForConversionToPointSet
@ -26,6 +22,7 @@ module Error = {
| TooFewSamples => "Too few samples when constructing sample set"
| NonNumericInput(err) => `Found a non-number in input: ${err}`
| OperationError(err) => Operation.Error.toString(err)
| UnequalSizes => "Expected sample sets of equal size"
}
}
}
@ -38,26 +35,29 @@ this constructor.
https://stackoverflow.com/questions/66909578/how-to-make-a-type-constructor-private-in-rescript-except-in-current-module
*/
module T: {
//This really should be hidden (remove the array<float>). The reason it isn't is to act as an escape hatch in JS__Test.ts.
//When we get a good functional library in TS, we could refactor that out.
@genType
type t = array<float>
let make: array<float> => result<t, sampleSetError>
let get: t => array<float>
@genType.opaque
type t
let makeFromTypedArray: E.FloatArray.t => result<t, sampleSetError>
let makeFromJsArray: array<float> => result<t, sampleSetError>
let toJsArray: t => array<float>
let get: t => E.FloatArray.t
} = {
type t = array<float>
let make = (a: array<float>) =>
if E.A.length(a) > 5 {
type t = E.FloatArray.t
let makeFromTypedArray = (a: E.FloatArray.t): result<t, sampleSetError> =>
if E.FloatArray.length(a) > 5 {
Ok(a)
} else {
Error(TooFewSamples)
}
let get = (a: t) => a
let makeFromJsArray = (a: array<float>): result<t, sampleSetError> =>
E.FloatArray.make(a)->makeFromTypedArray
let toJsArray = (t: t) => t->E.FloatArray.toArray
let get = (t: t) => t
}
include T
let length = (t: t) => get(t)->E.A.length
let length = (t: T.t) => T.get(t)->E.FloatArray.length
/*
TODO: Refactor to get a more precise estimate. Also, this code is just fairly messy, could use
@ -68,15 +68,15 @@ let toPointSetDist = (~samples: t, ~samplingInputs: SamplingInputs.samplingInput
pointsetConversionError,
> =>
SampleSetDist_ToPointSet.toPointSetDist(
~samples=get(samples),
~samples=T.get(samples),
~samplingInputs,
(),
).pointSetDist->E.O2.toResult(TooFewSamplesForConversionToPointSet)
//Randomly get one sample from the distribution
let sample = (t: t): float => {
let i = E.Int.random(~min=0, ~max=E.A.length(get(t)) - 1)
E.A.unsafe_get(get(t), i)
let i = E.Int.random(~min=0, ~max=E.FloatArray.length(get(t)) - 1)
E.FloatArray.unsafe_get(get(t), i)
}
/*
@ -87,52 +87,144 @@ The former helps in cases where multiple distributions are correlated.
However, if n > length(t), then there's no clear right answer, so we just randomly
sample everything.
*/
let sampleN = (t: t, n) => {
if n <= E.A.length(get(t)) {
E.A.slice(get(t), ~offset=0, ~len=n)
let sampleN = (t: t, n): array<float> => {
if n <= length(t) {
E.FloatArray.slice(~start=0, ~end_=n, get(t))->E.FloatArray.toArray
} else {
Belt.Array.makeBy(n, _ => sample(t))
}
}
let _fromSampleResultArray = (samples: array<result<float, QuriSquiggleLang.Operation.Error.t>>) =>
E.A.R.firstErrorOrOpen(samples)->E.R2.errMap(Error.fromOperationError) |> E.R2.bind(make)
let samplesMap = (~fn: float => result<float, Operation.Error.t>, t: t): result<
t,
sampleSetError,
> => T.get(t)->E.A2.fmap(fn)->_fromSampleResultArray
> => {
try {
T.get(t)
->E.FloatArray.map((. v) => {
switch fn(v) {
| Ok(res) => res
| Error(err) => err->Operation.Error.OperationException->raise
}
})
->T.makeFromTypedArray
} catch {
| Operation.Error.OperationException(err) => Error.fromOperationError(err)->Error
}
}
//TODO: Figure out what to do if distributions are different lengths. ``zip`` is kind of inelegant for this.
let map2 = (~fn: (float, float) => result<float, Operation.Error.t>, ~t1: t, ~t2: t): result<
t,
sampleSetError,
> => E.A.zip(get(t1), get(t2))->E.A2.fmap(E.Tuple2.toFnCall(fn))->_fromSampleResultArray
> => {
let length1 = t1->length
let length2 = t2->length
if length1 == length2 {
try {
let res = E.FloatArray.fromLength(length1)
for i in 0 to length1 - 1 {
let v = switch fn(
get(t1)->E.FloatArray.unsafe_get(i),
get(t2)->E.FloatArray.unsafe_get(i),
) {
| Ok(fnResult) => fnResult
| Error(err) => err->Operation.Error.OperationException->raise
}
res->E.FloatArray.set(i, v)
}
res->T.makeFromTypedArray
} catch {
| Operation.Error.OperationException(err) => Error.fromOperationError(err)->Error
}
} else {
Error.UnequalSizes->Error
}
}
let map3 = (
~fn: (float, float, float) => result<float, Operation.Error.t>,
~t1: t,
~t2: t,
~t3: t,
): result<t, sampleSetError> =>
E.A.zip3(get(t1), get(t2), get(t3))->E.A2.fmap(E.Tuple3.toFnCall(fn))->_fromSampleResultArray
): result<t, sampleSetError> => {
let length1 = t1->length
let length2 = t2->length
let length3 = t3->length
if length1 == length2 && length2 == length3 {
try {
let res = E.FloatArray.fromLength(length1)
for i in 0 to length1 - 1 {
let v = switch fn(
get(t1)->E.FloatArray.unsafe_get(i),
get(t2)->E.FloatArray.unsafe_get(i),
get(t3)->E.FloatArray.unsafe_get(i),
) {
| Ok(fnResult) => fnResult
| Error(err) => err->Operation.Error.OperationException->raise
}
res->E.FloatArray.set(i, v)
}
res->T.makeFromTypedArray
} catch {
| Operation.Error.OperationException(err) => Error.fromOperationError(err)->Error
}
} else {
Error.UnequalSizes->Error
}
}
let mapN = (~fn: array<float> => result<float, Operation.Error.t>, ~t1: array<t>): result<
t,
sampleSetError,
> => E.A.transpose(E.A.fmap(get, t1))->E.A2.fmap(fn)->_fromSampleResultArray
> => {
let lengths = t1->E.A2.fmap(t => t->length)
let l0 = lengths[0]
if lengths->E.A.all(l => l == l0, _) {
try {
let res = E.FloatArray.fromLength(l0)
for i in 0 to l0 - 1 {
let v = switch fn(t1->E.A2.fmap(t => get(t)->E.FloatArray.unsafe_get(i))) {
| Ok(fnResult) => fnResult
| Error(err) => err->Operation.Error.OperationException->raise
}
res->E.FloatArray.set(i, v)
}
res->T.makeFromTypedArray
} catch {
| Operation.Error.OperationException(err) => Error.fromOperationError(err)->Error
}
} else {
Error.UnequalSizes->Error
}
}
let mean = t => T.get(t)->E.A.Floats.mean
let geomean = t => T.get(t)->E.A.Floats.geomean
let mode = t => T.get(t)->E.A.Floats.mode
let sum = t => T.get(t)->E.A.Floats.sum
let min = t => T.get(t)->E.A.Floats.min
let max = t => T.get(t)->E.A.Floats.max
let stdev = t => T.get(t)->E.A.Floats.stdev
let variance = t => T.get(t)->E.A.Floats.variance
let percentile = (t, f) => T.get(t)->E.A.Floats.percentile(f)
let makeBy = (n: int, fn: int => result<float, Operation.Error.t>): result<t, sampleSetError> => {
let res = E.FloatArray.fromLength(n)
try {
for i in 0 to n - 1 {
let fnResult = fn(i)
switch fnResult {
| Ok(v) => res->E.FloatArray.set(i, v)
| Error(err) => err->Operation.Error.OperationException->raise
}
}
res->T.makeFromTypedArray
} catch {
| Operation.Error.OperationException(err) => Error.fromOperationError(err)->Error
}
}
let mean = t => T.get(t)->E.FloatArray.mean
let geomean = t => T.get(t)->E.FloatArray.geomean
let mode = t => T.get(t)->E.FloatArray.mode
let sum = t => T.get(t)->E.FloatArray.sum
let min = t => T.get(t)->E.FloatArray.min
let max = t => T.get(t)->E.FloatArray.max
let stdev = t => T.get(t)->E.FloatArray.stdev
let variance = t => T.get(t)->E.FloatArray.variance
let percentile = (t, f) => T.get(t)->E.FloatArray.percentile(f)
let cdf = (t: t, f: float) => {
let countBelowF = t->E.A.reduce(0, (acc, x) => acc + (x <= f ? 1 : 0))
let countBelowF = T.get(t)->E.FloatArray.reduce((. acc, x) => acc + (x <= f ? 1 : 0), 0)
countBelowF->Js.Int.toFloat /. t->length->Js.Int.toFloat
}
@ -149,14 +241,14 @@ let mixture = (values: array<(t, float)>, intendedLength: int) => {
discreteSamples
->Belt.Array.mapWithIndex((index, distIndexToChoose) => {
let chosenDist = E.A.get(dists, E.Float.toInt(distIndexToChoose))
chosenDist->E.O.bind(E.A.get(_, index))
chosenDist->E.O.bind(E.FloatArray.get(_, index))
})
->E.A.O.openIfAllSome
samples->E.O2.toExn("Mixture unreachable error")->T.make
samples->E.O2.toExn("Mixture unreachable error")->T.makeFromJsArray
}
let truncateLeft = (t, f) => T.get(t)->E.A2.filter(x => x >= f)->T.make
let truncateRight = (t, f) => T.get(t)->E.A2.filter(x => x <= f)->T.make
let truncateLeft = (t, f) => T.get(t)->E.FloatArray.filter((. x) => x >= f)->T.makeFromTypedArray
let truncateRight = (t, f) => T.get(t)->E.FloatArray.filter((. x) => x <= f)->T.makeFromTypedArray
let truncate = (t, ~leftCutoff: option<float>, ~rightCutoff: option<float>) => {
let withTruncatedLeft = t => leftCutoff |> E.O.dimap(left => truncateLeft(t, left), _ => Ok(t))

View File

@ -2,16 +2,19 @@
let {iqr_percentile, nrd0_lo_denominator, one, nrd0_coef, nrd_coef, nrd_fractionalPower} = module(
MagicNumbers.SampleSetBandwidth
)
let len = x => E.A.length(x) |> float_of_int
let len = x => E.FloatArray.length(x)->float_of_int
let iqr = x =>
Jstat.percentile(x, iqr_percentile, true) -. Jstat.percentile(x, 1.0 -. iqr_percentile, true)
let iqr = x => {
let xArr = x->E.FloatArray.toArray
Jstat.percentile(xArr, iqr_percentile, true) -.
Jstat.percentile(xArr, 1.0 -. iqr_percentile, true)
}
// Silverman, B. W. (1986) Density Estimation. London: Chapman and Hall.
let nrd0 = x => {
let hi = Js_math.sqrt(Jstat.variance(x))
let hi = Js_math.sqrt(Stdlib.Base.variance(x))
let lo = Js_math.minMany_float([hi, iqr(x) /. nrd0_lo_denominator])
let e = Js_math.abs_float(x[1])
let e = Js_math.abs_float(x->E.FloatArray.unsafe_get(1))
let lo' = switch (lo, hi, e) {
| (lo, _, _) if !Js.Float.isNaN(lo) => lo
| (_, hi, _) if !Js.Float.isNaN(hi) => hi
@ -25,6 +28,6 @@ let nrd0 = x => {
let nrd = x => {
let h = iqr(x) /. nrd0_lo_denominator
nrd_coef *.
Js.Math.min_float(Js.Math.sqrt(Jstat.variance(x)), h) *.
Js.Math.min_float(Js.Math.sqrt(Stdlib.Base.variance(x)), h) *.
Js.Math.pow_float(~base=len(x), ~exp=nrd_fractionalPower)
}

View File

@ -37,40 +37,40 @@ module Internals = {
}
module T = {
type t = array<float>
type t = E.FloatArray.t
let xWidthToUnitWidth = (samples, outputXYPoints, xWidth) => {
let xyPointRange = E.A.Sorted.range(samples)->E.O2.default(0.0)
let xWidthToUnitWidth = (samples: t, outputXYPoints, xWidth) => {
let xyPointRange = E.FloatArray.Sorted.range(samples)->E.O2.default(0.0)
let xyPointWidth = xyPointRange /. float_of_int(outputXYPoints)
xWidth /. xyPointWidth
}
let formatUnitWidth = w => Jstat.max([w, 1.0])->int_of_float
let suggestedUnitWidth = (samples, outputXYPoints) => {
let suggestedUnitWidth = (samples: t, outputXYPoints) => {
let suggestedXWidth = SampleSetDist_Bandwidth.nrd0(samples)
xWidthToUnitWidth(samples, outputXYPoints, suggestedXWidth)
}
let kde = (~samples, ~outputXYPoints, width) =>
KDE.normalSampling(samples, outputXYPoints, width)
let kde = (~samples: t, ~outputXYPoints, width) =>
KDE.normalSampling(samples->E.FloatArray.toArray, outputXYPoints, width)
}
}
let toPointSetDist = (
~samples: Internals.T.t,
~samples: E.FloatArray.t,
~samplingInputs: SamplingInputs.samplingInputs,
(),
): Internals.Types.outputs => {
let samples = samples->E.A.Floats.sort
let samples = samples->E.FloatArray.sort
let minDiscreteToKeep = MagicNumbers.ToPointSet.minDiscreteToKeep(samples)
let (continuousPart, discretePart) = E.A.Floats.Sorted.splitContinuousAndDiscreteForMinWeight(
let (continuousPart, discretePart) = E.FloatArray.Sorted.splitContinuousAndDiscreteForMinWeight(
samples,
~minDiscreteWeight=minDiscreteToKeep,
)
let length = samples->E.A.length->float_of_int
let length = samples->E.FloatArray.length->float_of_int
let discrete: PointSetTypes.discreteShape =
discretePart
->E.FloatFloatMap.fmap(r => r /. length, _)
@ -79,7 +79,7 @@ let toPointSetDist = (
->Discrete.make
let pdf =
continuousPart->E.A.length > 5
continuousPart->E.FloatArray.length > 5
? {
let _suggestedXWidth = SampleSetDist_Bandwidth.nrd0(continuousPart)
// todo: This does some recalculating from the last step.

View File

@ -28,7 +28,7 @@ module Internal = {
let fromFn = (aLambdaValue, environment: Reducer_T.environment, reducer: Reducer_T.reducerFn) => {
let sampleCount = environment.sampleCount
let fn = r => doLambdaCall(aLambdaValue, [IEvNumber(r)], environment, reducer)
Belt_Array.makeBy(sampleCount, r => fn(r->Js.Int.toFloat))->E.A.R.firstErrorOrOpen
SampleSetDist.makeBy(sampleCount, r => fn(r->Js.Int.toFloat))
}
let map1 = (sampleSetDist: t, aLambdaValue, environment: Reducer_T.environment, reducer) => {
@ -116,7 +116,9 @@ let libaryBase = [
~run=(inputs, _, _) => {
let sampleSet =
inputs->Prepare.ToTypedArray.numbers
|> E.R2.bind(r => SampleSetDist.make(r)->E.R2.errMap(_ => "AM I HERE? WHYERE AMI??"))
|> E.R2.bind(r =>
SampleSetDist.makeFromJsArray(r)->E.R2.errMap(_ => "AM I HERE? WHYERE AMI??")
)
sampleSet
->E.R2.fmap(Wrappers.sampleSet)
->E.R2.fmap(Wrappers.evDistribution)
@ -140,7 +142,7 @@ let libaryBase = [
~run=(inputs, _, _) =>
switch inputs {
| [IEvDistribution(SampleSet(dist))] =>
dist->E.A2.fmap(Wrappers.evNumber)->Wrappers.evArray->Ok
dist->SampleSetDist.toJsArray->E.A2.fmap(Wrappers.evNumber)->Wrappers.evArray->Ok
| _ => Error(impossibleError)
},
(),
@ -163,7 +165,7 @@ let libaryBase = [
| [IEvLambda(lambda)] =>
switch Internal.fromFn(lambda, environment, reducer) {
| Ok(r) => Ok(r->Wrappers.sampleSet->Wrappers.evDistribution)
| Error(e) => e->Reducer_ErrorValue.REOperationError->Error
| Error(e) => e->SampleSetError->Reducer_ErrorValue.REDistributionError->Error
}
| _ => Error(impossibleError)
},

View File

@ -35,7 +35,7 @@ module ToPointSet = {
it would strike a reasonable trade-off, but Im really unsure whats
best right now.
*/
let minDiscreteToKeep = samples => max(20, E.A.length(samples) / 50)
let minDiscreteToKeep = samples => max(20, E.FloatArray.length(samples) / 50)
}
module SampleSetBandwidth = {

View File

@ -11,6 +11,7 @@ module B = E_B
module Dict = E_Dict
module F = E_F
module Float = E_Float
module FloatArray = E_FloatArray
module FloatFloatMap = E_FloatFloatMap
module I = E_I
module Int = E_Int

View File

@ -303,55 +303,6 @@ module Floats = {
let makeIncrementalDown = (a, b) =>
Array.make(a - b + 1, a) |> Array.mapi((i, c) => c - i) |> Belt.Array.map(_, float_of_int)
/*
This function goes through a sorted array and divides it into two different clusters:
continuous samples and discrete samples. The discrete samples are stored in a mutable map.
Samples are thought to be discrete if they have at least `minDiscreteWight` duplicates.
If the min discrete weight is 4, that would mean that at least four elements needed from a specific
value for that to be kept as discrete. This is important because in some cases, we can expect that
some common elements will be generated by regular operations. The final continuous array will be sorted.
This function is performance-critical, don't change it significantly without benchmarking
SampleSet->PointSet conversion performance.
*/
let splitContinuousAndDiscreteForMinWeight = (
sortedArray: array<float>,
~minDiscreteWeight: int,
) => {
let continuous: array<float> = []
let discrete = FloatFloatMap.empty()
let addData = (count: int, value: float): unit => {
if count >= minDiscreteWeight {
FloatFloatMap.add(value, count->Belt.Int.toFloat, discrete)
} else {
for _ in 1 to count {
continuous->Js.Array2.push(value)->ignore
}
}
}
let (finalCount, finalValue) = sortedArray->Belt.Array.reduce(
// initial prev value doesn't matter; if it collides with the first element of the array, flush won't do anything
(0, 0.),
((count, prev), element) => {
if element == prev {
(count + 1, prev)
} else {
// new value, process previous ones
addData(count, prev)
(1, element)
}
},
)
// flush final values
addData(finalCount, finalValue)
(continuous, discrete)
}
}
}
module Sorted = Floats.Sorted

View File

@ -0,0 +1,89 @@
module FloatArray = Js.TypedArray2.Float64Array
type t = FloatArray.t
let make = FloatArray.make
let fromLength = FloatArray.fromLength
let length = FloatArray.length
let reduce = FloatArray.reduce
let filter = FloatArray.filter
let slice = FloatArray.slice
let sort = (t: t) => t->FloatArray.copy->FloatArray.sortInPlace
let toArray: t => array<float> = %raw(`a => Array.from(a)`)
let map = FloatArray.map
let unsafe_get = (t: t, i: int) => t->FloatArray.unsafe_get(i)
let get = (t: t, i: int) => i >= t->length ? None : Some(t->FloatArray.unsafe_get(i))
let set = (t: t, i: int) => t->FloatArray.unsafe_set(i)
let mean = t => t->toArray->Jstat.mean
let geomean = t => t->toArray->Jstat.geomean
let mode = t => t->toArray->Jstat.mode
let variance = t => t->toArray->Jstat.variance
let stdev = t => t->toArray->Jstat.stdev
let sum = t => t->toArray->Jstat.sum
let product = Jstat.product
let random = Js.Math.random_int
let min = t => t->toArray->Js.Math.minMany_float
let max = t => t->toArray->Js.Math.maxMany_float
let percentile = (t, b) => Jstat.percentile(t->toArray, b, false)
module Sorted = {
let min = (t: t) => t->length > 0 ? Some(FloatArray.unsafe_get(t, 0)) : None
let max = (t: t) => t->length > 0 ? Some(FloatArray.unsafe_get(t, t->length - 1)) : None
let range = (~min=min, ~max=max, a) =>
switch (min(a), max(a)) {
| (Some(min), Some(max)) => Some(max -. min)
| _ => None
}
/*
This function goes through a sorted array and divides it into two different clusters:
continuous samples and discrete samples. The discrete samples are stored in a mutable map.
Samples are thought to be discrete if they have at least `minDiscreteWight` duplicates.
If the min discrete weight is 4, that would mean that at least four elements needed from a specific
value for that to be kept as discrete. This is important because in some cases, we can expect that
some common elements will be generated by regular operations. The final continuous array will be sorted.
This function is performance-critical, don't change it significantly without benchmarking
SampleSet->PointSet conversion performance.
*/
let splitContinuousAndDiscreteForMinWeight = (sortedArray: t, ~minDiscreteWeight: int): (
t,
E_FloatFloatMap.t,
) => {
// TODO - preallocate as typed array, then trim
let continuous: array<float> = []
let discrete = E_FloatFloatMap.empty()
let addData = (count: int, value: float): unit => {
if count >= minDiscreteWeight {
E_FloatFloatMap.add(value, count->Belt.Int.toFloat, discrete)
} else {
for _ in 1 to count {
continuous->Js.Array2.push(value)->ignore
}
}
}
let (finalCount, finalValue) = reduce(
sortedArray,
// initial prev value doesn't matter; if it collides with the first element of the array, flush won't do anything
(. (count, prev), element) => {
if element == prev {
(count + 1, prev)
} else {
// new value, process previous ones
addData(count, prev)
(1, element)
}
},
(0, 0.),
)
// flush final values
addData(finalCount, finalValue)
(continuous->make, discrete)
}
}

View File

@ -78,6 +78,8 @@ module Error = {
| NotYetImplemented => "This pathway is not yet implemented"
| Other(t) => t
}
exception OperationException(t)
}
let power = (a: float, b: float): result<float, Error.t> =>

View File

@ -51,3 +51,11 @@ module Random = {
module Math = {
@module external factorial: float => float = "@stdlib/math/base/special/factorial"
}
module Base = {
@module
external variance: (int, int, Js.TypedArray2.Float64Array.t, int) => float =
"@stdlib/stats/base/variance"
let variance = (t: Js.TypedArray2.Float64Array.t) =>
variance(t->Js.TypedArray2.Float64Array.length, 0, t, 1)
}