store SampleSet as typed array
This commit is contained in:
parent
d60792aa93
commit
4bd961a808
|
@ -3,11 +3,11 @@ open Expect
|
|||
|
||||
describe("Bandwidth", () => {
|
||||
test("nrd0()", () => {
|
||||
let data = [1., 4., 3., 2.]
|
||||
let data = [1., 4., 3., 2.]->E.FloatArray.make
|
||||
expect(SampleSetDist_Bandwidth.nrd0(data))->toEqual(0.7625801874014622)
|
||||
})
|
||||
test("nrd()", () => {
|
||||
let data = [1., 4., 3., 2.]
|
||||
let data = [1., 4., 3., 2.]->E.FloatArray.make
|
||||
expect(SampleSetDist_Bandwidth.nrd(data))->toEqual(0.8981499984950554)
|
||||
})
|
||||
})
|
||||
|
|
|
@ -2,9 +2,10 @@ open Jest
|
|||
open TestHelpers
|
||||
|
||||
let prepareInputs = (ar, minWeight) =>
|
||||
E.A.Floats.Sorted.splitContinuousAndDiscreteForMinWeight(ar, ~minDiscreteWeight=minWeight) |> (
|
||||
((c, disc)) => (c, disc |> E.FloatFloatMap.toArray)
|
||||
)
|
||||
E.FloatArray.Sorted.splitContinuousAndDiscreteForMinWeight(
|
||||
ar->E.FloatArray.make,
|
||||
~minDiscreteWeight=minWeight,
|
||||
) |> (((c, disc)) => (c->E.FloatArray.toArray, disc->E.FloatFloatMap.toArray))
|
||||
|
||||
describe("Continuous and discrete splits", () => {
|
||||
makeTest(
|
||||
|
@ -37,18 +38,10 @@ describe("Continuous and discrete splits", () => {
|
|||
E.A.concatMany([sorted, sorted, sorted, sorted]) |> Belt.SortArray.stableSortBy(_, compare)
|
||||
}
|
||||
|
||||
let (_, discrete1) = E.A.Floats.Sorted.splitContinuousAndDiscreteForMinWeight(
|
||||
makeDuplicatedArray(10),
|
||||
~minDiscreteWeight=2,
|
||||
)
|
||||
let toArr1 = discrete1 |> E.FloatFloatMap.toArray
|
||||
makeTest("splitMedium at count=10", toArr1 |> Belt.Array.length, 10)
|
||||
let (_, toArr1) = prepareInputs(makeDuplicatedArray(10), 2)
|
||||
makeTest("splitMedium at count=10", toArr1->Belt.Array.length, 10)
|
||||
|
||||
let (_c, discrete2) = E.A.Floats.Sorted.splitContinuousAndDiscreteForMinWeight(
|
||||
makeDuplicatedArray(500),
|
||||
~minDiscreteWeight=2,
|
||||
)
|
||||
let toArr2 = discrete2 |> E.FloatFloatMap.toArray
|
||||
makeTest("splitMedium at count=500", toArr2 |> Belt.Array.length, 500)
|
||||
let (_, toArr2) = prepareInputs(makeDuplicatedArray(500), 2)
|
||||
makeTest("splitMedium at count=500", toArr2->Belt.Array.length, 500)
|
||||
// makeTest("foo", [] |> Belt.Array.length, 500)
|
||||
})
|
||||
|
|
|
@ -90,9 +90,9 @@ export class SqPointSetDistribution extends SqAbstractDistribution {
|
|||
export class SqSampleSetDistribution extends SqAbstractDistribution {
|
||||
tag = Tag.SampleSet as const;
|
||||
|
||||
value(): number[] {
|
||||
return this.valueMethod(RSDistribution.getSampleSet);
|
||||
}
|
||||
// value(): number[] {
|
||||
// return this.valueMethod(RSDistribution.getSampleSet);
|
||||
// }
|
||||
}
|
||||
|
||||
export class SqSymbolicDistribution extends SqAbstractDistribution {
|
||||
|
|
|
@ -221,7 +221,7 @@ let rec run = (~env: env, functionCallInfo: functionCallInfo): outputType => {
|
|||
->OutputLocal.fromResult
|
||||
| FromSamples(xs) =>
|
||||
xs
|
||||
->SampleSetDist.make
|
||||
->SampleSetDist.makeFromJsArray
|
||||
->E.R2.errMap(x => DistributionTypes.SampleSetError(x))
|
||||
->E.R2.fmap(x => x->DistributionTypes.SampleSet->Dist)
|
||||
->OutputLocal.fromResult
|
||||
|
|
|
@ -98,7 +98,7 @@ module Constructors: {
|
|||
@genType
|
||||
let toSampleSet: (~env: GenericDist.env, genericDist, int) => result<genericDist, error>
|
||||
@genType
|
||||
let fromSamples: (~env: GenericDist.env, SampleSetDist.t) => result<genericDist, error>
|
||||
let fromSamples: (~env: GenericDist.env, array<float>) => result<genericDist, error>
|
||||
@genType
|
||||
let truncate: (
|
||||
~env: GenericDist.env,
|
||||
|
|
|
@ -38,6 +38,7 @@ module Error = {
|
|||
| SampleSetError(TooFewSamples) => "Too Few Samples"
|
||||
| SampleSetError(NonNumericInput(err)) => `Found a non-number in input: ${err}`
|
||||
| SampleSetError(OperationError(err)) => Operation.Error.toString(err)
|
||||
| SampleSetError(UnequalSizes) => "Expected sample sets of equal size"
|
||||
| OperationError(err) => Operation.Error.toString(err)
|
||||
| PointSetConversionError(err) => SampleSetDist.pointsetConversionErrorToString(err)
|
||||
| SparklineError(err) => PointSetTypes.sparklineErrorToString(err)
|
||||
|
|
|
@ -40,7 +40,9 @@ let sampleN = (t: t, n) =>
|
|||
let sample = (t: t) => sampleN(t, 1)->E.A.first |> E.O.toExn("Should not have happened")
|
||||
|
||||
let toSampleSetDist = (t: t, n) =>
|
||||
SampleSetDist.make(sampleN(t, n))->E.R2.errMap(DistributionTypes.Error.sampleErrorToDistErr)
|
||||
SampleSetDist.makeFromJsArray(sampleN(t, n))->E.R2.errMap(
|
||||
DistributionTypes.Error.sampleErrorToDistErr,
|
||||
)
|
||||
|
||||
let fromFloat = (f: float): t => Symbolic(SymbolicDist.Float.make(f))
|
||||
|
||||
|
|
|
@ -2,14 +2,10 @@
|
|||
module Error = {
|
||||
@genType
|
||||
type sampleSetError =
|
||||
TooFewSamples | NonNumericInput(string) | OperationError(Operation.operationError)
|
||||
|
||||
let sampleSetErrorToString = (err: sampleSetError): string =>
|
||||
switch err {
|
||||
| TooFewSamples => "Too few samples when constructing sample set"
|
||||
| NonNumericInput(err) => `Found a non-number in input: ${err}`
|
||||
| OperationError(err) => Operation.Error.toString(err)
|
||||
}
|
||||
| TooFewSamples
|
||||
| NonNumericInput(string)
|
||||
| OperationError(Operation.operationError)
|
||||
| UnequalSizes
|
||||
|
||||
@genType
|
||||
type pointsetConversionError = TooFewSamplesForConversionToPointSet
|
||||
|
@ -26,6 +22,7 @@ module Error = {
|
|||
| TooFewSamples => "Too few samples when constructing sample set"
|
||||
| NonNumericInput(err) => `Found a non-number in input: ${err}`
|
||||
| OperationError(err) => Operation.Error.toString(err)
|
||||
| UnequalSizes => "Expected sample sets of equal size"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -38,26 +35,29 @@ this constructor.
|
|||
https://stackoverflow.com/questions/66909578/how-to-make-a-type-constructor-private-in-rescript-except-in-current-module
|
||||
*/
|
||||
module T: {
|
||||
//This really should be hidden (remove the array<float>). The reason it isn't is to act as an escape hatch in JS__Test.ts.
|
||||
//When we get a good functional library in TS, we could refactor that out.
|
||||
@genType
|
||||
type t = array<float>
|
||||
let make: array<float> => result<t, sampleSetError>
|
||||
let get: t => array<float>
|
||||
@genType.opaque
|
||||
type t
|
||||
let makeFromTypedArray: E.FloatArray.t => result<t, sampleSetError>
|
||||
let makeFromJsArray: array<float> => result<t, sampleSetError>
|
||||
let toJsArray: t => array<float>
|
||||
let get: t => E.FloatArray.t
|
||||
} = {
|
||||
type t = array<float>
|
||||
let make = (a: array<float>) =>
|
||||
if E.A.length(a) > 5 {
|
||||
type t = E.FloatArray.t
|
||||
let makeFromTypedArray = (a: E.FloatArray.t): result<t, sampleSetError> =>
|
||||
if E.FloatArray.length(a) > 5 {
|
||||
Ok(a)
|
||||
} else {
|
||||
Error(TooFewSamples)
|
||||
}
|
||||
let get = (a: t) => a
|
||||
let makeFromJsArray = (a: array<float>): result<t, sampleSetError> =>
|
||||
E.FloatArray.make(a)->makeFromTypedArray
|
||||
let toJsArray = (t: t) => t->E.FloatArray.toArray
|
||||
let get = (t: t) => t
|
||||
}
|
||||
|
||||
include T
|
||||
|
||||
let length = (t: t) => get(t)->E.A.length
|
||||
let length = (t: T.t) => T.get(t)->E.FloatArray.length
|
||||
|
||||
/*
|
||||
TODO: Refactor to get a more precise estimate. Also, this code is just fairly messy, could use
|
||||
|
@ -68,15 +68,15 @@ let toPointSetDist = (~samples: t, ~samplingInputs: SamplingInputs.samplingInput
|
|||
pointsetConversionError,
|
||||
> =>
|
||||
SampleSetDist_ToPointSet.toPointSetDist(
|
||||
~samples=get(samples),
|
||||
~samples=T.get(samples),
|
||||
~samplingInputs,
|
||||
(),
|
||||
).pointSetDist->E.O2.toResult(TooFewSamplesForConversionToPointSet)
|
||||
|
||||
//Randomly get one sample from the distribution
|
||||
let sample = (t: t): float => {
|
||||
let i = E.Int.random(~min=0, ~max=E.A.length(get(t)) - 1)
|
||||
E.A.unsafe_get(get(t), i)
|
||||
let i = E.Int.random(~min=0, ~max=E.FloatArray.length(get(t)) - 1)
|
||||
E.FloatArray.unsafe_get(get(t), i)
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -87,52 +87,144 @@ The former helps in cases where multiple distributions are correlated.
|
|||
However, if n > length(t), then there's no clear right answer, so we just randomly
|
||||
sample everything.
|
||||
*/
|
||||
let sampleN = (t: t, n) => {
|
||||
if n <= E.A.length(get(t)) {
|
||||
E.A.slice(get(t), ~offset=0, ~len=n)
|
||||
let sampleN = (t: t, n): array<float> => {
|
||||
if n <= length(t) {
|
||||
E.FloatArray.slice(~start=0, ~end_=n, get(t))->E.FloatArray.toArray
|
||||
} else {
|
||||
Belt.Array.makeBy(n, _ => sample(t))
|
||||
}
|
||||
}
|
||||
|
||||
let _fromSampleResultArray = (samples: array<result<float, QuriSquiggleLang.Operation.Error.t>>) =>
|
||||
E.A.R.firstErrorOrOpen(samples)->E.R2.errMap(Error.fromOperationError) |> E.R2.bind(make)
|
||||
|
||||
let samplesMap = (~fn: float => result<float, Operation.Error.t>, t: t): result<
|
||||
t,
|
||||
sampleSetError,
|
||||
> => T.get(t)->E.A2.fmap(fn)->_fromSampleResultArray
|
||||
> => {
|
||||
try {
|
||||
T.get(t)
|
||||
->E.FloatArray.map((. v) => {
|
||||
switch fn(v) {
|
||||
| Ok(res) => res
|
||||
| Error(err) => err->Operation.Error.OperationException->raise
|
||||
}
|
||||
})
|
||||
->T.makeFromTypedArray
|
||||
} catch {
|
||||
| Operation.Error.OperationException(err) => Error.fromOperationError(err)->Error
|
||||
}
|
||||
}
|
||||
|
||||
//TODO: Figure out what to do if distributions are different lengths. ``zip`` is kind of inelegant for this.
|
||||
let map2 = (~fn: (float, float) => result<float, Operation.Error.t>, ~t1: t, ~t2: t): result<
|
||||
t,
|
||||
sampleSetError,
|
||||
> => E.A.zip(get(t1), get(t2))->E.A2.fmap(E.Tuple2.toFnCall(fn))->_fromSampleResultArray
|
||||
> => {
|
||||
let length1 = t1->length
|
||||
let length2 = t2->length
|
||||
if length1 == length2 {
|
||||
try {
|
||||
let res = E.FloatArray.fromLength(length1)
|
||||
for i in 0 to length1 - 1 {
|
||||
let v = switch fn(
|
||||
get(t1)->E.FloatArray.unsafe_get(i),
|
||||
get(t2)->E.FloatArray.unsafe_get(i),
|
||||
) {
|
||||
| Ok(fnResult) => fnResult
|
||||
| Error(err) => err->Operation.Error.OperationException->raise
|
||||
}
|
||||
res->E.FloatArray.set(i, v)
|
||||
}
|
||||
res->T.makeFromTypedArray
|
||||
} catch {
|
||||
| Operation.Error.OperationException(err) => Error.fromOperationError(err)->Error
|
||||
}
|
||||
} else {
|
||||
Error.UnequalSizes->Error
|
||||
}
|
||||
}
|
||||
|
||||
let map3 = (
|
||||
~fn: (float, float, float) => result<float, Operation.Error.t>,
|
||||
~t1: t,
|
||||
~t2: t,
|
||||
~t3: t,
|
||||
): result<t, sampleSetError> =>
|
||||
E.A.zip3(get(t1), get(t2), get(t3))->E.A2.fmap(E.Tuple3.toFnCall(fn))->_fromSampleResultArray
|
||||
): result<t, sampleSetError> => {
|
||||
let length1 = t1->length
|
||||
let length2 = t2->length
|
||||
let length3 = t3->length
|
||||
if length1 == length2 && length2 == length3 {
|
||||
try {
|
||||
let res = E.FloatArray.fromLength(length1)
|
||||
for i in 0 to length1 - 1 {
|
||||
let v = switch fn(
|
||||
get(t1)->E.FloatArray.unsafe_get(i),
|
||||
get(t2)->E.FloatArray.unsafe_get(i),
|
||||
get(t3)->E.FloatArray.unsafe_get(i),
|
||||
) {
|
||||
| Ok(fnResult) => fnResult
|
||||
| Error(err) => err->Operation.Error.OperationException->raise
|
||||
}
|
||||
res->E.FloatArray.set(i, v)
|
||||
}
|
||||
res->T.makeFromTypedArray
|
||||
} catch {
|
||||
| Operation.Error.OperationException(err) => Error.fromOperationError(err)->Error
|
||||
}
|
||||
} else {
|
||||
Error.UnequalSizes->Error
|
||||
}
|
||||
}
|
||||
|
||||
let mapN = (~fn: array<float> => result<float, Operation.Error.t>, ~t1: array<t>): result<
|
||||
t,
|
||||
sampleSetError,
|
||||
> => E.A.transpose(E.A.fmap(get, t1))->E.A2.fmap(fn)->_fromSampleResultArray
|
||||
> => {
|
||||
let lengths = t1->E.A2.fmap(t => t->length)
|
||||
let l0 = lengths[0]
|
||||
if lengths->E.A.all(l => l == l0, _) {
|
||||
try {
|
||||
let res = E.FloatArray.fromLength(l0)
|
||||
for i in 0 to l0 - 1 {
|
||||
let v = switch fn(t1->E.A2.fmap(t => get(t)->E.FloatArray.unsafe_get(i))) {
|
||||
| Ok(fnResult) => fnResult
|
||||
| Error(err) => err->Operation.Error.OperationException->raise
|
||||
}
|
||||
res->E.FloatArray.set(i, v)
|
||||
}
|
||||
res->T.makeFromTypedArray
|
||||
} catch {
|
||||
| Operation.Error.OperationException(err) => Error.fromOperationError(err)->Error
|
||||
}
|
||||
} else {
|
||||
Error.UnequalSizes->Error
|
||||
}
|
||||
}
|
||||
|
||||
let mean = t => T.get(t)->E.A.Floats.mean
|
||||
let geomean = t => T.get(t)->E.A.Floats.geomean
|
||||
let mode = t => T.get(t)->E.A.Floats.mode
|
||||
let sum = t => T.get(t)->E.A.Floats.sum
|
||||
let min = t => T.get(t)->E.A.Floats.min
|
||||
let max = t => T.get(t)->E.A.Floats.max
|
||||
let stdev = t => T.get(t)->E.A.Floats.stdev
|
||||
let variance = t => T.get(t)->E.A.Floats.variance
|
||||
let percentile = (t, f) => T.get(t)->E.A.Floats.percentile(f)
|
||||
let makeBy = (n: int, fn: int => result<float, Operation.Error.t>): result<t, sampleSetError> => {
|
||||
let res = E.FloatArray.fromLength(n)
|
||||
try {
|
||||
for i in 0 to n - 1 {
|
||||
let fnResult = fn(i)
|
||||
switch fnResult {
|
||||
| Ok(v) => res->E.FloatArray.set(i, v)
|
||||
| Error(err) => err->Operation.Error.OperationException->raise
|
||||
}
|
||||
}
|
||||
res->T.makeFromTypedArray
|
||||
} catch {
|
||||
| Operation.Error.OperationException(err) => Error.fromOperationError(err)->Error
|
||||
}
|
||||
}
|
||||
|
||||
let mean = t => T.get(t)->E.FloatArray.mean
|
||||
let geomean = t => T.get(t)->E.FloatArray.geomean
|
||||
let mode = t => T.get(t)->E.FloatArray.mode
|
||||
let sum = t => T.get(t)->E.FloatArray.sum
|
||||
let min = t => T.get(t)->E.FloatArray.min
|
||||
let max = t => T.get(t)->E.FloatArray.max
|
||||
let stdev = t => T.get(t)->E.FloatArray.stdev
|
||||
let variance = t => T.get(t)->E.FloatArray.variance
|
||||
let percentile = (t, f) => T.get(t)->E.FloatArray.percentile(f)
|
||||
let cdf = (t: t, f: float) => {
|
||||
let countBelowF = t->E.A.reduce(0, (acc, x) => acc + (x <= f ? 1 : 0))
|
||||
let countBelowF = T.get(t)->E.FloatArray.reduce((. acc, x) => acc + (x <= f ? 1 : 0), 0)
|
||||
countBelowF->Js.Int.toFloat /. t->length->Js.Int.toFloat
|
||||
}
|
||||
|
||||
|
@ -149,14 +241,14 @@ let mixture = (values: array<(t, float)>, intendedLength: int) => {
|
|||
discreteSamples
|
||||
->Belt.Array.mapWithIndex((index, distIndexToChoose) => {
|
||||
let chosenDist = E.A.get(dists, E.Float.toInt(distIndexToChoose))
|
||||
chosenDist->E.O.bind(E.A.get(_, index))
|
||||
chosenDist->E.O.bind(E.FloatArray.get(_, index))
|
||||
})
|
||||
->E.A.O.openIfAllSome
|
||||
samples->E.O2.toExn("Mixture unreachable error")->T.make
|
||||
samples->E.O2.toExn("Mixture unreachable error")->T.makeFromJsArray
|
||||
}
|
||||
|
||||
let truncateLeft = (t, f) => T.get(t)->E.A2.filter(x => x >= f)->T.make
|
||||
let truncateRight = (t, f) => T.get(t)->E.A2.filter(x => x <= f)->T.make
|
||||
let truncateLeft = (t, f) => T.get(t)->E.FloatArray.filter((. x) => x >= f)->T.makeFromTypedArray
|
||||
let truncateRight = (t, f) => T.get(t)->E.FloatArray.filter((. x) => x <= f)->T.makeFromTypedArray
|
||||
|
||||
let truncate = (t, ~leftCutoff: option<float>, ~rightCutoff: option<float>) => {
|
||||
let withTruncatedLeft = t => leftCutoff |> E.O.dimap(left => truncateLeft(t, left), _ => Ok(t))
|
||||
|
|
|
@ -2,16 +2,19 @@
|
|||
let {iqr_percentile, nrd0_lo_denominator, one, nrd0_coef, nrd_coef, nrd_fractionalPower} = module(
|
||||
MagicNumbers.SampleSetBandwidth
|
||||
)
|
||||
let len = x => E.A.length(x) |> float_of_int
|
||||
let len = x => E.FloatArray.length(x)->float_of_int
|
||||
|
||||
let iqr = x =>
|
||||
Jstat.percentile(x, iqr_percentile, true) -. Jstat.percentile(x, 1.0 -. iqr_percentile, true)
|
||||
let iqr = x => {
|
||||
let xArr = x->E.FloatArray.toArray
|
||||
Jstat.percentile(xArr, iqr_percentile, true) -.
|
||||
Jstat.percentile(xArr, 1.0 -. iqr_percentile, true)
|
||||
}
|
||||
|
||||
// Silverman, B. W. (1986) Density Estimation. London: Chapman and Hall.
|
||||
let nrd0 = x => {
|
||||
let hi = Js_math.sqrt(Jstat.variance(x))
|
||||
let hi = Js_math.sqrt(Stdlib.Base.variance(x))
|
||||
let lo = Js_math.minMany_float([hi, iqr(x) /. nrd0_lo_denominator])
|
||||
let e = Js_math.abs_float(x[1])
|
||||
let e = Js_math.abs_float(x->E.FloatArray.unsafe_get(1))
|
||||
let lo' = switch (lo, hi, e) {
|
||||
| (lo, _, _) if !Js.Float.isNaN(lo) => lo
|
||||
| (_, hi, _) if !Js.Float.isNaN(hi) => hi
|
||||
|
@ -25,6 +28,6 @@ let nrd0 = x => {
|
|||
let nrd = x => {
|
||||
let h = iqr(x) /. nrd0_lo_denominator
|
||||
nrd_coef *.
|
||||
Js.Math.min_float(Js.Math.sqrt(Jstat.variance(x)), h) *.
|
||||
Js.Math.min_float(Js.Math.sqrt(Stdlib.Base.variance(x)), h) *.
|
||||
Js.Math.pow_float(~base=len(x), ~exp=nrd_fractionalPower)
|
||||
}
|
||||
|
|
|
@ -37,40 +37,40 @@ module Internals = {
|
|||
}
|
||||
|
||||
module T = {
|
||||
type t = array<float>
|
||||
type t = E.FloatArray.t
|
||||
|
||||
let xWidthToUnitWidth = (samples, outputXYPoints, xWidth) => {
|
||||
let xyPointRange = E.A.Sorted.range(samples)->E.O2.default(0.0)
|
||||
let xWidthToUnitWidth = (samples: t, outputXYPoints, xWidth) => {
|
||||
let xyPointRange = E.FloatArray.Sorted.range(samples)->E.O2.default(0.0)
|
||||
let xyPointWidth = xyPointRange /. float_of_int(outputXYPoints)
|
||||
xWidth /. xyPointWidth
|
||||
}
|
||||
|
||||
let formatUnitWidth = w => Jstat.max([w, 1.0])->int_of_float
|
||||
|
||||
let suggestedUnitWidth = (samples, outputXYPoints) => {
|
||||
let suggestedUnitWidth = (samples: t, outputXYPoints) => {
|
||||
let suggestedXWidth = SampleSetDist_Bandwidth.nrd0(samples)
|
||||
xWidthToUnitWidth(samples, outputXYPoints, suggestedXWidth)
|
||||
}
|
||||
|
||||
let kde = (~samples, ~outputXYPoints, width) =>
|
||||
KDE.normalSampling(samples, outputXYPoints, width)
|
||||
let kde = (~samples: t, ~outputXYPoints, width) =>
|
||||
KDE.normalSampling(samples->E.FloatArray.toArray, outputXYPoints, width)
|
||||
}
|
||||
}
|
||||
|
||||
let toPointSetDist = (
|
||||
~samples: Internals.T.t,
|
||||
~samples: E.FloatArray.t,
|
||||
~samplingInputs: SamplingInputs.samplingInputs,
|
||||
(),
|
||||
): Internals.Types.outputs => {
|
||||
let samples = samples->E.A.Floats.sort
|
||||
let samples = samples->E.FloatArray.sort
|
||||
|
||||
let minDiscreteToKeep = MagicNumbers.ToPointSet.minDiscreteToKeep(samples)
|
||||
let (continuousPart, discretePart) = E.A.Floats.Sorted.splitContinuousAndDiscreteForMinWeight(
|
||||
let (continuousPart, discretePart) = E.FloatArray.Sorted.splitContinuousAndDiscreteForMinWeight(
|
||||
samples,
|
||||
~minDiscreteWeight=minDiscreteToKeep,
|
||||
)
|
||||
|
||||
let length = samples->E.A.length->float_of_int
|
||||
let length = samples->E.FloatArray.length->float_of_int
|
||||
let discrete: PointSetTypes.discreteShape =
|
||||
discretePart
|
||||
->E.FloatFloatMap.fmap(r => r /. length, _)
|
||||
|
@ -79,7 +79,7 @@ let toPointSetDist = (
|
|||
->Discrete.make
|
||||
|
||||
let pdf =
|
||||
continuousPart->E.A.length > 5
|
||||
continuousPart->E.FloatArray.length > 5
|
||||
? {
|
||||
let _suggestedXWidth = SampleSetDist_Bandwidth.nrd0(continuousPart)
|
||||
// todo: This does some recalculating from the last step.
|
||||
|
|
|
@ -28,7 +28,7 @@ module Internal = {
|
|||
let fromFn = (aLambdaValue, environment: Reducer_T.environment, reducer: Reducer_T.reducerFn) => {
|
||||
let sampleCount = environment.sampleCount
|
||||
let fn = r => doLambdaCall(aLambdaValue, [IEvNumber(r)], environment, reducer)
|
||||
Belt_Array.makeBy(sampleCount, r => fn(r->Js.Int.toFloat))->E.A.R.firstErrorOrOpen
|
||||
SampleSetDist.makeBy(sampleCount, r => fn(r->Js.Int.toFloat))
|
||||
}
|
||||
|
||||
let map1 = (sampleSetDist: t, aLambdaValue, environment: Reducer_T.environment, reducer) => {
|
||||
|
@ -116,7 +116,9 @@ let libaryBase = [
|
|||
~run=(inputs, _, _) => {
|
||||
let sampleSet =
|
||||
inputs->Prepare.ToTypedArray.numbers
|
||||
|> E.R2.bind(r => SampleSetDist.make(r)->E.R2.errMap(_ => "AM I HERE? WHYERE AMI??"))
|
||||
|> E.R2.bind(r =>
|
||||
SampleSetDist.makeFromJsArray(r)->E.R2.errMap(_ => "AM I HERE? WHYERE AMI??")
|
||||
)
|
||||
sampleSet
|
||||
->E.R2.fmap(Wrappers.sampleSet)
|
||||
->E.R2.fmap(Wrappers.evDistribution)
|
||||
|
@ -140,7 +142,7 @@ let libaryBase = [
|
|||
~run=(inputs, _, _) =>
|
||||
switch inputs {
|
||||
| [IEvDistribution(SampleSet(dist))] =>
|
||||
dist->E.A2.fmap(Wrappers.evNumber)->Wrappers.evArray->Ok
|
||||
dist->SampleSetDist.toJsArray->E.A2.fmap(Wrappers.evNumber)->Wrappers.evArray->Ok
|
||||
| _ => Error(impossibleError)
|
||||
},
|
||||
(),
|
||||
|
@ -163,7 +165,7 @@ let libaryBase = [
|
|||
| [IEvLambda(lambda)] =>
|
||||
switch Internal.fromFn(lambda, environment, reducer) {
|
||||
| Ok(r) => Ok(r->Wrappers.sampleSet->Wrappers.evDistribution)
|
||||
| Error(e) => e->Reducer_ErrorValue.REOperationError->Error
|
||||
| Error(e) => e->SampleSetError->Reducer_ErrorValue.REDistributionError->Error
|
||||
}
|
||||
| _ => Error(impossibleError)
|
||||
},
|
||||
|
|
|
@ -35,7 +35,7 @@ module ToPointSet = {
|
|||
it would strike a reasonable trade-off, but I’m really unsure what’s
|
||||
best right now.
|
||||
*/
|
||||
let minDiscreteToKeep = samples => max(20, E.A.length(samples) / 50)
|
||||
let minDiscreteToKeep = samples => max(20, E.FloatArray.length(samples) / 50)
|
||||
}
|
||||
|
||||
module SampleSetBandwidth = {
|
||||
|
|
|
@ -11,6 +11,7 @@ module B = E_B
|
|||
module Dict = E_Dict
|
||||
module F = E_F
|
||||
module Float = E_Float
|
||||
module FloatArray = E_FloatArray
|
||||
module FloatFloatMap = E_FloatFloatMap
|
||||
module I = E_I
|
||||
module Int = E_Int
|
||||
|
|
|
@ -303,55 +303,6 @@ module Floats = {
|
|||
|
||||
let makeIncrementalDown = (a, b) =>
|
||||
Array.make(a - b + 1, a) |> Array.mapi((i, c) => c - i) |> Belt.Array.map(_, float_of_int)
|
||||
|
||||
/*
|
||||
This function goes through a sorted array and divides it into two different clusters:
|
||||
continuous samples and discrete samples. The discrete samples are stored in a mutable map.
|
||||
Samples are thought to be discrete if they have at least `minDiscreteWight` duplicates.
|
||||
|
||||
If the min discrete weight is 4, that would mean that at least four elements needed from a specific
|
||||
value for that to be kept as discrete. This is important because in some cases, we can expect that
|
||||
some common elements will be generated by regular operations. The final continuous array will be sorted.
|
||||
|
||||
This function is performance-critical, don't change it significantly without benchmarking
|
||||
SampleSet->PointSet conversion performance.
|
||||
*/
|
||||
let splitContinuousAndDiscreteForMinWeight = (
|
||||
sortedArray: array<float>,
|
||||
~minDiscreteWeight: int,
|
||||
) => {
|
||||
let continuous: array<float> = []
|
||||
let discrete = FloatFloatMap.empty()
|
||||
|
||||
let addData = (count: int, value: float): unit => {
|
||||
if count >= minDiscreteWeight {
|
||||
FloatFloatMap.add(value, count->Belt.Int.toFloat, discrete)
|
||||
} else {
|
||||
for _ in 1 to count {
|
||||
continuous->Js.Array2.push(value)->ignore
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let (finalCount, finalValue) = sortedArray->Belt.Array.reduce(
|
||||
// initial prev value doesn't matter; if it collides with the first element of the array, flush won't do anything
|
||||
(0, 0.),
|
||||
((count, prev), element) => {
|
||||
if element == prev {
|
||||
(count + 1, prev)
|
||||
} else {
|
||||
// new value, process previous ones
|
||||
addData(count, prev)
|
||||
(1, element)
|
||||
}
|
||||
},
|
||||
)
|
||||
|
||||
// flush final values
|
||||
addData(finalCount, finalValue)
|
||||
|
||||
(continuous, discrete)
|
||||
}
|
||||
}
|
||||
}
|
||||
module Sorted = Floats.Sorted
|
||||
|
|
|
@ -0,0 +1,89 @@
|
|||
module FloatArray = Js.TypedArray2.Float64Array
|
||||
|
||||
type t = FloatArray.t
|
||||
let make = FloatArray.make
|
||||
let fromLength = FloatArray.fromLength
|
||||
let length = FloatArray.length
|
||||
let reduce = FloatArray.reduce
|
||||
let filter = FloatArray.filter
|
||||
let slice = FloatArray.slice
|
||||
let sort = (t: t) => t->FloatArray.copy->FloatArray.sortInPlace
|
||||
let toArray: t => array<float> = %raw(`a => Array.from(a)`)
|
||||
let map = FloatArray.map
|
||||
|
||||
let unsafe_get = (t: t, i: int) => t->FloatArray.unsafe_get(i)
|
||||
let get = (t: t, i: int) => i >= t->length ? None : Some(t->FloatArray.unsafe_get(i))
|
||||
let set = (t: t, i: int) => t->FloatArray.unsafe_set(i)
|
||||
|
||||
let mean = t => t->toArray->Jstat.mean
|
||||
let geomean = t => t->toArray->Jstat.geomean
|
||||
let mode = t => t->toArray->Jstat.mode
|
||||
let variance = t => t->toArray->Jstat.variance
|
||||
let stdev = t => t->toArray->Jstat.stdev
|
||||
let sum = t => t->toArray->Jstat.sum
|
||||
let product = Jstat.product
|
||||
let random = Js.Math.random_int
|
||||
let min = t => t->toArray->Js.Math.minMany_float
|
||||
let max = t => t->toArray->Js.Math.maxMany_float
|
||||
let percentile = (t, b) => Jstat.percentile(t->toArray, b, false)
|
||||
|
||||
module Sorted = {
|
||||
let min = (t: t) => t->length > 0 ? Some(FloatArray.unsafe_get(t, 0)) : None
|
||||
let max = (t: t) => t->length > 0 ? Some(FloatArray.unsafe_get(t, t->length - 1)) : None
|
||||
let range = (~min=min, ~max=max, a) =>
|
||||
switch (min(a), max(a)) {
|
||||
| (Some(min), Some(max)) => Some(max -. min)
|
||||
| _ => None
|
||||
}
|
||||
|
||||
/*
|
||||
This function goes through a sorted array and divides it into two different clusters:
|
||||
continuous samples and discrete samples. The discrete samples are stored in a mutable map.
|
||||
Samples are thought to be discrete if they have at least `minDiscreteWight` duplicates.
|
||||
|
||||
If the min discrete weight is 4, that would mean that at least four elements needed from a specific
|
||||
value for that to be kept as discrete. This is important because in some cases, we can expect that
|
||||
some common elements will be generated by regular operations. The final continuous array will be sorted.
|
||||
|
||||
This function is performance-critical, don't change it significantly without benchmarking
|
||||
SampleSet->PointSet conversion performance.
|
||||
*/
|
||||
let splitContinuousAndDiscreteForMinWeight = (sortedArray: t, ~minDiscreteWeight: int): (
|
||||
t,
|
||||
E_FloatFloatMap.t,
|
||||
) => {
|
||||
// TODO - preallocate as typed array, then trim
|
||||
let continuous: array<float> = []
|
||||
let discrete = E_FloatFloatMap.empty()
|
||||
|
||||
let addData = (count: int, value: float): unit => {
|
||||
if count >= minDiscreteWeight {
|
||||
E_FloatFloatMap.add(value, count->Belt.Int.toFloat, discrete)
|
||||
} else {
|
||||
for _ in 1 to count {
|
||||
continuous->Js.Array2.push(value)->ignore
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let (finalCount, finalValue) = reduce(
|
||||
sortedArray,
|
||||
// initial prev value doesn't matter; if it collides with the first element of the array, flush won't do anything
|
||||
(. (count, prev), element) => {
|
||||
if element == prev {
|
||||
(count + 1, prev)
|
||||
} else {
|
||||
// new value, process previous ones
|
||||
addData(count, prev)
|
||||
(1, element)
|
||||
}
|
||||
},
|
||||
(0, 0.),
|
||||
)
|
||||
|
||||
// flush final values
|
||||
addData(finalCount, finalValue)
|
||||
|
||||
(continuous->make, discrete)
|
||||
}
|
||||
}
|
|
@ -78,6 +78,8 @@ module Error = {
|
|||
| NotYetImplemented => "This pathway is not yet implemented"
|
||||
| Other(t) => t
|
||||
}
|
||||
|
||||
exception OperationException(t)
|
||||
}
|
||||
|
||||
let power = (a: float, b: float): result<float, Error.t> =>
|
||||
|
|
|
@ -51,3 +51,11 @@ module Random = {
|
|||
module Math = {
|
||||
@module external factorial: float => float = "@stdlib/math/base/special/factorial"
|
||||
}
|
||||
|
||||
module Base = {
|
||||
@module
|
||||
external variance: (int, int, Js.TypedArray2.Float64Array.t, int) => float =
|
||||
"@stdlib/stats/base/variance"
|
||||
let variance = (t: Js.TypedArray2.Float64Array.t) =>
|
||||
variance(t->Js.TypedArray2.Float64Array.length, 0, t, 1)
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue
Block a user