store SampleSet as typed array
This commit is contained in:
parent
d60792aa93
commit
4bd961a808
|
@ -3,11 +3,11 @@ open Expect
|
||||||
|
|
||||||
describe("Bandwidth", () => {
|
describe("Bandwidth", () => {
|
||||||
test("nrd0()", () => {
|
test("nrd0()", () => {
|
||||||
let data = [1., 4., 3., 2.]
|
let data = [1., 4., 3., 2.]->E.FloatArray.make
|
||||||
expect(SampleSetDist_Bandwidth.nrd0(data))->toEqual(0.7625801874014622)
|
expect(SampleSetDist_Bandwidth.nrd0(data))->toEqual(0.7625801874014622)
|
||||||
})
|
})
|
||||||
test("nrd()", () => {
|
test("nrd()", () => {
|
||||||
let data = [1., 4., 3., 2.]
|
let data = [1., 4., 3., 2.]->E.FloatArray.make
|
||||||
expect(SampleSetDist_Bandwidth.nrd(data))->toEqual(0.8981499984950554)
|
expect(SampleSetDist_Bandwidth.nrd(data))->toEqual(0.8981499984950554)
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
|
|
@ -2,9 +2,10 @@ open Jest
|
||||||
open TestHelpers
|
open TestHelpers
|
||||||
|
|
||||||
let prepareInputs = (ar, minWeight) =>
|
let prepareInputs = (ar, minWeight) =>
|
||||||
E.A.Floats.Sorted.splitContinuousAndDiscreteForMinWeight(ar, ~minDiscreteWeight=minWeight) |> (
|
E.FloatArray.Sorted.splitContinuousAndDiscreteForMinWeight(
|
||||||
((c, disc)) => (c, disc |> E.FloatFloatMap.toArray)
|
ar->E.FloatArray.make,
|
||||||
)
|
~minDiscreteWeight=minWeight,
|
||||||
|
) |> (((c, disc)) => (c->E.FloatArray.toArray, disc->E.FloatFloatMap.toArray))
|
||||||
|
|
||||||
describe("Continuous and discrete splits", () => {
|
describe("Continuous and discrete splits", () => {
|
||||||
makeTest(
|
makeTest(
|
||||||
|
@ -37,18 +38,10 @@ describe("Continuous and discrete splits", () => {
|
||||||
E.A.concatMany([sorted, sorted, sorted, sorted]) |> Belt.SortArray.stableSortBy(_, compare)
|
E.A.concatMany([sorted, sorted, sorted, sorted]) |> Belt.SortArray.stableSortBy(_, compare)
|
||||||
}
|
}
|
||||||
|
|
||||||
let (_, discrete1) = E.A.Floats.Sorted.splitContinuousAndDiscreteForMinWeight(
|
let (_, toArr1) = prepareInputs(makeDuplicatedArray(10), 2)
|
||||||
makeDuplicatedArray(10),
|
makeTest("splitMedium at count=10", toArr1->Belt.Array.length, 10)
|
||||||
~minDiscreteWeight=2,
|
|
||||||
)
|
|
||||||
let toArr1 = discrete1 |> E.FloatFloatMap.toArray
|
|
||||||
makeTest("splitMedium at count=10", toArr1 |> Belt.Array.length, 10)
|
|
||||||
|
|
||||||
let (_c, discrete2) = E.A.Floats.Sorted.splitContinuousAndDiscreteForMinWeight(
|
let (_, toArr2) = prepareInputs(makeDuplicatedArray(500), 2)
|
||||||
makeDuplicatedArray(500),
|
makeTest("splitMedium at count=500", toArr2->Belt.Array.length, 500)
|
||||||
~minDiscreteWeight=2,
|
|
||||||
)
|
|
||||||
let toArr2 = discrete2 |> E.FloatFloatMap.toArray
|
|
||||||
makeTest("splitMedium at count=500", toArr2 |> Belt.Array.length, 500)
|
|
||||||
// makeTest("foo", [] |> Belt.Array.length, 500)
|
// makeTest("foo", [] |> Belt.Array.length, 500)
|
||||||
})
|
})
|
||||||
|
|
|
@ -90,9 +90,9 @@ export class SqPointSetDistribution extends SqAbstractDistribution {
|
||||||
export class SqSampleSetDistribution extends SqAbstractDistribution {
|
export class SqSampleSetDistribution extends SqAbstractDistribution {
|
||||||
tag = Tag.SampleSet as const;
|
tag = Tag.SampleSet as const;
|
||||||
|
|
||||||
value(): number[] {
|
// value(): number[] {
|
||||||
return this.valueMethod(RSDistribution.getSampleSet);
|
// return this.valueMethod(RSDistribution.getSampleSet);
|
||||||
}
|
// }
|
||||||
}
|
}
|
||||||
|
|
||||||
export class SqSymbolicDistribution extends SqAbstractDistribution {
|
export class SqSymbolicDistribution extends SqAbstractDistribution {
|
||||||
|
|
|
@ -221,7 +221,7 @@ let rec run = (~env: env, functionCallInfo: functionCallInfo): outputType => {
|
||||||
->OutputLocal.fromResult
|
->OutputLocal.fromResult
|
||||||
| FromSamples(xs) =>
|
| FromSamples(xs) =>
|
||||||
xs
|
xs
|
||||||
->SampleSetDist.make
|
->SampleSetDist.makeFromJsArray
|
||||||
->E.R2.errMap(x => DistributionTypes.SampleSetError(x))
|
->E.R2.errMap(x => DistributionTypes.SampleSetError(x))
|
||||||
->E.R2.fmap(x => x->DistributionTypes.SampleSet->Dist)
|
->E.R2.fmap(x => x->DistributionTypes.SampleSet->Dist)
|
||||||
->OutputLocal.fromResult
|
->OutputLocal.fromResult
|
||||||
|
|
|
@ -98,7 +98,7 @@ module Constructors: {
|
||||||
@genType
|
@genType
|
||||||
let toSampleSet: (~env: GenericDist.env, genericDist, int) => result<genericDist, error>
|
let toSampleSet: (~env: GenericDist.env, genericDist, int) => result<genericDist, error>
|
||||||
@genType
|
@genType
|
||||||
let fromSamples: (~env: GenericDist.env, SampleSetDist.t) => result<genericDist, error>
|
let fromSamples: (~env: GenericDist.env, array<float>) => result<genericDist, error>
|
||||||
@genType
|
@genType
|
||||||
let truncate: (
|
let truncate: (
|
||||||
~env: GenericDist.env,
|
~env: GenericDist.env,
|
||||||
|
|
|
@ -38,6 +38,7 @@ module Error = {
|
||||||
| SampleSetError(TooFewSamples) => "Too Few Samples"
|
| SampleSetError(TooFewSamples) => "Too Few Samples"
|
||||||
| SampleSetError(NonNumericInput(err)) => `Found a non-number in input: ${err}`
|
| SampleSetError(NonNumericInput(err)) => `Found a non-number in input: ${err}`
|
||||||
| SampleSetError(OperationError(err)) => Operation.Error.toString(err)
|
| SampleSetError(OperationError(err)) => Operation.Error.toString(err)
|
||||||
|
| SampleSetError(UnequalSizes) => "Expected sample sets of equal size"
|
||||||
| OperationError(err) => Operation.Error.toString(err)
|
| OperationError(err) => Operation.Error.toString(err)
|
||||||
| PointSetConversionError(err) => SampleSetDist.pointsetConversionErrorToString(err)
|
| PointSetConversionError(err) => SampleSetDist.pointsetConversionErrorToString(err)
|
||||||
| SparklineError(err) => PointSetTypes.sparklineErrorToString(err)
|
| SparklineError(err) => PointSetTypes.sparklineErrorToString(err)
|
||||||
|
|
|
@ -40,7 +40,9 @@ let sampleN = (t: t, n) =>
|
||||||
let sample = (t: t) => sampleN(t, 1)->E.A.first |> E.O.toExn("Should not have happened")
|
let sample = (t: t) => sampleN(t, 1)->E.A.first |> E.O.toExn("Should not have happened")
|
||||||
|
|
||||||
let toSampleSetDist = (t: t, n) =>
|
let toSampleSetDist = (t: t, n) =>
|
||||||
SampleSetDist.make(sampleN(t, n))->E.R2.errMap(DistributionTypes.Error.sampleErrorToDistErr)
|
SampleSetDist.makeFromJsArray(sampleN(t, n))->E.R2.errMap(
|
||||||
|
DistributionTypes.Error.sampleErrorToDistErr,
|
||||||
|
)
|
||||||
|
|
||||||
let fromFloat = (f: float): t => Symbolic(SymbolicDist.Float.make(f))
|
let fromFloat = (f: float): t => Symbolic(SymbolicDist.Float.make(f))
|
||||||
|
|
||||||
|
|
|
@ -2,14 +2,10 @@
|
||||||
module Error = {
|
module Error = {
|
||||||
@genType
|
@genType
|
||||||
type sampleSetError =
|
type sampleSetError =
|
||||||
TooFewSamples | NonNumericInput(string) | OperationError(Operation.operationError)
|
| TooFewSamples
|
||||||
|
| NonNumericInput(string)
|
||||||
let sampleSetErrorToString = (err: sampleSetError): string =>
|
| OperationError(Operation.operationError)
|
||||||
switch err {
|
| UnequalSizes
|
||||||
| TooFewSamples => "Too few samples when constructing sample set"
|
|
||||||
| NonNumericInput(err) => `Found a non-number in input: ${err}`
|
|
||||||
| OperationError(err) => Operation.Error.toString(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
@genType
|
@genType
|
||||||
type pointsetConversionError = TooFewSamplesForConversionToPointSet
|
type pointsetConversionError = TooFewSamplesForConversionToPointSet
|
||||||
|
@ -26,6 +22,7 @@ module Error = {
|
||||||
| TooFewSamples => "Too few samples when constructing sample set"
|
| TooFewSamples => "Too few samples when constructing sample set"
|
||||||
| NonNumericInput(err) => `Found a non-number in input: ${err}`
|
| NonNumericInput(err) => `Found a non-number in input: ${err}`
|
||||||
| OperationError(err) => Operation.Error.toString(err)
|
| OperationError(err) => Operation.Error.toString(err)
|
||||||
|
| UnequalSizes => "Expected sample sets of equal size"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -38,26 +35,29 @@ this constructor.
|
||||||
https://stackoverflow.com/questions/66909578/how-to-make-a-type-constructor-private-in-rescript-except-in-current-module
|
https://stackoverflow.com/questions/66909578/how-to-make-a-type-constructor-private-in-rescript-except-in-current-module
|
||||||
*/
|
*/
|
||||||
module T: {
|
module T: {
|
||||||
//This really should be hidden (remove the array<float>). The reason it isn't is to act as an escape hatch in JS__Test.ts.
|
@genType.opaque
|
||||||
//When we get a good functional library in TS, we could refactor that out.
|
type t
|
||||||
@genType
|
let makeFromTypedArray: E.FloatArray.t => result<t, sampleSetError>
|
||||||
type t = array<float>
|
let makeFromJsArray: array<float> => result<t, sampleSetError>
|
||||||
let make: array<float> => result<t, sampleSetError>
|
let toJsArray: t => array<float>
|
||||||
let get: t => array<float>
|
let get: t => E.FloatArray.t
|
||||||
} = {
|
} = {
|
||||||
type t = array<float>
|
type t = E.FloatArray.t
|
||||||
let make = (a: array<float>) =>
|
let makeFromTypedArray = (a: E.FloatArray.t): result<t, sampleSetError> =>
|
||||||
if E.A.length(a) > 5 {
|
if E.FloatArray.length(a) > 5 {
|
||||||
Ok(a)
|
Ok(a)
|
||||||
} else {
|
} else {
|
||||||
Error(TooFewSamples)
|
Error(TooFewSamples)
|
||||||
}
|
}
|
||||||
let get = (a: t) => a
|
let makeFromJsArray = (a: array<float>): result<t, sampleSetError> =>
|
||||||
|
E.FloatArray.make(a)->makeFromTypedArray
|
||||||
|
let toJsArray = (t: t) => t->E.FloatArray.toArray
|
||||||
|
let get = (t: t) => t
|
||||||
}
|
}
|
||||||
|
|
||||||
include T
|
include T
|
||||||
|
|
||||||
let length = (t: t) => get(t)->E.A.length
|
let length = (t: T.t) => T.get(t)->E.FloatArray.length
|
||||||
|
|
||||||
/*
|
/*
|
||||||
TODO: Refactor to get a more precise estimate. Also, this code is just fairly messy, could use
|
TODO: Refactor to get a more precise estimate. Also, this code is just fairly messy, could use
|
||||||
|
@ -68,15 +68,15 @@ let toPointSetDist = (~samples: t, ~samplingInputs: SamplingInputs.samplingInput
|
||||||
pointsetConversionError,
|
pointsetConversionError,
|
||||||
> =>
|
> =>
|
||||||
SampleSetDist_ToPointSet.toPointSetDist(
|
SampleSetDist_ToPointSet.toPointSetDist(
|
||||||
~samples=get(samples),
|
~samples=T.get(samples),
|
||||||
~samplingInputs,
|
~samplingInputs,
|
||||||
(),
|
(),
|
||||||
).pointSetDist->E.O2.toResult(TooFewSamplesForConversionToPointSet)
|
).pointSetDist->E.O2.toResult(TooFewSamplesForConversionToPointSet)
|
||||||
|
|
||||||
//Randomly get one sample from the distribution
|
//Randomly get one sample from the distribution
|
||||||
let sample = (t: t): float => {
|
let sample = (t: t): float => {
|
||||||
let i = E.Int.random(~min=0, ~max=E.A.length(get(t)) - 1)
|
let i = E.Int.random(~min=0, ~max=E.FloatArray.length(get(t)) - 1)
|
||||||
E.A.unsafe_get(get(t), i)
|
E.FloatArray.unsafe_get(get(t), i)
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -87,52 +87,144 @@ The former helps in cases where multiple distributions are correlated.
|
||||||
However, if n > length(t), then there's no clear right answer, so we just randomly
|
However, if n > length(t), then there's no clear right answer, so we just randomly
|
||||||
sample everything.
|
sample everything.
|
||||||
*/
|
*/
|
||||||
let sampleN = (t: t, n) => {
|
let sampleN = (t: t, n): array<float> => {
|
||||||
if n <= E.A.length(get(t)) {
|
if n <= length(t) {
|
||||||
E.A.slice(get(t), ~offset=0, ~len=n)
|
E.FloatArray.slice(~start=0, ~end_=n, get(t))->E.FloatArray.toArray
|
||||||
} else {
|
} else {
|
||||||
Belt.Array.makeBy(n, _ => sample(t))
|
Belt.Array.makeBy(n, _ => sample(t))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let _fromSampleResultArray = (samples: array<result<float, QuriSquiggleLang.Operation.Error.t>>) =>
|
|
||||||
E.A.R.firstErrorOrOpen(samples)->E.R2.errMap(Error.fromOperationError) |> E.R2.bind(make)
|
|
||||||
|
|
||||||
let samplesMap = (~fn: float => result<float, Operation.Error.t>, t: t): result<
|
let samplesMap = (~fn: float => result<float, Operation.Error.t>, t: t): result<
|
||||||
t,
|
t,
|
||||||
sampleSetError,
|
sampleSetError,
|
||||||
> => T.get(t)->E.A2.fmap(fn)->_fromSampleResultArray
|
> => {
|
||||||
|
try {
|
||||||
|
T.get(t)
|
||||||
|
->E.FloatArray.map((. v) => {
|
||||||
|
switch fn(v) {
|
||||||
|
| Ok(res) => res
|
||||||
|
| Error(err) => err->Operation.Error.OperationException->raise
|
||||||
|
}
|
||||||
|
})
|
||||||
|
->T.makeFromTypedArray
|
||||||
|
} catch {
|
||||||
|
| Operation.Error.OperationException(err) => Error.fromOperationError(err)->Error
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
//TODO: Figure out what to do if distributions are different lengths. ``zip`` is kind of inelegant for this.
|
|
||||||
let map2 = (~fn: (float, float) => result<float, Operation.Error.t>, ~t1: t, ~t2: t): result<
|
let map2 = (~fn: (float, float) => result<float, Operation.Error.t>, ~t1: t, ~t2: t): result<
|
||||||
t,
|
t,
|
||||||
sampleSetError,
|
sampleSetError,
|
||||||
> => E.A.zip(get(t1), get(t2))->E.A2.fmap(E.Tuple2.toFnCall(fn))->_fromSampleResultArray
|
> => {
|
||||||
|
let length1 = t1->length
|
||||||
|
let length2 = t2->length
|
||||||
|
if length1 == length2 {
|
||||||
|
try {
|
||||||
|
let res = E.FloatArray.fromLength(length1)
|
||||||
|
for i in 0 to length1 - 1 {
|
||||||
|
let v = switch fn(
|
||||||
|
get(t1)->E.FloatArray.unsafe_get(i),
|
||||||
|
get(t2)->E.FloatArray.unsafe_get(i),
|
||||||
|
) {
|
||||||
|
| Ok(fnResult) => fnResult
|
||||||
|
| Error(err) => err->Operation.Error.OperationException->raise
|
||||||
|
}
|
||||||
|
res->E.FloatArray.set(i, v)
|
||||||
|
}
|
||||||
|
res->T.makeFromTypedArray
|
||||||
|
} catch {
|
||||||
|
| Operation.Error.OperationException(err) => Error.fromOperationError(err)->Error
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
Error.UnequalSizes->Error
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
let map3 = (
|
let map3 = (
|
||||||
~fn: (float, float, float) => result<float, Operation.Error.t>,
|
~fn: (float, float, float) => result<float, Operation.Error.t>,
|
||||||
~t1: t,
|
~t1: t,
|
||||||
~t2: t,
|
~t2: t,
|
||||||
~t3: t,
|
~t3: t,
|
||||||
): result<t, sampleSetError> =>
|
): result<t, sampleSetError> => {
|
||||||
E.A.zip3(get(t1), get(t2), get(t3))->E.A2.fmap(E.Tuple3.toFnCall(fn))->_fromSampleResultArray
|
let length1 = t1->length
|
||||||
|
let length2 = t2->length
|
||||||
|
let length3 = t3->length
|
||||||
|
if length1 == length2 && length2 == length3 {
|
||||||
|
try {
|
||||||
|
let res = E.FloatArray.fromLength(length1)
|
||||||
|
for i in 0 to length1 - 1 {
|
||||||
|
let v = switch fn(
|
||||||
|
get(t1)->E.FloatArray.unsafe_get(i),
|
||||||
|
get(t2)->E.FloatArray.unsafe_get(i),
|
||||||
|
get(t3)->E.FloatArray.unsafe_get(i),
|
||||||
|
) {
|
||||||
|
| Ok(fnResult) => fnResult
|
||||||
|
| Error(err) => err->Operation.Error.OperationException->raise
|
||||||
|
}
|
||||||
|
res->E.FloatArray.set(i, v)
|
||||||
|
}
|
||||||
|
res->T.makeFromTypedArray
|
||||||
|
} catch {
|
||||||
|
| Operation.Error.OperationException(err) => Error.fromOperationError(err)->Error
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
Error.UnequalSizes->Error
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
let mapN = (~fn: array<float> => result<float, Operation.Error.t>, ~t1: array<t>): result<
|
let mapN = (~fn: array<float> => result<float, Operation.Error.t>, ~t1: array<t>): result<
|
||||||
t,
|
t,
|
||||||
sampleSetError,
|
sampleSetError,
|
||||||
> => E.A.transpose(E.A.fmap(get, t1))->E.A2.fmap(fn)->_fromSampleResultArray
|
> => {
|
||||||
|
let lengths = t1->E.A2.fmap(t => t->length)
|
||||||
|
let l0 = lengths[0]
|
||||||
|
if lengths->E.A.all(l => l == l0, _) {
|
||||||
|
try {
|
||||||
|
let res = E.FloatArray.fromLength(l0)
|
||||||
|
for i in 0 to l0 - 1 {
|
||||||
|
let v = switch fn(t1->E.A2.fmap(t => get(t)->E.FloatArray.unsafe_get(i))) {
|
||||||
|
| Ok(fnResult) => fnResult
|
||||||
|
| Error(err) => err->Operation.Error.OperationException->raise
|
||||||
|
}
|
||||||
|
res->E.FloatArray.set(i, v)
|
||||||
|
}
|
||||||
|
res->T.makeFromTypedArray
|
||||||
|
} catch {
|
||||||
|
| Operation.Error.OperationException(err) => Error.fromOperationError(err)->Error
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
Error.UnequalSizes->Error
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
let mean = t => T.get(t)->E.A.Floats.mean
|
let makeBy = (n: int, fn: int => result<float, Operation.Error.t>): result<t, sampleSetError> => {
|
||||||
let geomean = t => T.get(t)->E.A.Floats.geomean
|
let res = E.FloatArray.fromLength(n)
|
||||||
let mode = t => T.get(t)->E.A.Floats.mode
|
try {
|
||||||
let sum = t => T.get(t)->E.A.Floats.sum
|
for i in 0 to n - 1 {
|
||||||
let min = t => T.get(t)->E.A.Floats.min
|
let fnResult = fn(i)
|
||||||
let max = t => T.get(t)->E.A.Floats.max
|
switch fnResult {
|
||||||
let stdev = t => T.get(t)->E.A.Floats.stdev
|
| Ok(v) => res->E.FloatArray.set(i, v)
|
||||||
let variance = t => T.get(t)->E.A.Floats.variance
|
| Error(err) => err->Operation.Error.OperationException->raise
|
||||||
let percentile = (t, f) => T.get(t)->E.A.Floats.percentile(f)
|
}
|
||||||
|
}
|
||||||
|
res->T.makeFromTypedArray
|
||||||
|
} catch {
|
||||||
|
| Operation.Error.OperationException(err) => Error.fromOperationError(err)->Error
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let mean = t => T.get(t)->E.FloatArray.mean
|
||||||
|
let geomean = t => T.get(t)->E.FloatArray.geomean
|
||||||
|
let mode = t => T.get(t)->E.FloatArray.mode
|
||||||
|
let sum = t => T.get(t)->E.FloatArray.sum
|
||||||
|
let min = t => T.get(t)->E.FloatArray.min
|
||||||
|
let max = t => T.get(t)->E.FloatArray.max
|
||||||
|
let stdev = t => T.get(t)->E.FloatArray.stdev
|
||||||
|
let variance = t => T.get(t)->E.FloatArray.variance
|
||||||
|
let percentile = (t, f) => T.get(t)->E.FloatArray.percentile(f)
|
||||||
let cdf = (t: t, f: float) => {
|
let cdf = (t: t, f: float) => {
|
||||||
let countBelowF = t->E.A.reduce(0, (acc, x) => acc + (x <= f ? 1 : 0))
|
let countBelowF = T.get(t)->E.FloatArray.reduce((. acc, x) => acc + (x <= f ? 1 : 0), 0)
|
||||||
countBelowF->Js.Int.toFloat /. t->length->Js.Int.toFloat
|
countBelowF->Js.Int.toFloat /. t->length->Js.Int.toFloat
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -149,14 +241,14 @@ let mixture = (values: array<(t, float)>, intendedLength: int) => {
|
||||||
discreteSamples
|
discreteSamples
|
||||||
->Belt.Array.mapWithIndex((index, distIndexToChoose) => {
|
->Belt.Array.mapWithIndex((index, distIndexToChoose) => {
|
||||||
let chosenDist = E.A.get(dists, E.Float.toInt(distIndexToChoose))
|
let chosenDist = E.A.get(dists, E.Float.toInt(distIndexToChoose))
|
||||||
chosenDist->E.O.bind(E.A.get(_, index))
|
chosenDist->E.O.bind(E.FloatArray.get(_, index))
|
||||||
})
|
})
|
||||||
->E.A.O.openIfAllSome
|
->E.A.O.openIfAllSome
|
||||||
samples->E.O2.toExn("Mixture unreachable error")->T.make
|
samples->E.O2.toExn("Mixture unreachable error")->T.makeFromJsArray
|
||||||
}
|
}
|
||||||
|
|
||||||
let truncateLeft = (t, f) => T.get(t)->E.A2.filter(x => x >= f)->T.make
|
let truncateLeft = (t, f) => T.get(t)->E.FloatArray.filter((. x) => x >= f)->T.makeFromTypedArray
|
||||||
let truncateRight = (t, f) => T.get(t)->E.A2.filter(x => x <= f)->T.make
|
let truncateRight = (t, f) => T.get(t)->E.FloatArray.filter((. x) => x <= f)->T.makeFromTypedArray
|
||||||
|
|
||||||
let truncate = (t, ~leftCutoff: option<float>, ~rightCutoff: option<float>) => {
|
let truncate = (t, ~leftCutoff: option<float>, ~rightCutoff: option<float>) => {
|
||||||
let withTruncatedLeft = t => leftCutoff |> E.O.dimap(left => truncateLeft(t, left), _ => Ok(t))
|
let withTruncatedLeft = t => leftCutoff |> E.O.dimap(left => truncateLeft(t, left), _ => Ok(t))
|
||||||
|
|
|
@ -2,16 +2,19 @@
|
||||||
let {iqr_percentile, nrd0_lo_denominator, one, nrd0_coef, nrd_coef, nrd_fractionalPower} = module(
|
let {iqr_percentile, nrd0_lo_denominator, one, nrd0_coef, nrd_coef, nrd_fractionalPower} = module(
|
||||||
MagicNumbers.SampleSetBandwidth
|
MagicNumbers.SampleSetBandwidth
|
||||||
)
|
)
|
||||||
let len = x => E.A.length(x) |> float_of_int
|
let len = x => E.FloatArray.length(x)->float_of_int
|
||||||
|
|
||||||
let iqr = x =>
|
let iqr = x => {
|
||||||
Jstat.percentile(x, iqr_percentile, true) -. Jstat.percentile(x, 1.0 -. iqr_percentile, true)
|
let xArr = x->E.FloatArray.toArray
|
||||||
|
Jstat.percentile(xArr, iqr_percentile, true) -.
|
||||||
|
Jstat.percentile(xArr, 1.0 -. iqr_percentile, true)
|
||||||
|
}
|
||||||
|
|
||||||
// Silverman, B. W. (1986) Density Estimation. London: Chapman and Hall.
|
// Silverman, B. W. (1986) Density Estimation. London: Chapman and Hall.
|
||||||
let nrd0 = x => {
|
let nrd0 = x => {
|
||||||
let hi = Js_math.sqrt(Jstat.variance(x))
|
let hi = Js_math.sqrt(Stdlib.Base.variance(x))
|
||||||
let lo = Js_math.minMany_float([hi, iqr(x) /. nrd0_lo_denominator])
|
let lo = Js_math.minMany_float([hi, iqr(x) /. nrd0_lo_denominator])
|
||||||
let e = Js_math.abs_float(x[1])
|
let e = Js_math.abs_float(x->E.FloatArray.unsafe_get(1))
|
||||||
let lo' = switch (lo, hi, e) {
|
let lo' = switch (lo, hi, e) {
|
||||||
| (lo, _, _) if !Js.Float.isNaN(lo) => lo
|
| (lo, _, _) if !Js.Float.isNaN(lo) => lo
|
||||||
| (_, hi, _) if !Js.Float.isNaN(hi) => hi
|
| (_, hi, _) if !Js.Float.isNaN(hi) => hi
|
||||||
|
@ -25,6 +28,6 @@ let nrd0 = x => {
|
||||||
let nrd = x => {
|
let nrd = x => {
|
||||||
let h = iqr(x) /. nrd0_lo_denominator
|
let h = iqr(x) /. nrd0_lo_denominator
|
||||||
nrd_coef *.
|
nrd_coef *.
|
||||||
Js.Math.min_float(Js.Math.sqrt(Jstat.variance(x)), h) *.
|
Js.Math.min_float(Js.Math.sqrt(Stdlib.Base.variance(x)), h) *.
|
||||||
Js.Math.pow_float(~base=len(x), ~exp=nrd_fractionalPower)
|
Js.Math.pow_float(~base=len(x), ~exp=nrd_fractionalPower)
|
||||||
}
|
}
|
||||||
|
|
|
@ -37,40 +37,40 @@ module Internals = {
|
||||||
}
|
}
|
||||||
|
|
||||||
module T = {
|
module T = {
|
||||||
type t = array<float>
|
type t = E.FloatArray.t
|
||||||
|
|
||||||
let xWidthToUnitWidth = (samples, outputXYPoints, xWidth) => {
|
let xWidthToUnitWidth = (samples: t, outputXYPoints, xWidth) => {
|
||||||
let xyPointRange = E.A.Sorted.range(samples)->E.O2.default(0.0)
|
let xyPointRange = E.FloatArray.Sorted.range(samples)->E.O2.default(0.0)
|
||||||
let xyPointWidth = xyPointRange /. float_of_int(outputXYPoints)
|
let xyPointWidth = xyPointRange /. float_of_int(outputXYPoints)
|
||||||
xWidth /. xyPointWidth
|
xWidth /. xyPointWidth
|
||||||
}
|
}
|
||||||
|
|
||||||
let formatUnitWidth = w => Jstat.max([w, 1.0])->int_of_float
|
let formatUnitWidth = w => Jstat.max([w, 1.0])->int_of_float
|
||||||
|
|
||||||
let suggestedUnitWidth = (samples, outputXYPoints) => {
|
let suggestedUnitWidth = (samples: t, outputXYPoints) => {
|
||||||
let suggestedXWidth = SampleSetDist_Bandwidth.nrd0(samples)
|
let suggestedXWidth = SampleSetDist_Bandwidth.nrd0(samples)
|
||||||
xWidthToUnitWidth(samples, outputXYPoints, suggestedXWidth)
|
xWidthToUnitWidth(samples, outputXYPoints, suggestedXWidth)
|
||||||
}
|
}
|
||||||
|
|
||||||
let kde = (~samples, ~outputXYPoints, width) =>
|
let kde = (~samples: t, ~outputXYPoints, width) =>
|
||||||
KDE.normalSampling(samples, outputXYPoints, width)
|
KDE.normalSampling(samples->E.FloatArray.toArray, outputXYPoints, width)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let toPointSetDist = (
|
let toPointSetDist = (
|
||||||
~samples: Internals.T.t,
|
~samples: E.FloatArray.t,
|
||||||
~samplingInputs: SamplingInputs.samplingInputs,
|
~samplingInputs: SamplingInputs.samplingInputs,
|
||||||
(),
|
(),
|
||||||
): Internals.Types.outputs => {
|
): Internals.Types.outputs => {
|
||||||
let samples = samples->E.A.Floats.sort
|
let samples = samples->E.FloatArray.sort
|
||||||
|
|
||||||
let minDiscreteToKeep = MagicNumbers.ToPointSet.minDiscreteToKeep(samples)
|
let minDiscreteToKeep = MagicNumbers.ToPointSet.minDiscreteToKeep(samples)
|
||||||
let (continuousPart, discretePart) = E.A.Floats.Sorted.splitContinuousAndDiscreteForMinWeight(
|
let (continuousPart, discretePart) = E.FloatArray.Sorted.splitContinuousAndDiscreteForMinWeight(
|
||||||
samples,
|
samples,
|
||||||
~minDiscreteWeight=minDiscreteToKeep,
|
~minDiscreteWeight=minDiscreteToKeep,
|
||||||
)
|
)
|
||||||
|
|
||||||
let length = samples->E.A.length->float_of_int
|
let length = samples->E.FloatArray.length->float_of_int
|
||||||
let discrete: PointSetTypes.discreteShape =
|
let discrete: PointSetTypes.discreteShape =
|
||||||
discretePart
|
discretePart
|
||||||
->E.FloatFloatMap.fmap(r => r /. length, _)
|
->E.FloatFloatMap.fmap(r => r /. length, _)
|
||||||
|
@ -79,7 +79,7 @@ let toPointSetDist = (
|
||||||
->Discrete.make
|
->Discrete.make
|
||||||
|
|
||||||
let pdf =
|
let pdf =
|
||||||
continuousPart->E.A.length > 5
|
continuousPart->E.FloatArray.length > 5
|
||||||
? {
|
? {
|
||||||
let _suggestedXWidth = SampleSetDist_Bandwidth.nrd0(continuousPart)
|
let _suggestedXWidth = SampleSetDist_Bandwidth.nrd0(continuousPart)
|
||||||
// todo: This does some recalculating from the last step.
|
// todo: This does some recalculating from the last step.
|
||||||
|
|
|
@ -28,7 +28,7 @@ module Internal = {
|
||||||
let fromFn = (aLambdaValue, environment: Reducer_T.environment, reducer: Reducer_T.reducerFn) => {
|
let fromFn = (aLambdaValue, environment: Reducer_T.environment, reducer: Reducer_T.reducerFn) => {
|
||||||
let sampleCount = environment.sampleCount
|
let sampleCount = environment.sampleCount
|
||||||
let fn = r => doLambdaCall(aLambdaValue, [IEvNumber(r)], environment, reducer)
|
let fn = r => doLambdaCall(aLambdaValue, [IEvNumber(r)], environment, reducer)
|
||||||
Belt_Array.makeBy(sampleCount, r => fn(r->Js.Int.toFloat))->E.A.R.firstErrorOrOpen
|
SampleSetDist.makeBy(sampleCount, r => fn(r->Js.Int.toFloat))
|
||||||
}
|
}
|
||||||
|
|
||||||
let map1 = (sampleSetDist: t, aLambdaValue, environment: Reducer_T.environment, reducer) => {
|
let map1 = (sampleSetDist: t, aLambdaValue, environment: Reducer_T.environment, reducer) => {
|
||||||
|
@ -116,7 +116,9 @@ let libaryBase = [
|
||||||
~run=(inputs, _, _) => {
|
~run=(inputs, _, _) => {
|
||||||
let sampleSet =
|
let sampleSet =
|
||||||
inputs->Prepare.ToTypedArray.numbers
|
inputs->Prepare.ToTypedArray.numbers
|
||||||
|> E.R2.bind(r => SampleSetDist.make(r)->E.R2.errMap(_ => "AM I HERE? WHYERE AMI??"))
|
|> E.R2.bind(r =>
|
||||||
|
SampleSetDist.makeFromJsArray(r)->E.R2.errMap(_ => "AM I HERE? WHYERE AMI??")
|
||||||
|
)
|
||||||
sampleSet
|
sampleSet
|
||||||
->E.R2.fmap(Wrappers.sampleSet)
|
->E.R2.fmap(Wrappers.sampleSet)
|
||||||
->E.R2.fmap(Wrappers.evDistribution)
|
->E.R2.fmap(Wrappers.evDistribution)
|
||||||
|
@ -140,7 +142,7 @@ let libaryBase = [
|
||||||
~run=(inputs, _, _) =>
|
~run=(inputs, _, _) =>
|
||||||
switch inputs {
|
switch inputs {
|
||||||
| [IEvDistribution(SampleSet(dist))] =>
|
| [IEvDistribution(SampleSet(dist))] =>
|
||||||
dist->E.A2.fmap(Wrappers.evNumber)->Wrappers.evArray->Ok
|
dist->SampleSetDist.toJsArray->E.A2.fmap(Wrappers.evNumber)->Wrappers.evArray->Ok
|
||||||
| _ => Error(impossibleError)
|
| _ => Error(impossibleError)
|
||||||
},
|
},
|
||||||
(),
|
(),
|
||||||
|
@ -163,7 +165,7 @@ let libaryBase = [
|
||||||
| [IEvLambda(lambda)] =>
|
| [IEvLambda(lambda)] =>
|
||||||
switch Internal.fromFn(lambda, environment, reducer) {
|
switch Internal.fromFn(lambda, environment, reducer) {
|
||||||
| Ok(r) => Ok(r->Wrappers.sampleSet->Wrappers.evDistribution)
|
| Ok(r) => Ok(r->Wrappers.sampleSet->Wrappers.evDistribution)
|
||||||
| Error(e) => e->Reducer_ErrorValue.REOperationError->Error
|
| Error(e) => e->SampleSetError->Reducer_ErrorValue.REDistributionError->Error
|
||||||
}
|
}
|
||||||
| _ => Error(impossibleError)
|
| _ => Error(impossibleError)
|
||||||
},
|
},
|
||||||
|
|
|
@ -35,7 +35,7 @@ module ToPointSet = {
|
||||||
it would strike a reasonable trade-off, but I’m really unsure what’s
|
it would strike a reasonable trade-off, but I’m really unsure what’s
|
||||||
best right now.
|
best right now.
|
||||||
*/
|
*/
|
||||||
let minDiscreteToKeep = samples => max(20, E.A.length(samples) / 50)
|
let minDiscreteToKeep = samples => max(20, E.FloatArray.length(samples) / 50)
|
||||||
}
|
}
|
||||||
|
|
||||||
module SampleSetBandwidth = {
|
module SampleSetBandwidth = {
|
||||||
|
|
|
@ -11,6 +11,7 @@ module B = E_B
|
||||||
module Dict = E_Dict
|
module Dict = E_Dict
|
||||||
module F = E_F
|
module F = E_F
|
||||||
module Float = E_Float
|
module Float = E_Float
|
||||||
|
module FloatArray = E_FloatArray
|
||||||
module FloatFloatMap = E_FloatFloatMap
|
module FloatFloatMap = E_FloatFloatMap
|
||||||
module I = E_I
|
module I = E_I
|
||||||
module Int = E_Int
|
module Int = E_Int
|
||||||
|
|
|
@ -303,55 +303,6 @@ module Floats = {
|
||||||
|
|
||||||
let makeIncrementalDown = (a, b) =>
|
let makeIncrementalDown = (a, b) =>
|
||||||
Array.make(a - b + 1, a) |> Array.mapi((i, c) => c - i) |> Belt.Array.map(_, float_of_int)
|
Array.make(a - b + 1, a) |> Array.mapi((i, c) => c - i) |> Belt.Array.map(_, float_of_int)
|
||||||
|
|
||||||
/*
|
|
||||||
This function goes through a sorted array and divides it into two different clusters:
|
|
||||||
continuous samples and discrete samples. The discrete samples are stored in a mutable map.
|
|
||||||
Samples are thought to be discrete if they have at least `minDiscreteWight` duplicates.
|
|
||||||
|
|
||||||
If the min discrete weight is 4, that would mean that at least four elements needed from a specific
|
|
||||||
value for that to be kept as discrete. This is important because in some cases, we can expect that
|
|
||||||
some common elements will be generated by regular operations. The final continuous array will be sorted.
|
|
||||||
|
|
||||||
This function is performance-critical, don't change it significantly without benchmarking
|
|
||||||
SampleSet->PointSet conversion performance.
|
|
||||||
*/
|
|
||||||
let splitContinuousAndDiscreteForMinWeight = (
|
|
||||||
sortedArray: array<float>,
|
|
||||||
~minDiscreteWeight: int,
|
|
||||||
) => {
|
|
||||||
let continuous: array<float> = []
|
|
||||||
let discrete = FloatFloatMap.empty()
|
|
||||||
|
|
||||||
let addData = (count: int, value: float): unit => {
|
|
||||||
if count >= minDiscreteWeight {
|
|
||||||
FloatFloatMap.add(value, count->Belt.Int.toFloat, discrete)
|
|
||||||
} else {
|
|
||||||
for _ in 1 to count {
|
|
||||||
continuous->Js.Array2.push(value)->ignore
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
let (finalCount, finalValue) = sortedArray->Belt.Array.reduce(
|
|
||||||
// initial prev value doesn't matter; if it collides with the first element of the array, flush won't do anything
|
|
||||||
(0, 0.),
|
|
||||||
((count, prev), element) => {
|
|
||||||
if element == prev {
|
|
||||||
(count + 1, prev)
|
|
||||||
} else {
|
|
||||||
// new value, process previous ones
|
|
||||||
addData(count, prev)
|
|
||||||
(1, element)
|
|
||||||
}
|
|
||||||
},
|
|
||||||
)
|
|
||||||
|
|
||||||
// flush final values
|
|
||||||
addData(finalCount, finalValue)
|
|
||||||
|
|
||||||
(continuous, discrete)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
module Sorted = Floats.Sorted
|
module Sorted = Floats.Sorted
|
||||||
|
|
|
@ -0,0 +1,89 @@
|
||||||
|
module FloatArray = Js.TypedArray2.Float64Array
|
||||||
|
|
||||||
|
type t = FloatArray.t
|
||||||
|
let make = FloatArray.make
|
||||||
|
let fromLength = FloatArray.fromLength
|
||||||
|
let length = FloatArray.length
|
||||||
|
let reduce = FloatArray.reduce
|
||||||
|
let filter = FloatArray.filter
|
||||||
|
let slice = FloatArray.slice
|
||||||
|
let sort = (t: t) => t->FloatArray.copy->FloatArray.sortInPlace
|
||||||
|
let toArray: t => array<float> = %raw(`a => Array.from(a)`)
|
||||||
|
let map = FloatArray.map
|
||||||
|
|
||||||
|
let unsafe_get = (t: t, i: int) => t->FloatArray.unsafe_get(i)
|
||||||
|
let get = (t: t, i: int) => i >= t->length ? None : Some(t->FloatArray.unsafe_get(i))
|
||||||
|
let set = (t: t, i: int) => t->FloatArray.unsafe_set(i)
|
||||||
|
|
||||||
|
let mean = t => t->toArray->Jstat.mean
|
||||||
|
let geomean = t => t->toArray->Jstat.geomean
|
||||||
|
let mode = t => t->toArray->Jstat.mode
|
||||||
|
let variance = t => t->toArray->Jstat.variance
|
||||||
|
let stdev = t => t->toArray->Jstat.stdev
|
||||||
|
let sum = t => t->toArray->Jstat.sum
|
||||||
|
let product = Jstat.product
|
||||||
|
let random = Js.Math.random_int
|
||||||
|
let min = t => t->toArray->Js.Math.minMany_float
|
||||||
|
let max = t => t->toArray->Js.Math.maxMany_float
|
||||||
|
let percentile = (t, b) => Jstat.percentile(t->toArray, b, false)
|
||||||
|
|
||||||
|
module Sorted = {
|
||||||
|
let min = (t: t) => t->length > 0 ? Some(FloatArray.unsafe_get(t, 0)) : None
|
||||||
|
let max = (t: t) => t->length > 0 ? Some(FloatArray.unsafe_get(t, t->length - 1)) : None
|
||||||
|
let range = (~min=min, ~max=max, a) =>
|
||||||
|
switch (min(a), max(a)) {
|
||||||
|
| (Some(min), Some(max)) => Some(max -. min)
|
||||||
|
| _ => None
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
This function goes through a sorted array and divides it into two different clusters:
|
||||||
|
continuous samples and discrete samples. The discrete samples are stored in a mutable map.
|
||||||
|
Samples are thought to be discrete if they have at least `minDiscreteWight` duplicates.
|
||||||
|
|
||||||
|
If the min discrete weight is 4, that would mean that at least four elements needed from a specific
|
||||||
|
value for that to be kept as discrete. This is important because in some cases, we can expect that
|
||||||
|
some common elements will be generated by regular operations. The final continuous array will be sorted.
|
||||||
|
|
||||||
|
This function is performance-critical, don't change it significantly without benchmarking
|
||||||
|
SampleSet->PointSet conversion performance.
|
||||||
|
*/
|
||||||
|
let splitContinuousAndDiscreteForMinWeight = (sortedArray: t, ~minDiscreteWeight: int): (
|
||||||
|
t,
|
||||||
|
E_FloatFloatMap.t,
|
||||||
|
) => {
|
||||||
|
// TODO - preallocate as typed array, then trim
|
||||||
|
let continuous: array<float> = []
|
||||||
|
let discrete = E_FloatFloatMap.empty()
|
||||||
|
|
||||||
|
let addData = (count: int, value: float): unit => {
|
||||||
|
if count >= minDiscreteWeight {
|
||||||
|
E_FloatFloatMap.add(value, count->Belt.Int.toFloat, discrete)
|
||||||
|
} else {
|
||||||
|
for _ in 1 to count {
|
||||||
|
continuous->Js.Array2.push(value)->ignore
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let (finalCount, finalValue) = reduce(
|
||||||
|
sortedArray,
|
||||||
|
// initial prev value doesn't matter; if it collides with the first element of the array, flush won't do anything
|
||||||
|
(. (count, prev), element) => {
|
||||||
|
if element == prev {
|
||||||
|
(count + 1, prev)
|
||||||
|
} else {
|
||||||
|
// new value, process previous ones
|
||||||
|
addData(count, prev)
|
||||||
|
(1, element)
|
||||||
|
}
|
||||||
|
},
|
||||||
|
(0, 0.),
|
||||||
|
)
|
||||||
|
|
||||||
|
// flush final values
|
||||||
|
addData(finalCount, finalValue)
|
||||||
|
|
||||||
|
(continuous->make, discrete)
|
||||||
|
}
|
||||||
|
}
|
|
@ -78,6 +78,8 @@ module Error = {
|
||||||
| NotYetImplemented => "This pathway is not yet implemented"
|
| NotYetImplemented => "This pathway is not yet implemented"
|
||||||
| Other(t) => t
|
| Other(t) => t
|
||||||
}
|
}
|
||||||
|
|
||||||
|
exception OperationException(t)
|
||||||
}
|
}
|
||||||
|
|
||||||
let power = (a: float, b: float): result<float, Error.t> =>
|
let power = (a: float, b: float): result<float, Error.t> =>
|
||||||
|
|
|
@ -51,3 +51,11 @@ module Random = {
|
||||||
module Math = {
|
module Math = {
|
||||||
@module external factorial: float => float = "@stdlib/math/base/special/factorial"
|
@module external factorial: float => float = "@stdlib/math/base/special/factorial"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
module Base = {
|
||||||
|
@module
|
||||||
|
external variance: (int, int, Js.TypedArray2.Float64Array.t, int) => float =
|
||||||
|
"@stdlib/stats/base/variance"
|
||||||
|
let variance = (t: Js.TypedArray2.Float64Array.t) =>
|
||||||
|
variance(t->Js.TypedArray2.Float64Array.length, 0, t, 1)
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue
Block a user