squiggle/packages/squiggle-lang/src/rescript/Distributions/SampleSetDist/SampleSetDist.res

146 lines
4.8 KiB
Plaintext

@genType
module Error = {
@genType
type sampleSetError =
TooFewSamples | NonNumericInput(string) | OperationError(Operation.operationError)
let sampleSetErrorToString = (err: sampleSetError): string =>
switch err {
| TooFewSamples => "Too few samples when constructing sample set"
| NonNumericInput(err) => `Found a non-number in input: ${err}`
| OperationError(err) => Operation.Error.toString(err)
}
@genType
type pointsetConversionError = TooFewSamplesForConversionToPointSet
let pointsetConversionErrorToString = (err: pointsetConversionError) =>
switch err {
| TooFewSamplesForConversionToPointSet => "Too Few Samples to convert to point set"
}
let fromOperationError = e => OperationError(e)
let toString = (err: sampleSetError) => {
switch err {
| TooFewSamples => "Too few samples when constructing sample set"
| NonNumericInput(err) => `Found a non-number in input: ${err}`
| OperationError(err) => Operation.Error.toString(err)
}
}
}
include Error
/*
This is used as a smart constructor. The only way to create a SampleSetDist.t is to call
this constructor.
https://stackoverflow.com/questions/66909578/how-to-make-a-type-constructor-private-in-rescript-except-in-current-module
*/
module T: {
//This really should be hidden (remove the array<float>). The reason it isn't is to act as an escape hatch in JS__Test.ts.
//When we get a good functional library in TS, we could refactor that out.
@genType
type t = array<float>
let make: array<float> => result<t, sampleSetError>
let get: t => array<float>
} = {
type t = array<float>
let make = (a: array<float>) =>
if E.A.length(a) > 5 {
Ok(a)
} else {
Error(TooFewSamples)
}
let get = (a: t) => a
}
include T
let length = (t: t) => get(t)->E.A.length
/*
TODO: Refactor to get a more precise estimate. Also, this code is just fairly messy, could use
some refactoring.
*/
let toPointSetDist = (~samples: t, ~samplingInputs: SamplingInputs.samplingInputs): result<
PointSetTypes.pointSetDist,
pointsetConversionError,
> =>
SampleSetDist_ToPointSet.toPointSetDist(
~samples=get(samples),
~samplingInputs,
(),
).pointSetDist->E.O2.toResult(TooFewSamplesForConversionToPointSet)
//Randomly get one sample from the distribution
let sample = (t: t): float => {
let i = E.Int.random(~min=0, ~max=E.A.length(get(t)) - 1)
E.A.unsafe_get(get(t), i)
}
/*
If asked for a length of samples shorter or equal the length of the distribution,
return this first n samples of this distribution.
Else, return n random samples of the distribution.
The former helps in cases where multiple distributions are correlated.
However, if n > length(t), then there's no clear right answer, so we just randomly
sample everything.
*/
let sampleN = (t: t, n) => {
if n <= E.A.length(get(t)) {
E.A.slice(get(t), ~offset=0, ~len=n)
} else {
Belt.Array.makeBy(n, _ => sample(t))
}
}
let _fromSampleResultArray = (samples: array<result<float, QuriSquiggleLang.Operation.Error.t>>) =>
E.A.R.firstErrorOrOpen(samples)->E.R2.errMap(Error.fromOperationError) |> E.R2.bind(make)
let samplesMap = (~fn: float => result<float, Operation.Error.t>, t: t): result<
t,
sampleSetError,
> => T.get(t)->E.A2.fmap(fn)->_fromSampleResultArray
//TODO: Figure out what to do if distributions are different lengths. ``zip`` is kind of inelegant for this.
let map2 = (~fn: (float, float) => result<float, Operation.Error.t>, ~t1: t, ~t2: t): result<
t,
sampleSetError,
> => E.A.zip(get(t1), get(t2))->E.A2.fmap(E.Tuple2.toFnCall(fn))->_fromSampleResultArray
let map3 = (
~fn: (float, float, float) => result<float, Operation.Error.t>,
~t1: t,
~t2: t,
~t3: t,
): result<t, sampleSetError> =>
E.A.zip3(get(t1), get(t2), get(t3))->E.A2.fmap(E.Tuple3.toFnCall(fn))->_fromSampleResultArray
let mapN = (~fn: array<float> => result<float, Operation.Error.t>, ~t1: array<t>): result<
t,
sampleSetError,
> => E.A.transpose(E.A.fmap(get, t1))->E.A2.fmap(fn)->_fromSampleResultArray
let mean = t => T.get(t)->E.A.Floats.mean
let geomean = t => T.get(t)->E.A.Floats.geomean
let mode = t => T.get(t)->E.A.Floats.mode
let sum = t => T.get(t)->E.A.Floats.sum
let min = t => T.get(t)->E.A.Floats.min
let max = t => T.get(t)->E.A.Floats.max
let stdev = t => T.get(t)->E.A.Floats.stdev
let variance = t => T.get(t)->E.A.Floats.variance
let percentile = (t, f) => T.get(t)->E.A.Floats.percentile(f)
let mixture = (values: array<(t, float)>, intendedLength: int) => {
let totalWeight = values->E.A2.fmap(E.Tuple2.second)->E.A.Floats.sum
values
->E.A2.fmap(((dist, weight)) => {
let adjustedWeight = weight /. totalWeight
let samplesToGet = adjustedWeight *. E.I.toFloat(intendedLength) |> E.Float.toInt
sampleN(dist, samplesToGet)
})
->E.A.concatMany
->T.make
}