squiggle/packages/squiggle-lang/src/rescript/Distributions/GenericDist.res

543 lines
18 KiB
Plaintext
Raw Normal View History

//TODO: multimodal, add interface, test somehow, track performance, refactor sampleSet, refactor ASTEvaluator.res.
make ts compatible commit 94803421acd2e5cb3a0f88e10f9244d374fab20b Author: Umur Ozkul <umur@hightechmind.io> Date: Mon Aug 22 08:48:33 2022 +0200 note on old habbits commit 2c47f80fce8fa6c12cb53b97f7150758eaa74b88 Author: Umur Ozkul <umur@hightechmind.io> Date: Mon Aug 22 08:18:16 2022 +0200 getTag returns enum value commit 733b9a820f1d01b618708896451a112d638ee811 Author: Umur Ozkul <umur@hightechmind.io> Date: Mon Aug 22 07:07:30 2022 +0200 result commit 64698f4a930182b3ccf122849824e4b6df251a9f Author: Umur Ozkul <umur@hightechmind.io> Date: Mon Aug 22 06:48:30 2022 +0200 return tags as ts enum commit 8ac802428a7aaac5367f5e8a9aaa592b89e305eb Author: Umur Ozkul <umur@hightechmind.io> Date: Mon Aug 22 06:16:26 2022 +0200 export tags commit 6c843e475a98ca1fcfa893d09d45ac9ad7c633ee Author: Umur Ozkul <umur@hightechmind.io> Date: Mon Aug 22 06:11:00 2022 +0200 distribution tag commit 9a43ec30fcaf967a672475431243949748d00bc7 Author: Umur Ozkul <umur@hightechmind.io> Date: Mon Aug 22 05:46:24 2022 +0200 opaque result commit f89bdd47c41135135baac99b18faf1c418cc4142 Author: Umur Ozkul <umur@hightechmind.io> Date: Mon Aug 22 05:24:18 2022 +0200 make ts compilable commit 6609bb3691b08405639e6f20da0fad309f2f232e Author: Umur Ozkul <umur@hightechmind.io> Date: Mon Aug 22 05:21:34 2022 +0200 compiles commit bace3eca63079de8f285069c65b219601e7310bf Author: Umur Ozkul <umur@hightechmind.io> Date: Mon Aug 22 04:33:34 2022 +0200 rescript compiles commit cd095f605c543902edec08fdcd407600296ec0cb Author: Umur Ozkul <umur@hightechmind.io> Date: Mon Aug 22 02:40:31 2022 +0200 squiggleValue commit 9b78b5d6c8b69287458fe392f142ceb3bca99407 Author: Umur Ozkul <umur@hightechmind.io> Date: Mon Aug 22 02:37:11 2022 +0200 project commit 20c8693b1eb6492f1662bedbb26b469aac11f8ff Author: Umur Ozkul <umur@hightechmind.io> Date: Mon Aug 22 00:59:44 2022 +0200 compiles
2022-08-22 06:50:59 +00:00
type t = DistributionTypes.genericDist
type error = DistributionTypes.error
2022-03-28 12:39:07 +00:00
type toPointSetFn = t => result<PointSetTypes.pointSetDist, error>
2022-04-09 22:10:06 +00:00
type toSampleSetFn = t => result<SampleSetDist.t, error>
2022-03-28 12:39:07 +00:00
type scaleMultiplyFn = (t, float) => result<t, error>
type pointwiseAddFn = (t, t) => result<t, error>
2022-07-13 16:37:39 +00:00
type env = {
sampleCount: int,
xyPointLength: int,
}
let isPointSet = (t: t) =>
switch t {
| PointSet(_) => true
| _ => false
}
let isSampleSetSet = (t: t) =>
switch t {
| SampleSet(_) => true
| _ => false
}
let isSymbolic = (t: t) =>
switch t {
| Symbolic(_) => true
| _ => false
}
let sampleN = (t: t, n) =>
2022-03-27 18:22:26 +00:00
switch t {
| PointSet(r) => PointSetDist.sampleNRendered(n, r)
| Symbolic(r) => SymbolicDist.T.sampleN(n, r)
| SampleSet(r) => SampleSetDist.sampleN(r, n)
2022-03-27 18:22:26 +00:00
}
2022-05-21 02:54:15 +00:00
let sample = (t: t) => sampleN(t, 1)->E.A.first |> E.O.toExn("Should not have happened")
2022-05-19 13:25:34 +00:00
2022-04-10 00:34:21 +00:00
let toSampleSetDist = (t: t, n) =>
2022-04-23 13:56:47 +00:00
SampleSetDist.make(sampleN(t, n))->E.R2.errMap(DistributionTypes.Error.sampleErrorToDistErr)
2022-03-27 18:22:26 +00:00
let fromFloat = (f: float): t => Symbolic(SymbolicDist.Float.make(f))
2022-03-27 18:22:26 +00:00
let toString = (t: t) =>
switch t {
| PointSet(_) => "Point Set Distribution"
| Symbolic(r) => SymbolicDist.T.toString(r)
| SampleSet(_) => "Sample Set Distribution"
2022-03-27 18:22:26 +00:00
}
let normalize = (t: t): t =>
2022-03-27 18:22:26 +00:00
switch t {
| PointSet(r) => PointSet(PointSetDist.T.normalize(r))
| Symbolic(_) => t
| SampleSet(_) => t
2022-03-27 18:22:26 +00:00
}
let integralEndY = (t: t): float =>
switch t {
| PointSet(r) => PointSetDist.T.integralEndY(r)
| Symbolic(_) => 1.0
| SampleSet(_) => 1.0
}
2022-04-15 20:28:51 +00:00
let isNormalized = (t: t): bool => Js.Math.abs_float(integralEndY(t) -. 1.0) < 1e-7
let toFloatOperation = (
t,
~toPointSetFn: toPointSetFn,
2022-04-29 01:31:15 +00:00
~distToFloatOperation: DistributionTypes.DistributionOperation.toFloat,
) => {
2022-04-29 01:31:15 +00:00
switch distToFloatOperation {
| #IntegralSum => Ok(integralEndY(t))
| (#Pdf(_) | #Cdf(_) | #Inv(_) | #Mean | #Sample | #Min | #Max) as op => {
2022-04-29 01:31:15 +00:00
let trySymbolicSolution = switch (t: t) {
| Symbolic(r) => SymbolicDist.T.operate(op, r)->E.R.toOption
| _ => None
}
2022-04-29 01:31:15 +00:00
let trySampleSetSolution = switch ((t: t), distToFloatOperation) {
| (SampleSet(sampleSet), #Mean) => SampleSetDist.mean(sampleSet)->Some
| (SampleSet(sampleSet), #Sample) => SampleSetDist.sample(sampleSet)->Some
| (SampleSet(sampleSet), #Inv(r)) => SampleSetDist.percentile(sampleSet, r)->Some
| (SampleSet(sampleSet), #Min) => SampleSetDist.min(sampleSet)->Some
| (SampleSet(sampleSet), #Max) => SampleSetDist.max(sampleSet)->Some
2022-04-29 01:31:15 +00:00
| _ => None
}
2022-04-29 01:31:15 +00:00
switch trySymbolicSolution {
| Some(r) => Ok(r)
| None =>
switch trySampleSetSolution {
| Some(r) => Ok(r)
| None => toPointSetFn(t)->E.R2.fmap(PointSetDist.operate(op))
}
}
2022-04-26 01:46:40 +00:00
}
| (#Stdev | #Variance | #Mode) as op =>
switch t {
| SampleSet(s) =>
switch op {
| #Stdev => SampleSetDist.stdev(s)->Ok
| #Variance => SampleSetDist.variance(s)->Ok
| #Mode => SampleSetDist.mode(s)->Ok
}
| _ => Error(DistributionTypes.NotYetImplemented)
}
2022-03-27 18:22:26 +00:00
}
}
//Todo: If it's a pointSet, but the xyPointLength is different from what it has, it should change.
2022-03-29 19:21:38 +00:00
// This is tricky because the case of discrete distributions.
// Also, change the outputXYPoints/pointSetDistLength details
let toPointSet = (
t,
~xyPointLength,
~sampleCount,
2022-04-23 18:09:06 +00:00
~xSelection: DistributionTypes.DistributionOperation.pointsetXSelection=#ByWeight,
2022-04-21 22:09:06 +00:00
(),
): result<PointSetTypes.pointSetDist, error> => {
switch (t: t) {
| PointSet(pointSet) => Ok(pointSet)
| Symbolic(r) => Ok(SymbolicDist.T.toPointSetDist(~xSelection, xyPointLength, r))
2022-04-10 00:21:02 +00:00
| SampleSet(r) =>
2022-04-10 00:27:03 +00:00
SampleSetDist.toPointSetDist(
2022-04-10 00:21:02 +00:00
~samples=r,
~samplingInputs={
sampleCount: sampleCount,
outputXYPoints: xyPointLength,
pointSetDistLength: xyPointLength,
kernelWidth: None,
},
)->E.R2.errMap(x => DistributionTypes.PointSetConversionError(x))
2022-03-27 18:22:26 +00:00
}
}
2022-05-25 12:17:45 +00:00
module Score = {
2022-06-20 12:28:30 +00:00
type genericDistOrScalar = DistributionTypes.DistributionOperation.genericDistOrScalar
2022-05-25 12:17:45 +00:00
2022-07-13 16:37:39 +00:00
let argsMake = (~esti: t, ~answ: genericDistOrScalar, ~prior: option<t>, ~env: env): result<
2022-07-12 16:45:41 +00:00
PointSetDist_Scoring.scoreArgs,
error,
> => {
2022-05-25 22:10:05 +00:00
let toPointSetFn = t =>
toPointSet(
t,
2022-07-13 16:37:39 +00:00
~xyPointLength=env.xyPointLength,
~sampleCount=env.sampleCount,
2022-05-25 22:10:05 +00:00
~xSelection=#ByWeight,
(),
)
2022-07-12 16:45:41 +00:00
let prior': option<result<PointSetTypes.pointSetDist, error>> = switch prior {
2022-05-25 12:17:45 +00:00
| None => None
2022-07-12 16:45:41 +00:00
| Some(d) => toPointSetFn(d)->Some
2022-05-25 12:17:45 +00:00
}
2022-06-20 12:47:16 +00:00
let twoDists = (~toPointSetFn, esti': t, answ': t): result<
2022-05-25 12:17:45 +00:00
(PointSetTypes.pointSetDist, PointSetTypes.pointSetDist),
error,
2022-05-25 22:10:05 +00:00
> => E.R.merge(toPointSetFn(esti'), toPointSetFn(answ'))
2022-05-25 12:17:45 +00:00
switch (esti, answ, prior') {
2022-07-12 16:45:41 +00:00
| (esti', Score_Dist(answ'), None) =>
2022-06-20 12:47:16 +00:00
twoDists(~toPointSetFn, esti', answ')->E.R2.fmap(((esti'', answ'')) =>
{estimate: esti'', answer: answ'', prior: None}->PointSetDist_Scoring.DistAnswer
2022-05-25 12:17:45 +00:00
)
2022-07-12 16:45:41 +00:00
| (esti', Score_Dist(answ'), Some(Ok(prior''))) =>
2022-06-20 12:28:30 +00:00
twoDists(~toPointSetFn, esti', answ')->E.R2.fmap(((esti'', answ'')) =>
2022-06-20 12:51:34 +00:00
{
estimate: esti'',
answer: answ'',
prior: Some(prior''),
}->PointSetDist_Scoring.DistAnswer
2022-05-25 12:17:45 +00:00
)
2022-07-12 16:45:41 +00:00
| (esti', Score_Scalar(answ'), None) =>
2022-06-21 15:55:00 +00:00
toPointSetFn(esti')->E.R2.fmap(esti'' =>
{
estimate: esti'',
answer: answ',
prior: None,
}->PointSetDist_Scoring.ScalarAnswer
2022-05-25 12:17:45 +00:00
)
2022-07-12 16:45:41 +00:00
| (esti', Score_Scalar(answ'), Some(Ok(prior''))) =>
2022-06-21 15:55:00 +00:00
toPointSetFn(esti')->E.R2.fmap(esti'' =>
{
estimate: esti'',
answer: answ',
prior: Some(prior''),
}->PointSetDist_Scoring.ScalarAnswer
2022-05-25 12:17:45 +00:00
)
| (_, _, Some(Error(err))) => err->Error
}
}
2022-07-13 17:11:59 +00:00
let logScore = (~estimate: t, ~answer: genericDistOrScalar, ~prior: option<t>, ~env: env): result<
float,
error,
> =>
2022-07-13 16:37:39 +00:00
argsMake(~esti=estimate, ~answ=answer, ~prior, ~env)->E.R.bind(x =>
2022-05-25 12:17:45 +00:00
x->PointSetDist.logScore->E.R2.errMap(y => DistributionTypes.OperationError(y))
)
}
2022-04-09 02:55:06 +00:00
/*
PointSetDist.toSparkline calls "downsampleEquallyOverX", which downsamples it to n=bucketCount.
It first needs a pointSetDist, so we convert to a pointSetDist. In this process we want the
xyPointLength to be a bit longer than the eventual toSparkline downsampling. I chose 3
fairly arbitrarily.
*/
2022-04-21 22:09:06 +00:00
let toSparkline = (t: t, ~sampleCount: int, ~bucketCount: int=20, ()): result<string, error> =>
t
2022-04-09 02:55:06 +00:00
->toPointSet(~xSelection=#Linear, ~xyPointLength=bucketCount * 3, ~sampleCount, ())
->E.R.bind(r =>
r->PointSetDist.toSparkline(bucketCount)->E.R2.errMap(x => DistributionTypes.SparklineError(x))
)
2022-03-27 18:22:26 +00:00
module Truncate = {
2022-04-21 22:42:15 +00:00
let trySymbolicSimplification = (
leftCutoff: option<float>,
rightCutoff: option<float>,
t: t,
): option<t> =>
2022-03-27 18:22:26 +00:00
switch (leftCutoff, rightCutoff, t) {
| (None, None, _) => None
2022-04-21 22:42:15 +00:00
| (Some(lc), Some(rc), Symbolic(#Uniform(u))) if lc < rc =>
Some(Symbolic(#Uniform(SymbolicDist.Uniform.truncate(Some(lc), Some(rc), u))))
| (lc, rc, Symbolic(#Uniform(u))) =>
Some(Symbolic(#Uniform(SymbolicDist.Uniform.truncate(lc, rc, u))))
2022-03-27 18:22:26 +00:00
| _ => None
}
let run = (
2022-03-29 19:47:32 +00:00
t: t,
~toPointSetFn: toPointSetFn,
~leftCutoff=None: option<float>,
~rightCutoff=None: option<float>,
(),
2022-03-27 18:22:26 +00:00
): result<t, error> => {
let doesNotNeedCutoff = E.O.isNone(leftCutoff) && E.O.isNone(rightCutoff)
if doesNotNeedCutoff {
Ok(t)
} else {
switch trySymbolicSimplification(leftCutoff, rightCutoff, t) {
| Some(r) => Ok(r)
| None =>
switch t {
| SampleSet(t) =>
switch SampleSetDist.truncate(t, ~leftCutoff, ~rightCutoff) {
| Ok(r) => Ok(SampleSet(r))
| Error(err) => Error(DistributionTypes.SampleSetError(err))
}
| _ =>
toPointSetFn(t)->E.R2.fmap(t => {
DistributionTypes.PointSet(
PointSetDist.T.truncate(leftCutoff, rightCutoff, t)->PointSetDist.T.normalize,
)
})
}
2022-03-27 18:22:26 +00:00
}
}
}
}
2022-03-28 11:56:20 +00:00
let truncate = Truncate.run
2022-03-27 18:22:26 +00:00
/* Given two random variables A and B, this returns the distribution
of a new variable that is the result of the operation on A and B.
For instance, normal(0, 1) + normal(1, 1) -> normal(1, 2).
2022-03-29 19:21:38 +00:00
In general, this is implemented via convolution.
*/
2022-03-27 18:22:26 +00:00
module AlgebraicCombination = {
module InputValidator = {
/*
It would be good to also do a check to make sure that probability mass for the second
operand, at value 1.0, is 0 (or approximately 0). However, we'd ideally want to check
that both the probability mass and the probability density are greater than zero.
Right now we don't yet have a way of getting probability mass, so I'll leave this for later.
*/
let getLogarithmInputError = (t1: t, t2: t, ~toPointSetFn: toPointSetFn): option<error> => {
let firstOperandIsGreaterThanZero =
2022-04-27 19:21:27 +00:00
toFloatOperation(
t1,
~toPointSetFn,
~distToFloatOperation=#Cdf(MagicNumbers.Epsilon.ten),
) |> E.R.fmap(r => r > 0.)
let secondOperandIsGreaterThanZero =
2022-04-27 19:21:27 +00:00
toFloatOperation(
t2,
~toPointSetFn,
~distToFloatOperation=#Cdf(MagicNumbers.Epsilon.ten),
) |> E.R.fmap(r => r > 0.)
let items = E.A.R.firstErrorOrOpen([
firstOperandIsGreaterThanZero,
secondOperandIsGreaterThanZero,
])
switch items {
| Error(r) => Some(r)
| Ok([true, _]) =>
2022-04-28 12:09:31 +00:00
Some(LogarithmOfDistributionError("First input must be completely greater than 0"))
| Ok([false, true]) =>
2022-04-28 12:09:31 +00:00
Some(LogarithmOfDistributionError("Second input must be completely greater than 0"))
| Ok([false, false]) => None
| Ok(_) => Some(Unreachable)
}
}
let run = (t1: t, t2: t, ~toPointSetFn: toPointSetFn, ~arithmeticOperation): option<error> => {
if arithmeticOperation == #Logarithm {
getLogarithmInputError(t1, t2, ~toPointSetFn)
} else {
None
}
}
}
module StrategyCallOnValidatedInputs = {
let convolution = (
toPointSet: toPointSetFn,
arithmeticOperation: Operation.convolutionOperation,
t1: t,
t2: t,
): result<t, error> =>
E.R.merge(toPointSet(t1), toPointSet(t2))
->E.R2.fmap(((a, b)) => PointSetDist.combineAlgebraically(arithmeticOperation, a, b))
->E.R2.fmap(r => DistributionTypes.PointSet(r))
2022-03-27 18:22:26 +00:00
let monteCarlo = (
toSampleSet: toSampleSetFn,
arithmeticOperation: Operation.algebraicOperation,
t1: t,
t2: t,
): result<t, error> => {
let fn = Operation.Algebraic.toFn(arithmeticOperation)
E.R.merge(toSampleSet(t1), toSampleSet(t2))
->E.R.bind(((t1, t2)) => {
2022-05-27 13:40:49 +00:00
SampleSetDist.map2(~fn, ~t1, ~t2)->E.R2.errMap(x => DistributionTypes.SampleSetError(x))
})
->E.R2.fmap(r => DistributionTypes.SampleSet(r))
}
let symbolic = (
arithmeticOperation: Operation.algebraicOperation,
t1: t,
t2: t,
): SymbolicDistTypes.analyticalSimplificationResult => {
switch (t1, t2) {
| (DistributionTypes.Symbolic(d1), DistributionTypes.Symbolic(d2)) =>
SymbolicDist.T.tryAnalyticalSimplification(d1, d2, arithmeticOperation)
| _ => #NoSolution
}
2022-04-26 22:41:57 +00:00
}
}
2022-04-26 22:41:57 +00:00
module StrategyChooser = {
type specificStrategy = [#AsSymbolic | #AsMonteCarlo | #AsConvolution]
//I'm (Ozzie) really just guessing here, very little idea what's best
let expectedConvolutionCost: t => int = x =>
switch x {
2022-04-27 19:21:27 +00:00
| Symbolic(#Float(_)) => MagicNumbers.OpCost.floatCost
| Symbolic(_) => MagicNumbers.OpCost.symbolicCost
| PointSet(Discrete(m)) => m.xyShape->XYShape.T.length
2022-04-27 19:06:15 +00:00
| PointSet(Mixed(_)) => MagicNumbers.OpCost.mixedCost
| PointSet(Continuous(_)) => MagicNumbers.OpCost.continuousCost
| _ => MagicNumbers.OpCost.wildcardCost
}
let hasSampleSetDist = (t1: t, t2: t): bool => isSampleSetSet(t1) || isSampleSetSet(t2)
let convolutionIsFasterThanMonteCarlo = (t1: t, t2: t): bool =>
expectedConvolutionCost(t1) * expectedConvolutionCost(t2) < MagicNumbers.OpCost.monteCarloCost
let preferConvolutionToMonteCarlo = (t1, t2, arithmeticOperation) => {
!hasSampleSetDist(t1, t2) &&
Operation.Convolution.canDoAlgebraicOperation(arithmeticOperation) &&
convolutionIsFasterThanMonteCarlo(t1, t2)
}
let run = (~t1: t, ~t2: t, ~arithmeticOperation): specificStrategy => {
switch StrategyCallOnValidatedInputs.symbolic(arithmeticOperation, t1, t2) {
| #AnalyticalSolution(_)
| #Error(_) =>
#AsSymbolic
| #NoSolution =>
preferConvolutionToMonteCarlo(t1, t2, arithmeticOperation) ? #AsConvolution : #AsMonteCarlo
}
2022-04-26 22:41:57 +00:00
}
}
let runStrategyOnValidatedInputs = (
~t1: t,
~t2: t,
~arithmeticOperation,
~strategy: StrategyChooser.specificStrategy,
~toPointSetFn: toPointSetFn,
~toSampleSetFn: toSampleSetFn,
2022-03-27 18:22:26 +00:00
): result<t, error> => {
switch strategy {
| #AsMonteCarlo =>
StrategyCallOnValidatedInputs.monteCarlo(toSampleSetFn, arithmeticOperation, t1, t2)
| #AsSymbolic =>
switch StrategyCallOnValidatedInputs.symbolic(arithmeticOperation, t1, t2) {
| #AnalyticalSolution(symbolicDist) => Ok(Symbolic(symbolicDist))
| #Error(e) => Error(OperationError(e))
| #NoSolution => Error(Unreachable)
}
| #AsConvolution =>
switch Operation.Convolution.fromAlgebraicOperation(arithmeticOperation) {
| Some(convOp) => StrategyCallOnValidatedInputs.convolution(toPointSetFn, convOp, t1, t2)
| None => Error(Unreachable)
2022-03-27 18:22:26 +00:00
}
}
}
2022-04-26 22:41:57 +00:00
let run = (
~strategy: DistributionTypes.asAlgebraicCombinationStrategy,
t1: t,
~toPointSetFn: toPointSetFn,
~toSampleSetFn: toSampleSetFn,
2022-04-27 00:30:38 +00:00
~arithmeticOperation: Operation.algebraicOperation,
2022-04-26 22:41:57 +00:00
~t2: t,
): result<t, error> => {
let invalidOperationError = InputValidator.run(t1, t2, ~arithmeticOperation, ~toPointSetFn)
switch (invalidOperationError, strategy) {
| (Some(e), _) => Error(e)
| (None, AsDefault) => {
let chooseStrategy = StrategyChooser.run(~arithmeticOperation, ~t1, ~t2)
runStrategyOnValidatedInputs(
~t1,
~t2,
~strategy=chooseStrategy,
~arithmeticOperation,
~toPointSetFn,
~toSampleSetFn,
)
}
| (None, AsMonteCarlo) =>
StrategyCallOnValidatedInputs.monteCarlo(toSampleSetFn, arithmeticOperation, t1, t2)
| (None, AsSymbolic) =>
switch StrategyCallOnValidatedInputs.symbolic(arithmeticOperation, t1, t2) {
| #AnalyticalSolution(symbolicDist) => Ok(Symbolic(symbolicDist))
| #NoSolution => Error(RequestedStrategyInvalidError(`No analytic solution for inputs`))
| #Error(err) => Error(OperationError(err))
2022-04-26 22:41:57 +00:00
}
| (None, AsConvolution) =>
switch Operation.Convolution.fromAlgebraicOperation(arithmeticOperation) {
| None => {
let errString = `Convolution not supported for ${Operation.Algebraic.toString(
arithmeticOperation,
)}`
Error(RequestedStrategyInvalidError(errString))
}
| Some(convOp) => StrategyCallOnValidatedInputs.convolution(toPointSetFn, convOp, t1, t2)
2022-04-26 22:41:57 +00:00
}
}
}
2022-03-27 18:22:26 +00:00
}
2022-03-28 11:56:20 +00:00
let algebraicCombination = AlgebraicCombination.run
2022-03-27 18:22:26 +00:00
//TODO: Add faster pointwiseCombine fn
let pointwiseCombination = (
t1: t,
~toPointSetFn: toPointSetFn,
2022-04-23 18:09:06 +00:00
~algebraicCombination: Operation.algebraicOperation,
~t2: t,
): result<t, error> => {
2022-04-23 18:09:06 +00:00
E.R.merge(toPointSetFn(t1), toPointSetFn(t2))->E.R.bind(((t1, t2)) =>
PointSetDist.combinePointwise(Operation.Algebraic.toFn(algebraicCombination), t1, t2)
->E.R2.fmap(r => DistributionTypes.PointSet(r))
->E.R2.errMap(err => DistributionTypes.OperationError(err))
2022-03-27 18:22:26 +00:00
)
}
let pointwiseCombinationFloat = (
2022-03-29 19:47:32 +00:00
t: t,
~toPointSetFn: toPointSetFn,
2022-04-23 18:09:06 +00:00
~algebraicCombination: Operation.algebraicOperation,
~f: float,
2022-03-27 18:22:26 +00:00
): result<t, error> => {
let executeCombination = arithOp =>
toPointSetFn(t)->E.R.bind(t => {
2022-03-27 18:22:26 +00:00
//TODO: Move to PointSet codebase
let fn = (secondary, main) => Operation.Scale.toFn(arithOp, main, secondary)
let integralSumCacheFn = Operation.Scale.toIntegralSumCacheFn(arithOp)
let integralCacheFn = Operation.Scale.toIntegralCacheFn(arithOp)
PointSetDist.T.mapYResult(
~integralSumCacheFn=integralSumCacheFn(f),
~integralCacheFn=integralCacheFn(f),
~fn=fn(f),
2022-03-27 18:22:26 +00:00
t,
)->E.R2.errMap(x => DistributionTypes.OperationError(x))
2022-03-27 18:22:26 +00:00
})
let m = switch algebraicCombination {
| #Add | #Subtract => Error(DistributionTypes.DistributionVerticalShiftIsInvalid)
| (#Multiply | #Divide | #Power | #Logarithm) as arithmeticOperation =>
executeCombination(arithmeticOperation)
| #LogarithmWithThreshold(eps) => executeCombination(#LogarithmWithThreshold(eps))
2022-03-30 01:28:14 +00:00
}
m->E.R2.fmap(r => DistributionTypes.PointSet(r))
2022-03-27 18:22:26 +00:00
}
//TODO: The result should always cumulatively sum to 1. This would be good to test.
//TODO: If the inputs are not normalized, this will return poor results. The weights probably refer to the post-normalized forms. It would be good to apply a catch to this.
let mixture = (
2022-03-29 19:47:32 +00:00
values: array<(t, float)>,
~scaleMultiplyFn: scaleMultiplyFn,
~pointwiseAddFn: pointwiseAddFn,
~env: env,
) => {
let allValuesAreSampleSet = v => E.A.all(((t, _)) => isSampleSetSet(t), v)
if E.A.isEmpty(values) {
2022-04-23 18:13:38 +00:00
Error(DistributionTypes.OtherError("Mixture error: mixture must have at least 1 element"))
} else if allValuesAreSampleSet(values) {
let withSampleSetValues = values->E.A2.fmap(((value, weight)) =>
switch value {
| SampleSet(sampleSet) => Ok((sampleSet, weight))
| _ => Error("Unreachable")
2022-09-01 05:07:13 +00:00
}->E.R2.toExn("Mixture coding error: SampleSet expected. This should be inaccessible.")
)
let sampleSetMixture = SampleSetDist.mixture(withSampleSetValues, env.sampleCount)
switch sampleSetMixture {
| Ok(sampleSet) => Ok(DistributionTypes.SampleSet(sampleSet))
| Error(err) => Error(DistributionTypes.Error.sampleErrorToDistErr(err))
}
2022-03-28 11:56:20 +00:00
} else {
2022-03-29 21:35:33 +00:00
let totalWeight = values->E.A2.fmap(E.Tuple2.second)->E.A.Floats.sum
2022-03-28 11:56:20 +00:00
let properlyWeightedValues =
2022-03-29 19:21:38 +00:00
values
->E.A2.fmap(((dist, weight)) => scaleMultiplyFn(dist, weight /. totalWeight))
2022-03-29 19:21:38 +00:00
->E.A.R.firstErrorOrOpen
properlyWeightedValues->E.R.bind(values => {
2022-03-28 11:56:20 +00:00
values
|> Js.Array.sliceFrom(1)
|> E.A.fold_left(
(acc, x) => E.R.bind(acc, acc => pointwiseAddFn(acc, x)),
2022-03-28 11:56:20 +00:00
Ok(E.A.unsafe_get(values, 0)),
)
})
}
}