2022-03-27 20:59:46 +00:00
//TODO: multimodal, add interface, test somehow, track performance, refactor sampleSet, refactor ASTEvaluator.res.
2022-04-11 18:00:56 +00:00
type t = DistributionTypes.genericDist
type error = DistributionTypes.error
2022-03-28 12:39:07 +00:00
type toPointSetFn = t => result<PointSetTypes.pointSetDist, error>
2022-04-09 22:10:06 +00:00
type toSampleSetFn = t => result<SampleSetDist.t, error>
2022-03-28 12:39:07 +00:00
type scaleMultiplyFn = (t, float) => result<t, error>
type pointwiseAddFn = (t, t) => result<t, error>
2022-04-26 00:55:16 +00:00
type asMode = AsSymbolic | AsMontecarlo | AsConvolution
2022-04-10 01:24:44 +00:00
2022-03-29 18:36:54 +00:00
let sampleN = (t: t, n) =>
2022-03-27 18:22:26 +00:00
switch t {
2022-04-10 01:24:44 +00:00
| PointSet(r) => PointSetDist.sampleNRendered(n, r)
| Symbolic(r) => SymbolicDist.T.sampleN(n, r)
| SampleSet(r) => SampleSetDist.sampleN(r, n)
2022-03-27 18:22:26 +00:00
}
2022-04-10 01:24:44 +00:00
2022-04-10 00:34:21 +00:00
let toSampleSetDist = (t: t, n) =>
2022-04-23 13:56:47 +00:00
SampleSetDist.make(sampleN(t, n))->E.R2.errMap(DistributionTypes.Error.sampleErrorToDistErr)
2022-03-27 18:22:26 +00:00
2022-03-31 18:15:21 +00:00
let fromFloat = (f: float): t => Symbolic(SymbolicDist.Float.make(f))
2022-03-27 21:37:27 +00:00
2022-03-27 18:22:26 +00:00
let toString = (t: t) =>
switch t {
2022-03-31 18:15:21 +00:00
| PointSet(_) => "Point Set Distribution"
| Symbolic(r) => SymbolicDist.T.toString(r)
| SampleSet(_) => "Sample Set Distribution"
2022-03-27 18:22:26 +00:00
}
2022-03-31 18:15:21 +00:00
let normalize = (t: t): t =>
2022-03-27 18:22:26 +00:00
switch t {
2022-03-31 18:15:21 +00:00
| PointSet(r) => PointSet(PointSetDist.T.normalize(r))
| Symbolic(_) => t
| SampleSet(_) => t
2022-03-27 18:22:26 +00:00
}
2022-04-15 17:58:00 +00:00
let integralEndY = (t: t): float =>
switch t {
| PointSet(r) => PointSetDist.T.integralEndY(r)
| Symbolic(_) => 1.0
| SampleSet(_) => 1.0
}
2022-04-15 20:28:51 +00:00
let isNormalized = (t: t): bool => Js.Math.abs_float(integralEndY(t) -. 1.0) < 1e-7
2022-04-15 17:58:00 +00:00
2022-03-31 18:07:39 +00:00
let toFloatOperation = (
2022-03-31 13:19:27 +00:00
t,
~toPointSetFn: toPointSetFn,
2022-03-31 18:07:39 +00:00
~distToFloatOperation: Operation.distToFloatOperation,
2022-03-31 13:19:27 +00:00
) => {
2022-03-31 18:15:21 +00:00
let symbolicSolution = switch (t: t) {
| Symbolic(r) =>
2022-03-31 18:07:39 +00:00
switch SymbolicDist.T.operate(distToFloatOperation, r) {
2022-03-27 18:22:26 +00:00
| Ok(f) => Some(f)
| _ => None
}
| _ => None
}
2022-03-27 20:59:46 +00:00
2022-03-27 18:22:26 +00:00
switch symbolicSolution {
| Some(r) => Ok(r)
2022-03-31 18:07:39 +00:00
| None => toPointSetFn(t)->E.R2.fmap(PointSetDist.operate(distToFloatOperation))
2022-03-27 18:22:26 +00:00
}
}
2022-04-08 00:17:01 +00:00
//Todo: If it's a pointSet, but the xyPointLength is different from what it has, it should change.
2022-03-29 19:21:38 +00:00
// This is tricky because the case of discrete distributions.
2022-03-31 18:07:39 +00:00
// Also, change the outputXYPoints/pointSetDistLength details
2022-04-08 12:44:04 +00:00
let toPointSet = (
t,
~xyPointLength,
~sampleCount,
2022-04-23 18:09:06 +00:00
~xSelection: DistributionTypes.DistributionOperation.pointsetXSelection=#ByWeight,
2022-04-21 22:09:06 +00:00
(),
2022-04-08 12:44:04 +00:00
): result<PointSetTypes.pointSetDist, error> => {
2022-03-31 18:15:21 +00:00
switch (t: t) {
| PointSet(pointSet) => Ok(pointSet)
2022-04-08 12:44:04 +00:00
| Symbolic(r) => Ok(SymbolicDist.T.toPointSetDist(~xSelection, xyPointLength, r))
2022-04-10 00:21:02 +00:00
| SampleSet(r) =>
2022-04-10 00:27:03 +00:00
SampleSetDist.toPointSetDist(
2022-04-10 00:21:02 +00:00
~samples=r,
~samplingInputs={
sampleCount: sampleCount,
outputXYPoints: xyPointLength,
pointSetDistLength: xyPointLength,
kernelWidth: None,
},
2022-04-22 20:27:17 +00:00
)->E.R2.errMap(x => DistributionTypes.PointSetConversionError(x))
2022-03-27 18:22:26 +00:00
}
}
2022-04-09 02:55:06 +00:00
/*
PointSetDist.toSparkline calls "downsampleEquallyOverX", which downsamples it to n=bucketCount.
It first needs a pointSetDist, so we convert to a pointSetDist. In this process we want the
xyPointLength to be a bit longer than the eventual toSparkline downsampling. I chose 3
fairly arbitrarily.
*/
2022-04-21 22:09:06 +00:00
let toSparkline = (t: t, ~sampleCount: int, ~bucketCount: int=20, ()): result<string, error> =>
2022-04-08 12:44:04 +00:00
t
2022-04-09 02:55:06 +00:00
->toPointSet(~xSelection=#Linear, ~xyPointLength=bucketCount * 3, ~sampleCount, ())
2022-04-09 01:31:08 +00:00
->E.R.bind(r =>
2022-04-22 20:27:17 +00:00
r->PointSetDist.toSparkline(bucketCount)->E.R2.errMap(x => DistributionTypes.SparklineError(x))
2022-04-09 01:31:08 +00:00
)
2022-04-08 00:17:01 +00:00
2022-03-27 18:22:26 +00:00
module Truncate = {
2022-04-21 22:42:15 +00:00
let trySymbolicSimplification = (
leftCutoff: option<float>,
rightCutoff: option<float>,
t: t,
): option<t> =>
2022-03-27 18:22:26 +00:00
switch (leftCutoff, rightCutoff, t) {
| (None, None, _) => None
2022-04-21 22:42:15 +00:00
| (Some(lc), Some(rc), Symbolic(#Uniform(u))) if lc < rc =>
Some(Symbolic(#Uniform(SymbolicDist.Uniform.truncate(Some(lc), Some(rc), u))))
| (lc, rc, Symbolic(#Uniform(u))) =>
2022-03-31 18:15:21 +00:00
Some(Symbolic(#Uniform(SymbolicDist.Uniform.truncate(lc, rc, u))))
2022-03-27 18:22:26 +00:00
| _ => None
}
let run = (
2022-03-29 19:47:32 +00:00
t: t,
2022-03-31 13:19:27 +00:00
~toPointSetFn: toPointSetFn,
~leftCutoff=None: option<float>,
~rightCutoff=None: option<float>,
(),
2022-03-27 18:22:26 +00:00
): result<t, error> => {
let doesNotNeedCutoff = E.O.isNone(leftCutoff) && E.O.isNone(rightCutoff)
if doesNotNeedCutoff {
Ok(t)
} else {
switch trySymbolicSimplification(leftCutoff, rightCutoff, t) {
| Some(r) => Ok(r)
| None =>
2022-03-31 18:15:21 +00:00
toPointSetFn(t)->E.R2.fmap(t => {
2022-04-11 18:00:56 +00:00
DistributionTypes.PointSet(PointSetDist.T.truncate(leftCutoff, rightCutoff, t))
2022-03-31 18:15:21 +00:00
})
2022-03-27 18:22:26 +00:00
}
}
}
}
2022-03-28 11:56:20 +00:00
let truncate = Truncate.run
2022-03-27 18:22:26 +00:00
/* Given two random variables A and B, this returns the distribution
of a new variable that is the result of the operation on A and B.
For instance, normal(0, 1) + normal(1, 1) -> normal(1, 2).
2022-03-29 19:21:38 +00:00
In general, this is implemented via convolution.
TODO: It would be useful to be able to pass in a paramater to get this to run either with convolution or monte carlo.
*/
2022-03-27 18:22:26 +00:00
module AlgebraicCombination = {
let tryAnalyticalSimplification = (
2022-04-23 18:09:06 +00:00
arithmeticOperation: Operation.algebraicOperation,
2022-03-27 18:22:26 +00:00
t1: t,
t2: t,
2022-04-23 18:35:49 +00:00
): option<result<SymbolicDistTypes.symbolicDist, Operation.Error.t>> =>
2022-03-31 18:07:39 +00:00
switch (arithmeticOperation, t1, t2) {
2022-03-31 18:15:21 +00:00
| (arithmeticOperation, Symbolic(d1), Symbolic(d2)) =>
2022-03-31 18:07:39 +00:00
switch SymbolicDist.T.tryAnalyticalSimplification(d1, d2, arithmeticOperation) {
2022-03-27 18:22:26 +00:00
| #AnalyticalSolution(symbolicDist) => Some(Ok(symbolicDist))
| #Error(er) => Some(Error(er))
| #NoSolution => None
}
| _ => None
}
let runConvolution = (
toPointSet: toPointSetFn,
2022-04-22 16:43:18 +00:00
arithmeticOperation: Operation.convolutionOperation,
2022-03-27 18:22:26 +00:00
t1: t,
t2: t,
) =>
2022-03-29 21:35:33 +00:00
E.R.merge(toPointSet(t1), toPointSet(t2))->E.R2.fmap(((a, b)) =>
2022-03-31 18:07:39 +00:00
PointSetDist.combineAlgebraically(arithmeticOperation, a, b)
2022-03-27 18:22:26 +00:00
)
let runMonteCarlo = (
toSampleSet: toSampleSetFn,
2022-04-23 18:09:06 +00:00
arithmeticOperation: Operation.algebraicOperation,
2022-03-27 18:22:26 +00:00
t1: t,
t2: t,
2022-04-22 20:27:17 +00:00
): result<t, error> => {
2022-04-10 01:24:44 +00:00
let fn = Operation.Algebraic.toFn(arithmeticOperation)
E.R.merge(toSampleSet(t1), toSampleSet(t2))
->E.R.bind(((t1, t2)) => {
2022-04-22 20:27:17 +00:00
SampleSetDist.map2(~fn, ~t1, ~t2)->E.R2.errMap(x => DistributionTypes.OperationError(x))
2022-03-27 18:22:26 +00:00
})
2022-04-11 18:00:56 +00:00
->E.R2.fmap(r => DistributionTypes.SampleSet(r))
2022-03-27 18:22:26 +00:00
}
2022-04-23 21:51:41 +00:00
/*
It would be good to also do a check to make sure that probability mass for the second
operand, at value 1.0, is 0 (or approximately 0). However, we'd ideally want to check
that both the probability mass and the probability density are greater than zero.
Right now we don't yet have a way of getting probability mass, so I'll leave this for later.
*/
let getLogarithmInputError = (t1: t, t2: t, ~toPointSetFn: toPointSetFn): option<error> => {
let firstOperandIsGreaterThanZero =
toFloatOperation(t1, ~toPointSetFn, ~distToFloatOperation=#Cdf(1e-10)) |> E.R.fmap(r =>
r > 0.
)
let secondOperandIsGreaterThanZero =
toFloatOperation(t2, ~toPointSetFn, ~distToFloatOperation=#Cdf(1e-10)) |> E.R.fmap(r =>
r > 0.
)
let items = E.A.R.firstErrorOrOpen([
firstOperandIsGreaterThanZero,
secondOperandIsGreaterThanZero,
])
switch items {
| Error(r) => Some(r)
2022-04-23 22:07:26 +00:00
| Ok([true, _]) =>
Some(LogarithmOfDistributionError("First input must completely greater than 0"))
2022-04-23 21:51:41 +00:00
| Ok([false, true]) =>
2022-04-23 22:07:26 +00:00
Some(LogarithmOfDistributionError("Second input must completely greater than 0"))
2022-04-23 21:51:41 +00:00
| Ok([false, false]) => None
| Ok(_) => Some(Unreachable)
}
}
let getInvalidOperationError = (
t1: t,
t2: t,
~toPointSetFn: toPointSetFn,
~arithmeticOperation,
): option<error> => {
if arithmeticOperation == #Logarithm {
getLogarithmInputError(t1, t2, ~toPointSetFn)
} else {
None
}
}
2022-03-27 18:22:26 +00:00
//I'm (Ozzie) really just guessing here, very little idea what's best
let expectedConvolutionCost: t => int = x =>
switch x {
2022-03-31 18:15:21 +00:00
| Symbolic(#Float(_)) => 1
| Symbolic(_) => 1000
| PointSet(Discrete(m)) => m.xyShape->XYShape.T.length
| PointSet(Mixed(_)) => 1000
| PointSet(Continuous(_)) => 1000
2022-03-27 18:22:26 +00:00
| _ => 1000
}
2022-04-22 16:43:18 +00:00
type calculationMethod = MonteCarlo | Convolution(Operation.convolutionOperation)
let chooseConvolutionOrMonteCarlo = (
op: Operation.algebraicOperation,
t2: t,
t1: t,
): calculationMethod =>
switch op {
| #Divide
| #Power
| #Logarithm =>
MonteCarlo
| (#Add | #Subtract | #Multiply) as convOp =>
expectedConvolutionCost(t1) * expectedConvolutionCost(t2) > 10000
? MonteCarlo
: Convolution(convOp)
}
2022-03-27 18:22:26 +00:00
2022-04-26 00:55:16 +00:00
let run' = (
2022-03-29 19:47:32 +00:00
t1: t,
2022-03-31 13:19:27 +00:00
~toPointSetFn: toPointSetFn,
~toSampleSetFn: toSampleSetFn,
2022-03-31 18:07:39 +00:00
~arithmeticOperation,
2022-03-31 13:19:27 +00:00
~t2: t,
2022-03-27 18:22:26 +00:00
): result<t, error> => {
2022-03-31 18:07:39 +00:00
switch tryAnalyticalSimplification(arithmeticOperation, t1, t2) {
2022-03-31 18:15:21 +00:00
| Some(Ok(symbolicDist)) => Ok(Symbolic(symbolicDist))
2022-04-22 20:27:17 +00:00
| Some(Error(e)) => Error(OperationError(e))
2022-03-27 18:22:26 +00:00
| None =>
2022-04-23 21:51:41 +00:00
switch getInvalidOperationError(t1, t2, ~toPointSetFn, ~arithmeticOperation) {
| Some(e) => Error(e)
| None =>
switch chooseConvolutionOrMonteCarlo(arithmeticOperation, t1, t2) {
| MonteCarlo => runMonteCarlo(toSampleSetFn, arithmeticOperation, t1, t2)
| Convolution(convOp) =>
runConvolution(toPointSetFn, convOp, t1, t2)->E.R2.fmap(r => DistributionTypes.PointSet(
r,
))
}
2022-03-27 18:22:26 +00:00
}
}
}
2022-04-26 00:55:16 +00:00
let run = (
~mode: option<asMode>=?,
t1: t,
~toPointSetFn: toPointSetFn,
~toSampleSetFn: toSampleSetFn,
~arithmeticOperation,
~t2: t,
): result<t, error> => {
let algebraicResult = run'(t1, ~toPointSetFn, ~toSampleSetFn, ~arithmeticOperation, ~t2)
2022-04-26 01:04:11 +00:00
switch (mode, algebraicResult) {
| (None, _)
| (Some(AsSymbolic), Ok(Symbolic(_)))
| (Some(AsMontecarlo), Ok(DistributionTypes.SampleSet(_)))
| (Some(AsConvolution), Ok(DistributionTypes.PointSet(_)))
| (Some(_), Error(_)) => algebraicResult
| (Some(_), Ok(_)) => Error(RequestedModeInvalidError)
2022-04-26 00:55:16 +00:00
}
}
2022-03-27 18:22:26 +00:00
}
2022-03-28 11:56:20 +00:00
let algebraicCombination = AlgebraicCombination.run
2022-03-27 18:22:26 +00:00
//TODO: Add faster pointwiseCombine fn
2022-03-31 18:07:39 +00:00
let pointwiseCombination = (
t1: t,
~toPointSetFn: toPointSetFn,
2022-04-23 18:09:06 +00:00
~algebraicCombination: Operation.algebraicOperation,
2022-03-31 18:07:39 +00:00
~t2: t,
): result<t, error> => {
2022-04-23 18:09:06 +00:00
E.R.merge(toPointSetFn(t1), toPointSetFn(t2))->E.R.bind(((t1, t2)) =>
PointSetDist.combinePointwise(Operation.Algebraic.toFn(algebraicCombination), t1, t2)
->E.R2.fmap(r => DistributionTypes.PointSet(r))
->E.R2.errMap(err => DistributionTypes.OperationError(err))
2022-03-27 18:22:26 +00:00
)
}
let pointwiseCombinationFloat = (
2022-03-29 19:47:32 +00:00
t: t,
2022-03-31 13:19:27 +00:00
~toPointSetFn: toPointSetFn,
2022-04-23 18:09:06 +00:00
~algebraicCombination: Operation.algebraicOperation,
2022-04-22 20:27:17 +00:00
~f: float,
2022-03-27 18:22:26 +00:00
): result<t, error> => {
2022-04-23 18:09:06 +00:00
let m = switch algebraicCombination {
2022-04-11 18:00:56 +00:00
| #Add | #Subtract => Error(DistributionTypes.DistributionVerticalShiftIsInvalid)
2022-04-09 16:37:26 +00:00
| (#Multiply | #Divide | #Power | #Logarithm) as arithmeticOperation =>
2022-04-22 20:27:17 +00:00
toPointSetFn(t)->E.R.bind(t => {
2022-03-27 18:22:26 +00:00
//TODO: Move to PointSet codebase
2022-03-31 18:07:39 +00:00
let fn = (secondary, main) => Operation.Scale.toFn(arithmeticOperation, main, secondary)
let integralSumCacheFn = Operation.Scale.toIntegralSumCacheFn(arithmeticOperation)
let integralCacheFn = Operation.Scale.toIntegralCacheFn(arithmeticOperation)
2022-04-22 20:27:17 +00:00
PointSetDist.T.mapYResult(
~integralSumCacheFn=integralSumCacheFn(f),
~integralCacheFn=integralCacheFn(f),
~fn=fn(f),
2022-03-27 18:22:26 +00:00
t,
2022-04-22 20:27:17 +00:00
)->E.R2.errMap(x => DistributionTypes.OperationError(x))
2022-03-27 18:22:26 +00:00
})
2022-03-30 01:28:14 +00:00
}
2022-04-11 18:00:56 +00:00
m->E.R2.fmap(r => DistributionTypes.PointSet(r))
2022-03-27 18:22:26 +00:00
}
2022-03-27 21:37:27 +00:00
2022-03-29 21:35:33 +00:00
//Note: The result should always cumulatively sum to 1. This would be good to test.
2022-03-30 01:28:14 +00:00
//Note: If the inputs are not normalized, this will return poor results. The weights probably refer to the post-normalized forms. It would be good to apply a catch to this.
2022-03-27 21:37:27 +00:00
let mixture = (
2022-03-29 19:47:32 +00:00
values: array<(t, float)>,
2022-03-31 13:19:27 +00:00
~scaleMultiplyFn: scaleMultiplyFn,
~pointwiseAddFn: pointwiseAddFn,
2022-03-27 21:37:27 +00:00
) => {
2022-03-28 11:56:20 +00:00
if E.A.length(values) == 0 {
2022-04-23 18:13:38 +00:00
Error(DistributionTypes.OtherError("Mixture error: mixture must have at least 1 element"))
2022-03-28 11:56:20 +00:00
} else {
2022-03-29 21:35:33 +00:00
let totalWeight = values->E.A2.fmap(E.Tuple2.second)->E.A.Floats.sum
2022-03-28 11:56:20 +00:00
let properlyWeightedValues =
2022-03-29 19:21:38 +00:00
values
2022-03-31 13:19:27 +00:00
->E.A2.fmap(((dist, weight)) => scaleMultiplyFn(dist, weight /. totalWeight))
2022-03-29 19:21:38 +00:00
->E.A.R.firstErrorOrOpen
properlyWeightedValues->E.R.bind(values => {
2022-03-28 11:56:20 +00:00
values
|> Js.Array.sliceFrom(1)
|> E.A.fold_left(
2022-03-31 13:19:27 +00:00
(acc, x) => E.R.bind(acc, acc => pointwiseAddFn(acc, x)),
2022-03-28 11:56:20 +00:00
Ok(E.A.unsafe_get(values, 0)),
)
})
}
2022-03-27 21:37:27 +00:00
}