//TODO: multimodal, add interface, test somehow, track performance, refactor sampleSet, refactor ASTEvaluator.res. type t = DistributionTypes.genericDist type error = DistributionTypes.error type toPointSetFn = t => result type toSampleSetFn = t => result type scaleMultiplyFn = (t, float) => result type pointwiseAddFn = (t, t) => result let isPointSet = (t: t) => switch t { | PointSet(_) => true | _ => false } let isSampleSetSet = (t: t) => switch t { | SampleSet(_) => true | _ => false } let isSymbolic = (t: t) => switch t { | Symbolic(_) => true | _ => false } let sampleN = (t: t, n) => switch t { | PointSet(r) => PointSetDist.sampleNRendered(n, r) | Symbolic(r) => SymbolicDist.T.sampleN(n, r) | SampleSet(r) => SampleSetDist.sampleN(r, n) } let toSampleSetDist = (t: t, n) => SampleSetDist.make(sampleN(t, n))->E.R2.errMap(DistributionTypes.Error.sampleErrorToDistErr) let fromFloat = (f: float): t => Symbolic(SymbolicDist.Float.make(f)) let toString = (t: t) => switch t { | PointSet(_) => "Point Set Distribution" | Symbolic(r) => SymbolicDist.T.toString(r) | SampleSet(_) => "Sample Set Distribution" } let normalize = (t: t): t => switch t { | PointSet(r) => PointSet(PointSetDist.T.normalize(r)) | Symbolic(_) => t | SampleSet(_) => t } let integralEndY = (t: t): float => switch t { | PointSet(r) => PointSetDist.T.integralEndY(r) | Symbolic(_) => 1.0 | SampleSet(_) => 1.0 } let isNormalized = (t: t): bool => Js.Math.abs_float(integralEndY(t) -. 1.0) < 1e-7 module Score = { let klDivergence = (prediction, answer, ~toPointSetFn: toPointSetFn): result => { let pointSets = E.R.merge(toPointSetFn(prediction), toPointSetFn(answer)) pointSets |> E.R2.bind(((predi, ans)) => PointSetDist.T.klDivergence(predi, ans)->E.R2.errMap(x => DistributionTypes.OperationError(x)) ) } let logScoreWithPointResolution = ( prior, prediction, answer, ~toPointSetFn: toPointSetFn, ): result => { switch prior { | Some(prior') => E.R.merge(toPointSetFn(prior'), toPointSetFn(prediction))->E.R.bind(((a, b)) => PointSetDist.T.logScoreWithPointResolution( a->Some, b, answer, )->E.R2.errMap(x => DistributionTypes.OperationError(x)) ) | None => prediction ->toPointSetFn ->E.R.bind(x => PointSetDist.T.logScoreWithPointResolution( None, x, answer, )->E.R2.errMap(x => DistributionTypes.OperationError(x)) ) } } } let toFloatOperation = ( t, ~toPointSetFn: toPointSetFn, ~distToFloatOperation: DistributionTypes.DistributionOperation.toFloat, ) => { switch distToFloatOperation { | #IntegralSum => Ok(integralEndY(t)) | (#Pdf(_) | #Cdf(_) | #Inv(_) | #Mean | #Sample) as op => { let trySymbolicSolution = switch (t: t) { | Symbolic(r) => SymbolicDist.T.operate(op, r)->E.R.toOption | _ => None } let trySampleSetSolution = switch ((t: t), distToFloatOperation) { | (SampleSet(sampleSet), #Mean) => SampleSetDist.mean(sampleSet)->Some | (SampleSet(sampleSet), #Sample) => SampleSetDist.sample(sampleSet)->Some | (SampleSet(sampleSet), #Inv(r)) => SampleSetDist.percentile(sampleSet, r)->Some | _ => None } switch trySymbolicSolution { | Some(r) => Ok(r) | None => switch trySampleSetSolution { | Some(r) => Ok(r) | None => toPointSetFn(t)->E.R2.fmap(PointSetDist.operate(op)) } } } } } //Todo: If it's a pointSet, but the xyPointLength is different from what it has, it should change. // This is tricky because the case of discrete distributions. // Also, change the outputXYPoints/pointSetDistLength details let toPointSet = ( t, ~xyPointLength, ~sampleCount, ~xSelection: DistributionTypes.DistributionOperation.pointsetXSelection=#ByWeight, (), ): result => { switch (t: t) { | PointSet(pointSet) => Ok(pointSet) | Symbolic(r) => Ok(SymbolicDist.T.toPointSetDist(~xSelection, xyPointLength, r)) | SampleSet(r) => SampleSetDist.toPointSetDist( ~samples=r, ~samplingInputs={ sampleCount: sampleCount, outputXYPoints: xyPointLength, pointSetDistLength: xyPointLength, kernelWidth: None, }, )->E.R2.errMap(x => DistributionTypes.PointSetConversionError(x)) } } /* PointSetDist.toSparkline calls "downsampleEquallyOverX", which downsamples it to n=bucketCount. It first needs a pointSetDist, so we convert to a pointSetDist. In this process we want the xyPointLength to be a bit longer than the eventual toSparkline downsampling. I chose 3 fairly arbitrarily. */ let toSparkline = (t: t, ~sampleCount: int, ~bucketCount: int=20, ()): result => t ->toPointSet(~xSelection=#Linear, ~xyPointLength=bucketCount * 3, ~sampleCount, ()) ->E.R.bind(r => r->PointSetDist.toSparkline(bucketCount)->E.R2.errMap(x => DistributionTypes.SparklineError(x)) ) module Truncate = { let trySymbolicSimplification = ( leftCutoff: option, rightCutoff: option, t: t, ): option => switch (leftCutoff, rightCutoff, t) { | (None, None, _) => None | (Some(lc), Some(rc), Symbolic(#Uniform(u))) if lc < rc => Some(Symbolic(#Uniform(SymbolicDist.Uniform.truncate(Some(lc), Some(rc), u)))) | (lc, rc, Symbolic(#Uniform(u))) => Some(Symbolic(#Uniform(SymbolicDist.Uniform.truncate(lc, rc, u)))) | _ => None } let run = ( t: t, ~toPointSetFn: toPointSetFn, ~leftCutoff=None: option, ~rightCutoff=None: option, (), ): result => { let doesNotNeedCutoff = E.O.isNone(leftCutoff) && E.O.isNone(rightCutoff) if doesNotNeedCutoff { Ok(t) } else { switch trySymbolicSimplification(leftCutoff, rightCutoff, t) { | Some(r) => Ok(r) | None => toPointSetFn(t)->E.R2.fmap(t => { DistributionTypes.PointSet(PointSetDist.T.truncate(leftCutoff, rightCutoff, t)) }) } } } } let truncate = Truncate.run /* Given two random variables A and B, this returns the distribution of a new variable that is the result of the operation on A and B. For instance, normal(0, 1) + normal(1, 1) -> normal(1, 2). In general, this is implemented via convolution. */ module AlgebraicCombination = { module InputValidator = { /* It would be good to also do a check to make sure that probability mass for the second operand, at value 1.0, is 0 (or approximately 0). However, we'd ideally want to check that both the probability mass and the probability density are greater than zero. Right now we don't yet have a way of getting probability mass, so I'll leave this for later. */ let getLogarithmInputError = (t1: t, t2: t, ~toPointSetFn: toPointSetFn): option => { let firstOperandIsGreaterThanZero = toFloatOperation( t1, ~toPointSetFn, ~distToFloatOperation=#Cdf(MagicNumbers.Epsilon.ten), ) |> E.R.fmap(r => r > 0.) let secondOperandIsGreaterThanZero = toFloatOperation( t2, ~toPointSetFn, ~distToFloatOperation=#Cdf(MagicNumbers.Epsilon.ten), ) |> E.R.fmap(r => r > 0.) let items = E.A.R.firstErrorOrOpen([ firstOperandIsGreaterThanZero, secondOperandIsGreaterThanZero, ]) switch items { | Error(r) => Some(r) | Ok([true, _]) => Some(LogarithmOfDistributionError("First input must be completely greater than 0")) | Ok([false, true]) => Some(LogarithmOfDistributionError("Second input must be completely greater than 0")) | Ok([false, false]) => None | Ok(_) => Some(Unreachable) } } let run = (t1: t, t2: t, ~toPointSetFn: toPointSetFn, ~arithmeticOperation): option => { if arithmeticOperation == #Logarithm { getLogarithmInputError(t1, t2, ~toPointSetFn) } else { None } } } module StrategyCallOnValidatedInputs = { let convolution = ( toPointSet: toPointSetFn, arithmeticOperation: Operation.convolutionOperation, t1: t, t2: t, ): result => E.R.merge(toPointSet(t1), toPointSet(t2)) ->E.R2.fmap(((a, b)) => PointSetDist.combineAlgebraically(arithmeticOperation, a, b)) ->E.R2.fmap(r => DistributionTypes.PointSet(r)) let monteCarlo = ( toSampleSet: toSampleSetFn, arithmeticOperation: Operation.algebraicOperation, t1: t, t2: t, ): result => { let fn = Operation.Algebraic.toFn(arithmeticOperation) E.R.merge(toSampleSet(t1), toSampleSet(t2)) ->E.R.bind(((t1, t2)) => { SampleSetDist.map2(~fn, ~t1, ~t2)->E.R2.errMap(x => DistributionTypes.OperationError(x)) }) ->E.R2.fmap(r => DistributionTypes.SampleSet(r)) } let symbolic = ( arithmeticOperation: Operation.algebraicOperation, t1: t, t2: t, ): SymbolicDistTypes.analyticalSimplificationResult => { switch (t1, t2) { | (DistributionTypes.Symbolic(d1), DistributionTypes.Symbolic(d2)) => SymbolicDist.T.tryAnalyticalSimplification(d1, d2, arithmeticOperation) | _ => #NoSolution } } } module StrategyChooser = { type specificStrategy = [#AsSymbolic | #AsMonteCarlo | #AsConvolution] //I'm (Ozzie) really just guessing here, very little idea what's best let expectedConvolutionCost: t => int = x => switch x { | Symbolic(#Float(_)) => MagicNumbers.OpCost.floatCost | Symbolic(_) => MagicNumbers.OpCost.symbolicCost | PointSet(Discrete(m)) => m.xyShape->XYShape.T.length | PointSet(Mixed(_)) => MagicNumbers.OpCost.mixedCost | PointSet(Continuous(_)) => MagicNumbers.OpCost.continuousCost | _ => MagicNumbers.OpCost.wildcardCost } let hasSampleSetDist = (t1: t, t2: t): bool => isSampleSetSet(t1) || isSampleSetSet(t2) let convolutionIsFasterThanMonteCarlo = (t1: t, t2: t): bool => expectedConvolutionCost(t1) * expectedConvolutionCost(t2) < MagicNumbers.OpCost.monteCarloCost let preferConvolutionToMonteCarlo = (t1, t2, arithmeticOperation) => { !hasSampleSetDist(t1, t2) && Operation.Convolution.canDoAlgebraicOperation(arithmeticOperation) && convolutionIsFasterThanMonteCarlo(t1, t2) } let run = (~t1: t, ~t2: t, ~arithmeticOperation): specificStrategy => { switch StrategyCallOnValidatedInputs.symbolic(arithmeticOperation, t1, t2) { | #AnalyticalSolution(_) | #Error(_) => #AsSymbolic | #NoSolution => preferConvolutionToMonteCarlo(t1, t2, arithmeticOperation) ? #AsConvolution : #AsMonteCarlo } } } let runStrategyOnValidatedInputs = ( ~t1: t, ~t2: t, ~arithmeticOperation, ~strategy: StrategyChooser.specificStrategy, ~toPointSetFn: toPointSetFn, ~toSampleSetFn: toSampleSetFn, ): result => { switch strategy { | #AsMonteCarlo => StrategyCallOnValidatedInputs.monteCarlo(toSampleSetFn, arithmeticOperation, t1, t2) | #AsSymbolic => switch StrategyCallOnValidatedInputs.symbolic(arithmeticOperation, t1, t2) { | #AnalyticalSolution(symbolicDist) => Ok(Symbolic(symbolicDist)) | #Error(e) => Error(OperationError(e)) | #NoSolution => Error(Unreachable) } | #AsConvolution => switch Operation.Convolution.fromAlgebraicOperation(arithmeticOperation) { | Some(convOp) => StrategyCallOnValidatedInputs.convolution(toPointSetFn, convOp, t1, t2) | None => Error(Unreachable) } } } let run = ( ~strategy: DistributionTypes.asAlgebraicCombinationStrategy, t1: t, ~toPointSetFn: toPointSetFn, ~toSampleSetFn: toSampleSetFn, ~arithmeticOperation: Operation.algebraicOperation, ~t2: t, ): result => { let invalidOperationError = InputValidator.run(t1, t2, ~arithmeticOperation, ~toPointSetFn) switch (invalidOperationError, strategy) { | (Some(e), _) => Error(e) | (None, AsDefault) => { let chooseStrategy = StrategyChooser.run(~arithmeticOperation, ~t1, ~t2) runStrategyOnValidatedInputs( ~t1, ~t2, ~strategy=chooseStrategy, ~arithmeticOperation, ~toPointSetFn, ~toSampleSetFn, ) } | (None, AsMonteCarlo) => StrategyCallOnValidatedInputs.monteCarlo(toSampleSetFn, arithmeticOperation, t1, t2) | (None, AsSymbolic) => switch StrategyCallOnValidatedInputs.symbolic(arithmeticOperation, t1, t2) { | #AnalyticalSolution(symbolicDist) => Ok(Symbolic(symbolicDist)) | #NoSolution => Error(RequestedStrategyInvalidError(`No analytic solution for inputs`)) | #Error(err) => Error(OperationError(err)) } | (None, AsConvolution) => switch Operation.Convolution.fromAlgebraicOperation(arithmeticOperation) { | None => { let errString = `Convolution not supported for ${Operation.Algebraic.toString( arithmeticOperation, )}` Error(RequestedStrategyInvalidError(errString)) } | Some(convOp) => StrategyCallOnValidatedInputs.convolution(toPointSetFn, convOp, t1, t2) } } } } let algebraicCombination = AlgebraicCombination.run //TODO: Add faster pointwiseCombine fn let pointwiseCombination = ( t1: t, ~toPointSetFn: toPointSetFn, ~algebraicCombination: Operation.algebraicOperation, ~t2: t, ): result => { E.R.merge(toPointSetFn(t1), toPointSetFn(t2))->E.R.bind(((t1, t2)) => PointSetDist.combinePointwise(Operation.Algebraic.toFn(algebraicCombination), t1, t2) ->E.R2.fmap(r => DistributionTypes.PointSet(r)) ->E.R2.errMap(err => DistributionTypes.OperationError(err)) ) } let pointwiseCombinationFloat = ( t: t, ~toPointSetFn: toPointSetFn, ~algebraicCombination: Operation.algebraicOperation, ~f: float, ): result => { let executeCombination = arithOp => toPointSetFn(t)->E.R.bind(t => { //TODO: Move to PointSet codebase let fn = (secondary, main) => Operation.Scale.toFn(arithOp, main, secondary) let integralSumCacheFn = Operation.Scale.toIntegralSumCacheFn(arithOp) let integralCacheFn = Operation.Scale.toIntegralCacheFn(arithOp) PointSetDist.T.mapYResult( ~integralSumCacheFn=integralSumCacheFn(f), ~integralCacheFn=integralCacheFn(f), ~fn=fn(f), t, )->E.R2.errMap(x => DistributionTypes.OperationError(x)) }) let m = switch algebraicCombination { | #Add | #Subtract => Error(DistributionTypes.DistributionVerticalShiftIsInvalid) | (#Multiply | #Divide | #Power | #Logarithm) as arithmeticOperation => executeCombination(arithmeticOperation) | #LogarithmWithThreshold(eps) => executeCombination(#LogarithmWithThreshold(eps)) } m->E.R2.fmap(r => DistributionTypes.PointSet(r)) } //Note: The result should always cumulatively sum to 1. This would be good to test. //Note: If the inputs are not normalized, this will return poor results. The weights probably refer to the post-normalized forms. It would be good to apply a catch to this. let mixture = ( values: array<(t, float)>, ~scaleMultiplyFn: scaleMultiplyFn, ~pointwiseAddFn: pointwiseAddFn, ) => { if E.A.length(values) == 0 { Error(DistributionTypes.OtherError("Mixture error: mixture must have at least 1 element")) } else { let totalWeight = values->E.A2.fmap(E.Tuple2.second)->E.A.Floats.sum let properlyWeightedValues = values ->E.A2.fmap(((dist, weight)) => scaleMultiplyFn(dist, weight /. totalWeight)) ->E.A.R.firstErrorOrOpen properlyWeightedValues->E.R.bind(values => { values |> Js.Array.sliceFrom(1) |> E.A.fold_left( (acc, x) => E.R.bind(acc, acc => pointwiseAddFn(acc, x)), Ok(E.A.unsafe_get(values, 0)), ) }) } }