open Distributions type t = PointSetTypes.continuousShape module Analysis = { let integrate = ( ~indefiniteIntegralStepwise=(p, h1) => h1 *. p, ~indefiniteIntegralLinear=(p, a, b) => a *. p +. b *. p ** 2.0 /. 2.0, t: t, ): float => { let xs = t.xyShape.xs let ys = t.xyShape.ys E.A.reducei(xs, 0.0, (acc, _x, i) => { let areaUnderIntegral = // TODO Take this switch statement out of the loop body switch (t.interpolation, i) { | (_, 0) => 0.0 | (#Stepwise, _) => indefiniteIntegralStepwise(xs[i], ys[i - 1]) -. indefiniteIntegralStepwise(xs[i - 1], ys[i - 1]) | (#Linear, _) => let x1 = xs[i - 1] let x2 = xs[i] if x1 == x2 { 0.0 } else { let h1 = ys[i - 1] let h2 = ys[i] let b = (h1 -. h2) /. (x1 -. x2) let a = h1 -. b *. x1 indefiniteIntegralLinear(x2, a, b) -. indefiniteIntegralLinear(x1, a, b) } } acc +. areaUnderIntegral }) } let getMeanOfSquares = (t: t) => { let indefiniteIntegralLinear = (p, a, b) => a *. p ** 3.0 /. 3.0 +. b *. p ** 4.0 /. 4.0 let indefiniteIntegralStepwise = (p, h1) => h1 *. p ** 3.0 /. 3.0 integrate(~indefiniteIntegralStepwise, ~indefiniteIntegralLinear, t) } } let getShape = (t: t) => t.xyShape let interpolation = (t: t) => t.interpolation let make = (~interpolation=#Linear, ~integralSumCache=None, ~integralCache=None, xyShape): t => { xyShape: xyShape, interpolation: interpolation, integralSumCache: integralSumCache, integralCache: integralCache, } let shapeMap = (fn, {xyShape, interpolation, integralSumCache, integralCache}: t): t => { xyShape: fn(xyShape), interpolation: interpolation, integralSumCache: integralSumCache, integralCache: integralCache, } let lastY = (t: t) => t |> getShape |> XYShape.T.lastY let oShapeMap = (fn, {xyShape, interpolation, integralSumCache, integralCache}: t): option< PointSetTypes.continuousShape, > => fn(xyShape) |> E.O.fmap(make(~interpolation, ~integralSumCache, ~integralCache)) let emptyIntegral: PointSetTypes.continuousShape = { xyShape: { xs: [neg_infinity], ys: [0.0], }, interpolation: #Linear, integralSumCache: Some(0.0), integralCache: None, } let empty: PointSetTypes.continuousShape = { xyShape: XYShape.T.empty, interpolation: #Linear, integralSumCache: Some(0.0), integralCache: Some(emptyIntegral), } let stepwiseToLinear = (t: t): t => make( ~integralSumCache=t.integralSumCache, ~integralCache=t.integralCache, XYShape.Range.stepwiseToLinear(t.xyShape), ) // Note: This results in a distribution with as many points as the sum of those in t1 and t2. let combinePointwise = ( ~combiner=XYShape.PointwiseCombination.combine, ~integralSumCachesFn=(_, _) => None, ~distributionType: PointSetTypes.distributionType=#PDF, fn: (float, float) => result, t1: PointSetTypes.continuousShape, t2: PointSetTypes.continuousShape, ): result => { // If we're adding the distributions, and we know the total of each, then we // can just sum them up. Otherwise, all bets are off. let combinedIntegralSum = Common.combineIntegralSums( integralSumCachesFn, t1.integralSumCache, t2.integralSumCache, ) // TODO: does it ever make sense to pointwise combine the integrals here? // It could be done for pointwise additions, but is that ever needed? // If combining stepwise and linear, we must convert the stepwise to linear first, // i.e. add a point at the bottom of each step let (t1, t2) = switch (t1.interpolation, t2.interpolation) { | (#Linear, #Linear) => (t1, t2) | (#Stepwise, #Stepwise) => (t1, t2) | (#Linear, #Stepwise) => (t1, stepwiseToLinear(t2)) | (#Stepwise, #Linear) => (stepwiseToLinear(t1), t2) } let extrapolation = switch distributionType { | #PDF => #UseZero | #CDF => #UseOutermostPoints } let interpolator = XYShape.XtoY.continuousInterpolator(t1.interpolation, extrapolation) combiner(fn, interpolator, t1.xyShape, t2.xyShape)->E.R2.fmap(x => make(~integralSumCache=combinedIntegralSum, x) ) } let toLinear = (t: t): option => switch t { | {interpolation: #Stepwise, xyShape, integralSumCache, integralCache} => xyShape |> XYShape.Range.stepsToContinuous |> E.O.fmap(make(~integralSumCache, ~integralCache)) | {interpolation: #Linear} => Some(t) } let shapeFn = (fn, t: t) => t |> getShape |> fn let updateIntegralSumCache = (integralSumCache, t: t): t => { ...t, integralSumCache: integralSumCache, } let updateIntegralCache = (integralCache, t: t): t => {...t, integralCache: integralCache} let sum = ( ~integralSumCachesFn: (float, float) => option=(_, _) => None, continuousShapes, ): t => continuousShapes |> E.A.fold_left( (x, y) => combinePointwise(~integralSumCachesFn, (a, b) => Ok(a +. b), x, y)->E.R.toExn( "Addition should never fail", _, ), empty, ) let reduce = ( ~integralSumCachesFn: (float, float) => option=(_, _) => None, fn: (float, float) => result, continuousShapes, ): result => { let merge = combinePointwise(~integralSumCachesFn, fn) continuousShapes |> E.A.R.foldM(merge, empty) } let mapYResult = ( ~integralSumCacheFn=_ => None, ~integralCacheFn=_ => None, ~fn: float => result, t: t, ): result => XYShape.T.mapYResult(fn, getShape(t))->E.R2.fmap(x => make( ~interpolation=t.interpolation, ~integralSumCache=t.integralSumCache |> E.O.bind(_, integralSumCacheFn), ~integralCache=t.integralCache |> E.O.bind(_, integralCacheFn), x, ) ) let mapY = ( ~integralSumCacheFn=_ => None, ~integralCacheFn=_ => None, ~fn: float => float, t: t, ): t => make( ~interpolation=t.interpolation, ~integralSumCache=t.integralSumCache |> E.O.bind(_, integralSumCacheFn), ~integralCache=t.integralCache |> E.O.bind(_, integralCacheFn), t |> getShape |> XYShape.T.mapY(fn), ) let rec scaleBy = (~scale=1.0, t: t): t => { let scaledIntegralSumCache = E.O.bind(t.integralSumCache, v => Some(scale *. v)) let scaledIntegralCache = E.O.bind(t.integralCache, v => Some(scaleBy(~scale, v))) t |> mapY(~fn=(r: float) => r *. scale) |> updateIntegralSumCache(scaledIntegralSumCache) |> updateIntegralCache(scaledIntegralCache) } module T = Dist({ type t = PointSetTypes.continuousShape type integral = PointSetTypes.continuousShape let minX = shapeFn(XYShape.T.minX) let maxX = shapeFn(XYShape.T.maxX) let mapY = mapY let mapYResult = mapYResult let updateIntegralCache = updateIntegralCache let toDiscreteProbabilityMassFraction = _ => 0.0 let toPointSetDist = (t: t): PointSetTypes.pointSetDist => Continuous(t) let xToY = (f, {interpolation, xyShape}: t) => switch interpolation { | #Stepwise => xyShape |> XYShape.XtoY.stepwiseIncremental(f) |> E.O.default(0.0) | #Linear => xyShape |> XYShape.XtoY.linear(f) } |> PointSetTypes.MixedPoint.makeContinuous let truncate = (leftCutoff: option, rightCutoff: option, t: t) => { let lc = E.O.default(neg_infinity, leftCutoff) let rc = E.O.default(infinity, rightCutoff) let truncatedZippedPairs = t |> getShape |> XYShape.T.zip |> XYShape.Zipped.filterByX(x => x >= lc && x <= rc) let leftNewPoint = leftCutoff |> E.O.dimap(lc => [(lc -. epsilon_float, 0.)], _ => []) let rightNewPoint = rightCutoff |> E.O.dimap(rc => [(rc +. epsilon_float, 0.)], _ => []) let truncatedZippedPairsWithNewPoints = E.A.concatMany([ leftNewPoint, truncatedZippedPairs, rightNewPoint, ]) let truncatedShape = XYShape.T.fromZippedArray(truncatedZippedPairsWithNewPoints) make(truncatedShape) } // TODO: This should work with stepwise plots. let integral = t => switch (getShape(t) |> XYShape.T.isEmpty, t.integralCache) { | (true, _) => emptyIntegral | (false, Some(cache)) => cache | (false, None) => t |> getShape |> XYShape.Range.integrateWithTriangles |> E.O.toExt("This should not have happened") |> make } let downsample = (length, t): t => t |> shapeMap(XYShape.XsConversion.proportionByProbabilityMass(length, integral(t).xyShape)) let integralEndY = (t: t) => t.integralSumCache |> E.O.default(t |> integral |> lastY) let integralXtoY = (f, t: t) => t |> integral |> shapeFn(XYShape.XtoY.linear(f)) let integralYtoX = (f, t: t) => t |> integral |> shapeFn(XYShape.YtoX.linear(f)) let toContinuous = t => Some(t) let toDiscrete = _ => None let normalize = (t: t): t => t |> updateIntegralCache(Some(integral(t))) |> scaleBy(~scale=1. /. integralEndY(t)) |> updateIntegralSumCache(Some(1.0)) let mean = (t: t) => { let indefiniteIntegralStepwise = (p, h1) => h1 *. p ** 2.0 /. 2.0 let indefiniteIntegralLinear = (p, a, b) => a *. p ** 2.0 /. 2.0 +. b *. p ** 3.0 /. 3.0 Analysis.integrate(~indefiniteIntegralStepwise, ~indefiniteIntegralLinear, t) } let variance = (t: t): float => XYShape.Analysis.getVarianceDangerously(t, mean, Analysis.getMeanOfSquares) let klDivergence = (prediction: t, answer: t) => { let newShape = XYShape.PointwiseCombination.combineAlongSupportOfSecondArgument( PointSetDist_Scoring.KLDivergence.integrand, prediction.xyShape, answer.xyShape, ) let xyShapeToContinuous: XYShape.xyShape => t = xyShape => { xyShape: xyShape, interpolation: #Linear, integralSumCache: None, integralCache: None, } newShape->E.R2.fmap(x => x->xyShapeToContinuous->integralEndY) } }) let isNormalized = (t: t): bool => { let areaUnderIntegral = t |> updateIntegralCache(Some(T.integral(t))) |> T.integralEndY areaUnderIntegral < 1. +. MagicNumbers.Epsilon.seven && areaUnderIntegral > 1. -. MagicNumbers.Epsilon.seven } let downsampleEquallyOverX = (length, t): t => t |> shapeMap(XYShape.XsConversion.proportionEquallyOverX(length)) /* This simply creates multiple copies of the continuous distribution, scaled and shifted according to each discrete data point, and then adds them all together. */ let combineAlgebraicallyWithDiscrete = ( op: Operation.convolutionOperation, t1: t, t2: PointSetTypes.discreteShape, ~discretePosition: AlgebraicShapeCombination.argumentPosition, ) => { let t1s = t1 |> getShape let t2s = t2.xyShape // TODO would like to use Discrete.getShape here, but current file structure doesn't allow for that if XYShape.T.isEmpty(t1s) || XYShape.T.isEmpty(t2s) { empty } else { let continuousAsLinear = switch t1.interpolation { | #Linear => t1 | #Stepwise => stepwiseToLinear(t1) } let combinedShape = AlgebraicShapeCombination.combineShapesContinuousDiscrete( op, continuousAsLinear |> getShape, t2s, ~discretePosition, ) let combinedIntegralSum = switch op { | #Multiply => Common.combineIntegralSums((a, b) => Some(a *. b), t1.integralSumCache, t2.integralSumCache) | _ => None } // TODO: It could make sense to automatically transform the integrals here (shift or scale) make(~interpolation=t1.interpolation, ~integralSumCache=combinedIntegralSum, combinedShape) } } let combineAlgebraically = (op: Operation.convolutionOperation, t1: t, t2: t) => { let s1 = t1 |> getShape let s2 = t2 |> getShape let t1n = s1 |> XYShape.T.length let t2n = s2 |> XYShape.T.length if t1n == 0 || t2n == 0 { empty } else { let combinedShape = AlgebraicShapeCombination.combineShapesContinuousContinuous(op, s1, s2) let combinedIntegralSum = Common.combineIntegralSums( (a, b) => Some(a *. b), t1.integralSumCache, t2.integralSumCache, ) // return a new Continuous distribution make(~integralSumCache=combinedIntegralSum, combinedShape) } }