From bd528571afbdaabd01328328acc50f7c016931ba Mon Sep 17 00:00:00 2001 From: Sebastian Kosch Date: Thu, 25 Jun 2020 23:38:14 -0700 Subject: [PATCH] Working on code reorganization, doesn't compile yet --- __tests__/Distributions__Test.re | 7 +- src/components/DistBuilder2.re | 6 +- src/components/DistBuilder3.re | 4 +- src/components/Drawer.re | 8 +- src/components/charts/DistPlusPlot.re | 28 +- src/distPlus/distribution/DistTypes.re | 8 +- src/distPlus/distribution/Distributions.re | 836 ++++++++++++------ .../distribution/MixedShapeBuilder.re | 23 +- src/distPlus/distribution/XYShape.re | 10 +- src/distPlus/renderers/RenderTypes.re | 2 +- src/distPlus/renderers/ShapeRenderer.re | 4 +- .../renderers/samplesRenderer/Guesstimator.re | 6 +- .../renderers/samplesRenderer/Samples.re | 11 +- src/distPlus/symbolic/MathJsParser.re | 78 +- src/distPlus/symbolic/SymbolicDist.re | 530 ++--------- src/distPlus/symbolic/TreeNode.re | 414 +++++++++ src/distPlus/utility/Jstat.re | 10 +- 17 files changed, 1142 insertions(+), 843 deletions(-) create mode 100644 src/distPlus/symbolic/TreeNode.re diff --git a/__tests__/Distributions__Test.re b/__tests__/Distributions__Test.re index d83c1ac2..20c7ce34 100644 --- a/__tests__/Distributions__Test.re +++ b/__tests__/Distributions__Test.re @@ -386,10 +386,9 @@ describe("Shape", () => { let numSamples = 10000; open Distributions.Shape; let normal: SymbolicDist.dist = `Normal({mean, stdev}); - let normalShape = SymbolicDist.GenericSimple.toShape(normal, numSamples); + let normalShape = TreeNode.toShape(numSamples, normal); let lognormal = SymbolicDist.Lognormal.fromMeanAndStdev(mean, stdev); - let lognormalShape = - SymbolicDist.GenericSimple.toShape(lognormal, numSamples); + let lognormalShape = TreeNode.toShape(numSamples, lognormal); makeTestCloseEquality( "Mean of a normal", @@ -416,4 +415,4 @@ describe("Shape", () => { ~digits=0, ); }); -}); \ No newline at end of file +}); diff --git a/src/components/DistBuilder2.re b/src/components/DistBuilder2.re index 9c7ad6bb..b912e223 100644 --- a/src/components/DistBuilder2.re +++ b/src/components/DistBuilder2.re @@ -44,14 +44,14 @@ module DemoDist = { Distributions.DistPlus.make( ~shape= Continuous( - Distributions.Continuous.make(`Linear, {xs, ys}), + Distributions.Continuous.make(`Linear, {xs, ys}, None), ), ~domain=Complete, ~unit=UnspecifiedDistribution, ~guesstimatorString=None, (), ) - |> Distributions.DistPlus.T.scaleToIntegralSum(~intendedSum=1.0); + |> Distributions.DistPlus.T.normalize; ; }; R.ste}> @@ -102,4 +102,4 @@ let make = () => {
; -}; \ No newline at end of file +}; diff --git a/src/components/DistBuilder3.re b/src/components/DistBuilder3.re index 662a3241..86bb1d2a 100644 --- a/src/components/DistBuilder3.re +++ b/src/components/DistBuilder3.re @@ -37,7 +37,7 @@ module DemoDist = { let parsed1 = MathJsParser.fromString(guesstimatorString); let shape = switch (parsed1) { - | Ok(r) => Some(SymbolicDist.toShape(10000, r)) + | Ok(r) => Some(TreeNode.toShape(10000, r)) | _ => None }; @@ -111,4 +111,4 @@ let make = () => {
; -}; \ No newline at end of file +}; diff --git a/src/components/Drawer.re b/src/components/Drawer.re index 090447b5..8dc0c7db 100644 --- a/src/components/Drawer.re +++ b/src/components/Drawer.re @@ -177,6 +177,7 @@ module Convert = { let continuousShape: Types.continuousShape = { xyShape, interpolation: `Linear, + knownIntegralSum: None, }; let integral = XYShape.Analysis.integrateContinuousShape(continuousShape); @@ -188,6 +189,7 @@ module Convert = { ys, }, interpolation: `Linear, + knownIntegralSum: Some(1.0), }; continuousShape; }; @@ -387,7 +389,7 @@ module Draw = { let numSamples = 3000; let normal: SymbolicDist.dist = `Normal({mean, stdev}); - let normalShape = SymbolicDist.GenericSimple.toShape(normal, numSamples); + let normalShape = TreeNode.toShape(numSamples, `DistData(`Symbolic(normal))); let xyShape: Types.xyShape = switch (normalShape) { | Mixed(_) => {xs: [||], ys: [||]} @@ -667,9 +669,7 @@ module State = { /* create a cdf from a pdf */ let _pdf = - Distributions.Continuous.T.scaleToIntegralSum( - ~cache=None, - ~intendedSum=1.0, + Distributions.Continuous.T.normalize( pdf, ); diff --git a/src/components/charts/DistPlusPlot.re b/src/components/charts/DistPlusPlot.re index 9eb484ef..93feb7d2 100644 --- a/src/components/charts/DistPlusPlot.re +++ b/src/components/charts/DistPlusPlot.re @@ -95,7 +95,7 @@ let table = (distPlus, x) => { {distPlus - |> Distributions.DistPlus.T.toScaledContinuous + |> Distributions.DistPlus.T.normalizedToContinuous |> E.O.fmap( Distributions.Continuous.T.Integral.sum(~cache=None), ) @@ -113,7 +113,7 @@ let table = (distPlus, x) => { {distPlus - |> Distributions.DistPlus.T.toScaledDiscrete + |> Distributions.DistPlus.T.normalizedToDiscrete |> E.O.fmap(Distributions.Discrete.T.Integral.sum(~cache=None)) |> E.O.fmap(E.Float.with2DigitsPrecision) |> E.O.default("") @@ -211,15 +211,13 @@ let percentiles = distPlus => { ; }; -let adjustBoth = discreteProbabilityMass => { - let yMaxDiscreteDomainFactor = discreteProbabilityMass; - let yMaxContinuousDomainFactor = 1.0 -. discreteProbabilityMass; - let yMax = - yMaxDiscreteDomainFactor > yMaxContinuousDomainFactor - ? yMaxDiscreteDomainFactor : yMaxContinuousDomainFactor; +let adjustBoth = discreteProbabilityMassFraction => { + let yMaxDiscreteDomainFactor = discreteProbabilityMassFraction; + let yMaxContinuousDomainFactor = 1.0 -. discreteProbabilityMassFraction; + let yMax = (yMaxDiscreteDomainFactor > 0.5 ? yMaxDiscreteDomainFactor : yMaxContinuousDomainFactor); ( - 1.0 /. (yMaxDiscreteDomainFactor /. yMax), - 1.0 /. (yMaxContinuousDomainFactor /. yMax), + yMax /. yMaxDiscreteDomainFactor, + yMax /. yMaxContinuousDomainFactor, ); }; @@ -227,10 +225,10 @@ module DistPlusChart = { [@react.component] let make = (~distPlus: DistTypes.distPlus, ~config: chartConfig, ~onHover) => { open Distributions.DistPlus; - let discrete = distPlus |> T.toScaledDiscrete; + let discrete = distPlus |> T.normalizedToDiscrete |> E.O.fmap(Distributions.Discrete.getShape); let continuous = distPlus - |> T.toScaledContinuous + |> T.normalizedToContinuous |> E.O.fmap(Distributions.Continuous.getShape); let range = T.xTotalRange(distPlus); @@ -254,10 +252,10 @@ module DistPlusChart = { }; let timeScale = distPlus.unit |> DistTypes.DistributionUnit.toJson; - let toDiscreteProbabilityMass = - distPlus |> Distributions.DistPlus.T.toDiscreteProbabilityMass; + let discreteProbabilityMassFraction = + distPlus |> Distributions.DistPlus.T.toDiscreteProbabilityMassFraction; let (yMaxDiscreteDomainFactor, yMaxContinuousDomainFactor) = - adjustBoth(toDiscreteProbabilityMass); + adjustBoth(discreteProbabilityMassFraction); float; let maxX: t => float; - let mapY: (float => float, t) => t; + let mapY: (~knownIntegralSumFn: float => option(float)=?, float => float, t) => t; let xToY: (float, t) => DistTypes.mixedPoint; let toShape: t => DistTypes.shape; let toContinuous: t => option(DistTypes.continuousShape); let toDiscrete: t => option(DistTypes.discreteShape); - let toScaledContinuous: t => option(DistTypes.continuousShape); - let toScaledDiscrete: t => option(DistTypes.discreteShape); - let toDiscreteProbabilityMass: t => float; - let truncate: (~cache: option(integral)=?, int, t) => t; + let normalize: t => t; + let normalizedToContinuous: t => option(DistTypes.continuousShape); + let normalizedToDiscrete: t => option(DistTypes.discreteShape); + let toDiscreteProbabilityMassFraction: t => float; + let downsample: (~cache: option(integral)=?, int, t) => t; let integral: (~cache: option(integral), t) => integral; let integralEndY: (~cache: option(integral), t) => float; @@ -31,19 +32,17 @@ module Dist = (T: dist) => { let xTotalRange = (t: t) => maxX(t) -. minX(t); let mapY = T.mapY; let xToY = T.xToY; - let truncate = T.truncate; + let downsample = T.downsample; let toShape = T.toShape; - let toDiscreteProbabilityMass = T.toDiscreteProbabilityMass; + let toDiscreteProbabilityMassFraction = T.toDiscreteProbabilityMassFraction; let toContinuous = T.toContinuous; let toDiscrete = T.toDiscrete; - let toScaledContinuous = T.toScaledContinuous; - let toScaledDiscrete = T.toScaledDiscrete; + let normalize = T.normalize; + let normalizedToContinuous = T.normalizedToContinuous; + let normalizedToDiscrete = T.normalizedToDiscrete; let mean = T.mean; let variance = T.variance; - // TODO: Move this to each class, have use integral to produce integral in DistPlus class. - let scaleBy = (~scale=1.0, t: t) => t |> mapY((r: float) => r *. scale); - module Integral = { type t = T.integral; let get = T.integral; @@ -51,52 +50,169 @@ module Dist = (T: dist) => { let yToX = T.integralYtoX; let sum = T.integralEndY; }; - - // This is suboptimal because it could get the cache but doesn't here. - let scaleToIntegralSum = - (~cache: option(integral)=None, ~intendedSum=1.0, t: t) => { - let scale = intendedSum /. Integral.sum(~cache, t); - scaleBy(~scale, t); - }; }; -module Continuous = { +module Continuous { type t = DistTypes.continuousShape; let getShape = (t: t) => t.xyShape; let interpolation = (t: t) => t.interpolation; - let make = (interpolation, xyShape): t => {xyShape, interpolation}; - let shapeMap = (fn, {xyShape, interpolation}: t): t => { + let make = (interpolation, xyShape, knownIntegralSum): t => { + xyShape, + interpolation, + knownIntegralSum, + }; + let shapeMap = (fn, {xyShape, interpolation, knownIntegralSum}: t): t => { xyShape: fn(xyShape), interpolation, + knownIntegralSum, }; let lastY = (t: t) => t |> getShape |> XYShape.T.lastY; let oShapeMap = - (fn, {xyShape, interpolation}: t): option(DistTypes.continuousShape) => - fn(xyShape) |> E.O.fmap(make(interpolation)); + (fn, {xyShape, interpolation, knownIntegralSum}: t) + : option(DistTypes.continuousShape) => + fn(xyShape) |> E.O.fmap(make(interpolation, _, knownIntegralSum)); - let empty: DistTypes.continuousShape = {xyShape: XYShape.T.empty, interpolation: `Linear}; + let empty: DistTypes.continuousShape = { + xyShape: XYShape.T.empty, + interpolation: `Linear, + knownIntegralSum: Some(0.0), + }; let combine = (fn, t1: DistTypes.continuousShape, t2: DistTypes.continuousShape) : DistTypes.continuousShape => { - make(`Linear, XYShape.Combine.combine( - ~xsSelection=ALL_XS, - ~xToYSelection=XYShape.XtoY.linear, - ~fn, - t1.xyShape, - t2.xyShape, - )); + + // If we're adding the distributions, and we know the total of each, then we + // can just sum them up. Otherwise, all bets are off. + let combinedIntegralSum = + switch (fn, t1.knownIntegralSum, t2.knownIntegralSum) { + | (_, None, _) + | (_, _, None) => None + | ((+.), Some(s1), Some(s2)) => Some(s1 +. s2) + }; + + make( + `Linear, + XYShape.Combine.combine( + ~xsSelection=ALL_XS, + ~xToYSelection=XYShape.XtoY.linear, + ~fn, + t1.xyShape, + t2.xyShape, + ), + combinedIntegralSum, + ); }; - let reduce = (fn, items) => - items |> E.A.fold_left(combine(fn), empty); + let reduce = (fn, items) => items |> E.A.fold_left(combine(fn), empty); let toLinear = (t: t): option(t) => { switch (t) { - | {interpolation: `Stepwise, xyShape} => - xyShape |> XYShape.Range.stepsToContinuous |> E.O.fmap(make(`Linear)) - | {interpolation: `Linear, _} => Some(t) + | {interpolation: `Stepwise, xyShape, knownIntegralSum} => + xyShape + |> XYShape.Range.stepsToContinuous + |> E.O.fmap(make(`Linear, _, knownIntegralSum)) + | {interpolation: `Linear} => Some(t) }; }; let shapeFn = (fn, t: t) => t |> getShape |> fn; + let updateKnownIntegralSum = (knownIntegralSum, t: t): t => ({...t, knownIntegralSum}); + + // Contracts every point in the continuous xyShape into a single dirac-Delta-like point, + // using the centerpoints between adjacent xs and the area under each trapezoid. + // This is essentially like integrateWithTriangles, without the accumulation. + let toDiscretePointMasses = (t: t): DistTypes.discreteShape => { + let tl = t |> getShape |> XYShape.T.length; + let pointMassesY: array(float) = Belt.Array.make(tl - 1, 0.0); + let {xs, ys}: XYShape.T.t = t |> getShape; + for (x in 0 to E.A.length(xs) - 2) { + let _ = + Belt.Array.set( + pointMassesY, + x, + (xs[x + 1] -. xs[x]) *. ((ys[x] +. ys[x + 1]) /. 2.)); // = dx * (1/2) * (avgY) + (); + }; + + {xyShape: {xs: xs, ys: pointMassesY}, knownIntegralSum: t.knownIntegralSum}; + }; + + /* Performs a discrete convolution between two continuous distributions A and B. + * It is an extremely good idea to downsample the distributions beforehand, + * because the number of samples in the convolution can be up to length(A) * length(B). + * + * Conventional convolution uses fn = (+.), but we also allow other operations to combine the xs. + * + * In practice, the convolution works by multiplying the ys for each possible combo of points of + * the two shapes. This creates a new shape for each point of A. These new shapes are then combined + * linearly. This may not always be the most efficient way, but it is probably the most robust for now. + * + * In the future, it may be possible to use a non-uniform fast Fourier transform instead (although only for addition). + */ + let convolveWithDiscrete = (fn, t1: t, t2: DistTypes.discreteShape) => { + let t1s = t1 |> getShape; + let t2s = t2.xyShape; // would like to use Discrete.getShape here, but current file structure doesn't allow for that + let t1n = t1s |> XYShape.T.length; + let t2n = t2s |> XYShape.T.length; + + let outXYShapes: array(array((float, float))) = Belt.Array.makeUninitializedUnsafe(t1n); + + for (i in 0 to t1n - 1) { + // create a new distribution + let dxyShape: array((float, float)) = Belt.Array.makeUninitializedUnsafe(t2n); + for (j in 0 to t2n - 1) { + let _ = Belt.Array.set(dxyShape, j, (fn(t1s.xs[i], t2s.xs[j]), t1s.ys[i] *. t2s.ys[j])); + (); + } + let _ = Belt.Array.set(outXYShapes, i, dxyShape); + (); + } + + let combinedIntegralSum = + switch (t1.knownIntegralSum, t2.knownIntegralSum) { + | (None, _) + | (_, None) => None + | (Some(s1), Some(s2)) => Some(s1 *. s2) + }; + + outXYShapes + |> E.A.fmap(s => { + let xyShape = XYShape.T.fromZippedArray(s); + make(`Linear, xyShape, None); + }) + |> reduce((+.)) + |> updateKnownIntegralSum(combinedIntegralSum); + }; + + let convolve = (fn, t1: t, t2: t) => + convolveWithDiscrete(fn, t1, toDiscretePointMasses(t2)); + + let mapY = (~knownIntegralSumFn=(previousKnownIntegralSum => None), fn, t: t) => { + let u = E.O.bind(_, knownIntegralSumFn); + let yMapFn = shapeMap(XYShape.T.mapY(fn)); + + t |> yMapFn |> updateKnownIntegralSum(u(t.knownIntegralSum)); + }; + + let scaleBy = (~scale=1.0, ~knownIntegralSum=None, t: t): t => + t |> mapY((r: float) => r *. scale) |> updateKnownIntegralSum(knownIntegralSum); + + let truncate = (leftCutoff: option(float), rightCutoff: option(float), t: t) => { + let truncatedZippedPairs = + t + |> getShape + |> XYShape.T.zip + |> XYShape.Zipped.filterByX(x => x >= E.O.default(neg_infinity, leftCutoff) || x <= E.O.default(infinity, rightCutoff)); + + let eps = (t |> getShape |> XYShape.T.xTotalRange) *. 0.0001; + + let leftNewPoint = leftCutoff |> E.O.dimap(lc => [| (lc -. eps, 0.) |], _ => [||]); + let rightNewPoint = rightCutoff |> E.O.dimap(rc => [| (rc +. eps, 0.) |], _ => [||]); + + let truncatedZippedPairsWithNewPoints = + E.A.concatMany([| leftNewPoint, truncatedZippedPairs, rightNewPoint |]); + let truncatedShape = XYShape.T.fromZippedArray(truncatedZippedPairsWithNewPoints); + + make(`Linear, truncatedShape, None); + }; module T = Dist({ @@ -104,8 +220,8 @@ module Continuous = { type integral = DistTypes.continuousShape; let minX = shapeFn(XYShape.T.minX); let maxX = shapeFn(XYShape.T.maxX); - let toDiscreteProbabilityMass = _ => 0.0; - let mapY = fn => shapeMap(XYShape.T.mapY(fn)); + let mapY = mapY; + let toDiscreteProbabilityMassFraction = _ => 0.0; let toShape = (t: t): DistTypes.shape => Continuous(t); let xToY = (f, {interpolation, xyShape}: t) => { ( @@ -136,9 +252,9 @@ module Continuous = { |> getShape |> XYShape.Range.integrateWithTriangles |> E.O.toExt("This should not have happened") - |> make(`Linear) + |> make(`Linear, _, None) }; - let truncate = (~cache=None, length, t) => + let downsample = (~cache=None, length, t): t => t |> shapeMap( XYShape.XsConversion.proportionByProbabilityMass( @@ -146,15 +262,23 @@ module Continuous = { integral(~cache, t).xyShape, ), ); - let integralEndY = (~cache, t) => t |> integral(~cache) |> lastY; - let integralXtoY = (~cache, f, t) => + let integralEndY = (~cache, t: t) => + t.knownIntegralSum |> E.O.default(t |> integral(~cache) |> lastY); + let integralXtoY = (~cache, f, t: t) => t |> integral(~cache) |> shapeFn(XYShape.XtoY.linear(f)); - let integralYtoX = (~cache, f, t) => + let integralYtoX = (~cache, f, t: t) => t |> integral(~cache) |> shapeFn(XYShape.YtoX.linear(f)); let toContinuous = t => Some(t); let toDiscrete = _ => None; - let toScaledContinuous = t => Some(t); - let toScaledDiscrete = _ => None; + + let normalize = (t: t): t => { + let continuousIntegralSum = integralEndY(~cache=None, t); + + scaleBy(~scale=(1. /. continuousIntegralSum), ~knownIntegralSum=Some(1.0), t); + }; + + let normalizedToContinuous = t => Some(t); // TODO: this should be normalized + let normalizedToDiscrete = _ => None; let mean = (t: t) => { let indefiniteIntegralStepwise = (p, h1) => h1 *. p ** 2.0 /. 2.0; @@ -176,27 +300,104 @@ module Continuous = { }; module Discrete = { - let sortedByY = (t: DistTypes.discreteShape) => - t |> XYShape.T.zip |> XYShape.Zipped.sortByY; - let sortedByX = (t: DistTypes.discreteShape) => - t |> XYShape.T.zip |> XYShape.Zipped.sortByX; - let empty = XYShape.T.empty; - let make = (s: DistTypes.discreteShape) => s; - let combine = - (fn, t1: DistTypes.discreteShape, t2: DistTypes.discreteShape) + type t = DistTypes.discreteShape; + + let make = (xyShape, knownIntegralSum): t => {xyShape, knownIntegralSum}; + let shapeMap = (fn, {xyShape, knownIntegralSum}: t): t => { + xyShape: fn(xyShape), + knownIntegralSum, + }; + let getShape = (t: t) => t.xyShape; + let oShapeMap = (fn, {xyShape, knownIntegralSum}: t): option(t) => + fn(xyShape) |> E.O.fmap(make(_, knownIntegralSum)); + + let empty: t = {xyShape: XYShape.T.empty, knownIntegralSum: Some(0.0)}; + let shapeFn = (fn, t: t) => t |> getShape |> fn; + + let lastY = (t: t) => t |> getShape |> XYShape.T.lastY; + + let combineIntegralSums = (combineFn: ((float, float) => option(float)), t1KnownIntegralSum: option(float), t2KnownIntegralSum: option(float)) => { + switch (t1KnownIntegralSum, t2KnownIntegralSum) { + | (None, _) + | (_, None) => None + | (Some(s1), Some(s2)) => combineFn(s1, s2) + }; + }; + + let combine = (combineIntegralSumsFn, fn, t1: DistTypes.discreteShape, t2: DistTypes.discreteShape) : DistTypes.discreteShape => { - XYShape.Combine.combine( - ~xsSelection=ALL_XS, - ~xToYSelection=XYShape.XtoY.stepwiseIfAtX, - ~fn, - t1, - t2, + + let combinedIntegralSum = combineIntegralSums(combineIntegralSumsFn, t1.knownIntegralSum, t2.knownIntegralSum); + + make( + XYShape.Combine.combine( + ~xsSelection=ALL_XS, + ~xToYSelection=XYShape.XtoY.stepwiseIfAtX, + ~fn, // stepwiseIfAtX returns option(float), so this fn needs to handle None, which is what the _default0 wrapper is for + t1.xyShape, + t2.xyShape, + ), + combinedIntegralSum, ); }; let _default0 = (fn, a, b) => fn(E.O.default(0.0, a), E.O.default(0.0, b)); let reduce = (fn, items) => - items |> E.A.fold_left(combine(_default0(fn)), empty); + items |> E.A.fold_left(combine((_, _) => None, _default0(fn)), empty); + // a special version of reduce that adds the results (which should be the most common case by far), + // and conveniently also adds the knownIntegralSums. + let reduceAdd = (fn, items) => + items |> E.A.fold_left(combine((s1, s2) => Some(s1 +. s2), _default0((+.))), empty); + + let updateKnownIntegralSum = (knownIntegralSum, t: t): t => ({...t, knownIntegralSum}); + + let convolve = (fn, t1: t, t2: t) => { + let t1s = t1 |> getShape; + let t2s = t2 |> getShape; + let t1n = t1s |> XYShape.T.length; + let t2n = t2s |> XYShape.T.length; + + let combinedIntegralSum = combineIntegralSums((s1, s2) => Some(s1 *. s2), t1.knownIntegralSum, t2.knownIntegralSum); + + let xToYMap = E.FloatFloatMap.empty(); + + for (i in 0 to t1n - 1) { + for (j in 0 to t2n - 1) { + let x = fn(t1s.xs[i], t2s.xs[j]); + let cv = xToYMap |> E.FloatFloatMap.get(x) |> E.O.default(0.); + let my = t1s.ys[i] *. t2s.ys[j]; + let _ = Belt.MutableMap.set(xToYMap, x, cv +. my); + (); + } + } + + let rxys = xToYMap |> E.FloatFloatMap.toArray |> XYShape.Zipped.sortByX; + + let convolvedShape = XYShape.T.fromZippedArray(rxys); + + make(convolvedShape, combinedIntegralSum); + }; + + let mapY = (~knownIntegralSumFn=(previousKnownIntegralSum => None), fn, t: t) => { + let u = E.O.bind(_, knownIntegralSumFn); + let yMapFn = shapeMap(XYShape.T.mapY(fn)); + + t |> yMapFn |> updateKnownIntegralSum(u(t.knownIntegralSum)); + }; + + let scaleBy = (~scale=1.0, ~knownIntegralSum=None, t: t): t => + t |> mapY((r: float) => r *. scale) |> updateKnownIntegralSum(knownIntegralSum); + + let truncate = (leftCutoff: option(float), rightCutoff: option(float), t: t) => { + let truncatedShape = + t + |> getShape + |> XYShape.T.zip + |> XYShape.Zipped.filterByX(x => x >= E.O.default(neg_infinity, leftCutoff) || x <= E.O.default(infinity, rightCutoff)) + |> XYShape.T.fromZippedArray; + + make(truncatedShape, None); + }; module T = Dist({ @@ -205,33 +406,54 @@ module Discrete = { let integral = (~cache, t) => switch (cache) { | Some(c) => c - | None => Continuous.make(`Stepwise, XYShape.T.accumulateYs((+.), t)) + | None => + Continuous.make( + `Stepwise, + XYShape.T.accumulateYs((+.), getShape(t)), + None, + ) }; - let integralEndY = (~cache, t) => - t |> integral(~cache) |> Continuous.lastY; - let minX = XYShape.T.minX; - let maxX = XYShape.T.maxX; - let toDiscreteProbabilityMass = _ => 1.0; - let mapY = XYShape.T.mapY; + let integralEndY = (~cache, t: t) => + t.knownIntegralSum |> E.O.default(t |> integral(~cache) |> Continuous.lastY); + let minX = shapeFn(XYShape.T.minX); + let maxX = shapeFn(XYShape.T.maxX); + let toDiscreteProbabilityMassFraction = _ => 1.0; + let mapY = mapY; let toShape = (t: t): DistTypes.shape => Discrete(t); let toContinuous = _ => None; let toDiscrete = t => Some(t); - let toScaledContinuous = _ => None; - let toScaledDiscrete = t => Some(t); - let truncate = (~cache=None, i, t: t): DistTypes.discreteShape => - t - |> XYShape.T.zip - |> XYShape.Zipped.sortByY - |> Belt.Array.reverse - |> Belt.Array.slice(_, ~offset=0, ~len=i) - |> XYShape.Zipped.sortByX - |> XYShape.T.fromZippedArray; - let xToY = (f, t) => { - XYShape.XtoY.stepwiseIfAtX(f, t) + let normalize = (t: t): t => { + let discreteIntegralSum = integralEndY(~cache=None, t); + + scaleBy(~scale=(1. /. discreteIntegralSum), ~knownIntegralSum=Some(1.0), t); + }; + + let normalizedToContinuous = _ => None; + let normalizedToDiscrete = t => Some(t); // TODO: this should be normalized! + + let downsample = (~cache=None, i, t: t): t => { + // It's not clear how to downsample a set of discrete points in a meaningful way. + // The best we can do is to clip off the smallest values. + let clippedShape = + t + |> getShape + |> XYShape.T.zip + |> XYShape.Zipped.sortByY + |> Belt.Array.reverse + |> Belt.Array.slice(_, ~offset=0, ~len=i) + |> XYShape.Zipped.sortByX + |> XYShape.T.fromZippedArray; + + make(clippedShape, None); // if someone needs the sum, they'll have to recompute it + }; + + let xToY = (f, t) => + t + |> getShape + |> XYShape.XtoY.stepwiseIfAtX(f) |> E.O.default(0.0) |> DistTypes.MixedPoint.makeDiscrete; - }; let integralXtoY = (~cache, f, t) => t @@ -245,49 +467,64 @@ module Discrete = { |> Continuous.getShape |> XYShape.YtoX.linear(f); - let mean = (t: t): float => - E.A.reducei(t.xs, 0.0, (acc, x, i) => acc +. x *. t.ys[i]); + let mean = (t: t): float => { + let s = getShape(t); + E.A.reducei(s.xs, 0.0, (acc, x, i) => acc +. x *. s.ys[i]); + }; let variance = (t: t): float => { let getMeanOfSquares = t => - mean(XYShape.Analysis.squareXYShape(t)); + t |> shapeMap(XYShape.Analysis.squareXYShape) |> mean; XYShape.Analysis.getVarianceDangerously(t, mean, getMeanOfSquares); }; }); + }; // TODO: I think this shouldn't assume continuous/discrete are normalized to 1.0, and thus should not need the discreteProbabilityMassFraction being separate. module Mixed = { type t = DistTypes.mixedShape; - let make = - (~continuous, ~discrete, ~discreteProbabilityMassFraction) - : DistTypes.mixedShape => { + let make = (~continuous, ~discrete): t => { continuous, discrete, - discreteProbabilityMassFraction, }; - // todo: Put into scaling module - let scaleDiscreteFn = - ({discreteProbabilityMassFraction}: DistTypes.mixedShape, f) => - f *. discreteProbabilityMassFraction; + let totalLength = (t: t): int => { + let continuousLength = t.continuous |> Continuous.getShape |> XYShape.T.length; + let discreteLength = t.discrete |> Discrete.getShape |> XYShape.T.length; + + continuousLength + discreteLength; + }; + + // TODO: Put into scaling module + //let normalizeMixedPoint = (t, f) => f *. discreteProbabilityMassFraction;*/ //TODO: Warning: This currently computes the integral, which is expensive. - let scaleContinuousFn = + /*let scaleContinuousFn = ({discreteProbabilityMassFraction}: DistTypes.mixedShape, f) => - f *. (1.0 -. discreteProbabilityMassFraction); + f *. (1.0 -. discreteProbabilityMassFraction); */ //TODO: Warning: This currently computes the integral, which is expensive. - let scaleContinuous = ({discreteProbabilityMassFraction}: t, continuous) => - continuous - |> Continuous.T.scaleToIntegralSum( - ~intendedSum=1.0 -. discreteProbabilityMassFraction, - ); - let scaleDiscrete = ({discreteProbabilityMassFraction}: t, disrete) => - disrete - |> Discrete.T.scaleToIntegralSum( - ~intendedSum=discreteProbabilityMassFraction, - ); + // Normalizes to 1.0. + /*let scaleContinuous = ({discreteProbabilityMassFraction}: t, continuous) => + // get only the continuous, and scale it to the respective + continuous + |> Continuous.T.scaleToIntegralSum( + ~intendedSum=1.0 -. discreteProbabilityMassFraction, + ); + + let scaleDiscrete = ({discreteProbabilityMassFraction}: t, disrete) => + disrete + |> Discrete.T.scaleToIntegralSum( + ~intendedSum=discreteProbabilityMassFraction, + );*/ + + let truncate = (leftCutoff: option(float), rightCutoff: option(float), {discrete, continuous}: t) => { + let truncatedDiscrete = Discrete.truncate(leftCutoff, rightCutoff, discrete); + let truncatedContinuous = Continuous.truncate(leftCutoff, rightCutoff, continuous); + + make(~discrete=truncatedDiscrete, ~continuous=truncatedContinuous); + }; module T = Dist({ @@ -301,98 +538,92 @@ module Mixed = { let toShape = (t: t): DistTypes.shape => Mixed(t); let toContinuous = ({continuous}: t) => Some(continuous); let toDiscrete = ({discrete}: t) => Some(discrete); - let toDiscreteProbabilityMass = ({discreteProbabilityMassFraction}: t) => discreteProbabilityMassFraction; - let xToY = (f, {discrete, continuous} as t: t) => { - let c = - continuous - |> Continuous.T.xToY(f) - |> DistTypes.MixedPoint.fmap(scaleContinuousFn(t)); - let d = - discrete - |> Discrete.T.xToY(f) - |> DistTypes.MixedPoint.fmap(scaleDiscreteFn(t)); - DistTypes.MixedPoint.add(c, d); + let normalize = (t: t): t => { + let continuousIntegralSum = Continuous.T.Integral.sum(~cache=None, t.continuous); + let discreteIntegralSum = Discrete.T.Integral.sum(~cache=None, t.discrete); + let totalIntegralSum = continuousIntegralSum +. discreteIntegralSum; + + let newContinuousSum = continuousIntegralSum /. totalIntegralSum; + let newDiscreteSum = discreteIntegralSum /. totalIntegralSum; + + let normalizedContinuous = Continuous.scaleBy(~scale=(1. /. newContinuousSum), ~knownIntegralSum=Some(newContinuousSum), t.continuous); + let normalizedDiscrete = Discrete.scaleBy(~scale=(1. /. newDiscreteSum), ~knownIntegralSum=Some(newDiscreteSum), t.discrete); + + make(~continuous=normalizedContinuous, ~discrete=normalizedDiscrete); }; - // Warning: It's not clear how to update the discreteProbabilityMassFraction, so this may create small errors. - let truncate = - ( - ~cache=None, - count, - {discrete, continuous, discreteProbabilityMassFraction}: t, - ) - : t => { - { - discrete: - Discrete.T.truncate( - int_of_float( - float_of_int(count) *. discreteProbabilityMassFraction, - ), - discrete, - ), - continuous: - Continuous.T.truncate( - int_of_float( - float_of_int(count) - *. (1.0 -. discreteProbabilityMassFraction), - ), - continuous, - ), - discreteProbabilityMassFraction, - }; + let xToY = (x, t: t) => { + // This evaluates the mixedShape at x, interpolating if necessary. + // Note that we normalize entire mixedShape first. + let {continuous, discrete}: t = normalize(t); + let c = Continuous.T.xToY(x, continuous); + let d = Discrete.T.xToY(x, discrete); + DistTypes.MixedPoint.add(c, d); // "add" here just combines the two values into a single MixedPoint. }; - let toScaledContinuous = ({continuous} as t: t) => - Some(scaleContinuous(t, continuous)); + let toDiscreteProbabilityMassFraction = ({discrete, continuous}: t) => { + let discreteIntegralSum = Discrete.T.Integral.sum(~cache=None, discrete); + let continuousIntegralSum = Continuous.T.Integral.sum(~cache=None, continuous); + let totalIntegralSum = discreteIntegralSum +. continuousIntegralSum; - let toScaledDiscrete = ({discrete} as t: t) => - Some(scaleDiscrete(t, discrete)); + discreteIntegralSum /. totalIntegralSum; + }; + + let downsample = (~cache=None, count, {discrete, continuous}: t): t => { + // We will need to distribute the new xs fairly between the discrete and continuous shapes. + // The easiest way to do this is to simply go by the previous probability masses. + + // The cache really isn't helpful here, because we would need two separate caches + let discreteIntegralSum = Discrete.T.Integral.sum(~cache=None, discrete); + let continuousIntegralSum = Continuous.T.Integral.sum(~cache=None, continuous); + let totalIntegralSum = discreteIntegralSum +. continuousIntegralSum; + + let downsampledDiscrete = + Discrete.T.downsample( + int_of_float(float_of_int(count) *. (discreteIntegralSum /. totalIntegralSum)), + discrete, + ); + + let downsampledContinuous = + Continuous.T.downsample( + int_of_float( + float_of_int(count) *. (continuousIntegralSum /. totalIntegralSum), + ), + continuous, + ); + + {discrete: downsampledDiscrete, continuous: downsampledContinuous}; + }; + + let normalizedToContinuous = (t: t) => + Some(normalize(t).continuous); + + let normalizedToDiscrete = ({discrete} as t: t) => + Some(normalize(t).discrete); let integral = ( ~cache, - {continuous, discrete, discreteProbabilityMassFraction}: t, + {continuous, discrete}: t, ) => { switch (cache) { | Some(cache) => cache - | None => - let scaleContinuousBy = - (1.0 -. discreteProbabilityMassFraction) - /. (continuous |> Continuous.T.Integral.sum(~cache=None)); + | None => { + // note: if the underlying shapes aren't normalized, then these integrals won't be either! + let continuousIntegral = Continuous.T.Integral.get(~cache=None, continuous); + let discreteIntegral = Discrete.T.Integral.get(~cache=None, discrete); - let scaleDiscreteBy = - discreteProbabilityMassFraction - /. ( - discrete - |> Discrete.T.Integral.get(~cache=None) - |> Continuous.toLinear - |> E.O.fmap(Continuous.lastY) - |> E.O.toExn("") - ); - - let cont = - continuous - |> Continuous.T.Integral.get(~cache=None) - |> Continuous.T.scaleBy(~scale=scaleContinuousBy); - - let dist = - discrete - |> Discrete.T.Integral.get(~cache=None) - |> Continuous.toLinear - |> E.O.toExn("") - |> Continuous.T.scaleBy(~scale=scaleDiscreteBy); - - let result = - Continuous.make( - `Linear, - XYShape.Combine.combineLinear( - ~fn=(+.), - Continuous.getShape(cont), - Continuous.getShape(dist), - ), - ); - result; + Continuous.make( + `Linear, + XYShape.Combine.combineLinear( + ~fn=(+.), + Continuous.getShape(continuousIntegral), + Continuous.getShape(discreteIntegral), + ), + None, + ); + } }; }; @@ -414,80 +645,153 @@ module Mixed = { |> XYShape.YtoX.linear(f); }; - // TODO: This part really needs to be rethought, I'm quite sure this is just broken. Mapping Ys would change the desired discreteProbabilityMassFraction. - let mapY = - (fn, {discrete, continuous, discreteProbabilityMassFraction}: t): t => { + // This pipes all ys (continuous and discrete) through fn. + // If mapY is a linear operation, we might be able to update the knownIntegralSums as well; + // if not, they'll be set to None. + let mapY = (~knownIntegralSumFn=(previousIntegralSum => None), fn, {discrete, continuous}: t): t => { + let u = E.O.bind(_, knownIntegralSumFn); + + let yMappedDiscrete = + discrete |> Discrete.T.mapY(fn) |> Discrete.updateKnownIntegralSum(u(discrete.knownIntegralSum)); + + let yMappedContinuous = + continuous |> Continuous.T.mapY(fn) |> Continuous.updateKnownIntegralSum(u(continuous.knownIntegralSum)); + { - discrete: Discrete.T.mapY(fn, discrete), + discrete: yMappedDiscrete, continuous: Continuous.T.mapY(fn, continuous), - discreteProbabilityMassFraction, }; }; - let mean = (t: t): float => { - let discreteProbabilityMassFraction = - t.discreteProbabilityMassFraction; - switch (discreteProbabilityMassFraction) { - | 1.0 => Discrete.T.mean(t.discrete) - | 0.0 => Continuous.T.mean(t.continuous) - | _ => - Discrete.T.mean(t.discrete) - *. discreteProbabilityMassFraction - +. Continuous.T.mean(t.continuous) - *. (1.0 -. discreteProbabilityMassFraction) - }; + let mean = ({discrete, continuous}: t): float => { + let discreteMean = Discrete.T.mean(discrete); + let continuousMean = Continuous.T.mean(continuous); + + // the combined mean is the weighted sum of the two: + let discreteIntegralSum = Discrete.T.Integral.sum(~cache=None, discrete); + let continuousIntegralSum = Continuous.T.Integral.sum(~cache=None, continuous); + let totalIntegralSum = discreteIntegralSum +. continuousIntegralSum; + + (discreteMean *. discreteIntegralSum +. continuousMean *. continuousIntegralSum) /. totalIntegralSum; }; - let variance = (t: t): float => { - let discreteProbabilityMassFraction = - t.discreteProbabilityMassFraction; - let getMeanOfSquares = (t: t) => { - Discrete.T.mean(XYShape.Analysis.squareXYShape(t.discrete)) - *. t.discreteProbabilityMassFraction - +. XYShape.Analysis.getMeanOfSquaresContinuousShape(t.continuous) - *. (1.0 -. t.discreteProbabilityMassFraction); + let variance = ({discrete, continuous} as t: t): float => { + // the combined mean is the weighted sum of the two: + let discreteIntegralSum = Discrete.T.Integral.sum(~cache=None, discrete); + let continuousIntegralSum = Continuous.T.Integral.sum(~cache=None, continuous); + let totalIntegralSum = discreteIntegralSum +. continuousIntegralSum; + + let getMeanOfSquares = ({discrete, continuous} as t: t) => { + let discreteMean = discrete |> Discrete.shapeMap(XYShape.Analysis.squareXYShape) |> Discrete.T.mean; + let continuousMean = continuous |> XYShape.Analysis.getMeanOfSquaresContinuousShape; + (discreteMean *. discreteIntegralSum +. continuousMean *. continuousIntegralSum) /. totalIntegralSum }; - switch (discreteProbabilityMassFraction) { - | 1.0 => Discrete.T.variance(t.discrete) - | 0.0 => Continuous.T.variance(t.continuous) - | _ => - XYShape.Analysis.getVarianceDangerously( - t, - mean, - getMeanOfSquares, - ) + + switch (discreteIntegralSum /. totalIntegralSum) { + | 1.0 => Discrete.T.variance(discrete) + | 0.0 => Continuous.T.variance(continuous) + | _ => XYShape.Analysis.getVarianceDangerously(t, mean, getMeanOfSquares) }; }; }); + + let convolve = (fn: ((float, float) => float), t1: t, t2: t): t => { + // Discrete convolution can cause a huge increase in the number of samples, + // so we'll first downsample. + + // An alternative (to be explored in the future) may be to first perform the full convolution and then to downsample the result; + // to use non-uniform fast Fourier transforms (for addition only), add web workers or gpu.js, etc. ... + + // TODO: make this optional or customizable + let downsampleIfTooLarge = (t: t) => { + let sqtl = sqrt(float_of_int(totalLength(t))); + sqtl > 10. ? T.downsample(int_of_float(sqtl), t) : t; + }; + + let t1d = downsampleIfTooLarge(t1); + let t2d = downsampleIfTooLarge(t2); + + // continuous (*) continuous => continuous, but also + // discrete (*) continuous => continuous (and vice versa). We have to take care of all combos and then combine them: + let ccConvResult = Continuous.convolve(fn, t1d.continuous, t2d.continuous); + let dcConvResult = Continuous.convolveWithDiscrete(fn, t2d.continuous, t1d.discrete); + let cdConvResult = Continuous.convolveWithDiscrete(fn, t1d.continuous, t2d.discrete); + let continuousConvResult = Continuous.reduce((+.), [|ccConvResult, dcConvResult, cdConvResult|]); + + // ... finally, discrete (*) discrete => discrete, obviously: + let discreteConvResult = Discrete.convolve(fn, t1d.discrete, t2d.discrete); + + {discrete: discreteConvResult, continuous: continuousConvResult}; + } }; module Shape = { + type t = DistTypes.shape; + let mapToAll = ((fn1, fn2, fn3), t: t) => + switch (t) { + | Mixed(m) => fn1(m) + | Discrete(m) => fn2(m) + | Continuous(m) => fn3(m) + }; + + let fmap = ((fn1, fn2, fn3), t: t): t => + switch (t) { + | Mixed(m) => Mixed(fn1(m)) + | Discrete(m) => Discrete(fn2(m)) + | Continuous(m) => Continuous(fn3(m)) + }; + + let toMixed = mapToAll(( + m => m, + d => Mixed.make(~discrete=d, ~continuous=Continuous.empty), + c => Mixed.make(~discrete=Discrete.empty, ~continuous=c), + )); + + let convolve = (fn, t1: t, t2: t): t => { + Mixed(Mixed.convolve(fn, toMixed(t1), toMixed(t2))); + }; + + let downsample = (~cache=None, i, t) => + fmap(( + Mixed.T.downsample(i), + Discrete.T.downsample(i), + Continuous.T.downsample(i), + ), t); + + let normalize = + fmap(( + Mixed.T.normalize, + Discrete.T.normalize, + Continuous.T.normalize, + )); + + let truncate (leftCutoff, rightCutoff, t): t = + fmap(( + Mixed.truncate(leftCutoff, rightCutoff), + Discrete.truncate(leftCutoff, rightCutoff), + Continuous.truncate(leftCutoff, rightCutoff), + ), t); + module T = Dist({ type t = DistTypes.shape; type integral = DistTypes.continuousShape; - let mapToAll = ((fn1, fn2, fn3), t: t) => - switch (t) { - | Mixed(m) => fn1(m) - | Discrete(m) => fn2(m) - | Continuous(m) => fn3(m) - }; - let fmap = ((fn1, fn2, fn3), t: t): t => - switch (t) { - | Mixed(m) => Mixed(fn1(m)) - | Discrete(m) => Discrete(fn2(m)) - | Continuous(m) => Continuous(fn3(m)) - }; - - let xToY = f => + let xToY = (f: float) => mapToAll(( Mixed.T.xToY(f), Discrete.T.xToY(f), Continuous.T.xToY(f), )); + let toShape = (t: t) => t; + + let toContinuous = t => None; + let toDiscrete = t => None; + let downsample = (~cache=None, i, t) => t; + let toDiscreteProbabilityMassFraction = t => 0.0; + let normalize = t => t; let toContinuous = mapToAll(( Mixed.T.toContinuous, @@ -501,31 +805,24 @@ module Shape = { Continuous.T.toDiscrete, )); - let truncate = (~cache=None, i) => - fmap(( - Mixed.T.truncate(i), - Discrete.T.truncate(i), - Continuous.T.truncate(i), + let toDiscreteProbabilityMassFraction = + mapToAll(( + Mixed.T.toDiscreteProbabilityMassFraction, + Discrete.T.toDiscreteProbabilityMassFraction, + Continuous.T.toDiscreteProbabilityMassFraction, )); - let toDiscreteProbabilityMass = + let normalizedToDiscrete = mapToAll(( - Mixed.T.toDiscreteProbabilityMass, - Discrete.T.toDiscreteProbabilityMass, - Continuous.T.toDiscreteProbabilityMass, + Mixed.T.normalizedToDiscrete, + Discrete.T.normalizedToDiscrete, + Continuous.T.normalizedToDiscrete, )); - - let toScaledDiscrete = + let normalizedToContinuous = mapToAll(( - Mixed.T.toScaledDiscrete, - Discrete.T.toScaledDiscrete, - Continuous.T.toScaledDiscrete, - )); - let toScaledContinuous = - mapToAll(( - Mixed.T.toScaledContinuous, - Discrete.T.toScaledContinuous, - Continuous.T.toScaledContinuous, + Mixed.T.normalizedToContinuous, + Discrete.T.normalizedToContinuous, + Continuous.T.normalizedToContinuous, )); let minX = mapToAll((Mixed.T.minX, Discrete.T.minX, Continuous.T.minX)); let integral = (~cache) => { @@ -556,11 +853,11 @@ module Shape = { )); }; let maxX = mapToAll((Mixed.T.maxX, Discrete.T.maxX, Continuous.T.maxX)); - let mapY = fn => + let mapY = (~knownIntegralSumFn=(previousIntegralSum => None), fn) => fmap(( - Mixed.T.mapY(fn), - Discrete.T.mapY(fn), - Continuous.T.mapY(fn), + Mixed.T.mapY(~knownIntegralSumFn, fn), + Discrete.T.mapY(~knownIntegralSumFn, fn), + Continuous.T.mapY(~knownIntegralSumFn, fn), )); let mean = (t: t): float => @@ -636,21 +933,30 @@ module DistPlus = { let toShape = toShape; let toContinuous = shapeFn(Shape.T.toContinuous); let toDiscrete = shapeFn(Shape.T.toDiscrete); - // todo: Adjust for total mass. - let toScaledContinuous = (t: t) => { + let normalize = (t: t): t => { + let normalizedShape = + t |> toShape |> Shape.T.normalize; + + t |> updateShape(normalizedShape); + + // TODO: also adjust for domainIncludedProbabilityMass here. + }; + + // TODO: replace this with + let normalizedToContinuous = (t: t) => { t |> toShape - |> Shape.T.toScaledContinuous + |> Shape.T.normalizedToContinuous |> E.O.fmap( Continuous.T.mapY(domainIncludedProbabilityMassAdjustment(t)), ); }; - let toScaledDiscrete = (t: t) => { + let normalizedToDiscrete = (t: t) => { t |> toShape - |> Shape.T.toScaledDiscrete + |> Shape.T.normalizedToDiscrete |> E.O.fmap( Discrete.T.mapY(domainIncludedProbabilityMassAdjustment(t)), ); @@ -664,18 +970,18 @@ module DistPlus = { let minX = shapeFn(Shape.T.minX); let maxX = shapeFn(Shape.T.maxX); - let toDiscreteProbabilityMass = - shapeFn(Shape.T.toDiscreteProbabilityMass); + let toDiscreteProbabilityMassFraction = + shapeFn(Shape.T.toDiscreteProbabilityMassFraction); // This bit is kind of awkward, could probably use rethinking. let integral = (~cache, t: t) => updateShape(Continuous(t.integralCache), t); - let truncate = (~cache=None, i, t) => - updateShape(t |> toShape |> Shape.T.truncate(i), t); + let downsample = (~cache=None, i, t): t => + updateShape(t |> toShape |> Shape.T.downsample(i), t); // todo: adjust for limit, maybe? - let mapY = (fn, {shape, _} as t: t): t => - Shape.T.mapY(fn, shape) |> updateShape(_, t); + let mapY = (~knownIntegralSumFn=(previousIntegralSum => None), fn, {shape, _} as t: t): t => + Shape.T.mapY(~knownIntegralSumFn, fn, shape) |> updateShape(_, t); let integralEndY = (~cache as _, t: t) => Shape.T.Integral.sum(~cache=Some(t.integralCache), toShape(t)); diff --git a/src/distPlus/distribution/MixedShapeBuilder.re b/src/distPlus/distribution/MixedShapeBuilder.re index 949a6f20..496e298c 100644 --- a/src/distPlus/distribution/MixedShapeBuilder.re +++ b/src/distPlus/distribution/MixedShapeBuilder.re @@ -8,14 +8,15 @@ type assumptions = { discreteProbabilityMass: option(float), }; -let buildSimple = (~continuous: option(DistTypes.continuousShape), ~discrete): option(DistTypes.shape) => { - let continuous = continuous |> E.O.default(Distributions.Continuous.make(`Linear, {xs: [||], ys: [||]})) +let buildSimple = (~continuous: option(DistTypes.continuousShape), ~discrete: option(DistTypes.discreteShape)): option(DistTypes.shape) => { + let continuous = continuous |> E.O.default(Distributions.Continuous.make(`Linear, {xs: [||], ys: [||]}, Some(0.0))); + let discrete = discrete |> E.O.default(Distributions.Discrete.make({xs: [||], ys: [||]}, Some(0.0))); let cLength = continuous |> Distributions.Continuous.getShape |> XYShape.T.xs |> E.A.length; - let dLength = discrete |> XYShape.T.xs |> E.A.length; + let dLength = discrete |> Distributions.Discrete.getShape |> XYShape.T.xs |> E.A.length; switch (cLength, dLength) { | (0 | 1, 0) => None | (0 | 1, _) => Some(Discrete(discrete)) @@ -23,18 +24,12 @@ let buildSimple = (~continuous: option(DistTypes.continuousShape), ~discrete): o | (_, _) => let discreteProbabilityMassFraction = Distributions.Discrete.T.Integral.sum(~cache=None, discrete); - let discrete = - Distributions.Discrete.T.scaleToIntegralSum(~intendedSum=1.0, discrete); - let continuous = - Distributions.Continuous.T.scaleToIntegralSum( - ~intendedSum=1.0, - continuous, - ); + let discrete = Distributions.Discrete.T.normalize(discrete); + let continuous = Distributions.Continuous.T.normalize(continuous); let mixedDist = Distributions.Mixed.make( ~continuous, - ~discrete, - ~discreteProbabilityMassFraction, + ~discrete ); Some(Mixed(mixedDist)); }; @@ -42,7 +37,7 @@ let buildSimple = (~continuous: option(DistTypes.continuousShape), ~discrete): o // TODO: Delete, only being used in tests -let build = (~continuous, ~discrete, ~assumptions) => +/*let build = (~continuous, ~discrete, ~assumptions) => switch (assumptions) { | { continuous: ADDS_TO_CORRECT_PROBABILITY, @@ -102,4 +97,4 @@ let build = (~continuous, ~discrete, ~assumptions) => ), ); | _ => None - }; \ No newline at end of file + };*/ diff --git a/src/distPlus/distribution/XYShape.re b/src/distPlus/distribution/XYShape.re index cf3600a9..9451fb23 100644 --- a/src/distPlus/distribution/XYShape.re +++ b/src/distPlus/distribution/XYShape.re @@ -17,6 +17,7 @@ module T = { type ts = array(xyShape); let xs = (t: t) => t.xs; let ys = (t: t) => t.ys; + let length = (t: t) => E.A.length(t.xs); let empty = {xs: [||], ys: [||]}; let minX = (t: t) => t |> xs |> E.A.Sorted.min |> extImp; let maxX = (t: t) => t |> xs |> E.A.Sorted.max |> extImp; @@ -154,7 +155,9 @@ module XsConversion = { let proportionByProbabilityMass = (newLength: int, integral: T.t, t: T.t): T.t => { - equallyDivideXByMass(newLength, integral) |> _replaceWithXs(_, t); + integral + |> equallyDivideXByMass(newLength) // creates a new set of xs at evenly spaced percentiles + |> _replaceWithXs(_, t); // linearly interpolates new ys for the new xs }; }; @@ -164,6 +167,7 @@ module Zipped = { let compareXs = ((x1, _), (x2, _)) => x1 > x2 ? 1 : 0; let sortByY = (t: zipped) => t |> E.A.stableSortBy(_, compareYs); let sortByX = (t: zipped) => t |> E.A.stableSortBy(_, compareXs); + let filterByX = (testFn: (float => bool), t: zipped) => t |> E.A.filter(((x, _)) => testFn(x)); }; module Combine = { @@ -253,8 +257,8 @@ module Range = { Belt.Array.set( cumulativeY, x + 1, - (xs[x + 1] -. xs[x]) - *. ((ys[x] +. ys[x + 1]) /. 2.) + (xs[x + 1] -. xs[x]) // dx + *. ((ys[x] +. ys[x + 1]) /. 2.) // (1/2) * (avgY) +. cumulativeY[x], ); (); diff --git a/src/distPlus/renderers/RenderTypes.re b/src/distPlus/renderers/RenderTypes.re index 99a53aae..c94ca69a 100644 --- a/src/distPlus/renderers/RenderTypes.re +++ b/src/distPlus/renderers/RenderTypes.re @@ -43,7 +43,7 @@ module ShapeRenderer = { module Symbolic = { type inputs = {length: int}; type outputs = { - graph: SymbolicDist.distTree, + graph: TreeNode.treeNode, shape: DistTypes.shape, }; let make = (graph, shape) => {graph, shape}; diff --git a/src/distPlus/renderers/ShapeRenderer.re b/src/distPlus/renderers/ShapeRenderer.re index c6f3dc0e..8542ba4a 100644 --- a/src/distPlus/renderers/ShapeRenderer.re +++ b/src/distPlus/renderers/ShapeRenderer.re @@ -21,7 +21,7 @@ let runSymbolic = (guesstimatorString, length) => { |> E.R.fmap(g => RenderTypes.ShapeRenderer.Symbolic.make( g, - SymbolicDist.toShape(length, g), + TreeNode.toShape(length, g), ) ); }; @@ -43,4 +43,4 @@ let run = }; Js.log3("IS SOME?", symbolic |> E.R.toOption |> E.O.isSome, symbolic); {symbolic: Some(symbolic), sampling}; -}; \ No newline at end of file +}; diff --git a/src/distPlus/renderers/samplesRenderer/Guesstimator.re b/src/distPlus/renderers/samplesRenderer/Guesstimator.re index e099889f..a08fb591 100644 --- a/src/distPlus/renderers/samplesRenderer/Guesstimator.re +++ b/src/distPlus/renderers/samplesRenderer/Guesstimator.re @@ -4,10 +4,10 @@ type discrete = { ys: array(float), }; -let jsToDistDiscrete = (d: discrete): DistTypes.discreteShape => { +let jsToDistDiscrete = (d: discrete): DistTypes.discreteShape => {xyShape: { xs: xsGet(d), ys: ysGet(d), -}; +}, knownIntegralSum: None}; [@bs.module "./GuesstimatorLibrary.js"] -external stringToSamples: (string, int) => array(float) = "stringToSamples"; \ No newline at end of file +external stringToSamples: (string, int) => array(float) = "stringToSamples"; diff --git a/src/distPlus/renderers/samplesRenderer/Samples.re b/src/distPlus/renderers/samplesRenderer/Samples.re index 7318a9dd..28f7bdce 100644 --- a/src/distPlus/renderers/samplesRenderer/Samples.re +++ b/src/distPlus/renderers/samplesRenderer/Samples.re @@ -115,11 +115,12 @@ module T = { Array.fast_sort(compare, samples); let (continuousPart, discretePart) = E.A.Sorted.Floats.split(samples); let length = samples |> E.A.length |> float_of_int; - let discrete: DistTypes.xyShape = + let discrete: DistTypes.discreteShape = discretePart |> E.FloatFloatMap.fmap(r => r /. length) |> E.FloatFloatMap.toArray - |> XYShape.T.fromZippedArray; + |> XYShape.T.fromZippedArray + |> Distributions.Discrete.make(_, None); let pdf = continuousPart |> E.A.length > 5 @@ -149,14 +150,14 @@ module T = { ~outputXYPoints=samplingInputs.outputXYPoints, formatUnitWidth(usedUnitWidth), ) - |> Distributions.Continuous.make(`Linear) + |> Distributions.Continuous.make(`Linear, _, None) |> (r => Some((r, foo))); } : None; let shape = MixedShapeBuilder.buildSimple( ~continuous=pdf |> E.O.fmap(fst), - ~discrete, + ~discrete=Some(discrete), ); let samplesParse: RenderTypes.ShapeRenderer.Sampling.outputs = { continuousParseParams: pdf |> E.O.fmap(snd), @@ -196,4 +197,4 @@ module T = { Some(fromSamples(~samplingInputs, samples)); }; }; -}; \ No newline at end of file +}; diff --git a/src/distPlus/symbolic/MathJsParser.re b/src/distPlus/symbolic/MathJsParser.re index f145253e..5353aba0 100644 --- a/src/distPlus/symbolic/MathJsParser.re +++ b/src/distPlus/symbolic/MathJsParser.re @@ -88,69 +88,69 @@ module MathAdtToDistDst = { ); }; - let normal: array(arg) => result(SymbolicDist.distTree, string) = + let normal: array(arg) => result(TreeNode.treeNode, string) = fun | [|Value(mean), Value(stdev)|] => - Ok(`Simple(`Normal({mean, stdev}))) + Ok(`DistData(`Symbolic(`Normal({mean, stdev})))) | _ => Error("Wrong number of variables in normal distribution"); - let lognormal: array(arg) => result(SymbolicDist.distTree, string) = + let lognormal: array(arg) => result(TreeNode.treeNode, string) = fun - | [|Value(mu), Value(sigma)|] => Ok(`Simple(`Lognormal({mu, sigma}))) + | [|Value(mu), Value(sigma)|] => Ok(`DistData(`Symbolic(`Lognormal({mu, sigma})))) | [|Object(o)|] => { let g = Js.Dict.get(o); switch (g("mean"), g("stdev"), g("mu"), g("sigma")) { | (Some(Value(mean)), Some(Value(stdev)), _, _) => - Ok(`Simple(SymbolicDist.Lognormal.fromMeanAndStdev(mean, stdev))) + Ok(`DistData(`Symbolic(SymbolicDist.Lognormal.fromMeanAndStdev(mean, stdev)))) | (_, _, Some(Value(mu)), Some(Value(sigma))) => - Ok(`Simple(`Lognormal({mu, sigma}))) + Ok(`DistData(`Symbolic(`Lognormal({mu, sigma})))) | _ => Error("Lognormal distribution would need mean and stdev") }; } | _ => Error("Wrong number of variables in lognormal distribution"); - let to_: array(arg) => result(SymbolicDist.distTree, string) = + let to_: array(arg) => result(TreeNode.treeNode, string) = fun | [|Value(low), Value(high)|] when low <= 0.0 && low < high=> { - Ok(`Simple(SymbolicDist.Normal.from90PercentCI(low, high))); + Ok(`DistData(`Symbolic(SymbolicDist.Normal.from90PercentCI(low, high)))); } | [|Value(low), Value(high)|] when low < high => { - Ok(`Simple(SymbolicDist.Lognormal.from90PercentCI(low, high))); + Ok(`DistData(`Symbolic(SymbolicDist.Lognormal.from90PercentCI(low, high)))); } | [|Value(_), Value(_)|] => Error("Low value must be less than high value.") | _ => Error("Wrong number of variables in lognormal distribution"); - let uniform: array(arg) => result(SymbolicDist.distTree, string) = + let uniform: array(arg) => result(TreeNode.treeNode, string) = fun - | [|Value(low), Value(high)|] => Ok(`Simple(`Uniform({low, high}))) + | [|Value(low), Value(high)|] => Ok(`DistData(`Symbolic(`Uniform({low, high})))) | _ => Error("Wrong number of variables in lognormal distribution"); - let beta: array(arg) => result(SymbolicDist.distTree, string) = + let beta: array(arg) => result(TreeNode.treeNode, string) = fun - | [|Value(alpha), Value(beta)|] => Ok(`Simple(`Beta({alpha, beta}))) + | [|Value(alpha), Value(beta)|] => Ok(`DistData(`Symbolic(`Beta({alpha, beta})))) | _ => Error("Wrong number of variables in lognormal distribution"); - let exponential: array(arg) => result(SymbolicDist.distTree, string) = + let exponential: array(arg) => result(TreeNode.treeNode, string) = fun - | [|Value(rate)|] => Ok(`Simple(`Exponential({rate: rate}))) + | [|Value(rate)|] => Ok(`DistData(`Symbolic(`Exponential({rate: rate})))) | _ => Error("Wrong number of variables in Exponential distribution"); - let cauchy: array(arg) => result(SymbolicDist.distTree, string) = + let cauchy: array(arg) => result(TreeNode.treeNode, string) = fun | [|Value(local), Value(scale)|] => - Ok(`Simple(`Cauchy({local, scale}))) + Ok(`DistData(`Symbolic(`Cauchy({local, scale})))) | _ => Error("Wrong number of variables in cauchy distribution"); - let triangular: array(arg) => result(SymbolicDist.distTree, string) = + let triangular: array(arg) => result(TreeNode.treeNode, string) = fun | [|Value(low), Value(medium), Value(high)|] => - Ok(`Simple(`Triangular({low, medium, high}))) + Ok(`DistData(`Symbolic(`Triangular({low, medium, high})))) | _ => Error("Wrong number of variables in triangle distribution"); let multiModal = ( - args: array(result(SymbolicDist.distTree, string)), + args: array(result(TreeNode.treeNode, string)), weights: option(array(float)), ) => { let weights = weights |> E.O.default([||]); @@ -158,17 +158,9 @@ module MathAdtToDistDst = { args |> E.A.fmap( fun - | Ok(`Simple(d)) => Ok(`Simple(d)) - | Ok(`Combination(t1, t2, op)) => Ok(`Combination(t1, t2, op)) - | Ok(`PointwiseSum(t1, t2)) => Ok(`PointwiseSum(t1, t2)) - | Ok(`PointwiseProduct(t1, t2)) => Ok(`PointwiseProduct(t1, t2)) - | Ok(`Normalize(t)) => Ok(`Normalize(t)) - | Ok(`LeftTruncate(t, x)) => Ok(`LeftTruncate(t, x)) - | Ok(`RightTruncate(t, x)) => Ok(`RightTruncate(t, x)) - | Ok(`Render(t)) => Ok(`Render(t)) - | Error(e) => Error(e) - | _ => Error("Unexpected dist") - ); + | Ok(a) => a + | Error(e) => Error(e) + ); let firstWithError = dists |> Belt.Array.getBy(_, Belt.Result.isError); let withoutErrors = dists |> E.A.fmap(E.R.toOption) |> E.A.O.concatSomes; @@ -182,7 +174,7 @@ module MathAdtToDistDst = { |> E.A.fmapi((index, t) => { let w = weights |> E.A.get(_, index) |> E.O.default(1.0); - `VerticalScaling(t, `Simple(`Float(w))) + `Operation(`ScaleBy(`Multiply, t, `DistData(`Symbolic(`Float(w))))) }); let pointwiseSum = components @@ -196,7 +188,7 @@ module MathAdtToDistDst = { }; }; - let arrayParser = (args:array(arg)):result(SymbolicDist.distTree, string) => { + let arrayParser = (args:array(arg)):result(TreeNode.treeNode, string) => { let samples = args |> E.A.fmap( fun @@ -207,18 +199,18 @@ module MathAdtToDistDst = { let outputs = Samples.T.fromSamples(samples); let pdf = outputs.shape |> E.O.bind(_,Distributions.Shape.T.toContinuous); let shape = pdf |> E.O.fmap(pdf => { - let _pdf = Distributions.Continuous.T.scaleToIntegralSum(~cache=None, ~intendedSum=1.0, pdf); + let _pdf = Distributions.Continuous.T.normalize(pdf); let cdf = Distributions.Continuous.T.integral(~cache=None, _pdf); SymbolicDist.ContinuousShape.make(_pdf, cdf) }); switch(shape){ - | Some(s) => Ok(`Simple(`ContinuousShape(s))) + | Some(s) => Ok(`DistData(`Symbolic(`ContinuousShape(s)))) | None => Error("Rendering did not work") } } - let rec functionParser = (r): result(SymbolicDist.distTree, string) => + let rec functionParser = (r): result(TreeNode.treeNode, string) => r |> ( fun @@ -230,7 +222,7 @@ module MathAdtToDistDst = { | Fn({name: "exponential", args}) => exponential(args) | Fn({name: "cauchy", args}) => cauchy(args) | Fn({name: "triangular", args}) => triangular(args) - | Value(f) => Ok(`Simple(`Float(f))) + | Value(f) => Ok(`DistData(`Symbolic(`Float(f)))) | Fn({name: "mm", args}) => { let weights = args @@ -283,7 +275,7 @@ module MathAdtToDistDst = { args |> E.A.fmap(functionParser) |> (fun - | [|Ok(l), Ok(`Simple(`Float(0.0)))|] => Error("Division by zero") + | [|Ok(l), Ok(`DistData(`Symbolic(`Float(0.0))))|] => Error("Division by zero") | [|Ok(l), Ok(r)|] => Ok(`Combination(l, r, `DivideOperation)) | _ => Error("Division needs two operands")) } @@ -298,14 +290,14 @@ module MathAdtToDistDst = { args |> E.A.fmap(functionParser) |> (fun - | [|Ok(l), Ok(`Simple(`Float(r)))|] => Ok(`LeftTruncate(l, r)) + | [|Ok(l), Ok(`DistData(`Symbolic(`Float(r))))|] => Ok(`LeftTruncate(l, r)) | _ => Error("leftTruncate needs two arguments: the expression and the cutoff")) } | Fn({name: "rightTruncate", args}) => { args |> E.A.fmap(functionParser) |> (fun - | [|Ok(l), Ok(`Simple(`Float(r)))|] => Ok(`RightTruncate(l, r)) + | [|Ok(l), Ok(`DistData(`Symbolic(`Float(r))))|] => Ok(`RightTruncate(l, r)) | _ => Error("rightTruncate needs two arguments: the expression and the cutoff")) } | Fn({name}) => Error(name ++ ": function not supported") @@ -314,18 +306,18 @@ module MathAdtToDistDst = { } ); - let topLevel = (r): result(SymbolicDist.distTree, string) => + let topLevel = (r): result(TreeNode.treeNode, string) => r |> ( fun | Fn(_) => functionParser(r) - | Value(r) => Ok(`Simple(`Float(r))) + | Value(r) => Ok(`DistData(`Symbolic(`Float(r)))) | Array(r) => arrayParser(r) | Symbol(_) => Error("Symbol not valid as top level") | Object(_) => Error("Object not valid as top level") ); - let run = (r): result(SymbolicDist.distTree, string) => + let run = (r): result(TreeNode.treeNode, string) => r |> MathAdtCleaner.run |> topLevel; }; diff --git a/src/distPlus/symbolic/SymbolicDist.re b/src/distPlus/symbolic/SymbolicDist.re index 48bc7ad5..8cab8227 100644 --- a/src/distPlus/symbolic/SymbolicDist.re +++ b/src/distPlus/symbolic/SymbolicDist.re @@ -36,7 +36,6 @@ type continuousShape = { cdf: DistTypes.continuousShape, }; -type contType = [ | `Continuous | `Discrete]; type dist = [ | `Normal(normal) @@ -50,29 +49,6 @@ type dist = [ | `Float(float) // Dirac delta at x. Practically useful only in the context of multimodals. ]; -type integral = float; -type cutoffX = float; -type operation = [ - | `AddOperation - | `SubtractOperation - | `MultiplyOperation - | `DivideOperation - | `ExponentiateOperation -]; - -type distTree = [ - | `Simple(dist) - | `Combination(distTree, distTree, operation) - | `PointwiseSum(distTree, distTree) - | `PointwiseProduct(distTree, distTree) - | `VerticalScaling(distTree, distTree) - | `Normalize(distTree) - | `LeftTruncate(distTree, cutoffX) - | `RightTruncate(distTree, cutoffX) - | `Render(distTree) -] -and weightedDists = array((distTree, float)); - module ContinuousShape = { type t = continuousShape; let make = (pdf, cdf): t => {pdf, cdf}; @@ -82,8 +58,9 @@ module ContinuousShape = { Distributions.Continuous.T.xToY(p, t.pdf).continuous; // TODO: Fix the sampling, to have it work correctly. let sample = (t: t) => 3.0; + // TODO: Fix the mean, to have it work correctly. + let mean = (t: t) => Ok(0.0); let toString = t => {j|CustomContinuousShape|j}; - let contType: contType = `Continuous; }; module Exponential = { @@ -91,8 +68,8 @@ module Exponential = { let pdf = (x, t: t) => Jstat.exponential##pdf(x, t.rate); let inv = (p, t: t) => Jstat.exponential##inv(p, t.rate); let sample = (t: t) => Jstat.exponential##sample(t.rate); + let mean = (t: t) => Ok(Jstat.exponential##mean(t.rate)); let toString = ({rate}: t) => {j|Exponential($rate)|j}; - let contType: contType = `Continuous; }; module Cauchy = { @@ -100,8 +77,8 @@ module Cauchy = { let pdf = (x, t: t) => Jstat.cauchy##pdf(x, t.local, t.scale); let inv = (p, t: t) => Jstat.cauchy##inv(p, t.local, t.scale); let sample = (t: t) => Jstat.cauchy##sample(t.local, t.scale); + let mean = (t: t) => Error("Cauchy distributions have no mean value.") let toString = ({local, scale}: t) => {j|Cauchy($local, $scale)|j}; - let contType: contType = `Continuous; }; module Triangular = { @@ -109,8 +86,8 @@ module Triangular = { let pdf = (x, t: t) => Jstat.triangular##pdf(x, t.low, t.high, t.medium); let inv = (p, t: t) => Jstat.triangular##inv(p, t.low, t.high, t.medium); let sample = (t: t) => Jstat.triangular##sample(t.low, t.high, t.medium); + let mean = (t: t) => Ok(Jstat.triangular##mean(t.low, t.high, t.medium)); let toString = ({low, medium, high}: t) => {j|Triangular($low, $medium, $high)|j}; - let contType: contType = `Continuous; }; module Normal = { @@ -124,8 +101,26 @@ module Normal = { }; let inv = (p, t: t) => Jstat.normal##inv(p, t.mean, t.stdev); let sample = (t: t) => Jstat.normal##sample(t.mean, t.stdev); + let mean = (t: t) => Ok(Jstat.normal##mean(t.mean, t.stdev)); let toString = ({mean, stdev}: t) => {j|Normal($mean,$stdev)|j}; - let contType: contType = `Continuous; + + let add = (n1: t, n2: t) => { + let mean = n1.mean +. n2.mean; + let stdev = sqrt(n1.stdev ** 2. +. n2.stdev ** 2.); + `Normal({mean, stdev}); + }; + let subtract = (n1: t, n2: t) => { + let mean = n1.mean -. n2.mean; + let stdev = sqrt(n1.stdev ** 2. +. n2.stdev ** 2.); + `Normal({mean, stdev}); + }; + + // TODO: is this useful here at all? would need the integral as well ... + let pointwiseProduct = (n1: t, n2: t) => { + let mean = (n1.mean *. n2.stdev**2. +. n2.mean *. n1.stdev**2.) /. (n1.stdev**2. +. n2.stdev**2.); + let stdev = 1. /. ((1. /. n1.stdev**2.) +. (1. /. n2.stdev**2.)); + `Normal({mean, stdev}); + }; }; module Beta = { @@ -133,17 +128,17 @@ module Beta = { let pdf = (x, t: t) => Jstat.beta##pdf(x, t.alpha, t.beta); let inv = (p, t: t) => Jstat.beta##inv(p, t.alpha, t.beta); let sample = (t: t) => Jstat.beta##sample(t.alpha, t.beta); + let mean = (t: t) => Ok(Jstat.beta##mean(t.alpha, t.beta)); let toString = ({alpha, beta}: t) => {j|Beta($alpha,$beta)|j}; - let contType: contType = `Continuous; }; module Lognormal = { type t = lognormal; let pdf = (x, t: t) => Jstat.lognormal##pdf(x, t.mu, t.sigma); let inv = (p, t: t) => Jstat.lognormal##inv(p, t.mu, t.sigma); + let mean = (t: t) => Ok(Jstat.lognormal##mean(t.mu, t.sigma)); let sample = (t: t) => Jstat.lognormal##sample(t.mu, t.sigma); let toString = ({mu, sigma}: t) => {j|Lognormal($mu,$sigma)|j}; - let contType: contType = `Continuous; let from90PercentCI = (low, high) => { let logLow = Js.Math.log(low); let logHigh = Js.Math.log(high); @@ -163,6 +158,17 @@ module Lognormal = { ); `Lognormal({mu, sigma}); }; + + let multiply = (l1, l2) => { + let mu = l1.mu +. l2.mu; + let sigma = l1.sigma +. l2.sigma; + `Lognormal({mu, sigma}) + }; + let divide = (l1, l2) => { + let mu = l1.mu -. l2.mu; + let sigma = l1.sigma +. l2.sigma; + `Lognormal({mu, sigma}) + }; }; module Uniform = { @@ -170,20 +176,20 @@ module Uniform = { let pdf = (x, t: t) => Jstat.uniform##pdf(x, t.low, t.high); let inv = (p, t: t) => Jstat.uniform##inv(p, t.low, t.high); let sample = (t: t) => Jstat.uniform##sample(t.low, t.high); + let mean = (t: t) => Ok(Jstat.uniform##mean(t.low, t.high)); let toString = ({low, high}: t) => {j|Uniform($low,$high)|j}; - let contType: contType = `Continuous; }; module Float = { type t = float; let pdf = (x, t: t) => x == t ? 1.0 : 0.0; let inv = (p, t: t) => p < t ? 0.0 : 1.0; + let mean = (t: t) => Ok(t); let sample = (t: t) => t; let toString = Js.Float.toString; - let contType: contType = `Discrete; }; -module GenericSimple = { +module GenericDistFunctions = { let minCdfValue = 0.0001; let maxCdfValue = 0.9999; @@ -200,19 +206,6 @@ module GenericSimple = { | `ContinuousShape(n) => ContinuousShape.pdf(x, n) }; - let contType = (dist: dist): contType => - switch (dist) { - | `Normal(_) => Normal.contType - | `Triangular(_) => Triangular.contType - | `Exponential(_) => Exponential.contType - | `Cauchy(_) => Cauchy.contType - | `Lognormal(_) => Lognormal.contType - | `Uniform(_) => Uniform.contType - | `Beta(_) => Beta.contType - | `Float(_) => Float.contType - | `ContinuousShape(_) => ContinuousShape.contType - }; - let inv = (x, dist) => switch (dist) { | `Normal(n) => Normal.inv(x, n) @@ -274,22 +267,25 @@ module GenericSimple = { | `Uniform({high}) => high | `Float(n) => n; - /* This function returns a list of x's at which to evaluate the overall distribution (for rendering). - This function is called separately for each individual distribution. + let mean: dist => result(float, string) = + fun + | `Triangular(n) => Triangular.mean(n) + | `Exponential(n) => Exponential.mean(n) + | `Cauchy(n) => Cauchy.mean(n) + | `Normal(n) => Normal.mean(n) + | `Lognormal(n) => Lognormal.mean(n) + | `Beta(n) => Beta.mean(n) + | `ContinuousShape(n) => ContinuousShape.mean(n) + | `Uniform(n) => Uniform.mean(n) + | `Float(n) => Float.mean(n) - When called with xSelection=`Linear, this function will return (n) x's, evenly - distributed between the min and max of the distribution (whatever those are defined to be above). - - When called with xSelection=`ByWeight, this function will distribute the x's such as to - match the cumulative shape of the distribution. This is slower but may give better results. - */ let interpolateXs = - (~xSelection: [ | `Linear | `ByWeight]=`Linear, dist: dist, n) => { + (~xSelection: [ | `Linear | `ByWeight]=`Linear, dist: dist, n) => { switch (xSelection, dist) { | (`Linear, _) => E.A.Floats.range(min(dist), max(dist), n) | (`ByWeight, `Uniform(n)) => - // In `ByWeight mode, uniform distributions get special treatment because we need two x's - // on either side for proper rendering (just left and right of the discontinuities). + // In `ByWeight mode, uniform distributions get special treatment because we need two x's + // on either side for proper rendering (just left and right of the discontinuities). let dx = 0.00001 *. (n.high -. n.low); [|n.low -. dx, n.low +. dx, n.high -. dx, n.high +. dx|]; | (`ByWeight, _) => @@ -297,423 +293,5 @@ module GenericSimple = { ys |> E.A.fmap(y => inv(y, dist)); }; }; - - let toShape = - (~xSelection: [ | `Linear | `ByWeight]=`Linear, dist: dist, n) - : DistTypes.shape => { - switch (dist) { - | `ContinuousShape(n) => n.pdf |> Distributions.Continuous.T.toShape - | dist => - let xs = interpolateXs(~xSelection, dist, n); - let ys = xs |> E.A.fmap(r => pdf(r, dist)); - XYShape.T.fromArrays(xs, ys) - |> Distributions.Continuous.make(`Linear, _) - |> Distributions.Continuous.T.toShape; - }; - }; }; -module DistTree = { - type nodeResult = [ - | `Simple(dist) - // RenderedShape: continuous xyShape, discrete xyShape, total value. - | `RenderedShape(DistTypes.continuousShape, DistTypes.discreteShape, integral) - ]; - - let evaluateDistribution = (d: dist): nodeResult => { - `Simple(d) - }; - - // This is a performance bottleneck! - // Using raw JS here so we can use native for loops and access array elements - // directly, without option checks. - let jsContinuousCombinationConvolve: (array(float), array(float), array(float), array(float), float => float => float) => array(array((float, float))) = [%bs.raw - {| - function (s1xs, s1ys, s2xs, s2ys, func) { - // For continuous-continuous convolution, use linear interpolation. - // Let's assume we got downsampled distributions - - const outXYShapes = new Array(s1xs.length); - for (let i = 0; i < s1xs.length; i++) { - // create a new distribution - const dxyShape = new Array(s2xs.length); - for (let j = 0; j < s2xs.length; j++) { - dxyShape[j] = [func(s1xs[i], s2xs[j]), (s1ys[i] * s2ys[j])]; - } - outXYShapes[i] = dxyShape; - } - - return outXYShapes; - } - |}]; - - let jsDiscreteCombinationConvolve: (array(float), array(float), array(float), array(float), float => float => float) => (array(float), array(float)) = [%bs.raw - {| - function (s1xs, s1ys, s2xs, s2ys, func) { - const r = new Map(); - - for (let i = 0; i < s1xs.length; i++) { - for (let j = 0; j < s2xs.length; j++) { - - const x = func(s1xs[i], s2xs[j]); - const cv = r.get(x) | 0; - - r.set(x, cv + s1ys[i] * s2ys[j]); // add up the ys, if same x - } - } - - const rxys = [...r.entries()]; - rxys.sort(([x1, y1], [x2, y2]) => x1 - x2); - - const rxs = new Array(rxys.length); - const rys = new Array(rxys.length); - - for (let i = 0; i < rxys.length; i++) { - rxs[i] = rxys[i][0]; - rys[i] = rxys[i][1]; - } - - return [rxs, rys]; - } - |}]; - - let funcFromOp = (op: operation) => { - switch (op) { - | `AddOperation => (+.) - | `SubtractOperation => (-.) - | `MultiplyOperation => (*.) - | `DivideOperation => (/.) - | `ExponentiateOperation => (**) - } - } - - let renderDistributionToXYShape = (d: dist, n: int): (DistTypes.continuousShape, DistTypes.discreteShape) => { - // render the distribution into an XY shape - switch (d) { - | `Float(v) => (Distributions.Continuous.empty, {xs: [|v|], ys: [|1.0|]}) - | _ => { - let xs = GenericSimple.interpolateXs(~xSelection=`ByWeight, d, n); - let ys = xs |> E.A.fmap(x => GenericSimple.pdf(x, d)); - (Distributions.Continuous.make(`Linear, {xs: xs, ys: ys}), XYShape.T.empty) - } - } - }; - - let combinationDistributionOfXYShapes = (sc1: DistTypes.continuousShape, // continuous shape - sd1: DistTypes.discreteShape, // discrete shape - sc2: DistTypes.continuousShape, - sd2: DistTypes.discreteShape, func): (DistTypes.continuousShape, DistTypes.discreteShape) => { - - // First, deal with the discrete-discrete convolution: - let (ddxs, ddys) = jsDiscreteCombinationConvolve(sd1.xs, sd1.ys, sd2.xs, sd2.ys, func); - let ddxy: DistTypes.discreteShape = {xs: ddxs, ys: ddys}; - - // Then, do the other three: - let downsample = (sc: DistTypes.continuousShape) => { - let scLength = E.A.length(sc.xyShape.xs); - let scSqLength = sqrt(float_of_int(scLength)); - scSqLength > 10. ? Distributions.Continuous.T.truncate(int_of_float(scSqLength), sc) : sc; - }; - - let combinePointConvolutionResults = ca => ca |> E.A.fmap(s => { - // s is an array of (x, y) objects - let (xs, ys) = Belt.Array.unzip(s); - Distributions.Continuous.make(`Linear, {xs, ys}); - }) - |> Distributions.Continuous.reduce((+.)); - - let sc1d = downsample(sc1); - let sc2d = downsample(sc2); - - let ccxy = jsContinuousCombinationConvolve(sc1d.xyShape.xs, sc1d.xyShape.ys, sc2d.xyShape.xs, sc2d.xyShape.ys, func) |> combinePointConvolutionResults; - let dcxy = jsContinuousCombinationConvolve(sc1d.xyShape.xs, sc1d.xyShape.ys, sc2d.xyShape.xs, sc2d.xyShape.ys, func) |> combinePointConvolutionResults; - let cdxy = jsContinuousCombinationConvolve(sc1d.xyShape.xs, sc1d.xyShape.ys, sc2d.xyShape.xs, sc2d.xyShape.ys, func) |> combinePointConvolutionResults; - let continuousShapeSum = Distributions.Continuous.reduce((+.), [|ccxy, dcxy, cdxy|]); - - (continuousShapeSum, ddxy) - }; - - let evaluateCombinationDistribution = (et1: nodeResult, et2: nodeResult, op: operation, n: int) => { - /* return either a Distribution or a RenderedShape. Must integrate to 1. */ - - let func = funcFromOp(op); - switch ((et1, et2, op)) { - /* Known cases: replace symbolic with symbolic distribution */ - | (`Simple(`Float(v1)), `Simple(`Float(v2)), _) => { - `Simple(`Float(func(v1, v2))) - } - - | (`Simple(`Normal(n2)), `Simple(`Float(v1)), `AddOperation) - | (`Simple(`Float(v1)), `Simple(`Normal(n2)), `AddOperation) => { - let n: normal = {mean: v1 +. n2.mean, stdev: n2.stdev}; - `Simple(`Normal(n)) - } - - | (`Simple(`Normal(n1)), `Simple(`Normal(n2)), `AddOperation) => { - let n: normal = {mean: n1.mean +. n2.mean, stdev: sqrt(n1.stdev ** 2. +. n2.stdev ** 2.)}; - `Simple(`Normal(n)); - } - - | (`Simple(`Normal(n1)), `Simple(`Normal(n2)), `SubtractOperation) => { - let n: normal = {mean: n1.mean -. n2.mean, stdev: sqrt(n1.stdev ** 2. +. n2.stdev ** 2.)}; - `Simple(`Normal(n)); - } - - | (`Simple(`Lognormal(l1)), `Simple(`Lognormal(l2)), `MultiplyOperation) => { - let l: lognormal = {mu: l1.mu +. l2.mu, sigma: l1.sigma +. l2.sigma}; - `Simple(`Lognormal(l)); - } - - | (`Simple(`Lognormal(l1)), `Simple(`Lognormal(l2)), `DivideOperation) => { - let l: lognormal = {mu: l1.mu -. l2.mu, sigma: l1.sigma +. l2.sigma}; - `Simple(`Lognormal(l)); - } - - - /* General cases: convolve the XYShapes */ - | (`Simple(d1), `Simple(d2), _) => { - let (sc1, sd1) = renderDistributionToXYShape(d1, n); - let (sc2, sd2) = renderDistributionToXYShape(d2, n); - let (sc, sd) = combinationDistributionOfXYShapes(sc1, sd1, sc2, sd2, func); - `RenderedShape(sc, sd, 1.0) - } - | (`Simple(d1), `RenderedShape(sc2, sd2, i2), _) - | (`RenderedShape(sc2, sd2, i2), `Simple(d1), _) => { - let (sc1, sd1) = renderDistributionToXYShape(d1, n); - let (sc, sd) = combinationDistributionOfXYShapes(sc1, sd1, sc2, sd2, func); - `RenderedShape(sc, sd, i2) - } - | (`RenderedShape(sc1, sd1, i1), `RenderedShape(sc2, sd2, i2), _) => { - // sum of two multimodals that have a continuous and discrete each. - let (sc, sd) = combinationDistributionOfXYShapes(sc1, sd1, sc2, sd2, func); - `RenderedShape(sc, sd, i1); - } - } - }; - - let evaluatePointwiseSum = (et1: nodeResult, et2: nodeResult, n: int) => { - switch ((et1, et2)) { - /* Known cases: */ - | (`Simple(`Float(v1)), `Simple(`Float(v2))) => { - v1 == v2 - ? `RenderedShape(Distributions.Continuous.empty, Distributions.Discrete.make({xs: [|v1|], ys: [|2.|]}), 2.) - : `RenderedShape(Distributions.Continuous.empty, Distributions.Discrete.empty, 0.) // TODO: add warning: shouldn't pointwise add scalars. - } - | (`Simple(`Float(v1)), `Simple(d2)) - | (`Simple(d2), `Simple(`Float(v1))) => { - let sd1: DistTypes.xyShape = {xs: [|v1|], ys: [|1.|]}; - let (sc2, sd2) = renderDistributionToXYShape(d2, n); - `RenderedShape(sc2, Distributions.Discrete.reduce((+.), [|sd1, sd2|]), 2.) - } - | (`Simple(d1), `Simple(d2)) => { - let (sc1, sd1) = renderDistributionToXYShape(d1, n); - let (sc2, sd2) = renderDistributionToXYShape(d2, n); - `RenderedShape(Distributions.Continuous.reduce((+.), [|sc1, sc2|]), Distributions.Discrete.reduce((+.), [|sd1, sd2|]), 2.) - } - | (`Simple(d1), `RenderedShape(sc2, sd2, i2)) - | (`RenderedShape(sc2, sd2, i2), `Simple(d1)) => { - let (sc1, sd1) = renderDistributionToXYShape(d1, n); - `RenderedShape(Distributions.Continuous.reduce((+.), [|sc1, sc2|]), Distributions.Discrete.reduce((+.), [|sd1, sd2|]), 1. +. i2) - } - | (`RenderedShape(sc1, sd1, i1), `RenderedShape(sc2, sd2, i2)) => { - `RenderedShape(Distributions.Continuous.reduce((+.), [|sc1, sc2|]), Distributions.Discrete.reduce((+.), [|sd1, sd2|]), i1 +. i2) - } - } - }; - - let evaluatePointwiseProduct = (et1: nodeResult, et2: nodeResult, n: int) => { - switch ((et1, et2)) { - /* Known cases: */ - | (`Simple(`Float(v1)), `Simple(`Float(v2))) => { - v1 == v2 - ? `RenderedShape(Distributions.Continuous.empty, Distributions.Discrete.make({xs: [|v1|], ys: [|1.|]}), 1.) - : `RenderedShape(Distributions.Continuous.empty, Distributions.Discrete.empty, 0.) // TODO: add warning: shouldn't pointwise multiply scalars. - } - | (`Simple(`Float(v1)), `Simple(d2)) => { - // evaluate d2 at v1 - let y = GenericSimple.pdf(v1, d2); - `RenderedShape(Distributions.Continuous.empty, Distributions.Discrete.make({xs: [|v1|], ys: [|y|]}), y) - } - | (`Simple(d1), `Simple(`Float(v2))) => { - // evaluate d1 at v2 - let y = GenericSimple.pdf(v2, d1); - `RenderedShape(Distributions.Continuous.empty, Distributions.Discrete.make({xs: [|v2|], ys: [|y|]}), y) - } - | (`Simple(`Normal(n1)), `Simple(`Normal(n2))) => { - let mean = (n1.mean *. n2.stdev**2. +. n2.mean *. n1.stdev**2.) /. (n1.stdev**2. +. n2.stdev**2.); - let stdev = 1. /. ((1. /. n1.stdev**2.) +. (1. /. n2.stdev**2.)); - let integral = 0; // TODO - `RenderedShape(Distributions.Continuous.empty, Distributions.Discrete.empty, 0.) - } - /* General cases */ - | (`Simple(d1), `Simple(d2)) => { - // NOT IMPLEMENTED YET - // TODO: evaluate integral properly - let (sc1, sd1) = renderDistributionToXYShape(d1, n); - let (sc2, sd2) = renderDistributionToXYShape(d2, n); - `RenderedShape(Distributions.Continuous.empty, Distributions.Discrete.empty, 0.) - } - | (`Simple(d1), `RenderedShape(sc2, sd2, i2)) => { - // NOT IMPLEMENTED YET - // TODO: evaluate integral properly - let (sc1, sd1) = renderDistributionToXYShape(d1, n); - `RenderedShape(Distributions.Continuous.empty, Distributions.Discrete.empty, 0.) - } - | (`RenderedShape(sc1, sd1, i1), `Simple(d1)) => { - // NOT IMPLEMENTED YET - // TODO: evaluate integral properly - let (sc2, sd2) = renderDistributionToXYShape(d1, n); - `RenderedShape(Distributions.Continuous.empty, Distributions.Discrete.empty, 0.) - } - | (`RenderedShape(sc1, sd1, i1), `RenderedShape(sc2, sd2, i2)) => { - `RenderedShape(Distributions.Continuous.empty, Distributions.Discrete.empty, 0.) - } - } - }; - - - let evaluateNormalize = (et: nodeResult, n: int) => { - // just divide everything by the integral. - switch (et) { - | `RenderedShape(sc, sd, 0.) => { - `RenderedShape(Distributions.Continuous.empty, Distributions.Discrete.empty, 0.) - } - | `RenderedShape(sc, sd, i) => { - // loop through all ys and divide them by i - let normalize = (s: DistTypes.xyShape): DistTypes.xyShape => {xs: s.xs, ys: s.ys |> E.A.fmap(y => y /. i)}; - - let scn = sc |> Distributions.Continuous.shapeMap(normalize); - let sdn = sd |> normalize; - - `RenderedShape(scn, sdn, 1.) - } - | `Simple(d) => `Simple(d) // any kind of atomic dist should already be normalized -- TODO: THIS IS ACTUALLY FALSE! E.g. pointwise product of normal * normal - } - }; - - let evaluateTruncate = (et: nodeResult, xc: cutoffX, compareFunc: (float, float) => bool, n: int) => { - let cut = (s: DistTypes.xyShape): DistTypes.xyShape => { - let (xs, ys) = s.ys - |> Belt.Array.zip(s.xs) - |> E.A.filter(((x, y)) => compareFunc(x, xc)) - |> Belt.Array.unzip - - let cutShape: DistTypes.xyShape = {xs, ys}; - cutShape; - }; - - switch (et) { - | `Simple(d) => { - let (sc, sd) = renderDistributionToXYShape(d, n); - - let scc = sc |> Distributions.Continuous.shapeMap(cut); - let sdc = sd |> cut; - - let newIntegral = 1.; // TODO - - `RenderedShape(scc, sdc, newIntegral); - } - | `RenderedShape(sc, sd, i) => { - let scc = sc |> Distributions.Continuous.shapeMap(cut); - let sdc = sd |> cut; - - let newIntegral = 1.; // TODO - - `RenderedShape(scc, sdc, newIntegral); - } - } - }; - - let evaluateVerticalScaling = (et1: nodeResult, et2: nodeResult, n: int) => { - let scale = (i: float, s: DistTypes.xyShape): DistTypes.xyShape => {xs: s.xs, ys: s.ys |> E.A.fmap(y => y *. i)}; - - switch ((et1, et2)) { - | (`Simple(`Float(v)), `Simple(d)) - | (`Simple(d), `Simple(`Float(v))) => { - let (sc, sd) = renderDistributionToXYShape(d, n); - - let scc = sc |> Distributions.Continuous.shapeMap(scale(v)); - let sdc = sd |> scale(v); - - let newIntegral = v; // TODO - - `RenderedShape(scc, sdc, newIntegral); - } - | (`Simple(`Float(v)), `RenderedShape(sc, sd, i)) - | (`RenderedShape(sc, sd, i), `Simple(`Float(v))) => { - let scc = sc |> Distributions.Continuous.shapeMap(scale(v)); - let sdc = sd |> scale(v); - - let newIntegral = v; // TODO - - `RenderedShape(scc, sdc, newIntegral); - } - | _ => `RenderedShape(Distributions.Continuous.empty, Distributions.Discrete.empty, 0.) // TODO: give warning - } - } - - let renderNode = (et: nodeResult, n: int) => { - switch (et) { - | `Simple(d) => { - let (sc, sd) = renderDistributionToXYShape(d, n); - `RenderedShape(sc, sd, 1.0); - } - | s => s - } - } - - let rec evaluateNode = (treeNode: distTree, n: int): nodeResult => { - // returns either a new symbolic distribution - switch (treeNode) { - | `Simple(d) => evaluateDistribution(d) - | `Combination(t1, t2, op) => evaluateCombinationDistribution(evaluateNode(t1, n), evaluateNode(t2, n), op, n) - | `PointwiseSum(t1, t2) => evaluatePointwiseSum(evaluateNode(t1, n), evaluateNode(t2, n), n) - | `PointwiseProduct(t1, t2) => evaluatePointwiseProduct(evaluateNode(t1, n), evaluateNode(t2, n), n) - | `VerticalScaling(t1, t2) => evaluateVerticalScaling(evaluateNode(t1, n), evaluateNode(t2, n), n) - | `Normalize(t) => evaluateNormalize(evaluateNode(t, n), n) - | `LeftTruncate(t, x) => evaluateTruncate(evaluateNode(t, n), x, (>=), n) - | `RightTruncate(t, x) => evaluateTruncate(evaluateNode(t, n), x, (<=), n) - | `Render(t) => renderNode(evaluateNode(t, n), n) - } - }; - - let toShape = (treeNode: distTree, n: int) => { - let treeShape = evaluateNode(`Render(`Normalize(treeNode)), n); - - switch (treeShape) { - | `Simple(_) => E.O.toExn("No shape found!", None) - | `RenderedShape(sc, sd, _) => { - let shape = MixedShapeBuilder.buildSimple(~continuous=Some(sc), ~discrete=sd); - - shape |> E.O.toExt(""); - } - } - }; - - let rec toString = (treeNode: distTree): string => { - let stringFromOp = op => switch (op) { - | `AddOperation => " + " - | `SubtractOperation => " - " - | `MultiplyOperation => " * " - | `DivideOperation => " / " - | `ExponentiateOperation => "^" - }; - - switch (treeNode) { - | `Simple(d) => GenericSimple.toString(d) - | `Combination(t1, t2, op) => toString(t1) ++ stringFromOp(op) ++ toString(t2) - | `PointwiseSum(t1, t2) => toString(t1) ++ " .+ " ++ toString(t2) - | `PointwiseProduct(t1, t2) => toString(t1) ++ " .* " ++ toString(t2) - | `VerticalScaling(t1, t2) => toString(t1) ++ " @ " ++ toString(t2) - | `Normalize(t) => "normalize(" ++ toString(t) ++ ")" - | `LeftTruncate(t, x) => "leftTruncate(" ++ toString(t) ++ ", " ++ string_of_float(x) ++ ")" - | `RightTruncate(t, x) => "rightTruncate(" ++ toString(t) ++ ", " ++ string_of_float(x) ++ ")" - | `Render(t) => toString(t) - } - }; -}; - -let toString = (treeNode: distTree) => DistTree.toString(treeNode) - -let toShape = (sampleCount: int, treeNode: distTree) => - DistTree.toShape(treeNode, sampleCount) //~xSelection=`ByWeight, diff --git a/src/distPlus/symbolic/TreeNode.re b/src/distPlus/symbolic/TreeNode.re new file mode 100644 index 00000000..5f1aece7 --- /dev/null +++ b/src/distPlus/symbolic/TreeNode.re @@ -0,0 +1,414 @@ +/* This module represents a tree node. */ + +/* TreeNodes are either Data (i.e. symbolic or rendered distributions) or Operations. */ +type treeNode = [ + | `DistData(distData) + | `Operation(operation) +] and distData = [ + | `Symbolic(SymbolicDist.dist) + | `RenderedShape(DistTypes.shape) +] and operation = [ + // binary operations + | `StandardOperation(standardOperation, treeNode, treeNode) + | `PointwiseOperation(pointwiseOperation, treeNode, treeNode) + | `ScaleOperation(scaleOperation, treeNode, scaleBy) + // unary operations + | `Render(treeNode) // always evaluates to `DistData(`RenderedShape(...)) + | `Truncate(leftCutoff, rightCutoff, treeNode) + | `Normalize(treeNode) + // direct evaluations of dists (e.g. cdf, sample) + | `FloatFromDist(distToFloatOperation, treeNode) +] and standardOperation = [ + | `Add + | `Multiply + | `Subtract + | `Divide + | `Exponentiate +] and pointwiseOperation = [ + | `Add + | `Multiply +] and scaleOperation = [ + | `Multiply + | `Log +] +and scaleBy = treeNode and leftCutoff = option(float) and rightCutoff = option(float) +and distToFloatOperation = [ + | `Pdf(float) + | `Cdf(float) + | `Inv(float) + | `Sample +]; + +module TreeNode = { + type t = treeNode; + type simplifier = treeNode => result(treeNode, string); + + type renderParams = { + operationToDistData: (int, operation) => result(t, string), + sampleCount: int, + } + + let rec renderToShape = (renderParams, t: t): result(DistTypes.shape, string) => { + switch (t) { + | `DistData(`RenderedShape(s)) => Ok(s) // already a rendered shape, we're done here + | `DistData(`Symbolic(d)) => + switch (d) { + | `Float(v) => + Ok(Discrete(Distributions.Discrete.make({xs: [|v|], ys: [|1.0|]}, Some(1.0)))); + | _ => + let xs = SymbolicDist.GenericDistFunctions.interpolateXs(~xSelection=`ByWeight, d, renderParams.sampleCount); + let ys = xs |> E.A.fmap(x => SymbolicDist.GenericDistFunctions.pdf(x, d)); + Ok(Continuous(Distributions.Continuous.make(`Linear, {xs, ys}, Some(1.0)))); + } + | `Operation(op) => E.R.bind(renderParams.operationToDistData(renderParams.sampleCount, op), renderToShape(renderParams)) + }; + }; + + /* The following modules encapsulate everything we can do with + * different kinds of operations. */ + + /* Given two random variables A and B, this returns the distribution + of a new variable that is the result of the operation on A and B. + For instance, normal(0, 1) + normal(1, 1) -> normal(1, 2). + In general, this is implemented via convolution. */ + module StandardOperation = { + let funcFromOp: (standardOperation, float, float) => float = + fun + | `Add => (+.) + | `Subtract => (-.) + | `Multiply => ( *. ) + | `Divide => (/.) + | `Exponentiate => ( ** ); + + module Simplify = { + let tryCombiningFloats: simplifier = + fun + | `Operation( + `StandardOperation( + `Divide, + `DistData(`Symbolic(`Float(v1))), + `DistData(`Symbolic(`Float(0.))), + ), + ) => + Error("Cannot divide $v1 by zero.") + | `Operation( + `StandardOperation( + standardOp, + `DistData(`Symbolic(`Float(v1))), + `DistData(`Symbolic(`Float(v2))), + ), + ) => { + let func = funcFromOp(standardOp); + Ok(`DistData(`Symbolic(`Float(func(v1, v2))))); + } + | t => Ok(t); + + let tryCombiningNormals: simplifier = + fun + | `Operation( + `StandardOperation( + `Add, + `DistData(`Symbolic(`Normal(n1))), + `DistData(`Symbolic(`Normal(n2))), + ), + ) => + Ok(`DistData(`Symbolic(SymbolicDist.Normal.add(n1, n2)))) + | `Operation( + `StandardOperation( + `Subtract, + `DistData(`Symbolic(`Normal(n1))), + `DistData(`Symbolic(`Normal(n2))), + ), + ) => + Ok(`DistData(`Symbolic(SymbolicDist.Normal.subtract(n1, n2)))) + | t => Ok(t); + + let tryCombiningLognormals: simplifier = + fun + | `Operation( + `StandardOperation( + `Multiply, + `DistData(`Symbolic(`Lognormal(l1))), + `DistData(`Symbolic(`Lognormal(l2))), + ), + ) => + Ok(`DistData(`Symbolic(SymbolicDist.Lognormal.multiply(l1, l2)))) + | `Operation( + `StandardOperation( + `Divide, + `DistData(`Symbolic(`Lognormal(l1))), + `DistData(`Symbolic(`Lognormal(l2))), + ), + ) => + Ok(`DistData(`Symbolic(SymbolicDist.Lognormal.divide(l1, l2)))) + | t => Ok(t); + + let attempt = (standardOp, t1: t, t2: t): result(treeNode, string) => { + let originalTreeNode = + `Operation(`StandardOperation((standardOp, t1, t2))); + + originalTreeNode + |> tryCombiningFloats + |> E.R.bind(_, tryCombiningNormals) + |> E.R.bind(_, tryCombiningLognormals); + }; + }; + + let evaluateNumerically = (standardOp, renderParams, t1, t2) => { + let func = funcFromOp(standardOp); + + // TODO: downsample the two shapes + let renderedShape1 = t1 |> renderToShape(renderParams); + let renderedShape2 = t2 |> renderToShape(renderParams); + + // This will most likely require a mixed + + switch ((renderedShape1, renderedShape2)) { + | (Error(e1), _) => Error(e1) + | (_, Error(e2)) => Error(e2) + | (Ok(s1), Ok(s2)) => Ok(`DistData(`RenderedShape(Distributions.Shape.convolve(func, s1, s2)))) + }; + }; + + let evaluateToDistData = + (standardOp: standardOperation, renderParams, t1: t, t2: t): result(treeNode, string) => + standardOp + |> Simplify.attempt(_, t1, t2) + |> E.R.bind( + _, + fun + | `DistData(d) => Ok(`DistData(d)) // the analytical simplifaction worked, nice! + | `Operation(_) => // if not, run the convolution + evaluateNumerically(standardOp, renderParams, t1, t2), + ); + }; + + module ScaleOperation = { + let rec mean = (renderParams, t: t): result(float, string) => { + switch (t) { + | `DistData(`RenderedShape(s)) => Ok(Distributions.Shape.T.mean(s)) + | `DistData(`Symbolic(s)) => SymbolicDist.GenericDistFunctions.mean(s) + // evaluating the operation returns result(treeNode(distData)). We then want to make sure + | `Operation(op) => E.R.bind(renderParams.operationToDistData(renderParams.sampleCount, op), mean(renderParams)) + } + }; + + let fnFromOp = + fun + | `Multiply => (*.) + | `Log => ((a, b) => ( log(a) /. log(b) )); + + let knownIntegralSumFnFromOp = + fun + | `Multiply => (a, b) => Some(a *. b) + | `Log => ((_, _) => None); + + let evaluateToDistData = (scaleOp, renderParams, t, scaleBy) => { + let fn = fnFromOp(scaleOp); + let knownIntegralSumFn = knownIntegralSumFnFromOp(scaleOp); + let renderedShape = t |> renderToShape(renderParams); + let scaleByMeanValue = mean(renderParams, scaleBy); + + switch ((renderedShape, scaleByMeanValue)) { + | (Error(e1), _) => Error(e1) + | (_, Error(e2)) => Error(e2) + | (Ok(rs), Ok(sm)) => + Ok(`DistData(`RenderedShape(Distributions.Shape.T.mapY(~knownIntegralSumFn=knownIntegralSumFn(sm), fn(sm), rs)))) + } + }; + }; + + module PointwiseOperation = { + let funcFromOp: (pointwiseOperation => ((float, float) => float)) = + fun + | `Add => (+.) + | `Multiply => ( *. ); + + let evaluateToDistData = (pointwiseOp, renderParams, t1, t2) => { + let func = funcFromOp(pointwiseOp); + let renderedShape1 = t1 |> renderToShape(renderParams); + let renderedShape2 = t2 |> renderToShape(renderParams); + + // TODO: figure out integral, diff between pointwiseAdd and pointwiseProduct and other stuff + // Distributions.Shape.reduce(func, renderedShape1, renderedShape2); + + Error("Pointwise operations currently not supported.") + }; + }; + + module Truncate = { + module Simplify = { + let tryTruncatingNothing: simplifier = fun + | `Operation(`Truncate(None, None, `DistData(d))) => Ok(`DistData(d)) + | t => Ok(t); + + let tryTruncatingUniform: simplifier = fun + | `Operation(`Truncate(lc, rc, `DistData(`Symbolic(`Uniform(u))))) => { + // just create a new Uniform distribution + let newLow = max(E.O.default(neg_infinity, lc), u.low); + let newHigh = min(E.O.default(infinity, rc), u.high); + Ok(`DistData(`Symbolic(`Uniform({low: newLow, high: newHigh})))); + } + | t => Ok(t); + + let attempt = (leftCutoff, rightCutoff, t): result(treeNode, string) => { + let originalTreeNode = `Operation(`Truncate(leftCutoff, rightCutoff, t)); + + originalTreeNode + |> tryTruncatingNothing + |> E.R.bind(_, tryTruncatingUniform); + }; + }; + + let evaluateNumerically = (leftCutoff, rightCutoff, renderParams, t) => { + // TODO: use named args in renderToShape; if we're lucky we can at least get the tail + // of a distribution we otherwise wouldn't get at all + let renderedShape = t |> renderToShape(renderParams); + + E.R.bind(renderedShape, rs => { + let truncatedShape = rs |> Distributions.Shape.truncate(leftCutoff, rightCutoff); + Ok(`DistData(`RenderedShape(rs))); + }); + }; + + let evaluateToDistData = (leftCutoff: option(float), rightCutoff: option(float), renderParams, t: treeNode): result(treeNode, string) => { + t + |> Simplify.attempt(leftCutoff, rightCutoff) + |> E.R.bind( + _, + fun + | `DistData(d) => Ok(`DistData(d)) // the analytical simplifaction worked, nice! + | `Operation(_) => evaluateNumerically(leftCutoff, rightCutoff, renderParams, t), + ); // if not, run the convolution + }; + }; + + module Normalize = { + let rec evaluateToDistData = (renderParams, t: treeNode): result(treeNode, string) => { + switch (t) { + | `DistData(`Symbolic(_)) => Ok(t) + | `DistData(`RenderedShape(s)) => { + let normalized = Distributions.Shape.normalize(s); + Ok(`DistData(`RenderedShape(normalized))); + } + | `Operation(op) => E.R.bind(renderParams.operationToDistData(renderParams.sampleCount, op), evaluateToDistData(renderParams)) + } + } + }; + + module FloatFromDist = { + let evaluateFromSymbolic = (distToFloatOp: distToFloatOperation, s) => { + let value = switch (distToFloatOp) { + | `Pdf(f) => SymbolicDist.GenericDistFunctions.pdf(f, s) + | `Cdf(f) => 0.0 + | `Inv(f) => SymbolicDist.GenericDistFunctions.inv(f, s) + | `Sample => SymbolicDist.GenericDistFunctions.sample(s) + } + Ok(`DistData(`Symbolic(`Float(value)))); + }; + let evaluateFromRenderedShape = (distToFloatOp: distToFloatOperation, rs: DistTypes.shape): result(treeNode, string) => { + // evaluate the pdf, cdf, get sample, etc. from the renderedShape rs + // Should be a float like Ok(`DistData(`Symbolic(Float(0.0)))); + Error("Float from dist is not yet implemented."); + }; + let rec evaluateToDistData = (distToFloatOp: distToFloatOperation, renderParams, t: treeNode): result(treeNode, string) => { + switch (t) { + | `DistData(`Symbolic(s)) => evaluateFromSymbolic(distToFloatOp, s) // we want to evaluate the distToFloatOp on the symbolic dist + | `DistData(`RenderedShape(rs)) => evaluateFromRenderedShape(distToFloatOp, rs) + | `Operation(op) => E.R.bind(renderParams.operationToDistData(renderParams.sampleCount, op), evaluateToDistData(distToFloatOp, renderParams)) + } + } + }; + + module Render = { + let evaluateToRenderedShape = (renderParams, t: treeNode): result(t, string) => { + E.R.bind(renderToShape(renderParams, t), rs => Ok(`DistData(`RenderedShape(rs)))); + } + }; + + let rec operationToDistData = + (sampleCount: int, op: operation): result(t, string) => { + + // the functions that convert the Operation nodes to DistData nodes need to + // have a way to call this function on their children, if their children are themselves Operation nodes. + + let renderParams: renderParams = { + operationToDistData: operationToDistData, + sampleCount: sampleCount, + }; + + switch (op) { + | `StandardOperation(standardOp, t1, t2) => + StandardOperation.evaluateToDistData( + standardOp, renderParams, t1, t2 // we want to give it the option to render or simply leave it as is + ) + | `PointwiseOperation(pointwiseOp, t1, t2) => + PointwiseOperation.evaluateToDistData( + pointwiseOp, + renderParams, + t1, + t2, + ) + | `ScaleOperation(scaleOp, t, scaleBy) => + ScaleOperation.evaluateToDistData(scaleOp, renderParams, t, scaleBy) + | `Truncate(leftCutoff, rightCutoff, t) => Truncate.evaluateToDistData(leftCutoff, rightCutoff, renderParams, t) + | `FloatFromDist(distToFloatOp, t) => FloatFromDist.evaluateToDistData(distToFloatOp, renderParams, t) + | `Normalize(t) => Normalize.evaluateToDistData(renderParams, t) + | `Render(t) => Render.evaluateToRenderedShape(renderParams, t) + }; + }; + + /* This function recursively goes through the nodes of the parse tree, + replacing each Operation node and its subtree with a Data node. + Whenever possible, the replacement produces a new Symbolic Data node, + but most often it will produce a RenderedShape. + This function is used mainly to turn a parse tree into a single RenderedShape + that can then be displayed to the user. */ + let rec toDistData = (treeNode: t, sampleCount: int): result(t, string) => { + switch (treeNode) { + | `DistData(d) => Ok(`DistData(d)) + | `Operation(op) => operationToDistData(sampleCount, op) + }; + }; + + let rec toString = (t: t): string => { + let stringFromStandardOperation = fun + | `Add => " + " + | `Subtract => " - " + | `Multiply => " * " + | `Divide => " / " + | `Exponentiate => "^"; + + let stringFromPointwiseOperation = + fun + | `Add => " .+ " + | `Multiply => " .* "; + + switch (t) { + | `DistData(`Symbolic(d)) => SymbolicDist.GenericDistFunctions.toString(d) + | `DistData(`RenderedShape(s)) => "[shape]" + | `Operation(`StandardOperation(op, t1, t2)) => toString(t1) ++ stringFromStandardOperation(op) ++ toString(t2) + | `Operation(`PointwiseOperation(op, t1, t2)) => toString(t1) ++ stringFromPointwiseOperation(op) ++ toString(t2) + | `Operation(`ScaleOperation(_scaleOp, t, scaleBy)) => toString(t) ++ " @ " ++ toString(scaleBy) + | `Operation(`Normalize(t)) => "normalize(" ++ toString(t) ++ ")" + | `Operation(`Truncate(lc, rc, t)) => "truncate(" ++ toString(t) ++ ", " ++ E.O.dimap(string_of_float, () => "-inf", lc) ++ ", " ++ E.O.dimap(string_of_float, () => "inf", rc) ++ ")" + | `Operation(`Render(t)) => toString(t) + } + }; +}; + +let toShape = (sampleCount: int, treeNode: treeNode) => { + let renderResult = TreeNode.toDistData(`Operation(`Render(treeNode)), sampleCount); + + + switch (renderResult) { + | Ok(`DistData(`RenderedShape(rs))) => { + let continuous = Distributions.Shape.T.toContinuous(rs); + let discrete = Distributions.Shape.T.toDiscrete(rs); + let shape = MixedShapeBuilder.buildSimple(~continuous, ~discrete); + shape |> E.O.toExt(""); + } + | Ok(_) => E.O.toExn("Rendering failed.", None) + | Error(message) => E.O.toExn("No shape found!", None) + } +}; diff --git a/src/distPlus/utility/Jstat.re b/src/distPlus/utility/Jstat.re index 5f1c6c51..0a2cc13f 100644 --- a/src/distPlus/utility/Jstat.re +++ b/src/distPlus/utility/Jstat.re @@ -5,6 +5,7 @@ type normal = { [@bs.meth] "cdf": (float, float, float) => float, [@bs.meth] "inv": (float, float, float) => float, [@bs.meth] "sample": (float, float) => float, + [@bs.meth] "mean": (float, float) => float, }; type lognormal = { . @@ -12,6 +13,7 @@ type lognormal = { [@bs.meth] "cdf": (float, float, float) => float, [@bs.meth] "inv": (float, float, float) => float, [@bs.meth] "sample": (float, float) => float, + [@bs.meth] "mean": (float, float) => float, }; type uniform = { . @@ -19,6 +21,7 @@ type uniform = { [@bs.meth] "cdf": (float, float, float) => float, [@bs.meth] "inv": (float, float, float) => float, [@bs.meth] "sample": (float, float) => float, + [@bs.meth] "mean": (float, float) => float, }; type beta = { . @@ -26,6 +29,7 @@ type beta = { [@bs.meth] "cdf": (float, float, float) => float, [@bs.meth] "inv": (float, float, float) => float, [@bs.meth] "sample": (float, float) => float, + [@bs.meth] "mean": (float, float) => float, }; type exponential = { . @@ -33,6 +37,7 @@ type exponential = { [@bs.meth] "cdf": (float, float) => float, [@bs.meth] "inv": (float, float) => float, [@bs.meth] "sample": float => float, + [@bs.meth] "mean": float => float, }; type cauchy = { . @@ -47,6 +52,7 @@ type triangular = { [@bs.meth] "cdf": (float, float, float, float) => float, [@bs.meth] "inv": (float, float, float, float) => float, [@bs.meth] "sample": (float, float, float) => float, + [@bs.meth] "mean": (float, float, float) => float, }; // Pareto doesn't have sample for some reason @@ -61,6 +67,7 @@ type poisson = { [@bs.meth] "pdf": (float, float) => float, [@bs.meth] "cdf": (float, float) => float, [@bs.meth] "sample": float => float, + [@bs.meth] "mean": float => float, }; type weibull = { . @@ -68,6 +75,7 @@ type weibull = { [@bs.meth] "cdf": (float, float, float) => float, [@bs.meth] "inv": (float, float, float) => float, [@bs.meth] "sample": (float, float) => float, + [@bs.meth] "mean": (float, float) => float, }; type binomial = { . @@ -101,4 +109,4 @@ external quartiles: (array(float)) => array(float) = "quartiles"; [@bs.module "jstat"] external quantiles: (array(float), array(float)) => array(float) = "quantiles"; [@bs.module "jstat"] -external percentile: (array(float), float, bool) => float = "percentile"; \ No newline at end of file +external percentile: (array(float), float, bool) => float = "percentile";