From eb0ffdc6c3a4e238381765a3cc80a9b5e17611f8 Mon Sep 17 00:00:00 2001 From: Sebastian Kosch Date: Tue, 9 Jun 2020 21:28:03 -0700 Subject: [PATCH 01/31] First pass at nested multimodals, still needs lots of cleanup --- src/distPlus/symbolic/MathJsParser.re | 57 ++++-- src/distPlus/symbolic/SymbolicDist.re | 270 +++++++++++++++++++------- 2 files changed, 239 insertions(+), 88 deletions(-) diff --git a/src/distPlus/symbolic/MathJsParser.re b/src/distPlus/symbolic/MathJsParser.re index 07d24cb4..0a0c5e0c 100644 --- a/src/distPlus/symbolic/MathJsParser.re +++ b/src/distPlus/symbolic/MathJsParser.re @@ -148,6 +148,10 @@ module MathAdtToDistDst = { Ok(`Simple(`Triangular({low, medium, high}))) | _ => Error("Wrong number of variables in triangle distribution"); + /*let add: array(arg) => result(SymbolicDist.bigDist, string) = + fun + | */ + let multiModal = ( args: array(result(SymbolicDist.bigDist, string)), @@ -158,22 +162,25 @@ module MathAdtToDistDst = { args |> E.A.fmap( fun - | Ok(`Simple(n)) => Ok(n) + | Ok(`Simple(d)) => Ok(`Simple(d)) + | Ok(`PointwiseCombination(dists)) => Ok(`PointwiseCombination(dists)) | Error(e) => Error(e) - | Ok(k) => Error(SymbolicDist.toString(k)), + | _ => Error("Unexpected dist") ); + let firstWithError = dists |> Belt.Array.getBy(_, Belt.Result.isError); let withoutErrors = dists |> E.A.fmap(E.R.toOption) |> E.A.O.concatSomes; + switch (firstWithError) { - | Some(Error(e)) => Error(e) - | None when withoutErrors |> E.A.length == 0 => - Error("Multimodals need at least one input") - | _ => - withoutErrors - |> E.A.fmapi((index, item) => - (item, weights |> E.A.get(_, index) |> E.O.default(1.0)) - ) - |> (r => Ok(`PointwiseCombination(r))) + | Some(Error(e)) => Error(e) + | None when withoutErrors |> E.A.length == 0 => + Error("Multimodals need at least one input") + | _ => + withoutErrors + |> E.A.fmapi((index, item) => + (item, weights |> E.A.get(_, index) |> E.O.default(1.0)) + ) + |> (r => Ok(`PointwiseCombination(r))) }; }; @@ -186,12 +193,12 @@ module MathAdtToDistDst = { ) |> E.A.O.concatSomes let outputs = Samples.T.fromSamples(samples); - let pdf = outputs.shape |> E.O.bind(_,Distributions.Shape.T.toContinuous) + let pdf = outputs.shape |> E.O.bind(_,Distributions.Shape.T.toContinuous); let shape = pdf |> E.O.fmap(pdf => { let _pdf = Distributions.Continuous.T.scaleToIntegralSum(~cache=None, ~intendedSum=1.0, pdf); let cdf = Distributions.Continuous.T.integral(~cache=None, _pdf); SymbolicDist.ContinuousShape.make(_pdf, cdf) - }) + }); switch(shape){ | Some(s) => Ok(`Simple(`ContinuousShape(s))) | None => Error("Rendering did not work") @@ -238,6 +245,7 @@ module MathAdtToDistDst = { let dists = possibleDists |> E.A.fmap(functionParser); multiModal(dists, weights); } + //| Fn({name: "add", args}) => add(args) | Fn({name}) => Error(name ++ ": function not supported") | _ => { Error("This type not currently supported"); @@ -255,19 +263,32 @@ module MathAdtToDistDst = { | Object(_) => Error("Object not valid as top level") ); - let run = (r): result(SymbolicDist.bigDist, string) => - r |> MathAdtCleaner.run |> topLevel; + let run = (r): result(SymbolicDist.bigDist, string) => { + let o = r |> MathAdtCleaner.run |> topLevel; + Js.log2("parser output", o); + o + }; }; let fromString = str => { + /* We feed the user-typed string into Mathjs.parseMath, + which returns a JSON with (hopefully) a single-element array. + This array element is the top-level node of a nested-object tree + representing the functions/arguments/values/etc. in the string. + + The function MathJsonToMathJsAdt then recursively unpacks this JSON into a typed data structure we can use. + Inside of this function, MathAdtToDistDst is called whenever a distribution function is encountered. + */ let mathJsToJson = Mathjs.parseMath(str); let mathJsParse = - E.R.bind(mathJsToJson, r => + E.R.bind(mathJsToJson, r => { + Js.log2("parsed", r); switch (MathJsonToMathJsAdt.run(r)) { | Some(r) => Ok(r) | None => Error("MathJsParse Error") } - ); +}); + let value = E.R.bind(mathJsParse, MathAdtToDistDst.run); value; -}; \ No newline at end of file +}; diff --git a/src/distPlus/symbolic/SymbolicDist.re b/src/distPlus/symbolic/SymbolicDist.re index d867eb22..38a4b272 100644 --- a/src/distPlus/symbolic/SymbolicDist.re +++ b/src/distPlus/symbolic/SymbolicDist.re @@ -47,12 +47,44 @@ type dist = [ | `Cauchy(cauchy) | `Triangular(triangular) | `ContinuousShape(continuousShape) - | `Float(float) + | `Float(float) // Dirac delta at x. Practically useful only in the context of multimodals. ]; -type pointwiseAdd = array((dist, float)); +/* Build a tree. -type bigDist = [ | `Simple(dist) | `PointwiseCombination(pointwiseAdd)]; + Multiple operations possible: + + - PointwiseSum(Scalar, Scalar) + - PointwiseSum(WeightedDist, WeightedDist) + - PointwiseProduct(Scalar, Scalar) + - PointwiseProduct(Scalar, WeightedDist) + - PointwiseProduct(WeightedDist, WeightedDist) + + - IndependentVariableSum(WeightedDist, WeightedDist) [i.e., convolution] + - IndependentVariableProduct(WeightedDist, WeightedDist) [i.e. distribution product] + */ + +type weightedDist = (float, dist); + +type bigDistTree = + /* | DistLeaf(dist) */ + /* | ScalarLeaf(float) */ + /* | PointwiseScalarDistProduct(DistLeaf(d), ScalarLeaf(s)) */ + | WeightedDistLeaf(weightedDist) + | PointwiseNormalizedDistSum(array(bigDistTree)); + +let rec treeIntegral = item => { + switch (item) { + | WeightedDistLeaf((w, d)) => w + | PointwiseNormalizedDistSum(childTrees) => + childTrees |> E.A.fmap(treeIntegral) |> E.A.Floats.sum + }; +}; + +/* bigDist can either be a single distribution, or a + PointwiseCombination, i.e. an array of (dist, weight) tuples */ +type bigDist = [ | `Simple(dist) | `PointwiseCombination(pointwiseAdd)] +and pointwiseAdd = array((bigDist, float)); module ContinuousShape = { type t = continuousShape; @@ -255,29 +287,27 @@ module GenericSimple = { | `Uniform({high}) => high | `Float(n) => n; - /* This function returns a list of x's at which to evaluate the overall distribution (for rendering). - This function is called separately for each individual distribution. + This function is called separately for each individual distribution. - When called with xSelection=`Linear, this function will return (sampleCount) x's, evenly - distributed between the min and max of the distribution (whatever those are defined to be above). + When called with xSelection=`Linear, this function will return (sampleCount) x's, evenly + distributed between the min and max of the distribution (whatever those are defined to be above). - When called with xSelection=`ByWeight, this function will distribute the x's such as to - match the cumulative shape of the distribution. This is slower but may give better results. - */ + When called with xSelection=`ByWeight, this function will distribute the x's such as to + match the cumulative shape of the distribution. This is slower but may give better results. + */ let interpolateXs = (~xSelection: [ | `Linear | `ByWeight]=`Linear, dist: dist, sampleCount) => { - switch (xSelection, dist) { | (`Linear, _) => E.A.Floats.range(min(dist), max(dist), sampleCount) | (`ByWeight, `Uniform(n)) => // In `ByWeight mode, uniform distributions get special treatment because we need two x's // on either side for proper rendering (just left and right of the discontinuities). let dx = 0.00001 *. (n.high -. n.low); - [|n.low -. dx, n.low +. dx, n.high -. dx, n.high +. dx|] - | (`ByWeight, _) => - let ys = E.A.Floats.range(minCdfValue, maxCdfValue, sampleCount) - ys |> E.A.fmap(y => inv(y, dist)) + [|n.low -. dx, n.low +. dx, n.high -. dx, n.high +. dx|]; + | (`ByWeight, _) => + let ys = E.A.Floats.range(minCdfValue, maxCdfValue, sampleCount); + ys |> E.A.fmap(y => inv(y, dist)); }; }; @@ -299,90 +329,190 @@ module GenericSimple = { module PointwiseAddDistributionsWeighted = { type t = pointwiseAdd; - let normalizeWeights = (dists: t) => { - let total = dists |> E.A.fmap(snd) |> E.A.Floats.sum; - dists |> E.A.fmap(((a, b)) => (a, b /. total)); + let normalizeWeights = (weightedDists: t) => { + let total = weightedDists |> E.A.fmap(snd) |> E.A.Floats.sum; + weightedDists |> E.A.fmap(((d, w)) => (d, w /. total)); }; - let pdf = (x: float, dists: t) => - dists - |> E.A.fmap(((e, w)) => GenericSimple.pdf(x, e) *. w) + let rec pdf = (x: float, weightedNormalizedDists: t) => + weightedNormalizedDists + |> E.A.fmap(((d, w)) => { + switch (d) { + | `PointwiseCombination(ts) => pdf(x, ts) *. w + | `Simple(d) => GenericSimple.pdf(x, d) *. w + } + }) |> E.A.Floats.sum; - let min = (dists: t) => - dists |> E.A.fmap(d => d |> fst |> GenericSimple.min) |> E.A.min; + // TODO: perhaps rename into minCdfX? + // TODO: how should nonexistent min values be handled? They should never happen + let rec min = (dists: t) => + dists + |> E.A.fmap(((d, w)) => { + switch (d) { + | `PointwiseCombination(ts) => E.O.toExn("Dist has no min", min(ts)) + | `Simple(d) => GenericSimple.min(d) + } + }) + |> E.A.min; - let max = (dists: t) => - dists |> E.A.fmap(d => d |> fst |> GenericSimple.max) |> E.A.max; + // TODO: perhaps rename into minCdfX? + let rec max = (dists: t) => + dists + |> E.A.fmap(((d, w)) => { + switch (d) { + | `PointwiseCombination(ts) => E.O.toExn("Dist has no max", max(ts)) + | `Simple(d) => GenericSimple.max(d) + } + }) + |> E.A.max; - let discreteShape = (dists: t, sampleCount: int) => { + + /*let rec discreteShape = (dists: t, sampleCount: int) => { let discrete = dists - |> E.A.fmap(((r, e)) => - r - |> ( - fun - | `Float(r) => Some((r, e)) - | _ => None - ) - ) + |> E.A.fmap(((x, w)) => { + switch (d) { + | `Float(d) => Some((d, w)) // if the distribution is just a number, then the weight is considered the y + | _ => None + } + }) |> E.A.O.concatSomes |> E.A.fmap(((x, y)) => ({xs: [|x|], ys: [|y|]}: DistTypes.xyShape) ) + // take an array of xyShapes and combine them together + //* r + |> ( + fun + | `Float(r) => Some((r, e)) + | _ => None + ) + )*/ |> Distributions.Discrete.reduce((+.)); discrete; + };*/ + + + let rec findContinuousXs = (dists: t, sampleCount: int) => { + // we need to go through the tree of distributions and, for the continuous ones, find the xs at which + // later, all distributions will get evaluated. + + // we want to accumulate a set of xs. + let xs: array(float) = + dists + |> E.A.fold_left((accXs, (d, w)) => { + switch (d) { + | `Simple(t) when (GenericSimple.contType(t) == `Discrete) => accXs + | `Simple(d) => { + let xs = GenericSimple.interpolateXs(~xSelection=`ByWeight, d, sampleCount) + + E.A.append(accXs, xs) + } + | `PointwiseCombination(ts) => { + let xs = findContinuousXs(ts, sampleCount); + E.A.append(accXs, xs) + } + } + }, [||]); + xs }; - let continuousShape = (dists: t, sampleCount: int) => { - let xs = - dists - |> E.A.fmap(r => - r - |> fst - |> GenericSimple.interpolateXs( - ~xSelection=`ByWeight, - _, - sampleCount / (dists |> E.A.length), - ) - ) - |> E.A.concatMany; - xs |> Array.fast_sort(compare); - let ys = xs |> E.A.fmap(pdf(_, dists)); - XYShape.T.fromArrays(xs, ys) |> Distributions.Continuous.make(`Linear, _); - }; - - let toShape = (dists: t, sampleCount: int) => { + /* Accumulate (accContShapes, accDistShapes), each of which is an array of {xs, ys} shapes. */ + let rec accumulateContAndDiscShapes = (dists: t, continuousXs: array(float), currentWeight) => { let normalized = normalizeWeights(dists); - let continuous = - normalized - |> E.A.filter(((r, _)) => GenericSimple.contType(r) == `Continuous) - |> continuousShape(_, sampleCount); - let discrete = - normalized - |> E.A.filter(((r, _)) => GenericSimple.contType(r) == `Discrete) - |> discreteShape(_, sampleCount); - let shape = - MixedShapeBuilder.buildSimple(~continuous=Some(continuous), ~discrete); + + normalized + |> E.A.fold_left(((accContShapes: array(DistTypes.xyShape), accDiscShapes: array(DistTypes.xyShape)), (d, w)) => { + switch (d) { + + | `Simple(`Float(x)) => { + let ds: DistTypes.xyShape = {xs: [|x|], ys: [|w *. currentWeight|]}; + (accContShapes, E.A.append(accDiscShapes, [|ds|])) + } + + | `Simple(d) when (GenericSimple.contType(d) == `Continuous) => { + let ys = continuousXs |> E.A.fmap(x => GenericSimple.pdf(x, d) *. w *. currentWeight); + let cs = XYShape.T.fromArrays(continuousXs, ys); + + (E.A.append(accContShapes, [|cs|]), accDiscShapes) + } + + | `Simple(d) => (accContShapes, accDiscShapes) // default -- should never happen + + | `PointwiseCombination(ts) => { + let (cs, ds) = accumulateContAndDiscShapes(ts, continuousXs, w *. currentWeight); + (E.A.append(accContShapes, cs), E.A.append(accDiscShapes, ds)) + } + } + + }, ([||]: array(DistTypes.xyShape), [||]: array(DistTypes.xyShape))) + }; + + /* + We will assume that each dist (of t) in the multimodal has a total of one. + We can therefore normalize the weights of the parts. + + However, a multimodal can consist of both discrete and continuous shapes. + These need to be added and collected individually. + */ + let toShape = (dists: t, sampleCount: int) => { + let continuousXs = findContinuousXs(dists, sampleCount); + continuousXs |> Array.fast_sort(compare); + + let (contShapes, distShapes) = accumulateContAndDiscShapes(dists, continuousXs, 1.0); + + let combinedContinuous = contShapes + |> E.A.fold_left((shapeAcc: DistTypes.xyShape, shape: DistTypes.xyShape) => { + let ys = E.A.fmapi((i, y) => y +. shape.ys[i], shapeAcc.ys); + {xs: continuousXs, ys: ys} + }, {xs: continuousXs, ys: Array.make(Array.length(continuousXs), 0.0)}) + |> Distributions.Continuous.make(`Linear); + + let combinedDiscrete = Distributions.Discrete.reduce((+.), distShapes) + + let shape = MixedShapeBuilder.buildSimple(~continuous=Some(combinedContinuous), ~discrete=combinedDiscrete); + shape |> E.O.toExt(""); }; - let toString = (dists: t) => { + let rec toString = (dists: t): string => { let distString = dists - |> E.A.fmap(d => GenericSimple.toString(fst(d))) - |> Js.Array.joinWith(","); - let weights = - dists - |> E.A.fmap(d => - snd(d) |> Js.Float.toPrecisionWithPrecision(~digits=2) + |> E.A.fmap(((d, _)) => + switch (d) { + | `Simple(d) => GenericSimple.toString(d) + | `PointwiseCombination(ts: t) => ts |> toString + } ) |> Js.Array.joinWith(","); + + // mm(normal(0,1), normal(1,2)) => "multimodal(normal(0,1), normal(1,2), ) + + let weights = + dists + |> E.A.fmap(((_, w)) => + Js.Float.toPrecisionWithPrecision(w, ~digits=2) + ) + |> Js.Array.joinWith(","); + {j|multimodal($distString, [$weights])|j}; }; }; +// assume that recursive pointwiseNormalizedDistSums are the only type of operation there is. +// in the original, it was a list of (dist, weight) tuples. Now, it's a tree of (dist, weight) tuples, just that every +// dist can be either a GenericSimple or another PointwiseAdd. + +/*let toString = (r: bigDistTree) => { + switch (r) { + | WeightedDistLeaf((w, d)) => GenericWeighted.toString(w) // "normal " + | PointwiseNormalizedDistSum(childTrees) => childTrees |> E.A.fmap(toString) |> Js.Array.joinWith("") + } + }*/ + let toString = (r: bigDist) => + // we need to recursively create the string representation of the tree. r |> ( fun From fa1bf75acc608da16f080fc8d5ecb0c7636769be Mon Sep 17 00:00:00 2001 From: Sebastian Kosch Date: Tue, 9 Jun 2020 21:43:35 -0700 Subject: [PATCH 02/31] Comment out some stuff, fix annoying react key prop error --- src/components/charts/DistPlusPlot.re | 4 ++-- src/distPlus/symbolic/MathJsParser.re | 8 ++------ src/distPlus/symbolic/SymbolicDist.re | 4 ++-- 3 files changed, 6 insertions(+), 10 deletions(-) diff --git a/src/components/charts/DistPlusPlot.re b/src/components/charts/DistPlusPlot.re index a6b35f22..9eb484ef 100644 --- a/src/components/charts/DistPlusPlot.re +++ b/src/components/charts/DistPlusPlot.re @@ -339,7 +339,7 @@ let make = (~distPlus: DistTypes.distPlus) => {
{state.distributions |> E.L.fmapi((index, config) => -
+
{setX(_ => r)}} />
@@ -406,4 +406,4 @@ let make = (~distPlus: DistTypes.distPlus) => { {state.showStats ? table(distPlus, x) : ReasonReact.null} {state.showPercentiles ? percentiles(distPlus) : ReasonReact.null}
; -}; \ No newline at end of file +}; diff --git a/src/distPlus/symbolic/MathJsParser.re b/src/distPlus/symbolic/MathJsParser.re index 0a0c5e0c..41667ad8 100644 --- a/src/distPlus/symbolic/MathJsParser.re +++ b/src/distPlus/symbolic/MathJsParser.re @@ -263,11 +263,8 @@ module MathAdtToDistDst = { | Object(_) => Error("Object not valid as top level") ); - let run = (r): result(SymbolicDist.bigDist, string) => { - let o = r |> MathAdtCleaner.run |> topLevel; - Js.log2("parser output", o); - o - }; + let run = (r): result(SymbolicDist.bigDist, string) => + r |> MathAdtCleaner.run |> topLevel; }; let fromString = str => { @@ -282,7 +279,6 @@ let fromString = str => { let mathJsToJson = Mathjs.parseMath(str); let mathJsParse = E.R.bind(mathJsToJson, r => { - Js.log2("parsed", r); switch (MathJsonToMathJsAdt.run(r)) { | Some(r) => Ok(r) | None => Error("MathJsParse Error") diff --git a/src/distPlus/symbolic/SymbolicDist.re b/src/distPlus/symbolic/SymbolicDist.re index 38a4b272..05eec52c 100644 --- a/src/distPlus/symbolic/SymbolicDist.re +++ b/src/distPlus/symbolic/SymbolicDist.re @@ -64,7 +64,7 @@ type dist = [ - IndependentVariableProduct(WeightedDist, WeightedDist) [i.e. distribution product] */ -type weightedDist = (float, dist); +/*type weightedDist = (float, dist); type bigDistTree = /* | DistLeaf(dist) */ @@ -79,7 +79,7 @@ let rec treeIntegral = item => { | PointwiseNormalizedDistSum(childTrees) => childTrees |> E.A.fmap(treeIntegral) |> E.A.Floats.sum }; -}; +};*/ /* bigDist can either be a single distribution, or a PointwiseCombination, i.e. an array of (dist, weight) tuples */ From bc271a090b2c0ac5f5020c0da34715e951efa543 Mon Sep 17 00:00:00 2001 From: Sebastian Kosch Date: Tue, 9 Jun 2020 21:58:48 -0700 Subject: [PATCH 03/31] Make yMin=0 for discrete point y-axis on d3 chart --- src/components/charts/DistributionPlot/distPlotD3.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/components/charts/DistributionPlot/distPlotD3.js b/src/components/charts/DistributionPlot/distPlotD3.js index 3bb13fb2..03cc671c 100644 --- a/src/components/charts/DistributionPlot/distPlotD3.js +++ b/src/components/charts/DistributionPlot/distPlotD3.js @@ -427,7 +427,7 @@ export class DistPlotD3 { addLollipopsChart(common) { const data = this.getDataPoints('discrete'); - const yMin = d3.min(this.attrs.data.discrete.ys); + const yMin = 0.; //d3.min(this.attrs.data.discrete.ys); const yMax = d3.max(this.attrs.data.discrete.ys); // X axis. From f6c1918b122a9c4c28bed10d1c9a6b790f8b1271 Mon Sep 17 00:00:00 2001 From: Sebastian Kosch Date: Fri, 12 Jun 2020 23:30:51 -0700 Subject: [PATCH 04/31] Big refactor towards proper distTree, still slow and untested --- src/components/Drawer.re | 2 +- src/distPlus/distribution/DistTypes.re | 2 +- src/distPlus/distribution/Distributions.re | 18 +- src/distPlus/distribution/XYShape.re | 29 +- src/distPlus/renderers/RenderTypes.re | 4 +- src/distPlus/symbolic/MathJsParser.re | 121 +++-- src/distPlus/symbolic/SymbolicDist.re | 546 +++++++++++++-------- 7 files changed, 475 insertions(+), 247 deletions(-) diff --git a/src/components/Drawer.re b/src/components/Drawer.re index fa0babbe..090447b5 100644 --- a/src/components/Drawer.re +++ b/src/components/Drawer.re @@ -986,4 +986,4 @@ let make = () => { ; -}; \ No newline at end of file +}; diff --git a/src/distPlus/distribution/DistTypes.re b/src/distPlus/distribution/DistTypes.re index 7a598c01..948eb3ae 100644 --- a/src/distPlus/distribution/DistTypes.re +++ b/src/distPlus/distribution/DistTypes.re @@ -153,4 +153,4 @@ module MixedPoint = { }; let add = combine2((a, b) => a +. b); -}; \ No newline at end of file +}; diff --git a/src/distPlus/distribution/Distributions.re b/src/distPlus/distribution/Distributions.re index 2472f2ab..16be6872 100644 --- a/src/distPlus/distribution/Distributions.re +++ b/src/distPlus/distribution/Distributions.re @@ -74,6 +74,21 @@ module Continuous = { (fn, {xyShape, interpolation}: t): option(DistTypes.continuousShape) => fn(xyShape) |> E.O.fmap(make(interpolation)); + let empty: DistTypes.continuousShape = {xyShape: XYShape.T.empty, interpolation: `Linear}; + let combine = + (fn, t1: DistTypes.continuousShape, t2: DistTypes.continuousShape) + : DistTypes.continuousShape => { + make(`Linear, XYShape.Combine.combine( + ~xsSelection=ALL_XS, + ~xToYSelection=XYShape.XtoY.linear, + ~fn, + t1.xyShape, + t2.xyShape, + )); + }; + let reduce = (fn, items) => + items |> E.A.fold_left(combine(fn), empty); + let toLinear = (t: t): option(t) => { switch (t) { | {interpolation: `Stepwise, xyShape} => @@ -166,6 +181,7 @@ module Discrete = { let sortedByX = (t: DistTypes.discreteShape) => t |> XYShape.T.zip |> XYShape.Zipped.sortByX; let empty = XYShape.T.empty; + let make = (s: DistTypes.discreteShape) => s; let combine = (fn, t1: DistTypes.discreteShape, t2: DistTypes.discreteShape) : DistTypes.discreteShape => { @@ -708,4 +724,4 @@ module DistPlusTime = { |> E.O.fmap(x => DistPlus.T.Integral.xToY(~cache=None, x, t)); }; }; -}; \ No newline at end of file +}; diff --git a/src/distPlus/distribution/XYShape.re b/src/distPlus/distribution/XYShape.re index aeed1bae..cf3600a9 100644 --- a/src/distPlus/distribution/XYShape.re +++ b/src/distPlus/distribution/XYShape.re @@ -179,16 +179,25 @@ module Combine = { t1: T.t, t2: T.t, ) => { - let allXs = - switch (xsSelection) { - | ALL_XS => Ts.allXs([|t1, t2|]) - | XS_EVENLY_DIVIDED(sampleCount) => - Ts.equallyDividedXs([|t1, t2|], sampleCount) - }; - let allYs = - allXs |> E.A.fmap(x => fn(xToYSelection(x, t1), xToYSelection(x, t2))); - T.fromArrays(allXs, allYs); + switch ((E.A.length(t1.xs), E.A.length(t2.xs))) { + | (0, 0) => T.empty + | (0, _) => t2 + | (_, 0) => t1 + | (_, _) => { + let allXs = + switch (xsSelection) { + | ALL_XS => Ts.allXs([|t1, t2|]) + | XS_EVENLY_DIVIDED(sampleCount) => + Ts.equallyDividedXs([|t1, t2|], sampleCount) + }; + + let allYs = + allXs |> E.A.fmap(x => fn(xToYSelection(x, t1), xToYSelection(x, t2))); + + T.fromArrays(allXs, allYs); + } + } }; let combineLinear = combine(~xToYSelection=XtoY.linear); @@ -354,4 +363,4 @@ module Analysis = { }; let squareXYShape = T.mapX(x => x ** 2.0) -}; \ No newline at end of file +}; diff --git a/src/distPlus/renderers/RenderTypes.re b/src/distPlus/renderers/RenderTypes.re index 95a36204..99a53aae 100644 --- a/src/distPlus/renderers/RenderTypes.re +++ b/src/distPlus/renderers/RenderTypes.re @@ -43,7 +43,7 @@ module ShapeRenderer = { module Symbolic = { type inputs = {length: int}; type outputs = { - graph: SymbolicDist.bigDist, + graph: SymbolicDist.distTree, shape: DistTypes.shape, }; let make = (graph, shape) => {graph, shape}; @@ -124,4 +124,4 @@ module DistPlusRenderer = { let shapeRenderOutputs = (t:outputs) => t.shapeRenderOutputs let make = (shapeRenderOutputs, distPlus) => {shapeRenderOutputs, distPlus}; } -}; \ No newline at end of file +}; diff --git a/src/distPlus/symbolic/MathJsParser.re b/src/distPlus/symbolic/MathJsParser.re index 41667ad8..b962df90 100644 --- a/src/distPlus/symbolic/MathJsParser.re +++ b/src/distPlus/symbolic/MathJsParser.re @@ -88,73 +88,69 @@ module MathAdtToDistDst = { ); }; - let normal: array(arg) => result(SymbolicDist.bigDist, string) = + let normal: array(arg) => result(SymbolicDist.distTree, string) = fun | [|Value(mean), Value(stdev)|] => - Ok(`Simple(`Normal({mean, stdev}))) + Ok(`Distribution(`Normal({mean, stdev}))) | _ => Error("Wrong number of variables in normal distribution"); - let lognormal: array(arg) => result(SymbolicDist.bigDist, string) = + let lognormal: array(arg) => result(SymbolicDist.distTree, string) = fun - | [|Value(mu), Value(sigma)|] => Ok(`Simple(`Lognormal({mu, sigma}))) + | [|Value(mu), Value(sigma)|] => Ok(`Distribution(`Lognormal({mu, sigma}))) | [|Object(o)|] => { let g = Js.Dict.get(o); switch (g("mean"), g("stdev"), g("mu"), g("sigma")) { | (Some(Value(mean)), Some(Value(stdev)), _, _) => - Ok(`Simple(SymbolicDist.Lognormal.fromMeanAndStdev(mean, stdev))) + Ok(`Distribution(SymbolicDist.Lognormal.fromMeanAndStdev(mean, stdev))) | (_, _, Some(Value(mu)), Some(Value(sigma))) => - Ok(`Simple(`Lognormal({mu, sigma}))) + Ok(`Distribution(`Lognormal({mu, sigma}))) | _ => Error("Lognormal distribution would need mean and stdev") }; } | _ => Error("Wrong number of variables in lognormal distribution"); - let to_: array(arg) => result(SymbolicDist.bigDist, string) = + let to_: array(arg) => result(SymbolicDist.distTree, string) = fun | [|Value(low), Value(high)|] when low <= 0.0 && low < high=> { - Ok(`Simple(SymbolicDist.Normal.from90PercentCI(low, high))); + Ok(`Distribution(SymbolicDist.Normal.from90PercentCI(low, high))); } | [|Value(low), Value(high)|] when low < high => { - Ok(`Simple(SymbolicDist.Lognormal.from90PercentCI(low, high))); + Ok(`Distribution(SymbolicDist.Lognormal.from90PercentCI(low, high))); } | [|Value(_), Value(_)|] => Error("Low value must be less than high value.") | _ => Error("Wrong number of variables in lognormal distribution"); - let uniform: array(arg) => result(SymbolicDist.bigDist, string) = + let uniform: array(arg) => result(SymbolicDist.distTree, string) = fun - | [|Value(low), Value(high)|] => Ok(`Simple(`Uniform({low, high}))) + | [|Value(low), Value(high)|] => Ok(`Distribution(`Uniform({low, high}))) | _ => Error("Wrong number of variables in lognormal distribution"); - let beta: array(arg) => result(SymbolicDist.bigDist, string) = + let beta: array(arg) => result(SymbolicDist.distTree, string) = fun - | [|Value(alpha), Value(beta)|] => Ok(`Simple(`Beta({alpha, beta}))) + | [|Value(alpha), Value(beta)|] => Ok(`Distribution(`Beta({alpha, beta}))) | _ => Error("Wrong number of variables in lognormal distribution"); - let exponential: array(arg) => result(SymbolicDist.bigDist, string) = + let exponential: array(arg) => result(SymbolicDist.distTree, string) = fun - | [|Value(rate)|] => Ok(`Simple(`Exponential({rate: rate}))) + | [|Value(rate)|] => Ok(`Distribution(`Exponential({rate: rate}))) | _ => Error("Wrong number of variables in Exponential distribution"); - let cauchy: array(arg) => result(SymbolicDist.bigDist, string) = + let cauchy: array(arg) => result(SymbolicDist.distTree, string) = fun | [|Value(local), Value(scale)|] => - Ok(`Simple(`Cauchy({local, scale}))) + Ok(`Distribution(`Cauchy({local, scale}))) | _ => Error("Wrong number of variables in cauchy distribution"); - let triangular: array(arg) => result(SymbolicDist.bigDist, string) = + let triangular: array(arg) => result(SymbolicDist.distTree, string) = fun | [|Value(low), Value(medium), Value(high)|] => - Ok(`Simple(`Triangular({low, medium, high}))) + Ok(`Distribution(`Triangular({low, medium, high}))) | _ => Error("Wrong number of variables in triangle distribution"); - /*let add: array(arg) => result(SymbolicDist.bigDist, string) = - fun - | */ - let multiModal = ( - args: array(result(SymbolicDist.bigDist, string)), + args: array(result(SymbolicDist.distTree, string)), weights: option(array(float)), ) => { let weights = weights |> E.O.default([||]); @@ -162,8 +158,14 @@ module MathAdtToDistDst = { args |> E.A.fmap( fun - | Ok(`Simple(d)) => Ok(`Simple(d)) - | Ok(`PointwiseCombination(dists)) => Ok(`PointwiseCombination(dists)) + | Ok(`Distribution(d)) => Ok(`Distribution(d)) + | Ok(`Combination(t1, t2, op)) => Ok(`Combination(t1, t2, op)) + | Ok(`PointwiseSum(t1, t2)) => Ok(`PointwiseSum(t1, t2)) + | Ok(`PointwiseProduct(t1, t2)) => Ok(`PointwiseProduct(t1, t2)) + | Ok(`Normalize(t)) => Ok(`Normalize(t)) + | Ok(`LeftTruncate(t, x)) => Ok(`LeftTruncate(t, x)) + | Ok(`RightTruncate(t, x)) => Ok(`RightTruncate(t, x)) + | Ok(`Render(t)) => Ok(`Render(t)) | Error(e) => Error(e) | _ => Error("Unexpected dist") ); @@ -175,16 +177,26 @@ module MathAdtToDistDst = { | Some(Error(e)) => Error(e) | None when withoutErrors |> E.A.length == 0 => Error("Multimodals need at least one input") - | _ => - withoutErrors - |> E.A.fmapi((index, item) => - (item, weights |> E.A.get(_, index) |> E.O.default(1.0)) - ) - |> (r => Ok(`PointwiseCombination(r))) + | _ => { + let components = withoutErrors + |> E.A.fmapi((index, t) => { + let w = weights |> E.A.get(_, index) |> E.O.default(1.0); + + `VerticalScaling(t, `Distribution(`Float(w))) + }); + + let pointwiseSum = components + |> Js.Array.sliceFrom(1) + |> E.A.fold_left((acc, x) => { + `PointwiseSum(acc, x) + }, E.A.unsafe_get(components, 0)) + + Ok(`Normalize(pointwiseSum)) + } }; }; - let arrayParser = (args:array(arg)):result(SymbolicDist.bigDist, string) => { + let arrayParser = (args:array(arg)):result(SymbolicDist.distTree, string) => { let samples = args |> E.A.fmap( fun @@ -200,13 +212,13 @@ module MathAdtToDistDst = { SymbolicDist.ContinuousShape.make(_pdf, cdf) }); switch(shape){ - | Some(s) => Ok(`Simple(`ContinuousShape(s))) + | Some(s) => Ok(`Distribution(`ContinuousShape(s))) | None => Error("Rendering did not work") } } - let rec functionParser = (r): result(SymbolicDist.bigDist, string) => + let rec functionParser = (r): result(SymbolicDist.distTree, string) => r |> ( fun @@ -218,7 +230,7 @@ module MathAdtToDistDst = { | Fn({name: "exponential", args}) => exponential(args) | Fn({name: "cauchy", args}) => cauchy(args) | Fn({name: "triangular", args}) => triangular(args) - | Value(f) => Ok(`Simple(`Float(f))) + | Value(f) => Ok(`Distribution(`Float(f))) | Fn({name: "mm", args}) => { let weights = args @@ -245,25 +257,54 @@ module MathAdtToDistDst = { let dists = possibleDists |> E.A.fmap(functionParser); multiModal(dists, weights); } - //| Fn({name: "add", args}) => add(args) + + | Fn({name: "add", args}) => { + args + |> E.A.fmap(functionParser) + |> (fun + | [|Ok(l), Ok(r)|] => Ok(`Combination(l, r, `AddOperation)) + | _ => Error("Addition needs two operands")) + } + | Fn({name: "subtract", args}) => { + args + |> E.A.fmap(functionParser) + |> (fun + | [|Ok(l), Ok(r)|] => Ok(`Combination(l, r, `SubtractOperation)) + | _ => Error("Subtraction needs two operands")) + } + | Fn({name: "multiply", args}) => { + args + |> E.A.fmap(functionParser) + |> (fun + | [|Ok(l), Ok(r)|] => Ok(`Combination(l, r, `MultiplyOperation)) + | _ => Error("Multiplication needs two operands")) + } + | Fn({name: "divide", args}) => { + args + |> E.A.fmap(functionParser) + |> (fun + | [|Ok(l), Ok(`Distribution(`Float(0.0)))|] => Error("Division by zero") + | [|Ok(l), Ok(r)|] => Ok(`Combination(l, r, `DivideOperation)) + | _ => Error("Division needs two operands")) + } | Fn({name}) => Error(name ++ ": function not supported") | _ => { Error("This type not currently supported"); } ); - let topLevel = (r): result(SymbolicDist.bigDist, string) => + let topLevel = (r): result(SymbolicDist.distTree, string) => r |> ( fun | Fn(_) => functionParser(r) - | Value(r) => Ok(`Simple(`Float(r))) + | Value(r) => Ok(`Distribution(`Float(r))) | Array(r) => arrayParser(r) | Symbol(_) => Error("Symbol not valid as top level") | Object(_) => Error("Object not valid as top level") ); - let run = (r): result(SymbolicDist.bigDist, string) => + let run = (r): result(SymbolicDist.distTree, string) => r |> MathAdtCleaner.run |> topLevel; }; diff --git a/src/distPlus/symbolic/SymbolicDist.re b/src/distPlus/symbolic/SymbolicDist.re index 05eec52c..f141bf92 100644 --- a/src/distPlus/symbolic/SymbolicDist.re +++ b/src/distPlus/symbolic/SymbolicDist.re @@ -50,41 +50,28 @@ type dist = [ | `Float(float) // Dirac delta at x. Practically useful only in the context of multimodals. ]; -/* Build a tree. +type integral = float; +type cutoffX = float; +type operation = [ + | `AddOperation + | `SubtractOperation + | `MultiplyOperation + | `DivideOperation + | `ExponentiateOperation +]; - Multiple operations possible: - - - PointwiseSum(Scalar, Scalar) - - PointwiseSum(WeightedDist, WeightedDist) - - PointwiseProduct(Scalar, Scalar) - - PointwiseProduct(Scalar, WeightedDist) - - PointwiseProduct(WeightedDist, WeightedDist) - - - IndependentVariableSum(WeightedDist, WeightedDist) [i.e., convolution] - - IndependentVariableProduct(WeightedDist, WeightedDist) [i.e. distribution product] - */ - -/*type weightedDist = (float, dist); - -type bigDistTree = - /* | DistLeaf(dist) */ - /* | ScalarLeaf(float) */ - /* | PointwiseScalarDistProduct(DistLeaf(d), ScalarLeaf(s)) */ - | WeightedDistLeaf(weightedDist) - | PointwiseNormalizedDistSum(array(bigDistTree)); - -let rec treeIntegral = item => { - switch (item) { - | WeightedDistLeaf((w, d)) => w - | PointwiseNormalizedDistSum(childTrees) => - childTrees |> E.A.fmap(treeIntegral) |> E.A.Floats.sum - }; -};*/ - -/* bigDist can either be a single distribution, or a - PointwiseCombination, i.e. an array of (dist, weight) tuples */ -type bigDist = [ | `Simple(dist) | `PointwiseCombination(pointwiseAdd)] -and pointwiseAdd = array((bigDist, float)); +type distTree = [ + | `Distribution(dist) + | `Combination(distTree, distTree, operation) + | `PointwiseSum(distTree, distTree) + | `PointwiseProduct(distTree, distTree) + | `VerticalScaling(distTree, distTree) + | `Normalize(distTree) + | `LeftTruncate(distTree, cutoffX) + | `RightTruncate(distTree, cutoffX) + | `Render(distTree) +] +and weightedDists = array((distTree, float)); module ContinuousShape = { type t = continuousShape; @@ -326,138 +313,331 @@ module GenericSimple = { }; }; -module PointwiseAddDistributionsWeighted = { - type t = pointwiseAdd; +module DistTree = { + type nodeResult = [ + | `Distribution(dist) + // RenderedShape: continuous xyShape, discrete xyShape, total value. + | `RenderedShape(DistTypes.continuousShape, DistTypes.discreteShape, integral) + ]; - let normalizeWeights = (weightedDists: t) => { - let total = weightedDists |> E.A.fmap(snd) |> E.A.Floats.sum; - weightedDists |> E.A.fmap(((d, w)) => (d, w /. total)); + let evaluateDistribution = (d: dist): nodeResult => { + // certain distributions we may want to evaluate to RenderedShapes right away, e.g. discrete + `Distribution(d) }; - let rec pdf = (x: float, weightedNormalizedDists: t) => - weightedNormalizedDists - |> E.A.fmap(((d, w)) => { - switch (d) { - | `PointwiseCombination(ts) => pdf(x, ts) *. w - | `Simple(d) => GenericSimple.pdf(x, d) *. w - } - }) - |> E.A.Floats.sum; + // This is a performance bottleneck! + // Using raw JS here so we can use native for loops and access array elements + // directly, without option checks. + let jsCombinationConvolve: (array(float), array(float), array(float), array(float), float => float => float) => (array(float), array(float)) = [%bs.raw + {| + function (s1xs, s1ys, s2xs, s2ys, func) { + const r = new Map(); - // TODO: perhaps rename into minCdfX? - // TODO: how should nonexistent min values be handled? They should never happen - let rec min = (dists: t) => - dists - |> E.A.fmap(((d, w)) => { - switch (d) { - | `PointwiseCombination(ts) => E.O.toExn("Dist has no min", min(ts)) - | `Simple(d) => GenericSimple.min(d) - } - }) - |> E.A.min; + // To convolve, add the xs and multiply the ys: + for (let i = 0; i < s1xs.length; i++) { + for (let j = 0; j < s2xs.length; j++) { + const x = func(s1xs[i], s2xs[j]); + const cv = r.get(x) | 0; + r.set(x, cv + s1ys[i] * s2ys[j]); // add up the ys, if same x + } + } - // TODO: perhaps rename into minCdfX? - let rec max = (dists: t) => - dists - |> E.A.fmap(((d, w)) => { - switch (d) { - | `PointwiseCombination(ts) => E.O.toExn("Dist has no max", max(ts)) - | `Simple(d) => GenericSimple.max(d) - } - }) - |> E.A.max; + const rxys = [...r.entries()]; + rxys.sort(([x1, y1], [x2, y2]) => x1 - x2); + const rxs = new Array(rxys.length); + const rys = new Array(rxys.length); - /*let rec discreteShape = (dists: t, sampleCount: int) => { - let discrete = - dists - |> E.A.fmap(((x, w)) => { - switch (d) { - | `Float(d) => Some((d, w)) // if the distribution is just a number, then the weight is considered the y - | _ => None - } - }) - |> E.A.O.concatSomes - |> E.A.fmap(((x, y)) => - ({xs: [|x|], ys: [|y|]}: DistTypes.xyShape) - ) - // take an array of xyShapes and combine them together - //* r - |> ( - fun - | `Float(r) => Some((r, e)) - | _ => None - ) - )*/ - |> Distributions.Discrete.reduce((+.)); - discrete; - };*/ + for (let i = 0; i < rxys.length; i++) { + rxs[i] = rxys[i][0]; + rys[i] = rxys[i][1]; + } + return [rxs, rys]; + } + |}]; - let rec findContinuousXs = (dists: t, sampleCount: int) => { - // we need to go through the tree of distributions and, for the continuous ones, find the xs at which - // later, all distributions will get evaluated. + let funcFromOp = (op: operation) => { + switch (op) { + | `AddOperation => (+.) + | `SubtractOperation => (-.) + | `MultiplyOperation => (*.) + | `DivideOperation => (/.) + | `ExponentiateOperation => (**) + } + } - // we want to accumulate a set of xs. - let xs: array(float) = - dists - |> E.A.fold_left((accXs, (d, w)) => { - switch (d) { - | `Simple(t) when (GenericSimple.contType(t) == `Discrete) => accXs - | `Simple(d) => { - let xs = GenericSimple.interpolateXs(~xSelection=`ByWeight, d, sampleCount) - - E.A.append(accXs, xs) - } - | `PointwiseCombination(ts) => { - let xs = findContinuousXs(ts, sampleCount); - E.A.append(accXs, xs) - } - } - }, [||]); - xs + let renderDistributionToXYShape = (d: dist, sampleCount: int): (DistTypes.continuousShape, DistTypes.discreteShape) => { + // render the distribution into an XY shape + switch (d) { + | `Float(v) => (Distributions.Continuous.empty, {xs: [|v|], ys: [|1.0|]}) + | _ => { + let xs = GenericSimple.interpolateXs(~xSelection=`ByWeight, d, sampleCount); + let ys = xs |> E.A.fmap(x => GenericSimple.pdf(x, d)); + (Distributions.Continuous.make(`Linear, {xs: xs, ys: ys}), XYShape.T.empty) + } + } }; - /* Accumulate (accContShapes, accDistShapes), each of which is an array of {xs, ys} shapes. */ - let rec accumulateContAndDiscShapes = (dists: t, continuousXs: array(float), currentWeight) => { - let normalized = normalizeWeights(dists); + let combinationDistributionOfXYShapes = (sc1: DistTypes.continuousShape, // continuous shape + sd1: DistTypes.discreteShape, // discrete shape + sc2: DistTypes.continuousShape, + sd2: DistTypes.discreteShape, func): (DistTypes.continuousShape, DistTypes.discreteShape) => { - normalized - |> E.A.fold_left(((accContShapes: array(DistTypes.xyShape), accDiscShapes: array(DistTypes.xyShape)), (d, w)) => { - switch (d) { + let (ccxs, ccys) = jsCombinationConvolve(sc1.xyShape.xs, sc1.xyShape.ys, sc2.xyShape.xs, sc2.xyShape.ys, func); + let (dcxs, dcys) = jsCombinationConvolve(sd1.xs, sd1.ys, sc2.xyShape.xs, sc2.xyShape.ys, func); + let (cdxs, cdys) = jsCombinationConvolve(sc1.xyShape.xs, sc1.xyShape.ys, sd2.xs, sd2.ys, func); + let (ddxs, ddys) = jsCombinationConvolve(sd1.xs, sd1.ys, sd2.xs, sd2.ys, func); - | `Simple(`Float(x)) => { - let ds: DistTypes.xyShape = {xs: [|x|], ys: [|w *. currentWeight|]}; - (accContShapes, E.A.append(accDiscShapes, [|ds|])) - } + let ccxy = Distributions.Continuous.make(`Linear, {xs: ccxs, ys: ccys}); + let dcxy = Distributions.Continuous.make(`Linear, {xs: dcxs, ys: dcys}); + let cdxy = Distributions.Continuous.make(`Linear, {xs: cdxs, ys: cdys}); + // the continuous parts are added up; only the discrete-discrete sum is discrete + let continuousShapeSum = Distributions.Continuous.reduce((+.), [|ccxy, dcxy, cdxy|]); - | `Simple(d) when (GenericSimple.contType(d) == `Continuous) => { - let ys = continuousXs |> E.A.fmap(x => GenericSimple.pdf(x, d) *. w *. currentWeight); - let cs = XYShape.T.fromArrays(continuousXs, ys); + let ddxy: DistTypes.discreteShape = {xs: cdxs, ys: cdys}; - (E.A.append(accContShapes, [|cs|]), accDiscShapes) - } + (continuousShapeSum, ddxy) + }; - | `Simple(d) => (accContShapes, accDiscShapes) // default -- should never happen + let evaluateCombinationDistribution = (et1: nodeResult, et2: nodeResult, op: operation, sampleCount: int) => { + /* return either a Distribution or a RenderedShape. Must integrate to 1. */ - | `PointwiseCombination(ts) => { - let (cs, ds) = accumulateContAndDiscShapes(ts, continuousXs, w *. currentWeight); - (E.A.append(accContShapes, cs), E.A.append(accDiscShapes, ds)) - } + let func = funcFromOp(op); + switch ((et1, et2, op)) { + /* Known cases: replace symbolic with symbolic distribution */ + | (`Distribution(`Float(v1)), `Distribution(`Float(v2)), _) => { + `Distribution(`Float(func(v1, v2))) + } + + | (`Distribution(`Float(v1)), `Distribution(`Normal(n2)), `AddOperation) => { + let n: normal = {mean: v1 +. n2.mean, stdev: n2.stdev}; + `Distribution(`Normal(n)) + } + + | (`Distribution(`Normal(n1)), `Distribution(`Normal(n2)), `AddOperation) => { + let n: normal = {mean: n1.mean +. n2.mean, stdev: sqrt(n1.stdev ** 2. +. n2.stdev ** 2.)}; + `Distribution(`Normal(n)); + } + + /* General cases: convolve the XYShapes */ + | (`Distribution(d1), `Distribution(d2), _) => { + let (sc1, sd1) = renderDistributionToXYShape(d1, sampleCount); + let (sc2, sd2) = renderDistributionToXYShape(d2, sampleCount); + let (sc, sd) = combinationDistributionOfXYShapes(sc1, sd1, sc2, sd2, func); + `RenderedShape(sc, sd, 1.0) + } + | (`Distribution(d1), `RenderedShape(sc2, sd2, i2), _) => { + let (sc1, sd1) = renderDistributionToXYShape(d1, sampleCount); + let (sc, sd) = combinationDistributionOfXYShapes(sc1, sd1, sc2, sd2, func); + `RenderedShape(sc, sd, i2) + } + | (`RenderedShape(sc1, sd1, i1), `Distribution(d2), _) => { + let (sc2, sd2) = renderDistributionToXYShape(d2, sampleCount); + let (sc, sd) = combinationDistributionOfXYShapes(sc1, sd1, sc2, sd2, func); + `RenderedShape(sc, sd, i1); + } + | (`RenderedShape(sc1, sd1, i1), `RenderedShape(sc2, sd2, i2), _) => { + // sum of two multimodals that have a continuous and discrete each. + let (sc, sd) = combinationDistributionOfXYShapes(sc1, sd1, sc2, sd2, func); + + `RenderedShape(sc, sd, i1); + } + } + }; + + let evaluatePointwiseSum = (et1: nodeResult, et2: nodeResult, sampleCount: int) => { + switch ((et1, et2)) { + /* Known cases: */ + | (`Distribution(`Float(v1)), `Distribution(`Float(v2))) => { + v1 == v2 + ? `RenderedShape(Distributions.Continuous.empty, Distributions.Discrete.make({xs: [|v1|], ys: [|2.|]}), 2.) + : `RenderedShape(Distributions.Continuous.empty, Distributions.Discrete.empty, 0.) // TODO: add warning: shouldn't pointwise add scalars. + } + | (`Distribution(`Float(v1)), `Distribution(d2)) => { + let sd1: DistTypes.xyShape = {xs: [|v1|], ys: [|1.|]}; + let (sc2, sd2) = renderDistributionToXYShape(d2, sampleCount); + `RenderedShape(sc2, Distributions.Discrete.reduce((+.), [|sd1, sd2|]), 2.) + } + | (`Distribution(d1), `Distribution(`Float(v2))) => { + let (sc1, sd1) = renderDistributionToXYShape(d1, sampleCount); + let sd2: DistTypes.xyShape = {xs: [|v2|], ys: [|1.|]}; + `RenderedShape(sc1, Distributions.Discrete.reduce((+.), [|sd1, sd2|]), 2.) + } + | (`Distribution(d1), `Distribution(d2)) => { + let (sc1, sd1) = renderDistributionToXYShape(d1, sampleCount); + let (sc2, sd2) = renderDistributionToXYShape(d2, sampleCount); + `RenderedShape(Distributions.Continuous.reduce((+.), [|sc1, sc2|]), Distributions.Discrete.reduce((+.), [|sd1, sd2|]), 2.) + } + | (`Distribution(d1), `RenderedShape(sc2, sd2, i2)) + | (`RenderedShape(sc2, sd2, i2), `Distribution(d1)) => { + let (sc1, sd1) = renderDistributionToXYShape(d1, sampleCount); + `RenderedShape(Distributions.Continuous.reduce((+.), [|sc1, sc2|]), Distributions.Discrete.reduce((+.), [|sd1, sd2|]), 1. +. i2) + } + | (`RenderedShape(sc1, sd1, i1), `RenderedShape(sc2, sd2, i2)) => { + Js.log3("Reducing continuous rr", sc1, sc2); + Js.log2("Continuous reduction:", Distributions.Continuous.reduce((+.), [|sc1, sc2|])); + Js.log2("Discrete reduction:", Distributions.Discrete.reduce((+.), [|sd1, sd2|])); + `RenderedShape(Distributions.Continuous.reduce((+.), [|sc1, sc2|]), Distributions.Discrete.reduce((+.), [|sd1, sd2|]), i1 +. i2) + } + } + }; + + let evaluatePointwiseProduct = (et1: nodeResult, et2: nodeResult, sampleCount: int) => { + switch ((et1, et2)) { + /* Known cases: */ + | (`Distribution(`Float(v1)), `Distribution(`Float(v2))) => { + v1 == v2 + ? `RenderedShape(Distributions.Continuous.empty, Distributions.Discrete.make({xs: [|v1|], ys: [|1.|]}), 1.) + : `RenderedShape(Distributions.Continuous.empty, Distributions.Discrete.empty, 0.) // TODO: add warning: shouldn't pointwise multiply scalars. + } + | (`Distribution(`Float(v1)), `Distribution(d2)) => { + // evaluate d2 at v1 + let y = GenericSimple.pdf(v1, d2); + `RenderedShape(Distributions.Continuous.empty, Distributions.Discrete.make({xs: [|v1|], ys: [|y|]}), y) + } + | (`Distribution(d1), `Distribution(`Float(v2))) => { + // evaluate d1 at v2 + let y = GenericSimple.pdf(v2, d1); + `RenderedShape(Distributions.Continuous.empty, Distributions.Discrete.make({xs: [|v2|], ys: [|y|]}), y) + } + | (`Distribution(`Normal(n1)), `Distribution(`Normal(n2))) => { + let mean = (n1.mean *. n2.stdev**2. +. n2.mean *. n1.stdev**2.) /. (n1.stdev**2. +. n2.stdev**2.); + let stdev = 1. /. ((1. /. n1.stdev**2.) +. (1. /. n2.stdev**2.)); + let integral = 0; // TODO + `RenderedShape(Distributions.Continuous.empty, Distributions.Discrete.empty, 0.) + } + /* General cases */ + | (`Distribution(d1), `Distribution(d2)) => { + // NOT IMPLEMENTED YET + // TODO: evaluate integral properly + let (sc1, sd1) = renderDistributionToXYShape(d1, sampleCount); + let (sc2, sd2) = renderDistributionToXYShape(d2, sampleCount); + `RenderedShape(Distributions.Continuous.empty, Distributions.Discrete.empty, 0.) + } + | (`Distribution(d1), `RenderedShape(sc2, sd2, i2)) => { + // NOT IMPLEMENTED YET + // TODO: evaluate integral properly + let (sc1, sd1) = renderDistributionToXYShape(d1, sampleCount); + `RenderedShape(Distributions.Continuous.empty, Distributions.Discrete.empty, 0.) + } + | (`RenderedShape(sc1, sd1, i1), `Distribution(d1)) => { + // NOT IMPLEMENTED YET + // TODO: evaluate integral properly + let (sc2, sd2) = renderDistributionToXYShape(d1, sampleCount); + `RenderedShape(Distributions.Continuous.empty, Distributions.Discrete.empty, 0.) + } + | (`RenderedShape(sc1, sd1, i1), `RenderedShape(sc2, sd2, i2)) => { + `RenderedShape(Distributions.Continuous.empty, Distributions.Discrete.empty, 0.) + } + } + }; + + + let evaluateNormalize = (et: nodeResult, sampleCount: int) => { + // just divide everything by the integral. + switch (et) { + | `RenderedShape(sc, sd, i) => { + // loop through all ys and divide them by i + let normalize = (s: DistTypes.xyShape): DistTypes.xyShape => {xs: s.xs, ys: s.ys |> E.A.fmap(y => y /. i)}; + + let scn = sc |> Distributions.Continuous.shapeMap(normalize); + let sdn = sd |> normalize; + + `RenderedShape(scn, sdn, 1.) + } + | `Distribution(d) => `Distribution(d) // any kind of atomic dist should already be normalized -- TODO: THIS IS ACTUALLY FALSE! E.g. pointwise product of normal * normal + } + }; + + let evaluateTruncate = (et: nodeResult, xc: cutoffX, compareFunc: (float, float) => bool, sampleCount: int) => { + let cut = (s: DistTypes.xyShape): DistTypes.xyShape => { + let (xs, ys) = s.ys + |> Belt.Array.zip(s.xs) + |> E.A.filter(((x, y)) => compareFunc(x, xc)) + |> Belt.Array.unzip + + let cutShape: DistTypes.xyShape = {xs, ys}; + cutShape; + }; + + switch (et) { + | `Distribution(d) => { + let (sc, sd) = renderDistributionToXYShape(d, sampleCount); + + let scc = sc |> Distributions.Continuous.shapeMap(cut); + let sdc = sd |> cut; + + let newIntegral = 1.; // TODO + + `RenderedShape(scc, sdc, newIntegral); + } + | `RenderedShape(sc, sd, i) => { + let scc = sc |> Distributions.Continuous.shapeMap(cut); + let sdc = sd |> cut; + + let newIntegral = 1.; // TODO + + `RenderedShape(scc, sdc, newIntegral); + } + } + }; + + let evaluateVerticalScaling = (et1: nodeResult, et2: nodeResult, sampleCount: int) => { + let scale = (i: float, s: DistTypes.xyShape): DistTypes.xyShape => {xs: s.xs, ys: s.ys |> E.A.fmap(y => y *. i)}; + + switch ((et1, et2)) { + | (`Distribution(`Float(v)), `Distribution(d)) + | (`Distribution(d), `Distribution(`Float(v))) => { + let (sc, sd) = renderDistributionToXYShape(d, sampleCount); + + let scc = sc |> Distributions.Continuous.shapeMap(scale(v)); + let sdc = sd |> scale(v); + + let newIntegral = v; // TODO + + `RenderedShape(scc, sdc, newIntegral); + } + | (`Distribution(`Float(v)), `RenderedShape(sc, sd, i)) + | (`RenderedShape(sc, sd, i), `Distribution(`Float(v))) => { + let scc = sc |> Distributions.Continuous.shapeMap(scale(v)); + let sdc = sd |> scale(v); + + let newIntegral = v; // TODO + + `RenderedShape(scc, sdc, newIntegral); } + | _ => `RenderedShape(Distributions.Continuous.empty, Distributions.Discrete.empty, 0.) // TODO: give warning + } + } - }, ([||]: array(DistTypes.xyShape), [||]: array(DistTypes.xyShape))) + let renderNode = (et: nodeResult, sampleCount: int) => { + switch (et) { + | `Distribution(d) => { + let (sc, sd) = renderDistributionToXYShape(d, sampleCount); + `RenderedShape(sc, sd, 1.0); + } + | s => s + } + } + + let rec evaluateNode = (treeNode: distTree, sampleCount: int): nodeResult => { + // returns either a new symbolic distribution + switch (treeNode) { + | `Distribution(d) => evaluateDistribution(d) + | `Combination(t1, t2, op) => evaluateCombinationDistribution(evaluateNode(t1, sampleCount), evaluateNode(t2, sampleCount), op, sampleCount) + | `PointwiseSum(t1, t2) => evaluatePointwiseSum(evaluateNode(t1, sampleCount), evaluateNode(t2, sampleCount), sampleCount) + | `PointwiseProduct(t1, t2) => evaluatePointwiseProduct(evaluateNode(t1, sampleCount), evaluateNode(t2, sampleCount), sampleCount) + | `VerticalScaling(t1, t2) => evaluateVerticalScaling(evaluateNode(t1, sampleCount), evaluateNode(t2, sampleCount), sampleCount) + | `Normalize(t) => evaluateNormalize(evaluateNode(t, sampleCount), sampleCount) + | `LeftTruncate(t, x) => evaluateTruncate(evaluateNode(t, sampleCount), x, (<=), sampleCount) + | `RightTruncate(t, x) => evaluateTruncate(evaluateNode(t, sampleCount), x, (>=), sampleCount) + | `Render(t) => renderNode(evaluateNode(t, sampleCount), sampleCount) + } }; - /* - We will assume that each dist (of t) in the multimodal has a total of one. - We can therefore normalize the weights of the parts. - - However, a multimodal can consist of both discrete and continuous shapes. - These need to be added and collected individually. - */ - let toShape = (dists: t, sampleCount: int) => { - let continuousXs = findContinuousXs(dists, sampleCount); + let toShape = (treeNode: distTree, sampleCount: int) => { + /*let continuousXs = findContinuousXs(dists, sampleCount); continuousXs |> Array.fast_sort(compare); let (contShapes, distShapes) = accumulateContAndDiscShapes(dists, continuousXs, 1.0); @@ -469,60 +649,42 @@ module PointwiseAddDistributionsWeighted = { }, {xs: continuousXs, ys: Array.make(Array.length(continuousXs), 0.0)}) |> Distributions.Continuous.make(`Linear); - let combinedDiscrete = Distributions.Discrete.reduce((+.), distShapes) + let combinedDiscrete = Distributions.Discrete.reduce((+.), distShapes)*/ - let shape = MixedShapeBuilder.buildSimple(~continuous=Some(combinedContinuous), ~discrete=combinedDiscrete); + let treeShape = evaluateNode(`Render(`Normalize(treeNode)), sampleCount); + switch (treeShape) { + | `Distribution(_) => E.O.toExn("No shape found!", None) + | `RenderedShape(sc, sd, _) => { + let shape = MixedShapeBuilder.buildSimple(~continuous=Some(sc), ~discrete=sd); - shape |> E.O.toExt(""); + shape |> E.O.toExt(""); + } + } }; - let rec toString = (dists: t): string => { - let distString = - dists - |> E.A.fmap(((d, _)) => - switch (d) { - | `Simple(d) => GenericSimple.toString(d) - | `PointwiseCombination(ts: t) => ts |> toString - } - ) - |> Js.Array.joinWith(","); + let rec toString = (treeNode: distTree): string => { + let stringFromOp = op => switch (op) { + | `AddOperation => " + " + | `SubtractOperation => " - " + | `MultiplyOperation => " * " + | `DivideOperation => " / " + | `ExponentiateOperation => "^" + }; - // mm(normal(0,1), normal(1,2)) => "multimodal(normal(0,1), normal(1,2), ) - - let weights = - dists - |> E.A.fmap(((_, w)) => - Js.Float.toPrecisionWithPrecision(w, ~digits=2) - ) - |> Js.Array.joinWith(","); - - {j|multimodal($distString, [$weights])|j}; + switch (treeNode) { + | `Distribution(d) => GenericSimple.toString(d) + | `Combination(t1, t2, op) => toString(t1) ++ stringFromOp(op) ++ toString(t2) + | `PointwiseSum(t1, t2) => toString(t1) ++ " .+ " ++ toString(t2) + | `PointwiseProduct(t1, t2) => toString(t1) ++ " .* " ++ toString(t2) + | `VerticalScaling(t1, t2) => toString(t1) ++ " @ " ++ toString(t2) + | `Normalize(t) => "normalize(" ++ toString(t) ++ ")" + | `LeftTruncate(t, x) => "leftTruncate(" ++ toString(t) ++ ", " ++ string_of_float(x) ++ ")" + | `RightTruncate(t, x) => "rightTruncate(" ++ toString(t) ++ ", " ++ string_of_float(x) ++ ")" + } }; }; -// assume that recursive pointwiseNormalizedDistSums are the only type of operation there is. -// in the original, it was a list of (dist, weight) tuples. Now, it's a tree of (dist, weight) tuples, just that every -// dist can be either a GenericSimple or another PointwiseAdd. +let toString = (treeNode: distTree) => DistTree.toString(treeNode) -/*let toString = (r: bigDistTree) => { - switch (r) { - | WeightedDistLeaf((w, d)) => GenericWeighted.toString(w) // "normal " - | PointwiseNormalizedDistSum(childTrees) => childTrees |> E.A.fmap(toString) |> Js.Array.joinWith("") - } - }*/ - -let toString = (r: bigDist) => - // we need to recursively create the string representation of the tree. - r - |> ( - fun - | `Simple(d) => GenericSimple.toString(d) - | `PointwiseCombination(d) => - PointwiseAddDistributionsWeighted.toString(d) - ); - -let toShape = n => - fun - | `Simple(d) => GenericSimple.toShape(~xSelection=`ByWeight, d, n) - | `PointwiseCombination(d) => - PointwiseAddDistributionsWeighted.toShape(d, n); +let toShape = (sampleCount: int, treeNode: distTree) => + DistTree.toShape(treeNode, sampleCount) //~xSelection=`ByWeight, From 9b104521566ca6d1773b1072fe3266f46c7e9fc1 Mon Sep 17 00:00:00 2001 From: Sebastian Kosch Date: Fri, 12 Jun 2020 23:53:01 -0700 Subject: [PATCH 05/31] Fix division by zero --- src/distPlus/symbolic/SymbolicDist.re | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/src/distPlus/symbolic/SymbolicDist.re b/src/distPlus/symbolic/SymbolicDist.re index f141bf92..9743f587 100644 --- a/src/distPlus/symbolic/SymbolicDist.re +++ b/src/distPlus/symbolic/SymbolicDist.re @@ -410,6 +410,7 @@ module DistTree = { `Distribution(`Float(func(v1, v2))) } + | (`Distribution(`Normal(n2)), `Distribution(`Float(v1)), `AddOperation) | (`Distribution(`Float(v1)), `Distribution(`Normal(n2)), `AddOperation) => { let n: normal = {mean: v1 +. n2.mean, stdev: n2.stdev}; `Distribution(`Normal(n)) @@ -420,6 +421,22 @@ module DistTree = { `Distribution(`Normal(n)); } + | (`Distribution(`Normal(n1)), `Distribution(`Normal(n2)), `SubtractOperation) => { + let n: normal = {mean: n1.mean -. n2.mean, stdev: sqrt(n1.stdev ** 2. +. n2.stdev ** 2.)}; + `Distribution(`Normal(n)); + } + + | (`Distribution(`Lognormal(l1)), `Distribution(`Lognormal(l2)), `MultiplyOperation) => { + let l: lognormal = {mu: l1.mu +. l2.mu, sigma: l1.sigma +. l2.sigma}; + `Distribution(`Lognormal(l)); + } + + | (`Distribution(`Lognormal(l1)), `Distribution(`Lognormal(l2)), `DivideOperation) => { + let l: lognormal = {mu: l1.mu -. l2.mu, sigma: l1.sigma +. l2.sigma}; + `Distribution(`Lognormal(l)); + } + + /* General cases: convolve the XYShapes */ | (`Distribution(d1), `Distribution(d2), _) => { let (sc1, sd1) = renderDistributionToXYShape(d1, sampleCount); @@ -537,6 +554,9 @@ module DistTree = { let evaluateNormalize = (et: nodeResult, sampleCount: int) => { // just divide everything by the integral. switch (et) { + | `RenderedShape(sc, sd, 0.) => { + `RenderedShape(Distributions.Continuous.empty, Distributions.Discrete.empty, 0.) + } | `RenderedShape(sc, sd, i) => { // loop through all ys and divide them by i let normalize = (s: DistTypes.xyShape): DistTypes.xyShape => {xs: s.xs, ys: s.ys |> E.A.fmap(y => y /. i)}; From 8827650da364062f757951ff7fa107e0762f188f Mon Sep 17 00:00:00 2001 From: Sebastian Kosch Date: Sat, 13 Jun 2020 18:46:38 -0700 Subject: [PATCH 06/31] Slightly cleaned up tree evaluation --- src/distPlus/symbolic/MathJsParser.re | 22 ++++ src/distPlus/symbolic/SymbolicDist.re | 176 ++++++++++++++------------ 2 files changed, 115 insertions(+), 83 deletions(-) diff --git a/src/distPlus/symbolic/MathJsParser.re b/src/distPlus/symbolic/MathJsParser.re index b962df90..6dba65e2 100644 --- a/src/distPlus/symbolic/MathJsParser.re +++ b/src/distPlus/symbolic/MathJsParser.re @@ -197,6 +197,7 @@ module MathAdtToDistDst = { }; let arrayParser = (args:array(arg)):result(SymbolicDist.distTree, string) => { + Js.log2("SAMPLING NOW!", args); let samples = args |> E.A.fmap( fun @@ -287,6 +288,27 @@ module MathAdtToDistDst = { | [|Ok(l), Ok(r)|] => Ok(`Combination(l, r, `DivideOperation)) | _ => Error("Division needs two operands")) } + | Fn({name: "pow", args}) => { + args + |> E.A.fmap(functionParser) + |> (fun + | [|Ok(l), Ok(r)|] => Ok(`Combination(l, r, `ExponentiateOperation)) + | _ => Error("Exponentiations needs two operands")) + } + | Fn({name: "leftTruncate", args}) => { + args + |> E.A.fmap(functionParser) + |> (fun + | [|Ok(l), Ok(`Distribution(`Float(r)))|] => Ok(`LeftTruncate(l, r)) + | _ => Error("leftTruncate needs two arguments: the expression and the cutoff")) + } + | Fn({name: "rightTruncate", args}) => { + args + |> E.A.fmap(functionParser) + |> (fun + | [|Ok(l), Ok(`Distribution(`Float(r)))|] => Ok(`RightTruncate(l, r)) + | _ => Error("rightTruncate needs two arguments: the expression and the cutoff")) + } | Fn({name}) => Error(name ++ ": function not supported") | _ => { Error("This type not currently supported"); diff --git a/src/distPlus/symbolic/SymbolicDist.re b/src/distPlus/symbolic/SymbolicDist.re index 9743f587..7f0fbd7b 100644 --- a/src/distPlus/symbolic/SymbolicDist.re +++ b/src/distPlus/symbolic/SymbolicDist.re @@ -277,34 +277,34 @@ module GenericSimple = { /* This function returns a list of x's at which to evaluate the overall distribution (for rendering). This function is called separately for each individual distribution. - When called with xSelection=`Linear, this function will return (sampleCount) x's, evenly + When called with xSelection=`Linear, this function will return (n) x's, evenly distributed between the min and max of the distribution (whatever those are defined to be above). When called with xSelection=`ByWeight, this function will distribute the x's such as to match the cumulative shape of the distribution. This is slower but may give better results. */ let interpolateXs = - (~xSelection: [ | `Linear | `ByWeight]=`Linear, dist: dist, sampleCount) => { + (~xSelection: [ | `Linear | `ByWeight]=`Linear, dist: dist, n) => { switch (xSelection, dist) { - | (`Linear, _) => E.A.Floats.range(min(dist), max(dist), sampleCount) + | (`Linear, _) => E.A.Floats.range(min(dist), max(dist), n) | (`ByWeight, `Uniform(n)) => // In `ByWeight mode, uniform distributions get special treatment because we need two x's // on either side for proper rendering (just left and right of the discontinuities). let dx = 0.00001 *. (n.high -. n.low); [|n.low -. dx, n.low +. dx, n.high -. dx, n.high +. dx|]; | (`ByWeight, _) => - let ys = E.A.Floats.range(minCdfValue, maxCdfValue, sampleCount); + let ys = E.A.Floats.range(minCdfValue, maxCdfValue, n); ys |> E.A.fmap(y => inv(y, dist)); }; }; let toShape = - (~xSelection: [ | `Linear | `ByWeight]=`Linear, dist: dist, sampleCount) + (~xSelection: [ | `Linear | `ByWeight]=`Linear, dist: dist, n) : DistTypes.shape => { switch (dist) { | `ContinuousShape(n) => n.pdf |> Distributions.Continuous.T.toShape | dist => - let xs = interpolateXs(~xSelection, dist, sampleCount); + let xs = interpolateXs(~xSelection, dist, n); let ys = xs |> E.A.fmap(r => pdf(r, dist)); XYShape.T.fromArrays(xs, ys) |> Distributions.Continuous.make(`Linear, _) @@ -321,23 +321,43 @@ module DistTree = { ]; let evaluateDistribution = (d: dist): nodeResult => { - // certain distributions we may want to evaluate to RenderedShapes right away, e.g. discrete `Distribution(d) }; // This is a performance bottleneck! // Using raw JS here so we can use native for loops and access array elements // directly, without option checks. - let jsCombinationConvolve: (array(float), array(float), array(float), array(float), float => float => float) => (array(float), array(float)) = [%bs.raw + let jsContinuousCombinationConvolve: (array(float), array(float), array(float), array(float), float => float => float) => array(array((float, float))) = [%bs.raw + {| + function (s1xs, s1ys, s2xs, s2ys, func) { + // For continuous-continuous convolution, use linear interpolation. + // Let's assume we got downsampled distributions + + const outXYShapes = new Array(s1xs.length); + for (let i = 0; i < s1xs.length; i++) { + // create a new distribution + const dxyShape = new Array(s2xs.length); + for (let j = 0; j < s2xs.length; j++) { + dxyShape[j] = [func(s1xs[i], s2xs[j]), (s1ys[i] * s2ys[j])]; + } + outXYShapes[i] = dxyShape; + } + + return outXYShapes; + } + |}]; + + let jsDiscreteCombinationConvolve: (array(float), array(float), array(float), array(float), float => float => float) => (array(float), array(float)) = [%bs.raw {| function (s1xs, s1ys, s2xs, s2ys, func) { const r = new Map(); - // To convolve, add the xs and multiply the ys: for (let i = 0; i < s1xs.length; i++) { for (let j = 0; j < s2xs.length; j++) { + const x = func(s1xs[i], s2xs[j]); const cv = r.get(x) | 0; + r.set(x, cv + s1ys[i] * s2ys[j]); // add up the ys, if same x } } @@ -367,12 +387,12 @@ module DistTree = { } } - let renderDistributionToXYShape = (d: dist, sampleCount: int): (DistTypes.continuousShape, DistTypes.discreteShape) => { + let renderDistributionToXYShape = (d: dist, n: int): (DistTypes.continuousShape, DistTypes.discreteShape) => { // render the distribution into an XY shape switch (d) { | `Float(v) => (Distributions.Continuous.empty, {xs: [|v|], ys: [|1.0|]}) | _ => { - let xs = GenericSimple.interpolateXs(~xSelection=`ByWeight, d, sampleCount); + let xs = GenericSimple.interpolateXs(~xSelection=`ByWeight, d, n); let ys = xs |> E.A.fmap(x => GenericSimple.pdf(x, d)); (Distributions.Continuous.make(`Linear, {xs: xs, ys: ys}), XYShape.T.empty) } @@ -384,23 +404,37 @@ module DistTree = { sc2: DistTypes.continuousShape, sd2: DistTypes.discreteShape, func): (DistTypes.continuousShape, DistTypes.discreteShape) => { - let (ccxs, ccys) = jsCombinationConvolve(sc1.xyShape.xs, sc1.xyShape.ys, sc2.xyShape.xs, sc2.xyShape.ys, func); - let (dcxs, dcys) = jsCombinationConvolve(sd1.xs, sd1.ys, sc2.xyShape.xs, sc2.xyShape.ys, func); - let (cdxs, cdys) = jsCombinationConvolve(sc1.xyShape.xs, sc1.xyShape.ys, sd2.xs, sd2.ys, func); - let (ddxs, ddys) = jsCombinationConvolve(sd1.xs, sd1.ys, sd2.xs, sd2.ys, func); - - let ccxy = Distributions.Continuous.make(`Linear, {xs: ccxs, ys: ccys}); - let dcxy = Distributions.Continuous.make(`Linear, {xs: dcxs, ys: dcys}); - let cdxy = Distributions.Continuous.make(`Linear, {xs: cdxs, ys: cdys}); - // the continuous parts are added up; only the discrete-discrete sum is discrete - let continuousShapeSum = Distributions.Continuous.reduce((+.), [|ccxy, dcxy, cdxy|]); - + // First, deal with the discrete-discrete convolution: + let (ddxs, ddys) = jsDiscreteCombinationConvolve(sd1.xs, sd1.ys, sd2.xs, sd2.ys, func); let ddxy: DistTypes.discreteShape = {xs: cdxs, ys: cdys}; + // Then, do the other three: + let downsample = (sc: DistTypes.continuousShape) => { + let scLength = E.A.length(sc.xyShape.xs); + let scSqLength = sqrt(float_of_int(scLength)); + scSqLength > 10. ? Distributions.Continuous.T.truncate(int_of_float(scSqLength), sc) : sc; + }; + + let combinePointConvolutionResults = ccs + |> E.A.fmap(s => { + // s is an array of (x, y) objects + let (xs, ys) = Belt.Array.unzip(s); + Distributions.Continuous.make(`Linear, {xs, ys}); + }) + |> Distributions.Continuous.reduce((+.)); + + let sc1d = downsample(sc1); + let sc2d = downsample(sc2); + + let ccxy = jsContinuousCombinationConvolve(sc1d.xyShape.xs, sc1d.xyShape.ys, sc2d.xyShape.xs, sc2d.xyShape.ys, func) |> combinePointConvolutionResults; + let dcxy = jsContinuousCombinationConvolve(sc1d.xyShape.xs, sc1d.xyShape.ys, sc2d.xyShape.xs, sc2d.xyShape.ys, func) |> combinePointConvolutionResults; + let cdxy = jsContinuousCombinationConvolve(sc1d.xyShape.xs, sc1d.xyShape.ys, sc2d.xyShape.xs, sc2d.xyShape.ys, func) |> combinePointConvolutionResults; + let continuousShapeSum = Distributions.Continuous.reduce((+.), [|ccxy, dcxy, cdxy|]); + (continuousShapeSum, ddxy) }; - let evaluateCombinationDistribution = (et1: nodeResult, et2: nodeResult, op: operation, sampleCount: int) => { + let evaluateCombinationDistribution = (et1: nodeResult, et2: nodeResult, op: operation, n: int) => { /* return either a Distribution or a RenderedShape. Must integrate to 1. */ let func = funcFromOp(op); @@ -439,31 +473,26 @@ module DistTree = { /* General cases: convolve the XYShapes */ | (`Distribution(d1), `Distribution(d2), _) => { - let (sc1, sd1) = renderDistributionToXYShape(d1, sampleCount); - let (sc2, sd2) = renderDistributionToXYShape(d2, sampleCount); + let (sc1, sd1) = renderDistributionToXYShape(d1, n); + let (sc2, sd2) = renderDistributionToXYShape(d2, n); let (sc, sd) = combinationDistributionOfXYShapes(sc1, sd1, sc2, sd2, func); `RenderedShape(sc, sd, 1.0) } - | (`Distribution(d1), `RenderedShape(sc2, sd2, i2), _) => { - let (sc1, sd1) = renderDistributionToXYShape(d1, sampleCount); + | (`Distribution(d2), `RenderedShape(sc1, sd1, i1), _) + | (`RenderedShape(sc1, sd1, i1), `Distribution(d2), _) => { + let (sc1, sd1) = renderDistributionToXYShape(d1, n); let (sc, sd) = combinationDistributionOfXYShapes(sc1, sd1, sc2, sd2, func); `RenderedShape(sc, sd, i2) } - | (`RenderedShape(sc1, sd1, i1), `Distribution(d2), _) => { - let (sc2, sd2) = renderDistributionToXYShape(d2, sampleCount); - let (sc, sd) = combinationDistributionOfXYShapes(sc1, sd1, sc2, sd2, func); - `RenderedShape(sc, sd, i1); - } | (`RenderedShape(sc1, sd1, i1), `RenderedShape(sc2, sd2, i2), _) => { // sum of two multimodals that have a continuous and discrete each. let (sc, sd) = combinationDistributionOfXYShapes(sc1, sd1, sc2, sd2, func); - `RenderedShape(sc, sd, i1); } } }; - let evaluatePointwiseSum = (et1: nodeResult, et2: nodeResult, sampleCount: int) => { + let evaluatePointwiseSum = (et1: nodeResult, et2: nodeResult, n: int) => { switch ((et1, et2)) { /* Known cases: */ | (`Distribution(`Float(v1)), `Distribution(`Float(v2))) => { @@ -471,36 +500,29 @@ module DistTree = { ? `RenderedShape(Distributions.Continuous.empty, Distributions.Discrete.make({xs: [|v1|], ys: [|2.|]}), 2.) : `RenderedShape(Distributions.Continuous.empty, Distributions.Discrete.empty, 0.) // TODO: add warning: shouldn't pointwise add scalars. } - | (`Distribution(`Float(v1)), `Distribution(d2)) => { + | (`Distribution(`Float(v1)), `Distribution(d2)) + | (`Distribution(d2), `Distribution(`Float(v1))) => { let sd1: DistTypes.xyShape = {xs: [|v1|], ys: [|1.|]}; - let (sc2, sd2) = renderDistributionToXYShape(d2, sampleCount); + let (sc2, sd2) = renderDistributionToXYShape(d2, n); `RenderedShape(sc2, Distributions.Discrete.reduce((+.), [|sd1, sd2|]), 2.) } - | (`Distribution(d1), `Distribution(`Float(v2))) => { - let (sc1, sd1) = renderDistributionToXYShape(d1, sampleCount); - let sd2: DistTypes.xyShape = {xs: [|v2|], ys: [|1.|]}; - `RenderedShape(sc1, Distributions.Discrete.reduce((+.), [|sd1, sd2|]), 2.) - } | (`Distribution(d1), `Distribution(d2)) => { - let (sc1, sd1) = renderDistributionToXYShape(d1, sampleCount); - let (sc2, sd2) = renderDistributionToXYShape(d2, sampleCount); + let (sc1, sd1) = renderDistributionToXYShape(d1, n); + let (sc2, sd2) = renderDistributionToXYShape(d2, n); `RenderedShape(Distributions.Continuous.reduce((+.), [|sc1, sc2|]), Distributions.Discrete.reduce((+.), [|sd1, sd2|]), 2.) } | (`Distribution(d1), `RenderedShape(sc2, sd2, i2)) | (`RenderedShape(sc2, sd2, i2), `Distribution(d1)) => { - let (sc1, sd1) = renderDistributionToXYShape(d1, sampleCount); + let (sc1, sd1) = renderDistributionToXYShape(d1, n); `RenderedShape(Distributions.Continuous.reduce((+.), [|sc1, sc2|]), Distributions.Discrete.reduce((+.), [|sd1, sd2|]), 1. +. i2) } | (`RenderedShape(sc1, sd1, i1), `RenderedShape(sc2, sd2, i2)) => { - Js.log3("Reducing continuous rr", sc1, sc2); - Js.log2("Continuous reduction:", Distributions.Continuous.reduce((+.), [|sc1, sc2|])); - Js.log2("Discrete reduction:", Distributions.Discrete.reduce((+.), [|sd1, sd2|])); `RenderedShape(Distributions.Continuous.reduce((+.), [|sc1, sc2|]), Distributions.Discrete.reduce((+.), [|sd1, sd2|]), i1 +. i2) } } }; - let evaluatePointwiseProduct = (et1: nodeResult, et2: nodeResult, sampleCount: int) => { + let evaluatePointwiseProduct = (et1: nodeResult, et2: nodeResult, n: int) => { switch ((et1, et2)) { /* Known cases: */ | (`Distribution(`Float(v1)), `Distribution(`Float(v2))) => { @@ -528,20 +550,20 @@ module DistTree = { | (`Distribution(d1), `Distribution(d2)) => { // NOT IMPLEMENTED YET // TODO: evaluate integral properly - let (sc1, sd1) = renderDistributionToXYShape(d1, sampleCount); - let (sc2, sd2) = renderDistributionToXYShape(d2, sampleCount); + let (sc1, sd1) = renderDistributionToXYShape(d1, n); + let (sc2, sd2) = renderDistributionToXYShape(d2, n); `RenderedShape(Distributions.Continuous.empty, Distributions.Discrete.empty, 0.) } | (`Distribution(d1), `RenderedShape(sc2, sd2, i2)) => { // NOT IMPLEMENTED YET // TODO: evaluate integral properly - let (sc1, sd1) = renderDistributionToXYShape(d1, sampleCount); + let (sc1, sd1) = renderDistributionToXYShape(d1, n); `RenderedShape(Distributions.Continuous.empty, Distributions.Discrete.empty, 0.) } | (`RenderedShape(sc1, sd1, i1), `Distribution(d1)) => { // NOT IMPLEMENTED YET // TODO: evaluate integral properly - let (sc2, sd2) = renderDistributionToXYShape(d1, sampleCount); + let (sc2, sd2) = renderDistributionToXYShape(d1, n); `RenderedShape(Distributions.Continuous.empty, Distributions.Discrete.empty, 0.) } | (`RenderedShape(sc1, sd1, i1), `RenderedShape(sc2, sd2, i2)) => { @@ -551,7 +573,7 @@ module DistTree = { }; - let evaluateNormalize = (et: nodeResult, sampleCount: int) => { + let evaluateNormalize = (et: nodeResult, n: int) => { // just divide everything by the integral. switch (et) { | `RenderedShape(sc, sd, 0.) => { @@ -570,7 +592,7 @@ module DistTree = { } }; - let evaluateTruncate = (et: nodeResult, xc: cutoffX, compareFunc: (float, float) => bool, sampleCount: int) => { + let evaluateTruncate = (et: nodeResult, xc: cutoffX, compareFunc: (float, float) => bool, n: int) => { let cut = (s: DistTypes.xyShape): DistTypes.xyShape => { let (xs, ys) = s.ys |> Belt.Array.zip(s.xs) @@ -583,7 +605,7 @@ module DistTree = { switch (et) { | `Distribution(d) => { - let (sc, sd) = renderDistributionToXYShape(d, sampleCount); + let (sc, sd) = renderDistributionToXYShape(d, n); let scc = sc |> Distributions.Continuous.shapeMap(cut); let sdc = sd |> cut; @@ -603,13 +625,13 @@ module DistTree = { } }; - let evaluateVerticalScaling = (et1: nodeResult, et2: nodeResult, sampleCount: int) => { + let evaluateVerticalScaling = (et1: nodeResult, et2: nodeResult, n: int) => { let scale = (i: float, s: DistTypes.xyShape): DistTypes.xyShape => {xs: s.xs, ys: s.ys |> E.A.fmap(y => y *. i)}; switch ((et1, et2)) { | (`Distribution(`Float(v)), `Distribution(d)) | (`Distribution(d), `Distribution(`Float(v))) => { - let (sc, sd) = renderDistributionToXYShape(d, sampleCount); + let (sc, sd) = renderDistributionToXYShape(d, n); let scc = sc |> Distributions.Continuous.shapeMap(scale(v)); let sdc = sd |> scale(v); @@ -631,47 +653,34 @@ module DistTree = { } } - let renderNode = (et: nodeResult, sampleCount: int) => { + let renderNode = (et: nodeResult, n: int) => { switch (et) { | `Distribution(d) => { - let (sc, sd) = renderDistributionToXYShape(d, sampleCount); + let (sc, sd) = renderDistributionToXYShape(d, n); `RenderedShape(sc, sd, 1.0); } | s => s } } - let rec evaluateNode = (treeNode: distTree, sampleCount: int): nodeResult => { + let rec evaluateNode = (treeNode: distTree, n: int): nodeResult => { // returns either a new symbolic distribution switch (treeNode) { | `Distribution(d) => evaluateDistribution(d) - | `Combination(t1, t2, op) => evaluateCombinationDistribution(evaluateNode(t1, sampleCount), evaluateNode(t2, sampleCount), op, sampleCount) - | `PointwiseSum(t1, t2) => evaluatePointwiseSum(evaluateNode(t1, sampleCount), evaluateNode(t2, sampleCount), sampleCount) - | `PointwiseProduct(t1, t2) => evaluatePointwiseProduct(evaluateNode(t1, sampleCount), evaluateNode(t2, sampleCount), sampleCount) - | `VerticalScaling(t1, t2) => evaluateVerticalScaling(evaluateNode(t1, sampleCount), evaluateNode(t2, sampleCount), sampleCount) - | `Normalize(t) => evaluateNormalize(evaluateNode(t, sampleCount), sampleCount) - | `LeftTruncate(t, x) => evaluateTruncate(evaluateNode(t, sampleCount), x, (<=), sampleCount) - | `RightTruncate(t, x) => evaluateTruncate(evaluateNode(t, sampleCount), x, (>=), sampleCount) - | `Render(t) => renderNode(evaluateNode(t, sampleCount), sampleCount) + | `Combination(t1, t2, op) => evaluateCombinationDistribution(evaluateNode(t1, n), evaluateNode(t2, n), op, n) + | `PointwiseSum(t1, t2) => evaluatePointwiseSum(evaluateNode(t1, n), evaluateNode(t2, n), n) + | `PointwiseProduct(t1, t2) => evaluatePointwiseProduct(evaluateNode(t1, n), evaluateNode(t2, n), n) + | `VerticalScaling(t1, t2) => evaluateVerticalScaling(evaluateNode(t1, n), evaluateNode(t2, n), n) + | `Normalize(t) => evaluateNormalize(evaluateNode(t, n), n) + | `LeftTruncate(t, x) => evaluateTruncate(evaluateNode(t, n), x, (>=), n) + | `RightTruncate(t, x) => evaluateTruncate(evaluateNode(t, n), x, (<=), n) + | `Render(t) => renderNode(evaluateNode(t, n), n) } }; - let toShape = (treeNode: distTree, sampleCount: int) => { - /*let continuousXs = findContinuousXs(dists, sampleCount); - continuousXs |> Array.fast_sort(compare); + let toShape = (treeNode: distTree, n: int) => { + let treeShape = evaluateNode(`Render(`Normalize(treeNode)), n); - let (contShapes, distShapes) = accumulateContAndDiscShapes(dists, continuousXs, 1.0); - - let combinedContinuous = contShapes - |> E.A.fold_left((shapeAcc: DistTypes.xyShape, shape: DistTypes.xyShape) => { - let ys = E.A.fmapi((i, y) => y +. shape.ys[i], shapeAcc.ys); - {xs: continuousXs, ys: ys} - }, {xs: continuousXs, ys: Array.make(Array.length(continuousXs), 0.0)}) - |> Distributions.Continuous.make(`Linear); - - let combinedDiscrete = Distributions.Discrete.reduce((+.), distShapes)*/ - - let treeShape = evaluateNode(`Render(`Normalize(treeNode)), sampleCount); switch (treeShape) { | `Distribution(_) => E.O.toExn("No shape found!", None) | `RenderedShape(sc, sd, _) => { @@ -700,6 +709,7 @@ module DistTree = { | `Normalize(t) => "normalize(" ++ toString(t) ++ ")" | `LeftTruncate(t, x) => "leftTruncate(" ++ toString(t) ++ ", " ++ string_of_float(x) ++ ")" | `RightTruncate(t, x) => "rightTruncate(" ++ toString(t) ++ ", " ++ string_of_float(x) ++ ")" + | `Render(t) => toString(t) } }; }; From 214f3b9e5831db7247280f7f24639bec58a354f0 Mon Sep 17 00:00:00 2001 From: Sebastian Kosch Date: Sat, 13 Jun 2020 18:54:54 -0700 Subject: [PATCH 07/31] Renaming, removed some Js.logs --- src/distPlus/symbolic/MathJsParser.re | 39 ++++++------ src/distPlus/symbolic/SymbolicDist.re | 89 +++++++++++++-------------- 2 files changed, 63 insertions(+), 65 deletions(-) diff --git a/src/distPlus/symbolic/MathJsParser.re b/src/distPlus/symbolic/MathJsParser.re index 6dba65e2..f145253e 100644 --- a/src/distPlus/symbolic/MathJsParser.re +++ b/src/distPlus/symbolic/MathJsParser.re @@ -91,19 +91,19 @@ module MathAdtToDistDst = { let normal: array(arg) => result(SymbolicDist.distTree, string) = fun | [|Value(mean), Value(stdev)|] => - Ok(`Distribution(`Normal({mean, stdev}))) + Ok(`Simple(`Normal({mean, stdev}))) | _ => Error("Wrong number of variables in normal distribution"); let lognormal: array(arg) => result(SymbolicDist.distTree, string) = fun - | [|Value(mu), Value(sigma)|] => Ok(`Distribution(`Lognormal({mu, sigma}))) + | [|Value(mu), Value(sigma)|] => Ok(`Simple(`Lognormal({mu, sigma}))) | [|Object(o)|] => { let g = Js.Dict.get(o); switch (g("mean"), g("stdev"), g("mu"), g("sigma")) { | (Some(Value(mean)), Some(Value(stdev)), _, _) => - Ok(`Distribution(SymbolicDist.Lognormal.fromMeanAndStdev(mean, stdev))) + Ok(`Simple(SymbolicDist.Lognormal.fromMeanAndStdev(mean, stdev))) | (_, _, Some(Value(mu)), Some(Value(sigma))) => - Ok(`Distribution(`Lognormal({mu, sigma}))) + Ok(`Simple(`Lognormal({mu, sigma}))) | _ => Error("Lognormal distribution would need mean and stdev") }; } @@ -112,10 +112,10 @@ module MathAdtToDistDst = { let to_: array(arg) => result(SymbolicDist.distTree, string) = fun | [|Value(low), Value(high)|] when low <= 0.0 && low < high=> { - Ok(`Distribution(SymbolicDist.Normal.from90PercentCI(low, high))); + Ok(`Simple(SymbolicDist.Normal.from90PercentCI(low, high))); } | [|Value(low), Value(high)|] when low < high => { - Ok(`Distribution(SymbolicDist.Lognormal.from90PercentCI(low, high))); + Ok(`Simple(SymbolicDist.Lognormal.from90PercentCI(low, high))); } | [|Value(_), Value(_)|] => Error("Low value must be less than high value.") @@ -123,29 +123,29 @@ module MathAdtToDistDst = { let uniform: array(arg) => result(SymbolicDist.distTree, string) = fun - | [|Value(low), Value(high)|] => Ok(`Distribution(`Uniform({low, high}))) + | [|Value(low), Value(high)|] => Ok(`Simple(`Uniform({low, high}))) | _ => Error("Wrong number of variables in lognormal distribution"); let beta: array(arg) => result(SymbolicDist.distTree, string) = fun - | [|Value(alpha), Value(beta)|] => Ok(`Distribution(`Beta({alpha, beta}))) + | [|Value(alpha), Value(beta)|] => Ok(`Simple(`Beta({alpha, beta}))) | _ => Error("Wrong number of variables in lognormal distribution"); let exponential: array(arg) => result(SymbolicDist.distTree, string) = fun - | [|Value(rate)|] => Ok(`Distribution(`Exponential({rate: rate}))) + | [|Value(rate)|] => Ok(`Simple(`Exponential({rate: rate}))) | _ => Error("Wrong number of variables in Exponential distribution"); let cauchy: array(arg) => result(SymbolicDist.distTree, string) = fun | [|Value(local), Value(scale)|] => - Ok(`Distribution(`Cauchy({local, scale}))) + Ok(`Simple(`Cauchy({local, scale}))) | _ => Error("Wrong number of variables in cauchy distribution"); let triangular: array(arg) => result(SymbolicDist.distTree, string) = fun | [|Value(low), Value(medium), Value(high)|] => - Ok(`Distribution(`Triangular({low, medium, high}))) + Ok(`Simple(`Triangular({low, medium, high}))) | _ => Error("Wrong number of variables in triangle distribution"); let multiModal = @@ -158,7 +158,7 @@ module MathAdtToDistDst = { args |> E.A.fmap( fun - | Ok(`Distribution(d)) => Ok(`Distribution(d)) + | Ok(`Simple(d)) => Ok(`Simple(d)) | Ok(`Combination(t1, t2, op)) => Ok(`Combination(t1, t2, op)) | Ok(`PointwiseSum(t1, t2)) => Ok(`PointwiseSum(t1, t2)) | Ok(`PointwiseProduct(t1, t2)) => Ok(`PointwiseProduct(t1, t2)) @@ -182,7 +182,7 @@ module MathAdtToDistDst = { |> E.A.fmapi((index, t) => { let w = weights |> E.A.get(_, index) |> E.O.default(1.0); - `VerticalScaling(t, `Distribution(`Float(w))) + `VerticalScaling(t, `Simple(`Float(w))) }); let pointwiseSum = components @@ -197,7 +197,6 @@ module MathAdtToDistDst = { }; let arrayParser = (args:array(arg)):result(SymbolicDist.distTree, string) => { - Js.log2("SAMPLING NOW!", args); let samples = args |> E.A.fmap( fun @@ -213,7 +212,7 @@ module MathAdtToDistDst = { SymbolicDist.ContinuousShape.make(_pdf, cdf) }); switch(shape){ - | Some(s) => Ok(`Distribution(`ContinuousShape(s))) + | Some(s) => Ok(`Simple(`ContinuousShape(s))) | None => Error("Rendering did not work") } } @@ -231,7 +230,7 @@ module MathAdtToDistDst = { | Fn({name: "exponential", args}) => exponential(args) | Fn({name: "cauchy", args}) => cauchy(args) | Fn({name: "triangular", args}) => triangular(args) - | Value(f) => Ok(`Distribution(`Float(f))) + | Value(f) => Ok(`Simple(`Float(f))) | Fn({name: "mm", args}) => { let weights = args @@ -284,7 +283,7 @@ module MathAdtToDistDst = { args |> E.A.fmap(functionParser) |> (fun - | [|Ok(l), Ok(`Distribution(`Float(0.0)))|] => Error("Division by zero") + | [|Ok(l), Ok(`Simple(`Float(0.0)))|] => Error("Division by zero") | [|Ok(l), Ok(r)|] => Ok(`Combination(l, r, `DivideOperation)) | _ => Error("Division needs two operands")) } @@ -299,14 +298,14 @@ module MathAdtToDistDst = { args |> E.A.fmap(functionParser) |> (fun - | [|Ok(l), Ok(`Distribution(`Float(r)))|] => Ok(`LeftTruncate(l, r)) + | [|Ok(l), Ok(`Simple(`Float(r)))|] => Ok(`LeftTruncate(l, r)) | _ => Error("leftTruncate needs two arguments: the expression and the cutoff")) } | Fn({name: "rightTruncate", args}) => { args |> E.A.fmap(functionParser) |> (fun - | [|Ok(l), Ok(`Distribution(`Float(r)))|] => Ok(`RightTruncate(l, r)) + | [|Ok(l), Ok(`Simple(`Float(r)))|] => Ok(`RightTruncate(l, r)) | _ => Error("rightTruncate needs two arguments: the expression and the cutoff")) } | Fn({name}) => Error(name ++ ": function not supported") @@ -320,7 +319,7 @@ module MathAdtToDistDst = { |> ( fun | Fn(_) => functionParser(r) - | Value(r) => Ok(`Distribution(`Float(r))) + | Value(r) => Ok(`Simple(`Float(r))) | Array(r) => arrayParser(r) | Symbol(_) => Error("Symbol not valid as top level") | Object(_) => Error("Object not valid as top level") diff --git a/src/distPlus/symbolic/SymbolicDist.re b/src/distPlus/symbolic/SymbolicDist.re index 7f0fbd7b..48bc7ad5 100644 --- a/src/distPlus/symbolic/SymbolicDist.re +++ b/src/distPlus/symbolic/SymbolicDist.re @@ -61,7 +61,7 @@ type operation = [ ]; type distTree = [ - | `Distribution(dist) + | `Simple(dist) | `Combination(distTree, distTree, operation) | `PointwiseSum(distTree, distTree) | `PointwiseProduct(distTree, distTree) @@ -315,13 +315,13 @@ module GenericSimple = { module DistTree = { type nodeResult = [ - | `Distribution(dist) + | `Simple(dist) // RenderedShape: continuous xyShape, discrete xyShape, total value. | `RenderedShape(DistTypes.continuousShape, DistTypes.discreteShape, integral) ]; let evaluateDistribution = (d: dist): nodeResult => { - `Distribution(d) + `Simple(d) }; // This is a performance bottleneck! @@ -406,7 +406,7 @@ module DistTree = { // First, deal with the discrete-discrete convolution: let (ddxs, ddys) = jsDiscreteCombinationConvolve(sd1.xs, sd1.ys, sd2.xs, sd2.ys, func); - let ddxy: DistTypes.discreteShape = {xs: cdxs, ys: cdys}; + let ddxy: DistTypes.discreteShape = {xs: ddxs, ys: ddys}; // Then, do the other three: let downsample = (sc: DistTypes.continuousShape) => { @@ -415,8 +415,7 @@ module DistTree = { scSqLength > 10. ? Distributions.Continuous.T.truncate(int_of_float(scSqLength), sc) : sc; }; - let combinePointConvolutionResults = ccs - |> E.A.fmap(s => { + let combinePointConvolutionResults = ca => ca |> E.A.fmap(s => { // s is an array of (x, y) objects let (xs, ys) = Belt.Array.unzip(s); Distributions.Continuous.make(`Linear, {xs, ys}); @@ -440,46 +439,46 @@ module DistTree = { let func = funcFromOp(op); switch ((et1, et2, op)) { /* Known cases: replace symbolic with symbolic distribution */ - | (`Distribution(`Float(v1)), `Distribution(`Float(v2)), _) => { - `Distribution(`Float(func(v1, v2))) + | (`Simple(`Float(v1)), `Simple(`Float(v2)), _) => { + `Simple(`Float(func(v1, v2))) } - | (`Distribution(`Normal(n2)), `Distribution(`Float(v1)), `AddOperation) - | (`Distribution(`Float(v1)), `Distribution(`Normal(n2)), `AddOperation) => { + | (`Simple(`Normal(n2)), `Simple(`Float(v1)), `AddOperation) + | (`Simple(`Float(v1)), `Simple(`Normal(n2)), `AddOperation) => { let n: normal = {mean: v1 +. n2.mean, stdev: n2.stdev}; - `Distribution(`Normal(n)) + `Simple(`Normal(n)) } - | (`Distribution(`Normal(n1)), `Distribution(`Normal(n2)), `AddOperation) => { + | (`Simple(`Normal(n1)), `Simple(`Normal(n2)), `AddOperation) => { let n: normal = {mean: n1.mean +. n2.mean, stdev: sqrt(n1.stdev ** 2. +. n2.stdev ** 2.)}; - `Distribution(`Normal(n)); + `Simple(`Normal(n)); } - | (`Distribution(`Normal(n1)), `Distribution(`Normal(n2)), `SubtractOperation) => { + | (`Simple(`Normal(n1)), `Simple(`Normal(n2)), `SubtractOperation) => { let n: normal = {mean: n1.mean -. n2.mean, stdev: sqrt(n1.stdev ** 2. +. n2.stdev ** 2.)}; - `Distribution(`Normal(n)); + `Simple(`Normal(n)); } - | (`Distribution(`Lognormal(l1)), `Distribution(`Lognormal(l2)), `MultiplyOperation) => { + | (`Simple(`Lognormal(l1)), `Simple(`Lognormal(l2)), `MultiplyOperation) => { let l: lognormal = {mu: l1.mu +. l2.mu, sigma: l1.sigma +. l2.sigma}; - `Distribution(`Lognormal(l)); + `Simple(`Lognormal(l)); } - | (`Distribution(`Lognormal(l1)), `Distribution(`Lognormal(l2)), `DivideOperation) => { + | (`Simple(`Lognormal(l1)), `Simple(`Lognormal(l2)), `DivideOperation) => { let l: lognormal = {mu: l1.mu -. l2.mu, sigma: l1.sigma +. l2.sigma}; - `Distribution(`Lognormal(l)); + `Simple(`Lognormal(l)); } /* General cases: convolve the XYShapes */ - | (`Distribution(d1), `Distribution(d2), _) => { + | (`Simple(d1), `Simple(d2), _) => { let (sc1, sd1) = renderDistributionToXYShape(d1, n); let (sc2, sd2) = renderDistributionToXYShape(d2, n); let (sc, sd) = combinationDistributionOfXYShapes(sc1, sd1, sc2, sd2, func); `RenderedShape(sc, sd, 1.0) } - | (`Distribution(d2), `RenderedShape(sc1, sd1, i1), _) - | (`RenderedShape(sc1, sd1, i1), `Distribution(d2), _) => { + | (`Simple(d1), `RenderedShape(sc2, sd2, i2), _) + | (`RenderedShape(sc2, sd2, i2), `Simple(d1), _) => { let (sc1, sd1) = renderDistributionToXYShape(d1, n); let (sc, sd) = combinationDistributionOfXYShapes(sc1, sd1, sc2, sd2, func); `RenderedShape(sc, sd, i2) @@ -495,24 +494,24 @@ module DistTree = { let evaluatePointwiseSum = (et1: nodeResult, et2: nodeResult, n: int) => { switch ((et1, et2)) { /* Known cases: */ - | (`Distribution(`Float(v1)), `Distribution(`Float(v2))) => { + | (`Simple(`Float(v1)), `Simple(`Float(v2))) => { v1 == v2 ? `RenderedShape(Distributions.Continuous.empty, Distributions.Discrete.make({xs: [|v1|], ys: [|2.|]}), 2.) : `RenderedShape(Distributions.Continuous.empty, Distributions.Discrete.empty, 0.) // TODO: add warning: shouldn't pointwise add scalars. } - | (`Distribution(`Float(v1)), `Distribution(d2)) - | (`Distribution(d2), `Distribution(`Float(v1))) => { + | (`Simple(`Float(v1)), `Simple(d2)) + | (`Simple(d2), `Simple(`Float(v1))) => { let sd1: DistTypes.xyShape = {xs: [|v1|], ys: [|1.|]}; let (sc2, sd2) = renderDistributionToXYShape(d2, n); `RenderedShape(sc2, Distributions.Discrete.reduce((+.), [|sd1, sd2|]), 2.) } - | (`Distribution(d1), `Distribution(d2)) => { + | (`Simple(d1), `Simple(d2)) => { let (sc1, sd1) = renderDistributionToXYShape(d1, n); let (sc2, sd2) = renderDistributionToXYShape(d2, n); `RenderedShape(Distributions.Continuous.reduce((+.), [|sc1, sc2|]), Distributions.Discrete.reduce((+.), [|sd1, sd2|]), 2.) } - | (`Distribution(d1), `RenderedShape(sc2, sd2, i2)) - | (`RenderedShape(sc2, sd2, i2), `Distribution(d1)) => { + | (`Simple(d1), `RenderedShape(sc2, sd2, i2)) + | (`RenderedShape(sc2, sd2, i2), `Simple(d1)) => { let (sc1, sd1) = renderDistributionToXYShape(d1, n); `RenderedShape(Distributions.Continuous.reduce((+.), [|sc1, sc2|]), Distributions.Discrete.reduce((+.), [|sd1, sd2|]), 1. +. i2) } @@ -525,42 +524,42 @@ module DistTree = { let evaluatePointwiseProduct = (et1: nodeResult, et2: nodeResult, n: int) => { switch ((et1, et2)) { /* Known cases: */ - | (`Distribution(`Float(v1)), `Distribution(`Float(v2))) => { + | (`Simple(`Float(v1)), `Simple(`Float(v2))) => { v1 == v2 ? `RenderedShape(Distributions.Continuous.empty, Distributions.Discrete.make({xs: [|v1|], ys: [|1.|]}), 1.) : `RenderedShape(Distributions.Continuous.empty, Distributions.Discrete.empty, 0.) // TODO: add warning: shouldn't pointwise multiply scalars. } - | (`Distribution(`Float(v1)), `Distribution(d2)) => { + | (`Simple(`Float(v1)), `Simple(d2)) => { // evaluate d2 at v1 let y = GenericSimple.pdf(v1, d2); `RenderedShape(Distributions.Continuous.empty, Distributions.Discrete.make({xs: [|v1|], ys: [|y|]}), y) } - | (`Distribution(d1), `Distribution(`Float(v2))) => { + | (`Simple(d1), `Simple(`Float(v2))) => { // evaluate d1 at v2 let y = GenericSimple.pdf(v2, d1); `RenderedShape(Distributions.Continuous.empty, Distributions.Discrete.make({xs: [|v2|], ys: [|y|]}), y) } - | (`Distribution(`Normal(n1)), `Distribution(`Normal(n2))) => { + | (`Simple(`Normal(n1)), `Simple(`Normal(n2))) => { let mean = (n1.mean *. n2.stdev**2. +. n2.mean *. n1.stdev**2.) /. (n1.stdev**2. +. n2.stdev**2.); let stdev = 1. /. ((1. /. n1.stdev**2.) +. (1. /. n2.stdev**2.)); let integral = 0; // TODO `RenderedShape(Distributions.Continuous.empty, Distributions.Discrete.empty, 0.) } /* General cases */ - | (`Distribution(d1), `Distribution(d2)) => { + | (`Simple(d1), `Simple(d2)) => { // NOT IMPLEMENTED YET // TODO: evaluate integral properly let (sc1, sd1) = renderDistributionToXYShape(d1, n); let (sc2, sd2) = renderDistributionToXYShape(d2, n); `RenderedShape(Distributions.Continuous.empty, Distributions.Discrete.empty, 0.) } - | (`Distribution(d1), `RenderedShape(sc2, sd2, i2)) => { + | (`Simple(d1), `RenderedShape(sc2, sd2, i2)) => { // NOT IMPLEMENTED YET // TODO: evaluate integral properly let (sc1, sd1) = renderDistributionToXYShape(d1, n); `RenderedShape(Distributions.Continuous.empty, Distributions.Discrete.empty, 0.) } - | (`RenderedShape(sc1, sd1, i1), `Distribution(d1)) => { + | (`RenderedShape(sc1, sd1, i1), `Simple(d1)) => { // NOT IMPLEMENTED YET // TODO: evaluate integral properly let (sc2, sd2) = renderDistributionToXYShape(d1, n); @@ -588,7 +587,7 @@ module DistTree = { `RenderedShape(scn, sdn, 1.) } - | `Distribution(d) => `Distribution(d) // any kind of atomic dist should already be normalized -- TODO: THIS IS ACTUALLY FALSE! E.g. pointwise product of normal * normal + | `Simple(d) => `Simple(d) // any kind of atomic dist should already be normalized -- TODO: THIS IS ACTUALLY FALSE! E.g. pointwise product of normal * normal } }; @@ -604,7 +603,7 @@ module DistTree = { }; switch (et) { - | `Distribution(d) => { + | `Simple(d) => { let (sc, sd) = renderDistributionToXYShape(d, n); let scc = sc |> Distributions.Continuous.shapeMap(cut); @@ -629,8 +628,8 @@ module DistTree = { let scale = (i: float, s: DistTypes.xyShape): DistTypes.xyShape => {xs: s.xs, ys: s.ys |> E.A.fmap(y => y *. i)}; switch ((et1, et2)) { - | (`Distribution(`Float(v)), `Distribution(d)) - | (`Distribution(d), `Distribution(`Float(v))) => { + | (`Simple(`Float(v)), `Simple(d)) + | (`Simple(d), `Simple(`Float(v))) => { let (sc, sd) = renderDistributionToXYShape(d, n); let scc = sc |> Distributions.Continuous.shapeMap(scale(v)); @@ -640,8 +639,8 @@ module DistTree = { `RenderedShape(scc, sdc, newIntegral); } - | (`Distribution(`Float(v)), `RenderedShape(sc, sd, i)) - | (`RenderedShape(sc, sd, i), `Distribution(`Float(v))) => { + | (`Simple(`Float(v)), `RenderedShape(sc, sd, i)) + | (`RenderedShape(sc, sd, i), `Simple(`Float(v))) => { let scc = sc |> Distributions.Continuous.shapeMap(scale(v)); let sdc = sd |> scale(v); @@ -655,7 +654,7 @@ module DistTree = { let renderNode = (et: nodeResult, n: int) => { switch (et) { - | `Distribution(d) => { + | `Simple(d) => { let (sc, sd) = renderDistributionToXYShape(d, n); `RenderedShape(sc, sd, 1.0); } @@ -666,7 +665,7 @@ module DistTree = { let rec evaluateNode = (treeNode: distTree, n: int): nodeResult => { // returns either a new symbolic distribution switch (treeNode) { - | `Distribution(d) => evaluateDistribution(d) + | `Simple(d) => evaluateDistribution(d) | `Combination(t1, t2, op) => evaluateCombinationDistribution(evaluateNode(t1, n), evaluateNode(t2, n), op, n) | `PointwiseSum(t1, t2) => evaluatePointwiseSum(evaluateNode(t1, n), evaluateNode(t2, n), n) | `PointwiseProduct(t1, t2) => evaluatePointwiseProduct(evaluateNode(t1, n), evaluateNode(t2, n), n) @@ -682,7 +681,7 @@ module DistTree = { let treeShape = evaluateNode(`Render(`Normalize(treeNode)), n); switch (treeShape) { - | `Distribution(_) => E.O.toExn("No shape found!", None) + | `Simple(_) => E.O.toExn("No shape found!", None) | `RenderedShape(sc, sd, _) => { let shape = MixedShapeBuilder.buildSimple(~continuous=Some(sc), ~discrete=sd); @@ -701,7 +700,7 @@ module DistTree = { }; switch (treeNode) { - | `Distribution(d) => GenericSimple.toString(d) + | `Simple(d) => GenericSimple.toString(d) | `Combination(t1, t2, op) => toString(t1) ++ stringFromOp(op) ++ toString(t2) | `PointwiseSum(t1, t2) => toString(t1) ++ " .+ " ++ toString(t2) | `PointwiseProduct(t1, t2) => toString(t1) ++ " .* " ++ toString(t2) From bd528571afbdaabd01328328acc50f7c016931ba Mon Sep 17 00:00:00 2001 From: Sebastian Kosch Date: Thu, 25 Jun 2020 23:38:14 -0700 Subject: [PATCH 08/31] Working on code reorganization, doesn't compile yet --- __tests__/Distributions__Test.re | 7 +- src/components/DistBuilder2.re | 6 +- src/components/DistBuilder3.re | 4 +- src/components/Drawer.re | 8 +- src/components/charts/DistPlusPlot.re | 28 +- src/distPlus/distribution/DistTypes.re | 8 +- src/distPlus/distribution/Distributions.re | 836 ++++++++++++------ .../distribution/MixedShapeBuilder.re | 23 +- src/distPlus/distribution/XYShape.re | 10 +- src/distPlus/renderers/RenderTypes.re | 2 +- src/distPlus/renderers/ShapeRenderer.re | 4 +- .../renderers/samplesRenderer/Guesstimator.re | 6 +- .../renderers/samplesRenderer/Samples.re | 11 +- src/distPlus/symbolic/MathJsParser.re | 78 +- src/distPlus/symbolic/SymbolicDist.re | 530 ++--------- src/distPlus/symbolic/TreeNode.re | 414 +++++++++ src/distPlus/utility/Jstat.re | 10 +- 17 files changed, 1142 insertions(+), 843 deletions(-) create mode 100644 src/distPlus/symbolic/TreeNode.re diff --git a/__tests__/Distributions__Test.re b/__tests__/Distributions__Test.re index d83c1ac2..20c7ce34 100644 --- a/__tests__/Distributions__Test.re +++ b/__tests__/Distributions__Test.re @@ -386,10 +386,9 @@ describe("Shape", () => { let numSamples = 10000; open Distributions.Shape; let normal: SymbolicDist.dist = `Normal({mean, stdev}); - let normalShape = SymbolicDist.GenericSimple.toShape(normal, numSamples); + let normalShape = TreeNode.toShape(numSamples, normal); let lognormal = SymbolicDist.Lognormal.fromMeanAndStdev(mean, stdev); - let lognormalShape = - SymbolicDist.GenericSimple.toShape(lognormal, numSamples); + let lognormalShape = TreeNode.toShape(numSamples, lognormal); makeTestCloseEquality( "Mean of a normal", @@ -416,4 +415,4 @@ describe("Shape", () => { ~digits=0, ); }); -}); \ No newline at end of file +}); diff --git a/src/components/DistBuilder2.re b/src/components/DistBuilder2.re index 9c7ad6bb..b912e223 100644 --- a/src/components/DistBuilder2.re +++ b/src/components/DistBuilder2.re @@ -44,14 +44,14 @@ module DemoDist = { Distributions.DistPlus.make( ~shape= Continuous( - Distributions.Continuous.make(`Linear, {xs, ys}), + Distributions.Continuous.make(`Linear, {xs, ys}, None), ), ~domain=Complete, ~unit=UnspecifiedDistribution, ~guesstimatorString=None, (), ) - |> Distributions.DistPlus.T.scaleToIntegralSum(~intendedSum=1.0); + |> Distributions.DistPlus.T.normalize; ; }; R.ste}> @@ -102,4 +102,4 @@ let make = () => {
; -}; \ No newline at end of file +}; diff --git a/src/components/DistBuilder3.re b/src/components/DistBuilder3.re index 662a3241..86bb1d2a 100644 --- a/src/components/DistBuilder3.re +++ b/src/components/DistBuilder3.re @@ -37,7 +37,7 @@ module DemoDist = { let parsed1 = MathJsParser.fromString(guesstimatorString); let shape = switch (parsed1) { - | Ok(r) => Some(SymbolicDist.toShape(10000, r)) + | Ok(r) => Some(TreeNode.toShape(10000, r)) | _ => None }; @@ -111,4 +111,4 @@ let make = () => {
; -}; \ No newline at end of file +}; diff --git a/src/components/Drawer.re b/src/components/Drawer.re index 090447b5..8dc0c7db 100644 --- a/src/components/Drawer.re +++ b/src/components/Drawer.re @@ -177,6 +177,7 @@ module Convert = { let continuousShape: Types.continuousShape = { xyShape, interpolation: `Linear, + knownIntegralSum: None, }; let integral = XYShape.Analysis.integrateContinuousShape(continuousShape); @@ -188,6 +189,7 @@ module Convert = { ys, }, interpolation: `Linear, + knownIntegralSum: Some(1.0), }; continuousShape; }; @@ -387,7 +389,7 @@ module Draw = { let numSamples = 3000; let normal: SymbolicDist.dist = `Normal({mean, stdev}); - let normalShape = SymbolicDist.GenericSimple.toShape(normal, numSamples); + let normalShape = TreeNode.toShape(numSamples, `DistData(`Symbolic(normal))); let xyShape: Types.xyShape = switch (normalShape) { | Mixed(_) => {xs: [||], ys: [||]} @@ -667,9 +669,7 @@ module State = { /* create a cdf from a pdf */ let _pdf = - Distributions.Continuous.T.scaleToIntegralSum( - ~cache=None, - ~intendedSum=1.0, + Distributions.Continuous.T.normalize( pdf, ); diff --git a/src/components/charts/DistPlusPlot.re b/src/components/charts/DistPlusPlot.re index 9eb484ef..93feb7d2 100644 --- a/src/components/charts/DistPlusPlot.re +++ b/src/components/charts/DistPlusPlot.re @@ -95,7 +95,7 @@ let table = (distPlus, x) => { {distPlus - |> Distributions.DistPlus.T.toScaledContinuous + |> Distributions.DistPlus.T.normalizedToContinuous |> E.O.fmap( Distributions.Continuous.T.Integral.sum(~cache=None), ) @@ -113,7 +113,7 @@ let table = (distPlus, x) => { {distPlus - |> Distributions.DistPlus.T.toScaledDiscrete + |> Distributions.DistPlus.T.normalizedToDiscrete |> E.O.fmap(Distributions.Discrete.T.Integral.sum(~cache=None)) |> E.O.fmap(E.Float.with2DigitsPrecision) |> E.O.default("") @@ -211,15 +211,13 @@ let percentiles = distPlus => {
; }; -let adjustBoth = discreteProbabilityMass => { - let yMaxDiscreteDomainFactor = discreteProbabilityMass; - let yMaxContinuousDomainFactor = 1.0 -. discreteProbabilityMass; - let yMax = - yMaxDiscreteDomainFactor > yMaxContinuousDomainFactor - ? yMaxDiscreteDomainFactor : yMaxContinuousDomainFactor; +let adjustBoth = discreteProbabilityMassFraction => { + let yMaxDiscreteDomainFactor = discreteProbabilityMassFraction; + let yMaxContinuousDomainFactor = 1.0 -. discreteProbabilityMassFraction; + let yMax = (yMaxDiscreteDomainFactor > 0.5 ? yMaxDiscreteDomainFactor : yMaxContinuousDomainFactor); ( - 1.0 /. (yMaxDiscreteDomainFactor /. yMax), - 1.0 /. (yMaxContinuousDomainFactor /. yMax), + yMax /. yMaxDiscreteDomainFactor, + yMax /. yMaxContinuousDomainFactor, ); }; @@ -227,10 +225,10 @@ module DistPlusChart = { [@react.component] let make = (~distPlus: DistTypes.distPlus, ~config: chartConfig, ~onHover) => { open Distributions.DistPlus; - let discrete = distPlus |> T.toScaledDiscrete; + let discrete = distPlus |> T.normalizedToDiscrete |> E.O.fmap(Distributions.Discrete.getShape); let continuous = distPlus - |> T.toScaledContinuous + |> T.normalizedToContinuous |> E.O.fmap(Distributions.Continuous.getShape); let range = T.xTotalRange(distPlus); @@ -254,10 +252,10 @@ module DistPlusChart = { }; let timeScale = distPlus.unit |> DistTypes.DistributionUnit.toJson; - let toDiscreteProbabilityMass = - distPlus |> Distributions.DistPlus.T.toDiscreteProbabilityMass; + let discreteProbabilityMassFraction = + distPlus |> Distributions.DistPlus.T.toDiscreteProbabilityMassFraction; let (yMaxDiscreteDomainFactor, yMaxContinuousDomainFactor) = - adjustBoth(toDiscreteProbabilityMass); + adjustBoth(discreteProbabilityMassFraction); float; let maxX: t => float; - let mapY: (float => float, t) => t; + let mapY: (~knownIntegralSumFn: float => option(float)=?, float => float, t) => t; let xToY: (float, t) => DistTypes.mixedPoint; let toShape: t => DistTypes.shape; let toContinuous: t => option(DistTypes.continuousShape); let toDiscrete: t => option(DistTypes.discreteShape); - let toScaledContinuous: t => option(DistTypes.continuousShape); - let toScaledDiscrete: t => option(DistTypes.discreteShape); - let toDiscreteProbabilityMass: t => float; - let truncate: (~cache: option(integral)=?, int, t) => t; + let normalize: t => t; + let normalizedToContinuous: t => option(DistTypes.continuousShape); + let normalizedToDiscrete: t => option(DistTypes.discreteShape); + let toDiscreteProbabilityMassFraction: t => float; + let downsample: (~cache: option(integral)=?, int, t) => t; let integral: (~cache: option(integral), t) => integral; let integralEndY: (~cache: option(integral), t) => float; @@ -31,19 +32,17 @@ module Dist = (T: dist) => { let xTotalRange = (t: t) => maxX(t) -. minX(t); let mapY = T.mapY; let xToY = T.xToY; - let truncate = T.truncate; + let downsample = T.downsample; let toShape = T.toShape; - let toDiscreteProbabilityMass = T.toDiscreteProbabilityMass; + let toDiscreteProbabilityMassFraction = T.toDiscreteProbabilityMassFraction; let toContinuous = T.toContinuous; let toDiscrete = T.toDiscrete; - let toScaledContinuous = T.toScaledContinuous; - let toScaledDiscrete = T.toScaledDiscrete; + let normalize = T.normalize; + let normalizedToContinuous = T.normalizedToContinuous; + let normalizedToDiscrete = T.normalizedToDiscrete; let mean = T.mean; let variance = T.variance; - // TODO: Move this to each class, have use integral to produce integral in DistPlus class. - let scaleBy = (~scale=1.0, t: t) => t |> mapY((r: float) => r *. scale); - module Integral = { type t = T.integral; let get = T.integral; @@ -51,52 +50,169 @@ module Dist = (T: dist) => { let yToX = T.integralYtoX; let sum = T.integralEndY; }; - - // This is suboptimal because it could get the cache but doesn't here. - let scaleToIntegralSum = - (~cache: option(integral)=None, ~intendedSum=1.0, t: t) => { - let scale = intendedSum /. Integral.sum(~cache, t); - scaleBy(~scale, t); - }; }; -module Continuous = { +module Continuous { type t = DistTypes.continuousShape; let getShape = (t: t) => t.xyShape; let interpolation = (t: t) => t.interpolation; - let make = (interpolation, xyShape): t => {xyShape, interpolation}; - let shapeMap = (fn, {xyShape, interpolation}: t): t => { + let make = (interpolation, xyShape, knownIntegralSum): t => { + xyShape, + interpolation, + knownIntegralSum, + }; + let shapeMap = (fn, {xyShape, interpolation, knownIntegralSum}: t): t => { xyShape: fn(xyShape), interpolation, + knownIntegralSum, }; let lastY = (t: t) => t |> getShape |> XYShape.T.lastY; let oShapeMap = - (fn, {xyShape, interpolation}: t): option(DistTypes.continuousShape) => - fn(xyShape) |> E.O.fmap(make(interpolation)); + (fn, {xyShape, interpolation, knownIntegralSum}: t) + : option(DistTypes.continuousShape) => + fn(xyShape) |> E.O.fmap(make(interpolation, _, knownIntegralSum)); - let empty: DistTypes.continuousShape = {xyShape: XYShape.T.empty, interpolation: `Linear}; + let empty: DistTypes.continuousShape = { + xyShape: XYShape.T.empty, + interpolation: `Linear, + knownIntegralSum: Some(0.0), + }; let combine = (fn, t1: DistTypes.continuousShape, t2: DistTypes.continuousShape) : DistTypes.continuousShape => { - make(`Linear, XYShape.Combine.combine( - ~xsSelection=ALL_XS, - ~xToYSelection=XYShape.XtoY.linear, - ~fn, - t1.xyShape, - t2.xyShape, - )); + + // If we're adding the distributions, and we know the total of each, then we + // can just sum them up. Otherwise, all bets are off. + let combinedIntegralSum = + switch (fn, t1.knownIntegralSum, t2.knownIntegralSum) { + | (_, None, _) + | (_, _, None) => None + | ((+.), Some(s1), Some(s2)) => Some(s1 +. s2) + }; + + make( + `Linear, + XYShape.Combine.combine( + ~xsSelection=ALL_XS, + ~xToYSelection=XYShape.XtoY.linear, + ~fn, + t1.xyShape, + t2.xyShape, + ), + combinedIntegralSum, + ); }; - let reduce = (fn, items) => - items |> E.A.fold_left(combine(fn), empty); + let reduce = (fn, items) => items |> E.A.fold_left(combine(fn), empty); let toLinear = (t: t): option(t) => { switch (t) { - | {interpolation: `Stepwise, xyShape} => - xyShape |> XYShape.Range.stepsToContinuous |> E.O.fmap(make(`Linear)) - | {interpolation: `Linear, _} => Some(t) + | {interpolation: `Stepwise, xyShape, knownIntegralSum} => + xyShape + |> XYShape.Range.stepsToContinuous + |> E.O.fmap(make(`Linear, _, knownIntegralSum)) + | {interpolation: `Linear} => Some(t) }; }; let shapeFn = (fn, t: t) => t |> getShape |> fn; + let updateKnownIntegralSum = (knownIntegralSum, t: t): t => ({...t, knownIntegralSum}); + + // Contracts every point in the continuous xyShape into a single dirac-Delta-like point, + // using the centerpoints between adjacent xs and the area under each trapezoid. + // This is essentially like integrateWithTriangles, without the accumulation. + let toDiscretePointMasses = (t: t): DistTypes.discreteShape => { + let tl = t |> getShape |> XYShape.T.length; + let pointMassesY: array(float) = Belt.Array.make(tl - 1, 0.0); + let {xs, ys}: XYShape.T.t = t |> getShape; + for (x in 0 to E.A.length(xs) - 2) { + let _ = + Belt.Array.set( + pointMassesY, + x, + (xs[x + 1] -. xs[x]) *. ((ys[x] +. ys[x + 1]) /. 2.)); // = dx * (1/2) * (avgY) + (); + }; + + {xyShape: {xs: xs, ys: pointMassesY}, knownIntegralSum: t.knownIntegralSum}; + }; + + /* Performs a discrete convolution between two continuous distributions A and B. + * It is an extremely good idea to downsample the distributions beforehand, + * because the number of samples in the convolution can be up to length(A) * length(B). + * + * Conventional convolution uses fn = (+.), but we also allow other operations to combine the xs. + * + * In practice, the convolution works by multiplying the ys for each possible combo of points of + * the two shapes. This creates a new shape for each point of A. These new shapes are then combined + * linearly. This may not always be the most efficient way, but it is probably the most robust for now. + * + * In the future, it may be possible to use a non-uniform fast Fourier transform instead (although only for addition). + */ + let convolveWithDiscrete = (fn, t1: t, t2: DistTypes.discreteShape) => { + let t1s = t1 |> getShape; + let t2s = t2.xyShape; // would like to use Discrete.getShape here, but current file structure doesn't allow for that + let t1n = t1s |> XYShape.T.length; + let t2n = t2s |> XYShape.T.length; + + let outXYShapes: array(array((float, float))) = Belt.Array.makeUninitializedUnsafe(t1n); + + for (i in 0 to t1n - 1) { + // create a new distribution + let dxyShape: array((float, float)) = Belt.Array.makeUninitializedUnsafe(t2n); + for (j in 0 to t2n - 1) { + let _ = Belt.Array.set(dxyShape, j, (fn(t1s.xs[i], t2s.xs[j]), t1s.ys[i] *. t2s.ys[j])); + (); + } + let _ = Belt.Array.set(outXYShapes, i, dxyShape); + (); + } + + let combinedIntegralSum = + switch (t1.knownIntegralSum, t2.knownIntegralSum) { + | (None, _) + | (_, None) => None + | (Some(s1), Some(s2)) => Some(s1 *. s2) + }; + + outXYShapes + |> E.A.fmap(s => { + let xyShape = XYShape.T.fromZippedArray(s); + make(`Linear, xyShape, None); + }) + |> reduce((+.)) + |> updateKnownIntegralSum(combinedIntegralSum); + }; + + let convolve = (fn, t1: t, t2: t) => + convolveWithDiscrete(fn, t1, toDiscretePointMasses(t2)); + + let mapY = (~knownIntegralSumFn=(previousKnownIntegralSum => None), fn, t: t) => { + let u = E.O.bind(_, knownIntegralSumFn); + let yMapFn = shapeMap(XYShape.T.mapY(fn)); + + t |> yMapFn |> updateKnownIntegralSum(u(t.knownIntegralSum)); + }; + + let scaleBy = (~scale=1.0, ~knownIntegralSum=None, t: t): t => + t |> mapY((r: float) => r *. scale) |> updateKnownIntegralSum(knownIntegralSum); + + let truncate = (leftCutoff: option(float), rightCutoff: option(float), t: t) => { + let truncatedZippedPairs = + t + |> getShape + |> XYShape.T.zip + |> XYShape.Zipped.filterByX(x => x >= E.O.default(neg_infinity, leftCutoff) || x <= E.O.default(infinity, rightCutoff)); + + let eps = (t |> getShape |> XYShape.T.xTotalRange) *. 0.0001; + + let leftNewPoint = leftCutoff |> E.O.dimap(lc => [| (lc -. eps, 0.) |], _ => [||]); + let rightNewPoint = rightCutoff |> E.O.dimap(rc => [| (rc +. eps, 0.) |], _ => [||]); + + let truncatedZippedPairsWithNewPoints = + E.A.concatMany([| leftNewPoint, truncatedZippedPairs, rightNewPoint |]); + let truncatedShape = XYShape.T.fromZippedArray(truncatedZippedPairsWithNewPoints); + + make(`Linear, truncatedShape, None); + }; module T = Dist({ @@ -104,8 +220,8 @@ module Continuous = { type integral = DistTypes.continuousShape; let minX = shapeFn(XYShape.T.minX); let maxX = shapeFn(XYShape.T.maxX); - let toDiscreteProbabilityMass = _ => 0.0; - let mapY = fn => shapeMap(XYShape.T.mapY(fn)); + let mapY = mapY; + let toDiscreteProbabilityMassFraction = _ => 0.0; let toShape = (t: t): DistTypes.shape => Continuous(t); let xToY = (f, {interpolation, xyShape}: t) => { ( @@ -136,9 +252,9 @@ module Continuous = { |> getShape |> XYShape.Range.integrateWithTriangles |> E.O.toExt("This should not have happened") - |> make(`Linear) + |> make(`Linear, _, None) }; - let truncate = (~cache=None, length, t) => + let downsample = (~cache=None, length, t): t => t |> shapeMap( XYShape.XsConversion.proportionByProbabilityMass( @@ -146,15 +262,23 @@ module Continuous = { integral(~cache, t).xyShape, ), ); - let integralEndY = (~cache, t) => t |> integral(~cache) |> lastY; - let integralXtoY = (~cache, f, t) => + let integralEndY = (~cache, t: t) => + t.knownIntegralSum |> E.O.default(t |> integral(~cache) |> lastY); + let integralXtoY = (~cache, f, t: t) => t |> integral(~cache) |> shapeFn(XYShape.XtoY.linear(f)); - let integralYtoX = (~cache, f, t) => + let integralYtoX = (~cache, f, t: t) => t |> integral(~cache) |> shapeFn(XYShape.YtoX.linear(f)); let toContinuous = t => Some(t); let toDiscrete = _ => None; - let toScaledContinuous = t => Some(t); - let toScaledDiscrete = _ => None; + + let normalize = (t: t): t => { + let continuousIntegralSum = integralEndY(~cache=None, t); + + scaleBy(~scale=(1. /. continuousIntegralSum), ~knownIntegralSum=Some(1.0), t); + }; + + let normalizedToContinuous = t => Some(t); // TODO: this should be normalized + let normalizedToDiscrete = _ => None; let mean = (t: t) => { let indefiniteIntegralStepwise = (p, h1) => h1 *. p ** 2.0 /. 2.0; @@ -176,27 +300,104 @@ module Continuous = { }; module Discrete = { - let sortedByY = (t: DistTypes.discreteShape) => - t |> XYShape.T.zip |> XYShape.Zipped.sortByY; - let sortedByX = (t: DistTypes.discreteShape) => - t |> XYShape.T.zip |> XYShape.Zipped.sortByX; - let empty = XYShape.T.empty; - let make = (s: DistTypes.discreteShape) => s; - let combine = - (fn, t1: DistTypes.discreteShape, t2: DistTypes.discreteShape) + type t = DistTypes.discreteShape; + + let make = (xyShape, knownIntegralSum): t => {xyShape, knownIntegralSum}; + let shapeMap = (fn, {xyShape, knownIntegralSum}: t): t => { + xyShape: fn(xyShape), + knownIntegralSum, + }; + let getShape = (t: t) => t.xyShape; + let oShapeMap = (fn, {xyShape, knownIntegralSum}: t): option(t) => + fn(xyShape) |> E.O.fmap(make(_, knownIntegralSum)); + + let empty: t = {xyShape: XYShape.T.empty, knownIntegralSum: Some(0.0)}; + let shapeFn = (fn, t: t) => t |> getShape |> fn; + + let lastY = (t: t) => t |> getShape |> XYShape.T.lastY; + + let combineIntegralSums = (combineFn: ((float, float) => option(float)), t1KnownIntegralSum: option(float), t2KnownIntegralSum: option(float)) => { + switch (t1KnownIntegralSum, t2KnownIntegralSum) { + | (None, _) + | (_, None) => None + | (Some(s1), Some(s2)) => combineFn(s1, s2) + }; + }; + + let combine = (combineIntegralSumsFn, fn, t1: DistTypes.discreteShape, t2: DistTypes.discreteShape) : DistTypes.discreteShape => { - XYShape.Combine.combine( - ~xsSelection=ALL_XS, - ~xToYSelection=XYShape.XtoY.stepwiseIfAtX, - ~fn, - t1, - t2, + + let combinedIntegralSum = combineIntegralSums(combineIntegralSumsFn, t1.knownIntegralSum, t2.knownIntegralSum); + + make( + XYShape.Combine.combine( + ~xsSelection=ALL_XS, + ~xToYSelection=XYShape.XtoY.stepwiseIfAtX, + ~fn, // stepwiseIfAtX returns option(float), so this fn needs to handle None, which is what the _default0 wrapper is for + t1.xyShape, + t2.xyShape, + ), + combinedIntegralSum, ); }; let _default0 = (fn, a, b) => fn(E.O.default(0.0, a), E.O.default(0.0, b)); let reduce = (fn, items) => - items |> E.A.fold_left(combine(_default0(fn)), empty); + items |> E.A.fold_left(combine((_, _) => None, _default0(fn)), empty); + // a special version of reduce that adds the results (which should be the most common case by far), + // and conveniently also adds the knownIntegralSums. + let reduceAdd = (fn, items) => + items |> E.A.fold_left(combine((s1, s2) => Some(s1 +. s2), _default0((+.))), empty); + + let updateKnownIntegralSum = (knownIntegralSum, t: t): t => ({...t, knownIntegralSum}); + + let convolve = (fn, t1: t, t2: t) => { + let t1s = t1 |> getShape; + let t2s = t2 |> getShape; + let t1n = t1s |> XYShape.T.length; + let t2n = t2s |> XYShape.T.length; + + let combinedIntegralSum = combineIntegralSums((s1, s2) => Some(s1 *. s2), t1.knownIntegralSum, t2.knownIntegralSum); + + let xToYMap = E.FloatFloatMap.empty(); + + for (i in 0 to t1n - 1) { + for (j in 0 to t2n - 1) { + let x = fn(t1s.xs[i], t2s.xs[j]); + let cv = xToYMap |> E.FloatFloatMap.get(x) |> E.O.default(0.); + let my = t1s.ys[i] *. t2s.ys[j]; + let _ = Belt.MutableMap.set(xToYMap, x, cv +. my); + (); + } + } + + let rxys = xToYMap |> E.FloatFloatMap.toArray |> XYShape.Zipped.sortByX; + + let convolvedShape = XYShape.T.fromZippedArray(rxys); + + make(convolvedShape, combinedIntegralSum); + }; + + let mapY = (~knownIntegralSumFn=(previousKnownIntegralSum => None), fn, t: t) => { + let u = E.O.bind(_, knownIntegralSumFn); + let yMapFn = shapeMap(XYShape.T.mapY(fn)); + + t |> yMapFn |> updateKnownIntegralSum(u(t.knownIntegralSum)); + }; + + let scaleBy = (~scale=1.0, ~knownIntegralSum=None, t: t): t => + t |> mapY((r: float) => r *. scale) |> updateKnownIntegralSum(knownIntegralSum); + + let truncate = (leftCutoff: option(float), rightCutoff: option(float), t: t) => { + let truncatedShape = + t + |> getShape + |> XYShape.T.zip + |> XYShape.Zipped.filterByX(x => x >= E.O.default(neg_infinity, leftCutoff) || x <= E.O.default(infinity, rightCutoff)) + |> XYShape.T.fromZippedArray; + + make(truncatedShape, None); + }; module T = Dist({ @@ -205,33 +406,54 @@ module Discrete = { let integral = (~cache, t) => switch (cache) { | Some(c) => c - | None => Continuous.make(`Stepwise, XYShape.T.accumulateYs((+.), t)) + | None => + Continuous.make( + `Stepwise, + XYShape.T.accumulateYs((+.), getShape(t)), + None, + ) }; - let integralEndY = (~cache, t) => - t |> integral(~cache) |> Continuous.lastY; - let minX = XYShape.T.minX; - let maxX = XYShape.T.maxX; - let toDiscreteProbabilityMass = _ => 1.0; - let mapY = XYShape.T.mapY; + let integralEndY = (~cache, t: t) => + t.knownIntegralSum |> E.O.default(t |> integral(~cache) |> Continuous.lastY); + let minX = shapeFn(XYShape.T.minX); + let maxX = shapeFn(XYShape.T.maxX); + let toDiscreteProbabilityMassFraction = _ => 1.0; + let mapY = mapY; let toShape = (t: t): DistTypes.shape => Discrete(t); let toContinuous = _ => None; let toDiscrete = t => Some(t); - let toScaledContinuous = _ => None; - let toScaledDiscrete = t => Some(t); - let truncate = (~cache=None, i, t: t): DistTypes.discreteShape => - t - |> XYShape.T.zip - |> XYShape.Zipped.sortByY - |> Belt.Array.reverse - |> Belt.Array.slice(_, ~offset=0, ~len=i) - |> XYShape.Zipped.sortByX - |> XYShape.T.fromZippedArray; - let xToY = (f, t) => { - XYShape.XtoY.stepwiseIfAtX(f, t) + let normalize = (t: t): t => { + let discreteIntegralSum = integralEndY(~cache=None, t); + + scaleBy(~scale=(1. /. discreteIntegralSum), ~knownIntegralSum=Some(1.0), t); + }; + + let normalizedToContinuous = _ => None; + let normalizedToDiscrete = t => Some(t); // TODO: this should be normalized! + + let downsample = (~cache=None, i, t: t): t => { + // It's not clear how to downsample a set of discrete points in a meaningful way. + // The best we can do is to clip off the smallest values. + let clippedShape = + t + |> getShape + |> XYShape.T.zip + |> XYShape.Zipped.sortByY + |> Belt.Array.reverse + |> Belt.Array.slice(_, ~offset=0, ~len=i) + |> XYShape.Zipped.sortByX + |> XYShape.T.fromZippedArray; + + make(clippedShape, None); // if someone needs the sum, they'll have to recompute it + }; + + let xToY = (f, t) => + t + |> getShape + |> XYShape.XtoY.stepwiseIfAtX(f) |> E.O.default(0.0) |> DistTypes.MixedPoint.makeDiscrete; - }; let integralXtoY = (~cache, f, t) => t @@ -245,49 +467,64 @@ module Discrete = { |> Continuous.getShape |> XYShape.YtoX.linear(f); - let mean = (t: t): float => - E.A.reducei(t.xs, 0.0, (acc, x, i) => acc +. x *. t.ys[i]); + let mean = (t: t): float => { + let s = getShape(t); + E.A.reducei(s.xs, 0.0, (acc, x, i) => acc +. x *. s.ys[i]); + }; let variance = (t: t): float => { let getMeanOfSquares = t => - mean(XYShape.Analysis.squareXYShape(t)); + t |> shapeMap(XYShape.Analysis.squareXYShape) |> mean; XYShape.Analysis.getVarianceDangerously(t, mean, getMeanOfSquares); }; }); + }; // TODO: I think this shouldn't assume continuous/discrete are normalized to 1.0, and thus should not need the discreteProbabilityMassFraction being separate. module Mixed = { type t = DistTypes.mixedShape; - let make = - (~continuous, ~discrete, ~discreteProbabilityMassFraction) - : DistTypes.mixedShape => { + let make = (~continuous, ~discrete): t => { continuous, discrete, - discreteProbabilityMassFraction, }; - // todo: Put into scaling module - let scaleDiscreteFn = - ({discreteProbabilityMassFraction}: DistTypes.mixedShape, f) => - f *. discreteProbabilityMassFraction; + let totalLength = (t: t): int => { + let continuousLength = t.continuous |> Continuous.getShape |> XYShape.T.length; + let discreteLength = t.discrete |> Discrete.getShape |> XYShape.T.length; + + continuousLength + discreteLength; + }; + + // TODO: Put into scaling module + //let normalizeMixedPoint = (t, f) => f *. discreteProbabilityMassFraction;*/ //TODO: Warning: This currently computes the integral, which is expensive. - let scaleContinuousFn = + /*let scaleContinuousFn = ({discreteProbabilityMassFraction}: DistTypes.mixedShape, f) => - f *. (1.0 -. discreteProbabilityMassFraction); + f *. (1.0 -. discreteProbabilityMassFraction); */ //TODO: Warning: This currently computes the integral, which is expensive. - let scaleContinuous = ({discreteProbabilityMassFraction}: t, continuous) => - continuous - |> Continuous.T.scaleToIntegralSum( - ~intendedSum=1.0 -. discreteProbabilityMassFraction, - ); - let scaleDiscrete = ({discreteProbabilityMassFraction}: t, disrete) => - disrete - |> Discrete.T.scaleToIntegralSum( - ~intendedSum=discreteProbabilityMassFraction, - ); + // Normalizes to 1.0. + /*let scaleContinuous = ({discreteProbabilityMassFraction}: t, continuous) => + // get only the continuous, and scale it to the respective + continuous + |> Continuous.T.scaleToIntegralSum( + ~intendedSum=1.0 -. discreteProbabilityMassFraction, + ); + + let scaleDiscrete = ({discreteProbabilityMassFraction}: t, disrete) => + disrete + |> Discrete.T.scaleToIntegralSum( + ~intendedSum=discreteProbabilityMassFraction, + );*/ + + let truncate = (leftCutoff: option(float), rightCutoff: option(float), {discrete, continuous}: t) => { + let truncatedDiscrete = Discrete.truncate(leftCutoff, rightCutoff, discrete); + let truncatedContinuous = Continuous.truncate(leftCutoff, rightCutoff, continuous); + + make(~discrete=truncatedDiscrete, ~continuous=truncatedContinuous); + }; module T = Dist({ @@ -301,98 +538,92 @@ module Mixed = { let toShape = (t: t): DistTypes.shape => Mixed(t); let toContinuous = ({continuous}: t) => Some(continuous); let toDiscrete = ({discrete}: t) => Some(discrete); - let toDiscreteProbabilityMass = ({discreteProbabilityMassFraction}: t) => discreteProbabilityMassFraction; - let xToY = (f, {discrete, continuous} as t: t) => { - let c = - continuous - |> Continuous.T.xToY(f) - |> DistTypes.MixedPoint.fmap(scaleContinuousFn(t)); - let d = - discrete - |> Discrete.T.xToY(f) - |> DistTypes.MixedPoint.fmap(scaleDiscreteFn(t)); - DistTypes.MixedPoint.add(c, d); + let normalize = (t: t): t => { + let continuousIntegralSum = Continuous.T.Integral.sum(~cache=None, t.continuous); + let discreteIntegralSum = Discrete.T.Integral.sum(~cache=None, t.discrete); + let totalIntegralSum = continuousIntegralSum +. discreteIntegralSum; + + let newContinuousSum = continuousIntegralSum /. totalIntegralSum; + let newDiscreteSum = discreteIntegralSum /. totalIntegralSum; + + let normalizedContinuous = Continuous.scaleBy(~scale=(1. /. newContinuousSum), ~knownIntegralSum=Some(newContinuousSum), t.continuous); + let normalizedDiscrete = Discrete.scaleBy(~scale=(1. /. newDiscreteSum), ~knownIntegralSum=Some(newDiscreteSum), t.discrete); + + make(~continuous=normalizedContinuous, ~discrete=normalizedDiscrete); }; - // Warning: It's not clear how to update the discreteProbabilityMassFraction, so this may create small errors. - let truncate = - ( - ~cache=None, - count, - {discrete, continuous, discreteProbabilityMassFraction}: t, - ) - : t => { - { - discrete: - Discrete.T.truncate( - int_of_float( - float_of_int(count) *. discreteProbabilityMassFraction, - ), - discrete, - ), - continuous: - Continuous.T.truncate( - int_of_float( - float_of_int(count) - *. (1.0 -. discreteProbabilityMassFraction), - ), - continuous, - ), - discreteProbabilityMassFraction, - }; + let xToY = (x, t: t) => { + // This evaluates the mixedShape at x, interpolating if necessary. + // Note that we normalize entire mixedShape first. + let {continuous, discrete}: t = normalize(t); + let c = Continuous.T.xToY(x, continuous); + let d = Discrete.T.xToY(x, discrete); + DistTypes.MixedPoint.add(c, d); // "add" here just combines the two values into a single MixedPoint. }; - let toScaledContinuous = ({continuous} as t: t) => - Some(scaleContinuous(t, continuous)); + let toDiscreteProbabilityMassFraction = ({discrete, continuous}: t) => { + let discreteIntegralSum = Discrete.T.Integral.sum(~cache=None, discrete); + let continuousIntegralSum = Continuous.T.Integral.sum(~cache=None, continuous); + let totalIntegralSum = discreteIntegralSum +. continuousIntegralSum; - let toScaledDiscrete = ({discrete} as t: t) => - Some(scaleDiscrete(t, discrete)); + discreteIntegralSum /. totalIntegralSum; + }; + + let downsample = (~cache=None, count, {discrete, continuous}: t): t => { + // We will need to distribute the new xs fairly between the discrete and continuous shapes. + // The easiest way to do this is to simply go by the previous probability masses. + + // The cache really isn't helpful here, because we would need two separate caches + let discreteIntegralSum = Discrete.T.Integral.sum(~cache=None, discrete); + let continuousIntegralSum = Continuous.T.Integral.sum(~cache=None, continuous); + let totalIntegralSum = discreteIntegralSum +. continuousIntegralSum; + + let downsampledDiscrete = + Discrete.T.downsample( + int_of_float(float_of_int(count) *. (discreteIntegralSum /. totalIntegralSum)), + discrete, + ); + + let downsampledContinuous = + Continuous.T.downsample( + int_of_float( + float_of_int(count) *. (continuousIntegralSum /. totalIntegralSum), + ), + continuous, + ); + + {discrete: downsampledDiscrete, continuous: downsampledContinuous}; + }; + + let normalizedToContinuous = (t: t) => + Some(normalize(t).continuous); + + let normalizedToDiscrete = ({discrete} as t: t) => + Some(normalize(t).discrete); let integral = ( ~cache, - {continuous, discrete, discreteProbabilityMassFraction}: t, + {continuous, discrete}: t, ) => { switch (cache) { | Some(cache) => cache - | None => - let scaleContinuousBy = - (1.0 -. discreteProbabilityMassFraction) - /. (continuous |> Continuous.T.Integral.sum(~cache=None)); + | None => { + // note: if the underlying shapes aren't normalized, then these integrals won't be either! + let continuousIntegral = Continuous.T.Integral.get(~cache=None, continuous); + let discreteIntegral = Discrete.T.Integral.get(~cache=None, discrete); - let scaleDiscreteBy = - discreteProbabilityMassFraction - /. ( - discrete - |> Discrete.T.Integral.get(~cache=None) - |> Continuous.toLinear - |> E.O.fmap(Continuous.lastY) - |> E.O.toExn("") - ); - - let cont = - continuous - |> Continuous.T.Integral.get(~cache=None) - |> Continuous.T.scaleBy(~scale=scaleContinuousBy); - - let dist = - discrete - |> Discrete.T.Integral.get(~cache=None) - |> Continuous.toLinear - |> E.O.toExn("") - |> Continuous.T.scaleBy(~scale=scaleDiscreteBy); - - let result = - Continuous.make( - `Linear, - XYShape.Combine.combineLinear( - ~fn=(+.), - Continuous.getShape(cont), - Continuous.getShape(dist), - ), - ); - result; + Continuous.make( + `Linear, + XYShape.Combine.combineLinear( + ~fn=(+.), + Continuous.getShape(continuousIntegral), + Continuous.getShape(discreteIntegral), + ), + None, + ); + } }; }; @@ -414,80 +645,153 @@ module Mixed = { |> XYShape.YtoX.linear(f); }; - // TODO: This part really needs to be rethought, I'm quite sure this is just broken. Mapping Ys would change the desired discreteProbabilityMassFraction. - let mapY = - (fn, {discrete, continuous, discreteProbabilityMassFraction}: t): t => { + // This pipes all ys (continuous and discrete) through fn. + // If mapY is a linear operation, we might be able to update the knownIntegralSums as well; + // if not, they'll be set to None. + let mapY = (~knownIntegralSumFn=(previousIntegralSum => None), fn, {discrete, continuous}: t): t => { + let u = E.O.bind(_, knownIntegralSumFn); + + let yMappedDiscrete = + discrete |> Discrete.T.mapY(fn) |> Discrete.updateKnownIntegralSum(u(discrete.knownIntegralSum)); + + let yMappedContinuous = + continuous |> Continuous.T.mapY(fn) |> Continuous.updateKnownIntegralSum(u(continuous.knownIntegralSum)); + { - discrete: Discrete.T.mapY(fn, discrete), + discrete: yMappedDiscrete, continuous: Continuous.T.mapY(fn, continuous), - discreteProbabilityMassFraction, }; }; - let mean = (t: t): float => { - let discreteProbabilityMassFraction = - t.discreteProbabilityMassFraction; - switch (discreteProbabilityMassFraction) { - | 1.0 => Discrete.T.mean(t.discrete) - | 0.0 => Continuous.T.mean(t.continuous) - | _ => - Discrete.T.mean(t.discrete) - *. discreteProbabilityMassFraction - +. Continuous.T.mean(t.continuous) - *. (1.0 -. discreteProbabilityMassFraction) - }; + let mean = ({discrete, continuous}: t): float => { + let discreteMean = Discrete.T.mean(discrete); + let continuousMean = Continuous.T.mean(continuous); + + // the combined mean is the weighted sum of the two: + let discreteIntegralSum = Discrete.T.Integral.sum(~cache=None, discrete); + let continuousIntegralSum = Continuous.T.Integral.sum(~cache=None, continuous); + let totalIntegralSum = discreteIntegralSum +. continuousIntegralSum; + + (discreteMean *. discreteIntegralSum +. continuousMean *. continuousIntegralSum) /. totalIntegralSum; }; - let variance = (t: t): float => { - let discreteProbabilityMassFraction = - t.discreteProbabilityMassFraction; - let getMeanOfSquares = (t: t) => { - Discrete.T.mean(XYShape.Analysis.squareXYShape(t.discrete)) - *. t.discreteProbabilityMassFraction - +. XYShape.Analysis.getMeanOfSquaresContinuousShape(t.continuous) - *. (1.0 -. t.discreteProbabilityMassFraction); + let variance = ({discrete, continuous} as t: t): float => { + // the combined mean is the weighted sum of the two: + let discreteIntegralSum = Discrete.T.Integral.sum(~cache=None, discrete); + let continuousIntegralSum = Continuous.T.Integral.sum(~cache=None, continuous); + let totalIntegralSum = discreteIntegralSum +. continuousIntegralSum; + + let getMeanOfSquares = ({discrete, continuous} as t: t) => { + let discreteMean = discrete |> Discrete.shapeMap(XYShape.Analysis.squareXYShape) |> Discrete.T.mean; + let continuousMean = continuous |> XYShape.Analysis.getMeanOfSquaresContinuousShape; + (discreteMean *. discreteIntegralSum +. continuousMean *. continuousIntegralSum) /. totalIntegralSum }; - switch (discreteProbabilityMassFraction) { - | 1.0 => Discrete.T.variance(t.discrete) - | 0.0 => Continuous.T.variance(t.continuous) - | _ => - XYShape.Analysis.getVarianceDangerously( - t, - mean, - getMeanOfSquares, - ) + + switch (discreteIntegralSum /. totalIntegralSum) { + | 1.0 => Discrete.T.variance(discrete) + | 0.0 => Continuous.T.variance(continuous) + | _ => XYShape.Analysis.getVarianceDangerously(t, mean, getMeanOfSquares) }; }; }); + + let convolve = (fn: ((float, float) => float), t1: t, t2: t): t => { + // Discrete convolution can cause a huge increase in the number of samples, + // so we'll first downsample. + + // An alternative (to be explored in the future) may be to first perform the full convolution and then to downsample the result; + // to use non-uniform fast Fourier transforms (for addition only), add web workers or gpu.js, etc. ... + + // TODO: make this optional or customizable + let downsampleIfTooLarge = (t: t) => { + let sqtl = sqrt(float_of_int(totalLength(t))); + sqtl > 10. ? T.downsample(int_of_float(sqtl), t) : t; + }; + + let t1d = downsampleIfTooLarge(t1); + let t2d = downsampleIfTooLarge(t2); + + // continuous (*) continuous => continuous, but also + // discrete (*) continuous => continuous (and vice versa). We have to take care of all combos and then combine them: + let ccConvResult = Continuous.convolve(fn, t1d.continuous, t2d.continuous); + let dcConvResult = Continuous.convolveWithDiscrete(fn, t2d.continuous, t1d.discrete); + let cdConvResult = Continuous.convolveWithDiscrete(fn, t1d.continuous, t2d.discrete); + let continuousConvResult = Continuous.reduce((+.), [|ccConvResult, dcConvResult, cdConvResult|]); + + // ... finally, discrete (*) discrete => discrete, obviously: + let discreteConvResult = Discrete.convolve(fn, t1d.discrete, t2d.discrete); + + {discrete: discreteConvResult, continuous: continuousConvResult}; + } }; module Shape = { + type t = DistTypes.shape; + let mapToAll = ((fn1, fn2, fn3), t: t) => + switch (t) { + | Mixed(m) => fn1(m) + | Discrete(m) => fn2(m) + | Continuous(m) => fn3(m) + }; + + let fmap = ((fn1, fn2, fn3), t: t): t => + switch (t) { + | Mixed(m) => Mixed(fn1(m)) + | Discrete(m) => Discrete(fn2(m)) + | Continuous(m) => Continuous(fn3(m)) + }; + + let toMixed = mapToAll(( + m => m, + d => Mixed.make(~discrete=d, ~continuous=Continuous.empty), + c => Mixed.make(~discrete=Discrete.empty, ~continuous=c), + )); + + let convolve = (fn, t1: t, t2: t): t => { + Mixed(Mixed.convolve(fn, toMixed(t1), toMixed(t2))); + }; + + let downsample = (~cache=None, i, t) => + fmap(( + Mixed.T.downsample(i), + Discrete.T.downsample(i), + Continuous.T.downsample(i), + ), t); + + let normalize = + fmap(( + Mixed.T.normalize, + Discrete.T.normalize, + Continuous.T.normalize, + )); + + let truncate (leftCutoff, rightCutoff, t): t = + fmap(( + Mixed.truncate(leftCutoff, rightCutoff), + Discrete.truncate(leftCutoff, rightCutoff), + Continuous.truncate(leftCutoff, rightCutoff), + ), t); + module T = Dist({ type t = DistTypes.shape; type integral = DistTypes.continuousShape; - let mapToAll = ((fn1, fn2, fn3), t: t) => - switch (t) { - | Mixed(m) => fn1(m) - | Discrete(m) => fn2(m) - | Continuous(m) => fn3(m) - }; - let fmap = ((fn1, fn2, fn3), t: t): t => - switch (t) { - | Mixed(m) => Mixed(fn1(m)) - | Discrete(m) => Discrete(fn2(m)) - | Continuous(m) => Continuous(fn3(m)) - }; - - let xToY = f => + let xToY = (f: float) => mapToAll(( Mixed.T.xToY(f), Discrete.T.xToY(f), Continuous.T.xToY(f), )); + let toShape = (t: t) => t; + + let toContinuous = t => None; + let toDiscrete = t => None; + let downsample = (~cache=None, i, t) => t; + let toDiscreteProbabilityMassFraction = t => 0.0; + let normalize = t => t; let toContinuous = mapToAll(( Mixed.T.toContinuous, @@ -501,31 +805,24 @@ module Shape = { Continuous.T.toDiscrete, )); - let truncate = (~cache=None, i) => - fmap(( - Mixed.T.truncate(i), - Discrete.T.truncate(i), - Continuous.T.truncate(i), + let toDiscreteProbabilityMassFraction = + mapToAll(( + Mixed.T.toDiscreteProbabilityMassFraction, + Discrete.T.toDiscreteProbabilityMassFraction, + Continuous.T.toDiscreteProbabilityMassFraction, )); - let toDiscreteProbabilityMass = + let normalizedToDiscrete = mapToAll(( - Mixed.T.toDiscreteProbabilityMass, - Discrete.T.toDiscreteProbabilityMass, - Continuous.T.toDiscreteProbabilityMass, + Mixed.T.normalizedToDiscrete, + Discrete.T.normalizedToDiscrete, + Continuous.T.normalizedToDiscrete, )); - - let toScaledDiscrete = + let normalizedToContinuous = mapToAll(( - Mixed.T.toScaledDiscrete, - Discrete.T.toScaledDiscrete, - Continuous.T.toScaledDiscrete, - )); - let toScaledContinuous = - mapToAll(( - Mixed.T.toScaledContinuous, - Discrete.T.toScaledContinuous, - Continuous.T.toScaledContinuous, + Mixed.T.normalizedToContinuous, + Discrete.T.normalizedToContinuous, + Continuous.T.normalizedToContinuous, )); let minX = mapToAll((Mixed.T.minX, Discrete.T.minX, Continuous.T.minX)); let integral = (~cache) => { @@ -556,11 +853,11 @@ module Shape = { )); }; let maxX = mapToAll((Mixed.T.maxX, Discrete.T.maxX, Continuous.T.maxX)); - let mapY = fn => + let mapY = (~knownIntegralSumFn=(previousIntegralSum => None), fn) => fmap(( - Mixed.T.mapY(fn), - Discrete.T.mapY(fn), - Continuous.T.mapY(fn), + Mixed.T.mapY(~knownIntegralSumFn, fn), + Discrete.T.mapY(~knownIntegralSumFn, fn), + Continuous.T.mapY(~knownIntegralSumFn, fn), )); let mean = (t: t): float => @@ -636,21 +933,30 @@ module DistPlus = { let toShape = toShape; let toContinuous = shapeFn(Shape.T.toContinuous); let toDiscrete = shapeFn(Shape.T.toDiscrete); - // todo: Adjust for total mass. - let toScaledContinuous = (t: t) => { + let normalize = (t: t): t => { + let normalizedShape = + t |> toShape |> Shape.T.normalize; + + t |> updateShape(normalizedShape); + + // TODO: also adjust for domainIncludedProbabilityMass here. + }; + + // TODO: replace this with + let normalizedToContinuous = (t: t) => { t |> toShape - |> Shape.T.toScaledContinuous + |> Shape.T.normalizedToContinuous |> E.O.fmap( Continuous.T.mapY(domainIncludedProbabilityMassAdjustment(t)), ); }; - let toScaledDiscrete = (t: t) => { + let normalizedToDiscrete = (t: t) => { t |> toShape - |> Shape.T.toScaledDiscrete + |> Shape.T.normalizedToDiscrete |> E.O.fmap( Discrete.T.mapY(domainIncludedProbabilityMassAdjustment(t)), ); @@ -664,18 +970,18 @@ module DistPlus = { let minX = shapeFn(Shape.T.minX); let maxX = shapeFn(Shape.T.maxX); - let toDiscreteProbabilityMass = - shapeFn(Shape.T.toDiscreteProbabilityMass); + let toDiscreteProbabilityMassFraction = + shapeFn(Shape.T.toDiscreteProbabilityMassFraction); // This bit is kind of awkward, could probably use rethinking. let integral = (~cache, t: t) => updateShape(Continuous(t.integralCache), t); - let truncate = (~cache=None, i, t) => - updateShape(t |> toShape |> Shape.T.truncate(i), t); + let downsample = (~cache=None, i, t): t => + updateShape(t |> toShape |> Shape.T.downsample(i), t); // todo: adjust for limit, maybe? - let mapY = (fn, {shape, _} as t: t): t => - Shape.T.mapY(fn, shape) |> updateShape(_, t); + let mapY = (~knownIntegralSumFn=(previousIntegralSum => None), fn, {shape, _} as t: t): t => + Shape.T.mapY(~knownIntegralSumFn, fn, shape) |> updateShape(_, t); let integralEndY = (~cache as _, t: t) => Shape.T.Integral.sum(~cache=Some(t.integralCache), toShape(t)); diff --git a/src/distPlus/distribution/MixedShapeBuilder.re b/src/distPlus/distribution/MixedShapeBuilder.re index 949a6f20..496e298c 100644 --- a/src/distPlus/distribution/MixedShapeBuilder.re +++ b/src/distPlus/distribution/MixedShapeBuilder.re @@ -8,14 +8,15 @@ type assumptions = { discreteProbabilityMass: option(float), }; -let buildSimple = (~continuous: option(DistTypes.continuousShape), ~discrete): option(DistTypes.shape) => { - let continuous = continuous |> E.O.default(Distributions.Continuous.make(`Linear, {xs: [||], ys: [||]})) +let buildSimple = (~continuous: option(DistTypes.continuousShape), ~discrete: option(DistTypes.discreteShape)): option(DistTypes.shape) => { + let continuous = continuous |> E.O.default(Distributions.Continuous.make(`Linear, {xs: [||], ys: [||]}, Some(0.0))); + let discrete = discrete |> E.O.default(Distributions.Discrete.make({xs: [||], ys: [||]}, Some(0.0))); let cLength = continuous |> Distributions.Continuous.getShape |> XYShape.T.xs |> E.A.length; - let dLength = discrete |> XYShape.T.xs |> E.A.length; + let dLength = discrete |> Distributions.Discrete.getShape |> XYShape.T.xs |> E.A.length; switch (cLength, dLength) { | (0 | 1, 0) => None | (0 | 1, _) => Some(Discrete(discrete)) @@ -23,18 +24,12 @@ let buildSimple = (~continuous: option(DistTypes.continuousShape), ~discrete): o | (_, _) => let discreteProbabilityMassFraction = Distributions.Discrete.T.Integral.sum(~cache=None, discrete); - let discrete = - Distributions.Discrete.T.scaleToIntegralSum(~intendedSum=1.0, discrete); - let continuous = - Distributions.Continuous.T.scaleToIntegralSum( - ~intendedSum=1.0, - continuous, - ); + let discrete = Distributions.Discrete.T.normalize(discrete); + let continuous = Distributions.Continuous.T.normalize(continuous); let mixedDist = Distributions.Mixed.make( ~continuous, - ~discrete, - ~discreteProbabilityMassFraction, + ~discrete ); Some(Mixed(mixedDist)); }; @@ -42,7 +37,7 @@ let buildSimple = (~continuous: option(DistTypes.continuousShape), ~discrete): o // TODO: Delete, only being used in tests -let build = (~continuous, ~discrete, ~assumptions) => +/*let build = (~continuous, ~discrete, ~assumptions) => switch (assumptions) { | { continuous: ADDS_TO_CORRECT_PROBABILITY, @@ -102,4 +97,4 @@ let build = (~continuous, ~discrete, ~assumptions) => ), ); | _ => None - }; \ No newline at end of file + };*/ diff --git a/src/distPlus/distribution/XYShape.re b/src/distPlus/distribution/XYShape.re index cf3600a9..9451fb23 100644 --- a/src/distPlus/distribution/XYShape.re +++ b/src/distPlus/distribution/XYShape.re @@ -17,6 +17,7 @@ module T = { type ts = array(xyShape); let xs = (t: t) => t.xs; let ys = (t: t) => t.ys; + let length = (t: t) => E.A.length(t.xs); let empty = {xs: [||], ys: [||]}; let minX = (t: t) => t |> xs |> E.A.Sorted.min |> extImp; let maxX = (t: t) => t |> xs |> E.A.Sorted.max |> extImp; @@ -154,7 +155,9 @@ module XsConversion = { let proportionByProbabilityMass = (newLength: int, integral: T.t, t: T.t): T.t => { - equallyDivideXByMass(newLength, integral) |> _replaceWithXs(_, t); + integral + |> equallyDivideXByMass(newLength) // creates a new set of xs at evenly spaced percentiles + |> _replaceWithXs(_, t); // linearly interpolates new ys for the new xs }; }; @@ -164,6 +167,7 @@ module Zipped = { let compareXs = ((x1, _), (x2, _)) => x1 > x2 ? 1 : 0; let sortByY = (t: zipped) => t |> E.A.stableSortBy(_, compareYs); let sortByX = (t: zipped) => t |> E.A.stableSortBy(_, compareXs); + let filterByX = (testFn: (float => bool), t: zipped) => t |> E.A.filter(((x, _)) => testFn(x)); }; module Combine = { @@ -253,8 +257,8 @@ module Range = { Belt.Array.set( cumulativeY, x + 1, - (xs[x + 1] -. xs[x]) - *. ((ys[x] +. ys[x + 1]) /. 2.) + (xs[x + 1] -. xs[x]) // dx + *. ((ys[x] +. ys[x + 1]) /. 2.) // (1/2) * (avgY) +. cumulativeY[x], ); (); diff --git a/src/distPlus/renderers/RenderTypes.re b/src/distPlus/renderers/RenderTypes.re index 99a53aae..c94ca69a 100644 --- a/src/distPlus/renderers/RenderTypes.re +++ b/src/distPlus/renderers/RenderTypes.re @@ -43,7 +43,7 @@ module ShapeRenderer = { module Symbolic = { type inputs = {length: int}; type outputs = { - graph: SymbolicDist.distTree, + graph: TreeNode.treeNode, shape: DistTypes.shape, }; let make = (graph, shape) => {graph, shape}; diff --git a/src/distPlus/renderers/ShapeRenderer.re b/src/distPlus/renderers/ShapeRenderer.re index c6f3dc0e..8542ba4a 100644 --- a/src/distPlus/renderers/ShapeRenderer.re +++ b/src/distPlus/renderers/ShapeRenderer.re @@ -21,7 +21,7 @@ let runSymbolic = (guesstimatorString, length) => { |> E.R.fmap(g => RenderTypes.ShapeRenderer.Symbolic.make( g, - SymbolicDist.toShape(length, g), + TreeNode.toShape(length, g), ) ); }; @@ -43,4 +43,4 @@ let run = }; Js.log3("IS SOME?", symbolic |> E.R.toOption |> E.O.isSome, symbolic); {symbolic: Some(symbolic), sampling}; -}; \ No newline at end of file +}; diff --git a/src/distPlus/renderers/samplesRenderer/Guesstimator.re b/src/distPlus/renderers/samplesRenderer/Guesstimator.re index e099889f..a08fb591 100644 --- a/src/distPlus/renderers/samplesRenderer/Guesstimator.re +++ b/src/distPlus/renderers/samplesRenderer/Guesstimator.re @@ -4,10 +4,10 @@ type discrete = { ys: array(float), }; -let jsToDistDiscrete = (d: discrete): DistTypes.discreteShape => { +let jsToDistDiscrete = (d: discrete): DistTypes.discreteShape => {xyShape: { xs: xsGet(d), ys: ysGet(d), -}; +}, knownIntegralSum: None}; [@bs.module "./GuesstimatorLibrary.js"] -external stringToSamples: (string, int) => array(float) = "stringToSamples"; \ No newline at end of file +external stringToSamples: (string, int) => array(float) = "stringToSamples"; diff --git a/src/distPlus/renderers/samplesRenderer/Samples.re b/src/distPlus/renderers/samplesRenderer/Samples.re index 7318a9dd..28f7bdce 100644 --- a/src/distPlus/renderers/samplesRenderer/Samples.re +++ b/src/distPlus/renderers/samplesRenderer/Samples.re @@ -115,11 +115,12 @@ module T = { Array.fast_sort(compare, samples); let (continuousPart, discretePart) = E.A.Sorted.Floats.split(samples); let length = samples |> E.A.length |> float_of_int; - let discrete: DistTypes.xyShape = + let discrete: DistTypes.discreteShape = discretePart |> E.FloatFloatMap.fmap(r => r /. length) |> E.FloatFloatMap.toArray - |> XYShape.T.fromZippedArray; + |> XYShape.T.fromZippedArray + |> Distributions.Discrete.make(_, None); let pdf = continuousPart |> E.A.length > 5 @@ -149,14 +150,14 @@ module T = { ~outputXYPoints=samplingInputs.outputXYPoints, formatUnitWidth(usedUnitWidth), ) - |> Distributions.Continuous.make(`Linear) + |> Distributions.Continuous.make(`Linear, _, None) |> (r => Some((r, foo))); } : None; let shape = MixedShapeBuilder.buildSimple( ~continuous=pdf |> E.O.fmap(fst), - ~discrete, + ~discrete=Some(discrete), ); let samplesParse: RenderTypes.ShapeRenderer.Sampling.outputs = { continuousParseParams: pdf |> E.O.fmap(snd), @@ -196,4 +197,4 @@ module T = { Some(fromSamples(~samplingInputs, samples)); }; }; -}; \ No newline at end of file +}; diff --git a/src/distPlus/symbolic/MathJsParser.re b/src/distPlus/symbolic/MathJsParser.re index f145253e..5353aba0 100644 --- a/src/distPlus/symbolic/MathJsParser.re +++ b/src/distPlus/symbolic/MathJsParser.re @@ -88,69 +88,69 @@ module MathAdtToDistDst = { ); }; - let normal: array(arg) => result(SymbolicDist.distTree, string) = + let normal: array(arg) => result(TreeNode.treeNode, string) = fun | [|Value(mean), Value(stdev)|] => - Ok(`Simple(`Normal({mean, stdev}))) + Ok(`DistData(`Symbolic(`Normal({mean, stdev})))) | _ => Error("Wrong number of variables in normal distribution"); - let lognormal: array(arg) => result(SymbolicDist.distTree, string) = + let lognormal: array(arg) => result(TreeNode.treeNode, string) = fun - | [|Value(mu), Value(sigma)|] => Ok(`Simple(`Lognormal({mu, sigma}))) + | [|Value(mu), Value(sigma)|] => Ok(`DistData(`Symbolic(`Lognormal({mu, sigma})))) | [|Object(o)|] => { let g = Js.Dict.get(o); switch (g("mean"), g("stdev"), g("mu"), g("sigma")) { | (Some(Value(mean)), Some(Value(stdev)), _, _) => - Ok(`Simple(SymbolicDist.Lognormal.fromMeanAndStdev(mean, stdev))) + Ok(`DistData(`Symbolic(SymbolicDist.Lognormal.fromMeanAndStdev(mean, stdev)))) | (_, _, Some(Value(mu)), Some(Value(sigma))) => - Ok(`Simple(`Lognormal({mu, sigma}))) + Ok(`DistData(`Symbolic(`Lognormal({mu, sigma})))) | _ => Error("Lognormal distribution would need mean and stdev") }; } | _ => Error("Wrong number of variables in lognormal distribution"); - let to_: array(arg) => result(SymbolicDist.distTree, string) = + let to_: array(arg) => result(TreeNode.treeNode, string) = fun | [|Value(low), Value(high)|] when low <= 0.0 && low < high=> { - Ok(`Simple(SymbolicDist.Normal.from90PercentCI(low, high))); + Ok(`DistData(`Symbolic(SymbolicDist.Normal.from90PercentCI(low, high)))); } | [|Value(low), Value(high)|] when low < high => { - Ok(`Simple(SymbolicDist.Lognormal.from90PercentCI(low, high))); + Ok(`DistData(`Symbolic(SymbolicDist.Lognormal.from90PercentCI(low, high)))); } | [|Value(_), Value(_)|] => Error("Low value must be less than high value.") | _ => Error("Wrong number of variables in lognormal distribution"); - let uniform: array(arg) => result(SymbolicDist.distTree, string) = + let uniform: array(arg) => result(TreeNode.treeNode, string) = fun - | [|Value(low), Value(high)|] => Ok(`Simple(`Uniform({low, high}))) + | [|Value(low), Value(high)|] => Ok(`DistData(`Symbolic(`Uniform({low, high})))) | _ => Error("Wrong number of variables in lognormal distribution"); - let beta: array(arg) => result(SymbolicDist.distTree, string) = + let beta: array(arg) => result(TreeNode.treeNode, string) = fun - | [|Value(alpha), Value(beta)|] => Ok(`Simple(`Beta({alpha, beta}))) + | [|Value(alpha), Value(beta)|] => Ok(`DistData(`Symbolic(`Beta({alpha, beta})))) | _ => Error("Wrong number of variables in lognormal distribution"); - let exponential: array(arg) => result(SymbolicDist.distTree, string) = + let exponential: array(arg) => result(TreeNode.treeNode, string) = fun - | [|Value(rate)|] => Ok(`Simple(`Exponential({rate: rate}))) + | [|Value(rate)|] => Ok(`DistData(`Symbolic(`Exponential({rate: rate})))) | _ => Error("Wrong number of variables in Exponential distribution"); - let cauchy: array(arg) => result(SymbolicDist.distTree, string) = + let cauchy: array(arg) => result(TreeNode.treeNode, string) = fun | [|Value(local), Value(scale)|] => - Ok(`Simple(`Cauchy({local, scale}))) + Ok(`DistData(`Symbolic(`Cauchy({local, scale})))) | _ => Error("Wrong number of variables in cauchy distribution"); - let triangular: array(arg) => result(SymbolicDist.distTree, string) = + let triangular: array(arg) => result(TreeNode.treeNode, string) = fun | [|Value(low), Value(medium), Value(high)|] => - Ok(`Simple(`Triangular({low, medium, high}))) + Ok(`DistData(`Symbolic(`Triangular({low, medium, high})))) | _ => Error("Wrong number of variables in triangle distribution"); let multiModal = ( - args: array(result(SymbolicDist.distTree, string)), + args: array(result(TreeNode.treeNode, string)), weights: option(array(float)), ) => { let weights = weights |> E.O.default([||]); @@ -158,17 +158,9 @@ module MathAdtToDistDst = { args |> E.A.fmap( fun - | Ok(`Simple(d)) => Ok(`Simple(d)) - | Ok(`Combination(t1, t2, op)) => Ok(`Combination(t1, t2, op)) - | Ok(`PointwiseSum(t1, t2)) => Ok(`PointwiseSum(t1, t2)) - | Ok(`PointwiseProduct(t1, t2)) => Ok(`PointwiseProduct(t1, t2)) - | Ok(`Normalize(t)) => Ok(`Normalize(t)) - | Ok(`LeftTruncate(t, x)) => Ok(`LeftTruncate(t, x)) - | Ok(`RightTruncate(t, x)) => Ok(`RightTruncate(t, x)) - | Ok(`Render(t)) => Ok(`Render(t)) - | Error(e) => Error(e) - | _ => Error("Unexpected dist") - ); + | Ok(a) => a + | Error(e) => Error(e) + ); let firstWithError = dists |> Belt.Array.getBy(_, Belt.Result.isError); let withoutErrors = dists |> E.A.fmap(E.R.toOption) |> E.A.O.concatSomes; @@ -182,7 +174,7 @@ module MathAdtToDistDst = { |> E.A.fmapi((index, t) => { let w = weights |> E.A.get(_, index) |> E.O.default(1.0); - `VerticalScaling(t, `Simple(`Float(w))) + `Operation(`ScaleBy(`Multiply, t, `DistData(`Symbolic(`Float(w))))) }); let pointwiseSum = components @@ -196,7 +188,7 @@ module MathAdtToDistDst = { }; }; - let arrayParser = (args:array(arg)):result(SymbolicDist.distTree, string) => { + let arrayParser = (args:array(arg)):result(TreeNode.treeNode, string) => { let samples = args |> E.A.fmap( fun @@ -207,18 +199,18 @@ module MathAdtToDistDst = { let outputs = Samples.T.fromSamples(samples); let pdf = outputs.shape |> E.O.bind(_,Distributions.Shape.T.toContinuous); let shape = pdf |> E.O.fmap(pdf => { - let _pdf = Distributions.Continuous.T.scaleToIntegralSum(~cache=None, ~intendedSum=1.0, pdf); + let _pdf = Distributions.Continuous.T.normalize(pdf); let cdf = Distributions.Continuous.T.integral(~cache=None, _pdf); SymbolicDist.ContinuousShape.make(_pdf, cdf) }); switch(shape){ - | Some(s) => Ok(`Simple(`ContinuousShape(s))) + | Some(s) => Ok(`DistData(`Symbolic(`ContinuousShape(s)))) | None => Error("Rendering did not work") } } - let rec functionParser = (r): result(SymbolicDist.distTree, string) => + let rec functionParser = (r): result(TreeNode.treeNode, string) => r |> ( fun @@ -230,7 +222,7 @@ module MathAdtToDistDst = { | Fn({name: "exponential", args}) => exponential(args) | Fn({name: "cauchy", args}) => cauchy(args) | Fn({name: "triangular", args}) => triangular(args) - | Value(f) => Ok(`Simple(`Float(f))) + | Value(f) => Ok(`DistData(`Symbolic(`Float(f)))) | Fn({name: "mm", args}) => { let weights = args @@ -283,7 +275,7 @@ module MathAdtToDistDst = { args |> E.A.fmap(functionParser) |> (fun - | [|Ok(l), Ok(`Simple(`Float(0.0)))|] => Error("Division by zero") + | [|Ok(l), Ok(`DistData(`Symbolic(`Float(0.0))))|] => Error("Division by zero") | [|Ok(l), Ok(r)|] => Ok(`Combination(l, r, `DivideOperation)) | _ => Error("Division needs two operands")) } @@ -298,14 +290,14 @@ module MathAdtToDistDst = { args |> E.A.fmap(functionParser) |> (fun - | [|Ok(l), Ok(`Simple(`Float(r)))|] => Ok(`LeftTruncate(l, r)) + | [|Ok(l), Ok(`DistData(`Symbolic(`Float(r))))|] => Ok(`LeftTruncate(l, r)) | _ => Error("leftTruncate needs two arguments: the expression and the cutoff")) } | Fn({name: "rightTruncate", args}) => { args |> E.A.fmap(functionParser) |> (fun - | [|Ok(l), Ok(`Simple(`Float(r)))|] => Ok(`RightTruncate(l, r)) + | [|Ok(l), Ok(`DistData(`Symbolic(`Float(r))))|] => Ok(`RightTruncate(l, r)) | _ => Error("rightTruncate needs two arguments: the expression and the cutoff")) } | Fn({name}) => Error(name ++ ": function not supported") @@ -314,18 +306,18 @@ module MathAdtToDistDst = { } ); - let topLevel = (r): result(SymbolicDist.distTree, string) => + let topLevel = (r): result(TreeNode.treeNode, string) => r |> ( fun | Fn(_) => functionParser(r) - | Value(r) => Ok(`Simple(`Float(r))) + | Value(r) => Ok(`DistData(`Symbolic(`Float(r)))) | Array(r) => arrayParser(r) | Symbol(_) => Error("Symbol not valid as top level") | Object(_) => Error("Object not valid as top level") ); - let run = (r): result(SymbolicDist.distTree, string) => + let run = (r): result(TreeNode.treeNode, string) => r |> MathAdtCleaner.run |> topLevel; }; diff --git a/src/distPlus/symbolic/SymbolicDist.re b/src/distPlus/symbolic/SymbolicDist.re index 48bc7ad5..8cab8227 100644 --- a/src/distPlus/symbolic/SymbolicDist.re +++ b/src/distPlus/symbolic/SymbolicDist.re @@ -36,7 +36,6 @@ type continuousShape = { cdf: DistTypes.continuousShape, }; -type contType = [ | `Continuous | `Discrete]; type dist = [ | `Normal(normal) @@ -50,29 +49,6 @@ type dist = [ | `Float(float) // Dirac delta at x. Practically useful only in the context of multimodals. ]; -type integral = float; -type cutoffX = float; -type operation = [ - | `AddOperation - | `SubtractOperation - | `MultiplyOperation - | `DivideOperation - | `ExponentiateOperation -]; - -type distTree = [ - | `Simple(dist) - | `Combination(distTree, distTree, operation) - | `PointwiseSum(distTree, distTree) - | `PointwiseProduct(distTree, distTree) - | `VerticalScaling(distTree, distTree) - | `Normalize(distTree) - | `LeftTruncate(distTree, cutoffX) - | `RightTruncate(distTree, cutoffX) - | `Render(distTree) -] -and weightedDists = array((distTree, float)); - module ContinuousShape = { type t = continuousShape; let make = (pdf, cdf): t => {pdf, cdf}; @@ -82,8 +58,9 @@ module ContinuousShape = { Distributions.Continuous.T.xToY(p, t.pdf).continuous; // TODO: Fix the sampling, to have it work correctly. let sample = (t: t) => 3.0; + // TODO: Fix the mean, to have it work correctly. + let mean = (t: t) => Ok(0.0); let toString = t => {j|CustomContinuousShape|j}; - let contType: contType = `Continuous; }; module Exponential = { @@ -91,8 +68,8 @@ module Exponential = { let pdf = (x, t: t) => Jstat.exponential##pdf(x, t.rate); let inv = (p, t: t) => Jstat.exponential##inv(p, t.rate); let sample = (t: t) => Jstat.exponential##sample(t.rate); + let mean = (t: t) => Ok(Jstat.exponential##mean(t.rate)); let toString = ({rate}: t) => {j|Exponential($rate)|j}; - let contType: contType = `Continuous; }; module Cauchy = { @@ -100,8 +77,8 @@ module Cauchy = { let pdf = (x, t: t) => Jstat.cauchy##pdf(x, t.local, t.scale); let inv = (p, t: t) => Jstat.cauchy##inv(p, t.local, t.scale); let sample = (t: t) => Jstat.cauchy##sample(t.local, t.scale); + let mean = (t: t) => Error("Cauchy distributions have no mean value.") let toString = ({local, scale}: t) => {j|Cauchy($local, $scale)|j}; - let contType: contType = `Continuous; }; module Triangular = { @@ -109,8 +86,8 @@ module Triangular = { let pdf = (x, t: t) => Jstat.triangular##pdf(x, t.low, t.high, t.medium); let inv = (p, t: t) => Jstat.triangular##inv(p, t.low, t.high, t.medium); let sample = (t: t) => Jstat.triangular##sample(t.low, t.high, t.medium); + let mean = (t: t) => Ok(Jstat.triangular##mean(t.low, t.high, t.medium)); let toString = ({low, medium, high}: t) => {j|Triangular($low, $medium, $high)|j}; - let contType: contType = `Continuous; }; module Normal = { @@ -124,8 +101,26 @@ module Normal = { }; let inv = (p, t: t) => Jstat.normal##inv(p, t.mean, t.stdev); let sample = (t: t) => Jstat.normal##sample(t.mean, t.stdev); + let mean = (t: t) => Ok(Jstat.normal##mean(t.mean, t.stdev)); let toString = ({mean, stdev}: t) => {j|Normal($mean,$stdev)|j}; - let contType: contType = `Continuous; + + let add = (n1: t, n2: t) => { + let mean = n1.mean +. n2.mean; + let stdev = sqrt(n1.stdev ** 2. +. n2.stdev ** 2.); + `Normal({mean, stdev}); + }; + let subtract = (n1: t, n2: t) => { + let mean = n1.mean -. n2.mean; + let stdev = sqrt(n1.stdev ** 2. +. n2.stdev ** 2.); + `Normal({mean, stdev}); + }; + + // TODO: is this useful here at all? would need the integral as well ... + let pointwiseProduct = (n1: t, n2: t) => { + let mean = (n1.mean *. n2.stdev**2. +. n2.mean *. n1.stdev**2.) /. (n1.stdev**2. +. n2.stdev**2.); + let stdev = 1. /. ((1. /. n1.stdev**2.) +. (1. /. n2.stdev**2.)); + `Normal({mean, stdev}); + }; }; module Beta = { @@ -133,17 +128,17 @@ module Beta = { let pdf = (x, t: t) => Jstat.beta##pdf(x, t.alpha, t.beta); let inv = (p, t: t) => Jstat.beta##inv(p, t.alpha, t.beta); let sample = (t: t) => Jstat.beta##sample(t.alpha, t.beta); + let mean = (t: t) => Ok(Jstat.beta##mean(t.alpha, t.beta)); let toString = ({alpha, beta}: t) => {j|Beta($alpha,$beta)|j}; - let contType: contType = `Continuous; }; module Lognormal = { type t = lognormal; let pdf = (x, t: t) => Jstat.lognormal##pdf(x, t.mu, t.sigma); let inv = (p, t: t) => Jstat.lognormal##inv(p, t.mu, t.sigma); + let mean = (t: t) => Ok(Jstat.lognormal##mean(t.mu, t.sigma)); let sample = (t: t) => Jstat.lognormal##sample(t.mu, t.sigma); let toString = ({mu, sigma}: t) => {j|Lognormal($mu,$sigma)|j}; - let contType: contType = `Continuous; let from90PercentCI = (low, high) => { let logLow = Js.Math.log(low); let logHigh = Js.Math.log(high); @@ -163,6 +158,17 @@ module Lognormal = { ); `Lognormal({mu, sigma}); }; + + let multiply = (l1, l2) => { + let mu = l1.mu +. l2.mu; + let sigma = l1.sigma +. l2.sigma; + `Lognormal({mu, sigma}) + }; + let divide = (l1, l2) => { + let mu = l1.mu -. l2.mu; + let sigma = l1.sigma +. l2.sigma; + `Lognormal({mu, sigma}) + }; }; module Uniform = { @@ -170,20 +176,20 @@ module Uniform = { let pdf = (x, t: t) => Jstat.uniform##pdf(x, t.low, t.high); let inv = (p, t: t) => Jstat.uniform##inv(p, t.low, t.high); let sample = (t: t) => Jstat.uniform##sample(t.low, t.high); + let mean = (t: t) => Ok(Jstat.uniform##mean(t.low, t.high)); let toString = ({low, high}: t) => {j|Uniform($low,$high)|j}; - let contType: contType = `Continuous; }; module Float = { type t = float; let pdf = (x, t: t) => x == t ? 1.0 : 0.0; let inv = (p, t: t) => p < t ? 0.0 : 1.0; + let mean = (t: t) => Ok(t); let sample = (t: t) => t; let toString = Js.Float.toString; - let contType: contType = `Discrete; }; -module GenericSimple = { +module GenericDistFunctions = { let minCdfValue = 0.0001; let maxCdfValue = 0.9999; @@ -200,19 +206,6 @@ module GenericSimple = { | `ContinuousShape(n) => ContinuousShape.pdf(x, n) }; - let contType = (dist: dist): contType => - switch (dist) { - | `Normal(_) => Normal.contType - | `Triangular(_) => Triangular.contType - | `Exponential(_) => Exponential.contType - | `Cauchy(_) => Cauchy.contType - | `Lognormal(_) => Lognormal.contType - | `Uniform(_) => Uniform.contType - | `Beta(_) => Beta.contType - | `Float(_) => Float.contType - | `ContinuousShape(_) => ContinuousShape.contType - }; - let inv = (x, dist) => switch (dist) { | `Normal(n) => Normal.inv(x, n) @@ -274,22 +267,25 @@ module GenericSimple = { | `Uniform({high}) => high | `Float(n) => n; - /* This function returns a list of x's at which to evaluate the overall distribution (for rendering). - This function is called separately for each individual distribution. + let mean: dist => result(float, string) = + fun + | `Triangular(n) => Triangular.mean(n) + | `Exponential(n) => Exponential.mean(n) + | `Cauchy(n) => Cauchy.mean(n) + | `Normal(n) => Normal.mean(n) + | `Lognormal(n) => Lognormal.mean(n) + | `Beta(n) => Beta.mean(n) + | `ContinuousShape(n) => ContinuousShape.mean(n) + | `Uniform(n) => Uniform.mean(n) + | `Float(n) => Float.mean(n) - When called with xSelection=`Linear, this function will return (n) x's, evenly - distributed between the min and max of the distribution (whatever those are defined to be above). - - When called with xSelection=`ByWeight, this function will distribute the x's such as to - match the cumulative shape of the distribution. This is slower but may give better results. - */ let interpolateXs = - (~xSelection: [ | `Linear | `ByWeight]=`Linear, dist: dist, n) => { + (~xSelection: [ | `Linear | `ByWeight]=`Linear, dist: dist, n) => { switch (xSelection, dist) { | (`Linear, _) => E.A.Floats.range(min(dist), max(dist), n) | (`ByWeight, `Uniform(n)) => - // In `ByWeight mode, uniform distributions get special treatment because we need two x's - // on either side for proper rendering (just left and right of the discontinuities). + // In `ByWeight mode, uniform distributions get special treatment because we need two x's + // on either side for proper rendering (just left and right of the discontinuities). let dx = 0.00001 *. (n.high -. n.low); [|n.low -. dx, n.low +. dx, n.high -. dx, n.high +. dx|]; | (`ByWeight, _) => @@ -297,423 +293,5 @@ module GenericSimple = { ys |> E.A.fmap(y => inv(y, dist)); }; }; - - let toShape = - (~xSelection: [ | `Linear | `ByWeight]=`Linear, dist: dist, n) - : DistTypes.shape => { - switch (dist) { - | `ContinuousShape(n) => n.pdf |> Distributions.Continuous.T.toShape - | dist => - let xs = interpolateXs(~xSelection, dist, n); - let ys = xs |> E.A.fmap(r => pdf(r, dist)); - XYShape.T.fromArrays(xs, ys) - |> Distributions.Continuous.make(`Linear, _) - |> Distributions.Continuous.T.toShape; - }; - }; }; -module DistTree = { - type nodeResult = [ - | `Simple(dist) - // RenderedShape: continuous xyShape, discrete xyShape, total value. - | `RenderedShape(DistTypes.continuousShape, DistTypes.discreteShape, integral) - ]; - - let evaluateDistribution = (d: dist): nodeResult => { - `Simple(d) - }; - - // This is a performance bottleneck! - // Using raw JS here so we can use native for loops and access array elements - // directly, without option checks. - let jsContinuousCombinationConvolve: (array(float), array(float), array(float), array(float), float => float => float) => array(array((float, float))) = [%bs.raw - {| - function (s1xs, s1ys, s2xs, s2ys, func) { - // For continuous-continuous convolution, use linear interpolation. - // Let's assume we got downsampled distributions - - const outXYShapes = new Array(s1xs.length); - for (let i = 0; i < s1xs.length; i++) { - // create a new distribution - const dxyShape = new Array(s2xs.length); - for (let j = 0; j < s2xs.length; j++) { - dxyShape[j] = [func(s1xs[i], s2xs[j]), (s1ys[i] * s2ys[j])]; - } - outXYShapes[i] = dxyShape; - } - - return outXYShapes; - } - |}]; - - let jsDiscreteCombinationConvolve: (array(float), array(float), array(float), array(float), float => float => float) => (array(float), array(float)) = [%bs.raw - {| - function (s1xs, s1ys, s2xs, s2ys, func) { - const r = new Map(); - - for (let i = 0; i < s1xs.length; i++) { - for (let j = 0; j < s2xs.length; j++) { - - const x = func(s1xs[i], s2xs[j]); - const cv = r.get(x) | 0; - - r.set(x, cv + s1ys[i] * s2ys[j]); // add up the ys, if same x - } - } - - const rxys = [...r.entries()]; - rxys.sort(([x1, y1], [x2, y2]) => x1 - x2); - - const rxs = new Array(rxys.length); - const rys = new Array(rxys.length); - - for (let i = 0; i < rxys.length; i++) { - rxs[i] = rxys[i][0]; - rys[i] = rxys[i][1]; - } - - return [rxs, rys]; - } - |}]; - - let funcFromOp = (op: operation) => { - switch (op) { - | `AddOperation => (+.) - | `SubtractOperation => (-.) - | `MultiplyOperation => (*.) - | `DivideOperation => (/.) - | `ExponentiateOperation => (**) - } - } - - let renderDistributionToXYShape = (d: dist, n: int): (DistTypes.continuousShape, DistTypes.discreteShape) => { - // render the distribution into an XY shape - switch (d) { - | `Float(v) => (Distributions.Continuous.empty, {xs: [|v|], ys: [|1.0|]}) - | _ => { - let xs = GenericSimple.interpolateXs(~xSelection=`ByWeight, d, n); - let ys = xs |> E.A.fmap(x => GenericSimple.pdf(x, d)); - (Distributions.Continuous.make(`Linear, {xs: xs, ys: ys}), XYShape.T.empty) - } - } - }; - - let combinationDistributionOfXYShapes = (sc1: DistTypes.continuousShape, // continuous shape - sd1: DistTypes.discreteShape, // discrete shape - sc2: DistTypes.continuousShape, - sd2: DistTypes.discreteShape, func): (DistTypes.continuousShape, DistTypes.discreteShape) => { - - // First, deal with the discrete-discrete convolution: - let (ddxs, ddys) = jsDiscreteCombinationConvolve(sd1.xs, sd1.ys, sd2.xs, sd2.ys, func); - let ddxy: DistTypes.discreteShape = {xs: ddxs, ys: ddys}; - - // Then, do the other three: - let downsample = (sc: DistTypes.continuousShape) => { - let scLength = E.A.length(sc.xyShape.xs); - let scSqLength = sqrt(float_of_int(scLength)); - scSqLength > 10. ? Distributions.Continuous.T.truncate(int_of_float(scSqLength), sc) : sc; - }; - - let combinePointConvolutionResults = ca => ca |> E.A.fmap(s => { - // s is an array of (x, y) objects - let (xs, ys) = Belt.Array.unzip(s); - Distributions.Continuous.make(`Linear, {xs, ys}); - }) - |> Distributions.Continuous.reduce((+.)); - - let sc1d = downsample(sc1); - let sc2d = downsample(sc2); - - let ccxy = jsContinuousCombinationConvolve(sc1d.xyShape.xs, sc1d.xyShape.ys, sc2d.xyShape.xs, sc2d.xyShape.ys, func) |> combinePointConvolutionResults; - let dcxy = jsContinuousCombinationConvolve(sc1d.xyShape.xs, sc1d.xyShape.ys, sc2d.xyShape.xs, sc2d.xyShape.ys, func) |> combinePointConvolutionResults; - let cdxy = jsContinuousCombinationConvolve(sc1d.xyShape.xs, sc1d.xyShape.ys, sc2d.xyShape.xs, sc2d.xyShape.ys, func) |> combinePointConvolutionResults; - let continuousShapeSum = Distributions.Continuous.reduce((+.), [|ccxy, dcxy, cdxy|]); - - (continuousShapeSum, ddxy) - }; - - let evaluateCombinationDistribution = (et1: nodeResult, et2: nodeResult, op: operation, n: int) => { - /* return either a Distribution or a RenderedShape. Must integrate to 1. */ - - let func = funcFromOp(op); - switch ((et1, et2, op)) { - /* Known cases: replace symbolic with symbolic distribution */ - | (`Simple(`Float(v1)), `Simple(`Float(v2)), _) => { - `Simple(`Float(func(v1, v2))) - } - - | (`Simple(`Normal(n2)), `Simple(`Float(v1)), `AddOperation) - | (`Simple(`Float(v1)), `Simple(`Normal(n2)), `AddOperation) => { - let n: normal = {mean: v1 +. n2.mean, stdev: n2.stdev}; - `Simple(`Normal(n)) - } - - | (`Simple(`Normal(n1)), `Simple(`Normal(n2)), `AddOperation) => { - let n: normal = {mean: n1.mean +. n2.mean, stdev: sqrt(n1.stdev ** 2. +. n2.stdev ** 2.)}; - `Simple(`Normal(n)); - } - - | (`Simple(`Normal(n1)), `Simple(`Normal(n2)), `SubtractOperation) => { - let n: normal = {mean: n1.mean -. n2.mean, stdev: sqrt(n1.stdev ** 2. +. n2.stdev ** 2.)}; - `Simple(`Normal(n)); - } - - | (`Simple(`Lognormal(l1)), `Simple(`Lognormal(l2)), `MultiplyOperation) => { - let l: lognormal = {mu: l1.mu +. l2.mu, sigma: l1.sigma +. l2.sigma}; - `Simple(`Lognormal(l)); - } - - | (`Simple(`Lognormal(l1)), `Simple(`Lognormal(l2)), `DivideOperation) => { - let l: lognormal = {mu: l1.mu -. l2.mu, sigma: l1.sigma +. l2.sigma}; - `Simple(`Lognormal(l)); - } - - - /* General cases: convolve the XYShapes */ - | (`Simple(d1), `Simple(d2), _) => { - let (sc1, sd1) = renderDistributionToXYShape(d1, n); - let (sc2, sd2) = renderDistributionToXYShape(d2, n); - let (sc, sd) = combinationDistributionOfXYShapes(sc1, sd1, sc2, sd2, func); - `RenderedShape(sc, sd, 1.0) - } - | (`Simple(d1), `RenderedShape(sc2, sd2, i2), _) - | (`RenderedShape(sc2, sd2, i2), `Simple(d1), _) => { - let (sc1, sd1) = renderDistributionToXYShape(d1, n); - let (sc, sd) = combinationDistributionOfXYShapes(sc1, sd1, sc2, sd2, func); - `RenderedShape(sc, sd, i2) - } - | (`RenderedShape(sc1, sd1, i1), `RenderedShape(sc2, sd2, i2), _) => { - // sum of two multimodals that have a continuous and discrete each. - let (sc, sd) = combinationDistributionOfXYShapes(sc1, sd1, sc2, sd2, func); - `RenderedShape(sc, sd, i1); - } - } - }; - - let evaluatePointwiseSum = (et1: nodeResult, et2: nodeResult, n: int) => { - switch ((et1, et2)) { - /* Known cases: */ - | (`Simple(`Float(v1)), `Simple(`Float(v2))) => { - v1 == v2 - ? `RenderedShape(Distributions.Continuous.empty, Distributions.Discrete.make({xs: [|v1|], ys: [|2.|]}), 2.) - : `RenderedShape(Distributions.Continuous.empty, Distributions.Discrete.empty, 0.) // TODO: add warning: shouldn't pointwise add scalars. - } - | (`Simple(`Float(v1)), `Simple(d2)) - | (`Simple(d2), `Simple(`Float(v1))) => { - let sd1: DistTypes.xyShape = {xs: [|v1|], ys: [|1.|]}; - let (sc2, sd2) = renderDistributionToXYShape(d2, n); - `RenderedShape(sc2, Distributions.Discrete.reduce((+.), [|sd1, sd2|]), 2.) - } - | (`Simple(d1), `Simple(d2)) => { - let (sc1, sd1) = renderDistributionToXYShape(d1, n); - let (sc2, sd2) = renderDistributionToXYShape(d2, n); - `RenderedShape(Distributions.Continuous.reduce((+.), [|sc1, sc2|]), Distributions.Discrete.reduce((+.), [|sd1, sd2|]), 2.) - } - | (`Simple(d1), `RenderedShape(sc2, sd2, i2)) - | (`RenderedShape(sc2, sd2, i2), `Simple(d1)) => { - let (sc1, sd1) = renderDistributionToXYShape(d1, n); - `RenderedShape(Distributions.Continuous.reduce((+.), [|sc1, sc2|]), Distributions.Discrete.reduce((+.), [|sd1, sd2|]), 1. +. i2) - } - | (`RenderedShape(sc1, sd1, i1), `RenderedShape(sc2, sd2, i2)) => { - `RenderedShape(Distributions.Continuous.reduce((+.), [|sc1, sc2|]), Distributions.Discrete.reduce((+.), [|sd1, sd2|]), i1 +. i2) - } - } - }; - - let evaluatePointwiseProduct = (et1: nodeResult, et2: nodeResult, n: int) => { - switch ((et1, et2)) { - /* Known cases: */ - | (`Simple(`Float(v1)), `Simple(`Float(v2))) => { - v1 == v2 - ? `RenderedShape(Distributions.Continuous.empty, Distributions.Discrete.make({xs: [|v1|], ys: [|1.|]}), 1.) - : `RenderedShape(Distributions.Continuous.empty, Distributions.Discrete.empty, 0.) // TODO: add warning: shouldn't pointwise multiply scalars. - } - | (`Simple(`Float(v1)), `Simple(d2)) => { - // evaluate d2 at v1 - let y = GenericSimple.pdf(v1, d2); - `RenderedShape(Distributions.Continuous.empty, Distributions.Discrete.make({xs: [|v1|], ys: [|y|]}), y) - } - | (`Simple(d1), `Simple(`Float(v2))) => { - // evaluate d1 at v2 - let y = GenericSimple.pdf(v2, d1); - `RenderedShape(Distributions.Continuous.empty, Distributions.Discrete.make({xs: [|v2|], ys: [|y|]}), y) - } - | (`Simple(`Normal(n1)), `Simple(`Normal(n2))) => { - let mean = (n1.mean *. n2.stdev**2. +. n2.mean *. n1.stdev**2.) /. (n1.stdev**2. +. n2.stdev**2.); - let stdev = 1. /. ((1. /. n1.stdev**2.) +. (1. /. n2.stdev**2.)); - let integral = 0; // TODO - `RenderedShape(Distributions.Continuous.empty, Distributions.Discrete.empty, 0.) - } - /* General cases */ - | (`Simple(d1), `Simple(d2)) => { - // NOT IMPLEMENTED YET - // TODO: evaluate integral properly - let (sc1, sd1) = renderDistributionToXYShape(d1, n); - let (sc2, sd2) = renderDistributionToXYShape(d2, n); - `RenderedShape(Distributions.Continuous.empty, Distributions.Discrete.empty, 0.) - } - | (`Simple(d1), `RenderedShape(sc2, sd2, i2)) => { - // NOT IMPLEMENTED YET - // TODO: evaluate integral properly - let (sc1, sd1) = renderDistributionToXYShape(d1, n); - `RenderedShape(Distributions.Continuous.empty, Distributions.Discrete.empty, 0.) - } - | (`RenderedShape(sc1, sd1, i1), `Simple(d1)) => { - // NOT IMPLEMENTED YET - // TODO: evaluate integral properly - let (sc2, sd2) = renderDistributionToXYShape(d1, n); - `RenderedShape(Distributions.Continuous.empty, Distributions.Discrete.empty, 0.) - } - | (`RenderedShape(sc1, sd1, i1), `RenderedShape(sc2, sd2, i2)) => { - `RenderedShape(Distributions.Continuous.empty, Distributions.Discrete.empty, 0.) - } - } - }; - - - let evaluateNormalize = (et: nodeResult, n: int) => { - // just divide everything by the integral. - switch (et) { - | `RenderedShape(sc, sd, 0.) => { - `RenderedShape(Distributions.Continuous.empty, Distributions.Discrete.empty, 0.) - } - | `RenderedShape(sc, sd, i) => { - // loop through all ys and divide them by i - let normalize = (s: DistTypes.xyShape): DistTypes.xyShape => {xs: s.xs, ys: s.ys |> E.A.fmap(y => y /. i)}; - - let scn = sc |> Distributions.Continuous.shapeMap(normalize); - let sdn = sd |> normalize; - - `RenderedShape(scn, sdn, 1.) - } - | `Simple(d) => `Simple(d) // any kind of atomic dist should already be normalized -- TODO: THIS IS ACTUALLY FALSE! E.g. pointwise product of normal * normal - } - }; - - let evaluateTruncate = (et: nodeResult, xc: cutoffX, compareFunc: (float, float) => bool, n: int) => { - let cut = (s: DistTypes.xyShape): DistTypes.xyShape => { - let (xs, ys) = s.ys - |> Belt.Array.zip(s.xs) - |> E.A.filter(((x, y)) => compareFunc(x, xc)) - |> Belt.Array.unzip - - let cutShape: DistTypes.xyShape = {xs, ys}; - cutShape; - }; - - switch (et) { - | `Simple(d) => { - let (sc, sd) = renderDistributionToXYShape(d, n); - - let scc = sc |> Distributions.Continuous.shapeMap(cut); - let sdc = sd |> cut; - - let newIntegral = 1.; // TODO - - `RenderedShape(scc, sdc, newIntegral); - } - | `RenderedShape(sc, sd, i) => { - let scc = sc |> Distributions.Continuous.shapeMap(cut); - let sdc = sd |> cut; - - let newIntegral = 1.; // TODO - - `RenderedShape(scc, sdc, newIntegral); - } - } - }; - - let evaluateVerticalScaling = (et1: nodeResult, et2: nodeResult, n: int) => { - let scale = (i: float, s: DistTypes.xyShape): DistTypes.xyShape => {xs: s.xs, ys: s.ys |> E.A.fmap(y => y *. i)}; - - switch ((et1, et2)) { - | (`Simple(`Float(v)), `Simple(d)) - | (`Simple(d), `Simple(`Float(v))) => { - let (sc, sd) = renderDistributionToXYShape(d, n); - - let scc = sc |> Distributions.Continuous.shapeMap(scale(v)); - let sdc = sd |> scale(v); - - let newIntegral = v; // TODO - - `RenderedShape(scc, sdc, newIntegral); - } - | (`Simple(`Float(v)), `RenderedShape(sc, sd, i)) - | (`RenderedShape(sc, sd, i), `Simple(`Float(v))) => { - let scc = sc |> Distributions.Continuous.shapeMap(scale(v)); - let sdc = sd |> scale(v); - - let newIntegral = v; // TODO - - `RenderedShape(scc, sdc, newIntegral); - } - | _ => `RenderedShape(Distributions.Continuous.empty, Distributions.Discrete.empty, 0.) // TODO: give warning - } - } - - let renderNode = (et: nodeResult, n: int) => { - switch (et) { - | `Simple(d) => { - let (sc, sd) = renderDistributionToXYShape(d, n); - `RenderedShape(sc, sd, 1.0); - } - | s => s - } - } - - let rec evaluateNode = (treeNode: distTree, n: int): nodeResult => { - // returns either a new symbolic distribution - switch (treeNode) { - | `Simple(d) => evaluateDistribution(d) - | `Combination(t1, t2, op) => evaluateCombinationDistribution(evaluateNode(t1, n), evaluateNode(t2, n), op, n) - | `PointwiseSum(t1, t2) => evaluatePointwiseSum(evaluateNode(t1, n), evaluateNode(t2, n), n) - | `PointwiseProduct(t1, t2) => evaluatePointwiseProduct(evaluateNode(t1, n), evaluateNode(t2, n), n) - | `VerticalScaling(t1, t2) => evaluateVerticalScaling(evaluateNode(t1, n), evaluateNode(t2, n), n) - | `Normalize(t) => evaluateNormalize(evaluateNode(t, n), n) - | `LeftTruncate(t, x) => evaluateTruncate(evaluateNode(t, n), x, (>=), n) - | `RightTruncate(t, x) => evaluateTruncate(evaluateNode(t, n), x, (<=), n) - | `Render(t) => renderNode(evaluateNode(t, n), n) - } - }; - - let toShape = (treeNode: distTree, n: int) => { - let treeShape = evaluateNode(`Render(`Normalize(treeNode)), n); - - switch (treeShape) { - | `Simple(_) => E.O.toExn("No shape found!", None) - | `RenderedShape(sc, sd, _) => { - let shape = MixedShapeBuilder.buildSimple(~continuous=Some(sc), ~discrete=sd); - - shape |> E.O.toExt(""); - } - } - }; - - let rec toString = (treeNode: distTree): string => { - let stringFromOp = op => switch (op) { - | `AddOperation => " + " - | `SubtractOperation => " - " - | `MultiplyOperation => " * " - | `DivideOperation => " / " - | `ExponentiateOperation => "^" - }; - - switch (treeNode) { - | `Simple(d) => GenericSimple.toString(d) - | `Combination(t1, t2, op) => toString(t1) ++ stringFromOp(op) ++ toString(t2) - | `PointwiseSum(t1, t2) => toString(t1) ++ " .+ " ++ toString(t2) - | `PointwiseProduct(t1, t2) => toString(t1) ++ " .* " ++ toString(t2) - | `VerticalScaling(t1, t2) => toString(t1) ++ " @ " ++ toString(t2) - | `Normalize(t) => "normalize(" ++ toString(t) ++ ")" - | `LeftTruncate(t, x) => "leftTruncate(" ++ toString(t) ++ ", " ++ string_of_float(x) ++ ")" - | `RightTruncate(t, x) => "rightTruncate(" ++ toString(t) ++ ", " ++ string_of_float(x) ++ ")" - | `Render(t) => toString(t) - } - }; -}; - -let toString = (treeNode: distTree) => DistTree.toString(treeNode) - -let toShape = (sampleCount: int, treeNode: distTree) => - DistTree.toShape(treeNode, sampleCount) //~xSelection=`ByWeight, diff --git a/src/distPlus/symbolic/TreeNode.re b/src/distPlus/symbolic/TreeNode.re new file mode 100644 index 00000000..5f1aece7 --- /dev/null +++ b/src/distPlus/symbolic/TreeNode.re @@ -0,0 +1,414 @@ +/* This module represents a tree node. */ + +/* TreeNodes are either Data (i.e. symbolic or rendered distributions) or Operations. */ +type treeNode = [ + | `DistData(distData) + | `Operation(operation) +] and distData = [ + | `Symbolic(SymbolicDist.dist) + | `RenderedShape(DistTypes.shape) +] and operation = [ + // binary operations + | `StandardOperation(standardOperation, treeNode, treeNode) + | `PointwiseOperation(pointwiseOperation, treeNode, treeNode) + | `ScaleOperation(scaleOperation, treeNode, scaleBy) + // unary operations + | `Render(treeNode) // always evaluates to `DistData(`RenderedShape(...)) + | `Truncate(leftCutoff, rightCutoff, treeNode) + | `Normalize(treeNode) + // direct evaluations of dists (e.g. cdf, sample) + | `FloatFromDist(distToFloatOperation, treeNode) +] and standardOperation = [ + | `Add + | `Multiply + | `Subtract + | `Divide + | `Exponentiate +] and pointwiseOperation = [ + | `Add + | `Multiply +] and scaleOperation = [ + | `Multiply + | `Log +] +and scaleBy = treeNode and leftCutoff = option(float) and rightCutoff = option(float) +and distToFloatOperation = [ + | `Pdf(float) + | `Cdf(float) + | `Inv(float) + | `Sample +]; + +module TreeNode = { + type t = treeNode; + type simplifier = treeNode => result(treeNode, string); + + type renderParams = { + operationToDistData: (int, operation) => result(t, string), + sampleCount: int, + } + + let rec renderToShape = (renderParams, t: t): result(DistTypes.shape, string) => { + switch (t) { + | `DistData(`RenderedShape(s)) => Ok(s) // already a rendered shape, we're done here + | `DistData(`Symbolic(d)) => + switch (d) { + | `Float(v) => + Ok(Discrete(Distributions.Discrete.make({xs: [|v|], ys: [|1.0|]}, Some(1.0)))); + | _ => + let xs = SymbolicDist.GenericDistFunctions.interpolateXs(~xSelection=`ByWeight, d, renderParams.sampleCount); + let ys = xs |> E.A.fmap(x => SymbolicDist.GenericDistFunctions.pdf(x, d)); + Ok(Continuous(Distributions.Continuous.make(`Linear, {xs, ys}, Some(1.0)))); + } + | `Operation(op) => E.R.bind(renderParams.operationToDistData(renderParams.sampleCount, op), renderToShape(renderParams)) + }; + }; + + /* The following modules encapsulate everything we can do with + * different kinds of operations. */ + + /* Given two random variables A and B, this returns the distribution + of a new variable that is the result of the operation on A and B. + For instance, normal(0, 1) + normal(1, 1) -> normal(1, 2). + In general, this is implemented via convolution. */ + module StandardOperation = { + let funcFromOp: (standardOperation, float, float) => float = + fun + | `Add => (+.) + | `Subtract => (-.) + | `Multiply => ( *. ) + | `Divide => (/.) + | `Exponentiate => ( ** ); + + module Simplify = { + let tryCombiningFloats: simplifier = + fun + | `Operation( + `StandardOperation( + `Divide, + `DistData(`Symbolic(`Float(v1))), + `DistData(`Symbolic(`Float(0.))), + ), + ) => + Error("Cannot divide $v1 by zero.") + | `Operation( + `StandardOperation( + standardOp, + `DistData(`Symbolic(`Float(v1))), + `DistData(`Symbolic(`Float(v2))), + ), + ) => { + let func = funcFromOp(standardOp); + Ok(`DistData(`Symbolic(`Float(func(v1, v2))))); + } + | t => Ok(t); + + let tryCombiningNormals: simplifier = + fun + | `Operation( + `StandardOperation( + `Add, + `DistData(`Symbolic(`Normal(n1))), + `DistData(`Symbolic(`Normal(n2))), + ), + ) => + Ok(`DistData(`Symbolic(SymbolicDist.Normal.add(n1, n2)))) + | `Operation( + `StandardOperation( + `Subtract, + `DistData(`Symbolic(`Normal(n1))), + `DistData(`Symbolic(`Normal(n2))), + ), + ) => + Ok(`DistData(`Symbolic(SymbolicDist.Normal.subtract(n1, n2)))) + | t => Ok(t); + + let tryCombiningLognormals: simplifier = + fun + | `Operation( + `StandardOperation( + `Multiply, + `DistData(`Symbolic(`Lognormal(l1))), + `DistData(`Symbolic(`Lognormal(l2))), + ), + ) => + Ok(`DistData(`Symbolic(SymbolicDist.Lognormal.multiply(l1, l2)))) + | `Operation( + `StandardOperation( + `Divide, + `DistData(`Symbolic(`Lognormal(l1))), + `DistData(`Symbolic(`Lognormal(l2))), + ), + ) => + Ok(`DistData(`Symbolic(SymbolicDist.Lognormal.divide(l1, l2)))) + | t => Ok(t); + + let attempt = (standardOp, t1: t, t2: t): result(treeNode, string) => { + let originalTreeNode = + `Operation(`StandardOperation((standardOp, t1, t2))); + + originalTreeNode + |> tryCombiningFloats + |> E.R.bind(_, tryCombiningNormals) + |> E.R.bind(_, tryCombiningLognormals); + }; + }; + + let evaluateNumerically = (standardOp, renderParams, t1, t2) => { + let func = funcFromOp(standardOp); + + // TODO: downsample the two shapes + let renderedShape1 = t1 |> renderToShape(renderParams); + let renderedShape2 = t2 |> renderToShape(renderParams); + + // This will most likely require a mixed + + switch ((renderedShape1, renderedShape2)) { + | (Error(e1), _) => Error(e1) + | (_, Error(e2)) => Error(e2) + | (Ok(s1), Ok(s2)) => Ok(`DistData(`RenderedShape(Distributions.Shape.convolve(func, s1, s2)))) + }; + }; + + let evaluateToDistData = + (standardOp: standardOperation, renderParams, t1: t, t2: t): result(treeNode, string) => + standardOp + |> Simplify.attempt(_, t1, t2) + |> E.R.bind( + _, + fun + | `DistData(d) => Ok(`DistData(d)) // the analytical simplifaction worked, nice! + | `Operation(_) => // if not, run the convolution + evaluateNumerically(standardOp, renderParams, t1, t2), + ); + }; + + module ScaleOperation = { + let rec mean = (renderParams, t: t): result(float, string) => { + switch (t) { + | `DistData(`RenderedShape(s)) => Ok(Distributions.Shape.T.mean(s)) + | `DistData(`Symbolic(s)) => SymbolicDist.GenericDistFunctions.mean(s) + // evaluating the operation returns result(treeNode(distData)). We then want to make sure + | `Operation(op) => E.R.bind(renderParams.operationToDistData(renderParams.sampleCount, op), mean(renderParams)) + } + }; + + let fnFromOp = + fun + | `Multiply => (*.) + | `Log => ((a, b) => ( log(a) /. log(b) )); + + let knownIntegralSumFnFromOp = + fun + | `Multiply => (a, b) => Some(a *. b) + | `Log => ((_, _) => None); + + let evaluateToDistData = (scaleOp, renderParams, t, scaleBy) => { + let fn = fnFromOp(scaleOp); + let knownIntegralSumFn = knownIntegralSumFnFromOp(scaleOp); + let renderedShape = t |> renderToShape(renderParams); + let scaleByMeanValue = mean(renderParams, scaleBy); + + switch ((renderedShape, scaleByMeanValue)) { + | (Error(e1), _) => Error(e1) + | (_, Error(e2)) => Error(e2) + | (Ok(rs), Ok(sm)) => + Ok(`DistData(`RenderedShape(Distributions.Shape.T.mapY(~knownIntegralSumFn=knownIntegralSumFn(sm), fn(sm), rs)))) + } + }; + }; + + module PointwiseOperation = { + let funcFromOp: (pointwiseOperation => ((float, float) => float)) = + fun + | `Add => (+.) + | `Multiply => ( *. ); + + let evaluateToDistData = (pointwiseOp, renderParams, t1, t2) => { + let func = funcFromOp(pointwiseOp); + let renderedShape1 = t1 |> renderToShape(renderParams); + let renderedShape2 = t2 |> renderToShape(renderParams); + + // TODO: figure out integral, diff between pointwiseAdd and pointwiseProduct and other stuff + // Distributions.Shape.reduce(func, renderedShape1, renderedShape2); + + Error("Pointwise operations currently not supported.") + }; + }; + + module Truncate = { + module Simplify = { + let tryTruncatingNothing: simplifier = fun + | `Operation(`Truncate(None, None, `DistData(d))) => Ok(`DistData(d)) + | t => Ok(t); + + let tryTruncatingUniform: simplifier = fun + | `Operation(`Truncate(lc, rc, `DistData(`Symbolic(`Uniform(u))))) => { + // just create a new Uniform distribution + let newLow = max(E.O.default(neg_infinity, lc), u.low); + let newHigh = min(E.O.default(infinity, rc), u.high); + Ok(`DistData(`Symbolic(`Uniform({low: newLow, high: newHigh})))); + } + | t => Ok(t); + + let attempt = (leftCutoff, rightCutoff, t): result(treeNode, string) => { + let originalTreeNode = `Operation(`Truncate(leftCutoff, rightCutoff, t)); + + originalTreeNode + |> tryTruncatingNothing + |> E.R.bind(_, tryTruncatingUniform); + }; + }; + + let evaluateNumerically = (leftCutoff, rightCutoff, renderParams, t) => { + // TODO: use named args in renderToShape; if we're lucky we can at least get the tail + // of a distribution we otherwise wouldn't get at all + let renderedShape = t |> renderToShape(renderParams); + + E.R.bind(renderedShape, rs => { + let truncatedShape = rs |> Distributions.Shape.truncate(leftCutoff, rightCutoff); + Ok(`DistData(`RenderedShape(rs))); + }); + }; + + let evaluateToDistData = (leftCutoff: option(float), rightCutoff: option(float), renderParams, t: treeNode): result(treeNode, string) => { + t + |> Simplify.attempt(leftCutoff, rightCutoff) + |> E.R.bind( + _, + fun + | `DistData(d) => Ok(`DistData(d)) // the analytical simplifaction worked, nice! + | `Operation(_) => evaluateNumerically(leftCutoff, rightCutoff, renderParams, t), + ); // if not, run the convolution + }; + }; + + module Normalize = { + let rec evaluateToDistData = (renderParams, t: treeNode): result(treeNode, string) => { + switch (t) { + | `DistData(`Symbolic(_)) => Ok(t) + | `DistData(`RenderedShape(s)) => { + let normalized = Distributions.Shape.normalize(s); + Ok(`DistData(`RenderedShape(normalized))); + } + | `Operation(op) => E.R.bind(renderParams.operationToDistData(renderParams.sampleCount, op), evaluateToDistData(renderParams)) + } + } + }; + + module FloatFromDist = { + let evaluateFromSymbolic = (distToFloatOp: distToFloatOperation, s) => { + let value = switch (distToFloatOp) { + | `Pdf(f) => SymbolicDist.GenericDistFunctions.pdf(f, s) + | `Cdf(f) => 0.0 + | `Inv(f) => SymbolicDist.GenericDistFunctions.inv(f, s) + | `Sample => SymbolicDist.GenericDistFunctions.sample(s) + } + Ok(`DistData(`Symbolic(`Float(value)))); + }; + let evaluateFromRenderedShape = (distToFloatOp: distToFloatOperation, rs: DistTypes.shape): result(treeNode, string) => { + // evaluate the pdf, cdf, get sample, etc. from the renderedShape rs + // Should be a float like Ok(`DistData(`Symbolic(Float(0.0)))); + Error("Float from dist is not yet implemented."); + }; + let rec evaluateToDistData = (distToFloatOp: distToFloatOperation, renderParams, t: treeNode): result(treeNode, string) => { + switch (t) { + | `DistData(`Symbolic(s)) => evaluateFromSymbolic(distToFloatOp, s) // we want to evaluate the distToFloatOp on the symbolic dist + | `DistData(`RenderedShape(rs)) => evaluateFromRenderedShape(distToFloatOp, rs) + | `Operation(op) => E.R.bind(renderParams.operationToDistData(renderParams.sampleCount, op), evaluateToDistData(distToFloatOp, renderParams)) + } + } + }; + + module Render = { + let evaluateToRenderedShape = (renderParams, t: treeNode): result(t, string) => { + E.R.bind(renderToShape(renderParams, t), rs => Ok(`DistData(`RenderedShape(rs)))); + } + }; + + let rec operationToDistData = + (sampleCount: int, op: operation): result(t, string) => { + + // the functions that convert the Operation nodes to DistData nodes need to + // have a way to call this function on their children, if their children are themselves Operation nodes. + + let renderParams: renderParams = { + operationToDistData: operationToDistData, + sampleCount: sampleCount, + }; + + switch (op) { + | `StandardOperation(standardOp, t1, t2) => + StandardOperation.evaluateToDistData( + standardOp, renderParams, t1, t2 // we want to give it the option to render or simply leave it as is + ) + | `PointwiseOperation(pointwiseOp, t1, t2) => + PointwiseOperation.evaluateToDistData( + pointwiseOp, + renderParams, + t1, + t2, + ) + | `ScaleOperation(scaleOp, t, scaleBy) => + ScaleOperation.evaluateToDistData(scaleOp, renderParams, t, scaleBy) + | `Truncate(leftCutoff, rightCutoff, t) => Truncate.evaluateToDistData(leftCutoff, rightCutoff, renderParams, t) + | `FloatFromDist(distToFloatOp, t) => FloatFromDist.evaluateToDistData(distToFloatOp, renderParams, t) + | `Normalize(t) => Normalize.evaluateToDistData(renderParams, t) + | `Render(t) => Render.evaluateToRenderedShape(renderParams, t) + }; + }; + + /* This function recursively goes through the nodes of the parse tree, + replacing each Operation node and its subtree with a Data node. + Whenever possible, the replacement produces a new Symbolic Data node, + but most often it will produce a RenderedShape. + This function is used mainly to turn a parse tree into a single RenderedShape + that can then be displayed to the user. */ + let rec toDistData = (treeNode: t, sampleCount: int): result(t, string) => { + switch (treeNode) { + | `DistData(d) => Ok(`DistData(d)) + | `Operation(op) => operationToDistData(sampleCount, op) + }; + }; + + let rec toString = (t: t): string => { + let stringFromStandardOperation = fun + | `Add => " + " + | `Subtract => " - " + | `Multiply => " * " + | `Divide => " / " + | `Exponentiate => "^"; + + let stringFromPointwiseOperation = + fun + | `Add => " .+ " + | `Multiply => " .* "; + + switch (t) { + | `DistData(`Symbolic(d)) => SymbolicDist.GenericDistFunctions.toString(d) + | `DistData(`RenderedShape(s)) => "[shape]" + | `Operation(`StandardOperation(op, t1, t2)) => toString(t1) ++ stringFromStandardOperation(op) ++ toString(t2) + | `Operation(`PointwiseOperation(op, t1, t2)) => toString(t1) ++ stringFromPointwiseOperation(op) ++ toString(t2) + | `Operation(`ScaleOperation(_scaleOp, t, scaleBy)) => toString(t) ++ " @ " ++ toString(scaleBy) + | `Operation(`Normalize(t)) => "normalize(" ++ toString(t) ++ ")" + | `Operation(`Truncate(lc, rc, t)) => "truncate(" ++ toString(t) ++ ", " ++ E.O.dimap(string_of_float, () => "-inf", lc) ++ ", " ++ E.O.dimap(string_of_float, () => "inf", rc) ++ ")" + | `Operation(`Render(t)) => toString(t) + } + }; +}; + +let toShape = (sampleCount: int, treeNode: treeNode) => { + let renderResult = TreeNode.toDistData(`Operation(`Render(treeNode)), sampleCount); + + + switch (renderResult) { + | Ok(`DistData(`RenderedShape(rs))) => { + let continuous = Distributions.Shape.T.toContinuous(rs); + let discrete = Distributions.Shape.T.toDiscrete(rs); + let shape = MixedShapeBuilder.buildSimple(~continuous, ~discrete); + shape |> E.O.toExt(""); + } + | Ok(_) => E.O.toExn("Rendering failed.", None) + | Error(message) => E.O.toExn("No shape found!", None) + } +}; diff --git a/src/distPlus/utility/Jstat.re b/src/distPlus/utility/Jstat.re index 5f1c6c51..0a2cc13f 100644 --- a/src/distPlus/utility/Jstat.re +++ b/src/distPlus/utility/Jstat.re @@ -5,6 +5,7 @@ type normal = { [@bs.meth] "cdf": (float, float, float) => float, [@bs.meth] "inv": (float, float, float) => float, [@bs.meth] "sample": (float, float) => float, + [@bs.meth] "mean": (float, float) => float, }; type lognormal = { . @@ -12,6 +13,7 @@ type lognormal = { [@bs.meth] "cdf": (float, float, float) => float, [@bs.meth] "inv": (float, float, float) => float, [@bs.meth] "sample": (float, float) => float, + [@bs.meth] "mean": (float, float) => float, }; type uniform = { . @@ -19,6 +21,7 @@ type uniform = { [@bs.meth] "cdf": (float, float, float) => float, [@bs.meth] "inv": (float, float, float) => float, [@bs.meth] "sample": (float, float) => float, + [@bs.meth] "mean": (float, float) => float, }; type beta = { . @@ -26,6 +29,7 @@ type beta = { [@bs.meth] "cdf": (float, float, float) => float, [@bs.meth] "inv": (float, float, float) => float, [@bs.meth] "sample": (float, float) => float, + [@bs.meth] "mean": (float, float) => float, }; type exponential = { . @@ -33,6 +37,7 @@ type exponential = { [@bs.meth] "cdf": (float, float) => float, [@bs.meth] "inv": (float, float) => float, [@bs.meth] "sample": float => float, + [@bs.meth] "mean": float => float, }; type cauchy = { . @@ -47,6 +52,7 @@ type triangular = { [@bs.meth] "cdf": (float, float, float, float) => float, [@bs.meth] "inv": (float, float, float, float) => float, [@bs.meth] "sample": (float, float, float) => float, + [@bs.meth] "mean": (float, float, float) => float, }; // Pareto doesn't have sample for some reason @@ -61,6 +67,7 @@ type poisson = { [@bs.meth] "pdf": (float, float) => float, [@bs.meth] "cdf": (float, float) => float, [@bs.meth] "sample": float => float, + [@bs.meth] "mean": float => float, }; type weibull = { . @@ -68,6 +75,7 @@ type weibull = { [@bs.meth] "cdf": (float, float, float) => float, [@bs.meth] "inv": (float, float, float) => float, [@bs.meth] "sample": (float, float) => float, + [@bs.meth] "mean": (float, float) => float, }; type binomial = { . @@ -101,4 +109,4 @@ external quartiles: (array(float)) => array(float) = "quartiles"; [@bs.module "jstat"] external quantiles: (array(float), array(float)) => array(float) = "quantiles"; [@bs.module "jstat"] -external percentile: (array(float), float, bool) => float = "percentile"; \ No newline at end of file +external percentile: (array(float), float, bool) => float = "percentile"; From dc1ec1bb8651a93bfe23c2f1d8b245792bb4790a Mon Sep 17 00:00:00 2001 From: Sebastian Kosch Date: Fri, 26 Jun 2020 21:29:21 -0700 Subject: [PATCH 09/31] It compiles! --- __tests__/Distributions__Test.re | 87 ++-- src/components/DistBuilder.re | 30 +- src/components/DistBuilder3.re | 4 +- src/distPlus/distribution/Distributions.re | 547 +++++++++++++-------- src/distPlus/renderers/DistPlusRenderer.re | 16 +- src/distPlus/renderers/RenderTypes.re | 8 +- src/distPlus/symbolic/MathJsParser.re | 38 +- src/distPlus/symbolic/TreeNode.re | 485 +++++++++++------- src/interface/FormBuilder.re | 4 +- 9 files changed, 751 insertions(+), 468 deletions(-) diff --git a/__tests__/Distributions__Test.re b/__tests__/Distributions__Test.re index 20c7ce34..c02430fe 100644 --- a/__tests__/Distributions__Test.re +++ b/__tests__/Distributions__Test.re @@ -24,7 +24,7 @@ let makeTestCloseEquality = (~only=false, str, item1, item2, ~digits) => describe("Shape", () => { describe("Continuous", () => { open Distributions.Continuous; - let continuous = make(`Linear, shape); + let continuous = make(`Linear, shape, None); makeTest("minX", T.minX(continuous), 1.0); makeTest("maxX", T.maxX(continuous), 8.0); makeTest( @@ -57,7 +57,7 @@ describe("Shape", () => { ); }); describe("when Stepwise", () => { - let continuous = make(`Stepwise, shape); + let continuous = make(`Stepwise, shape, None); makeTest( "at 4.0", T.xToY(4., continuous), @@ -89,7 +89,7 @@ describe("Shape", () => { "toLinear", { let continuous = - make(`Stepwise, {xs: [|1., 4., 8.|], ys: [|0.1, 5., 1.0|]}); + make(`Stepwise, {xs: [|1., 4., 8.|], ys: [|0.1, 5., 1.0|]}, None); continuous |> toLinear |> E.O.fmap(getShape); }, Some({ @@ -100,7 +100,7 @@ describe("Shape", () => { makeTest( "toLinear", { - let continuous = make(`Stepwise, {xs: [|0.0|], ys: [|0.3|]}); + let continuous = make(`Stepwise, {xs: [|0.0|], ys: [|0.3|]}, None); continuous |> toLinear |> E.O.fmap(getShape); }, Some({xs: [|0.0|], ys: [|0.3|]}), @@ -123,7 +123,7 @@ describe("Shape", () => { makeTest( "integralEndY", continuous - |> T.scaleToIntegralSum(~intendedSum=1.0) + |> T.normalize //scaleToIntegralSum(~intendedSum=1.0) |> T.Integral.sum(~cache=None), 1.0, ); @@ -135,12 +135,12 @@ describe("Shape", () => { xs: [|1., 4., 8.|], ys: [|0.3, 0.5, 0.2|], }; - let discrete = shape; + let discrete = make(shape, None); makeTest("minX", T.minX(discrete), 1.0); makeTest("maxX", T.maxX(discrete), 8.0); makeTest( "mapY", - T.mapY(r => r *. 2.0, discrete) |> (r => r.ys), + T.mapY(r => r *. 2.0, discrete) |> (r => getShape(r).ys), [|0.6, 1.0, 0.4|], ); makeTest( @@ -160,19 +160,22 @@ describe("Shape", () => { ); makeTest( "scaleBy", - T.scaleBy(~scale=4.0, discrete), - {xs: [|1., 4., 8.|], ys: [|1.2, 2.0, 0.8|]}, + scaleBy(~scale=4.0, discrete), + make({xs: [|1., 4., 8.|], ys: [|1.2, 2.0, 0.8|]}, None), ); makeTest( - "scaleToIntegralSum", - T.scaleToIntegralSum(~intendedSum=4.0, discrete), - {xs: [|1., 4., 8.|], ys: [|1.2, 2.0, 0.8|]}, + "normalize, then scale by 4.0", + discrete + |> T.normalize + |> scaleBy(~scale=4.0), + make({xs: [|1., 4., 8.|], ys: [|1.2, 2.0, 0.8|]}, None), ); makeTest( "scaleToIntegralSum: back and forth", discrete - |> T.scaleToIntegralSum(~intendedSum=4.0) - |> T.scaleToIntegralSum(~intendedSum=1.0), + |> T.normalize + |> scaleBy(~scale=4.0) + |> T.normalize, discrete, ); makeTest( @@ -181,12 +184,13 @@ describe("Shape", () => { Distributions.Continuous.make( `Stepwise, {xs: [|1., 4., 8.|], ys: [|0.3, 0.8, 1.0|]}, + None ), ); makeTest( "integral with 1 element", - T.Integral.get(~cache=None, {xs: [|0.0|], ys: [|1.0|]}), - Distributions.Continuous.make(`Stepwise, {xs: [|0.0|], ys: [|1.0|]}), + T.Integral.get(~cache=None, Distributions.Discrete.make({xs: [|0.0|], ys: [|1.0|]}, None)), + Distributions.Continuous.make(`Stepwise, {xs: [|0.0|], ys: [|1.0|]}, None), ); makeTest( "integralXToY", @@ -205,27 +209,22 @@ describe("Shape", () => { describe("Mixed", () => { open Distributions.Mixed; - let discrete: DistTypes.xyShape = { + let discreteShape: DistTypes.xyShape = { xs: [|1., 4., 8.|], ys: [|0.3, 0.5, 0.2|], }; + let discrete = Distributions.Discrete.make(discreteShape, None); let continuous = Distributions.Continuous.make( `Linear, {xs: [|3., 7., 14.|], ys: [|0.058, 0.082, 0.124|]}, + None ) - |> Distributions.Continuous.T.scaleToIntegralSum(~intendedSum=1.0); - let mixed = - MixedShapeBuilder.build( + |> Distributions.Continuous.T.normalize; //scaleToIntegralSum(~intendedSum=1.0); + let mixed = Distributions.Mixed.make( ~continuous, ~discrete, - ~assumptions={ - continuous: ADDS_TO_CORRECT_PROBABILITY, - discrete: ADDS_TO_CORRECT_PROBABILITY, - discreteProbabilityMass: Some(0.5), - }, - ) - |> E.O.toExn(""); + ); makeTest("minX", T.minX(mixed), 1.0); makeTest("maxX", T.maxX(mixed), 14.0); makeTest( @@ -243,9 +242,9 @@ describe("Shape", () => { 0.24775224775224775, |], }, + None ), - ~discrete={xs: [|1., 4., 8.|], ys: [|0.6, 1.0, 0.4|]}, - ~discreteProbabilityMassFraction=0.5, + ~discrete=Distributions.Discrete.make({xs: [|1., 4., 8.|], ys: [|0.6, 1.0, 0.4|]}, None) ), ); makeTest( @@ -266,7 +265,7 @@ describe("Shape", () => { makeTest("integralEndY", T.Integral.sum(~cache=None, mixed), 1.0); makeTest( "scaleBy", - T.scaleBy(~scale=2.0, mixed), + Distributions.Mixed.scaleBy(~scale=2.0, mixed), Distributions.Mixed.make( ~continuous= Distributions.Continuous.make( @@ -279,9 +278,9 @@ describe("Shape", () => { 0.24775224775224775, |], }, + None ), - ~discrete={xs: [|1., 4., 8.|], ys: [|0.6, 1.0, 0.4|]}, - ~discreteProbabilityMassFraction=0.5, + ~discrete=Distributions.Discrete.make({xs: [|1., 4., 8.|], ys: [|0.6, 1.0, 0.4|]}, None), ), ); makeTest( @@ -302,34 +301,31 @@ describe("Shape", () => { 0.6913122927072927, 1.0, |], - }, + }, + None, ), ); }); describe("Distplus", () => { open Distributions.DistPlus; - let discrete: DistTypes.xyShape = { + let discreteShape: DistTypes.xyShape = { xs: [|1., 4., 8.|], ys: [|0.3, 0.5, 0.2|], }; + let discrete = Distributions.Discrete.make(discreteShape, None); let continuous = Distributions.Continuous.make( `Linear, {xs: [|3., 7., 14.|], ys: [|0.058, 0.082, 0.124|]}, + None ) - |> Distributions.Continuous.T.scaleToIntegralSum(~intendedSum=1.0); + |> Distributions.Continuous.T.normalize; //scaleToIntegralSum(~intendedSum=1.0); let mixed = - MixedShapeBuilder.build( + Distributions.Mixed.make( ~continuous, ~discrete, - ~assumptions={ - continuous: ADDS_TO_CORRECT_PROBABILITY, - discrete: ADDS_TO_CORRECT_PROBABILITY, - discreteProbabilityMass: Some(0.5), - }, - ) - |> E.O.toExn(""); + ); let distPlus = Distributions.DistPlus.make( ~shape=Mixed(mixed), @@ -374,6 +370,7 @@ describe("Shape", () => { 1.0, |], }, + None, ), ), ); @@ -386,9 +383,9 @@ describe("Shape", () => { let numSamples = 10000; open Distributions.Shape; let normal: SymbolicDist.dist = `Normal({mean, stdev}); - let normalShape = TreeNode.toShape(numSamples, normal); + let normalShape = TreeNode.toShape(numSamples, `DistData(`Symbolic(normal))); let lognormal = SymbolicDist.Lognormal.fromMeanAndStdev(mean, stdev); - let lognormalShape = TreeNode.toShape(numSamples, lognormal); + let lognormalShape = TreeNode.toShape(numSamples, `DistData(`Symbolic(lognormal))); makeTestCloseEquality( "Mean of a normal", diff --git a/src/components/DistBuilder.re b/src/components/DistBuilder.re index 7f57f9c1..93856fc9 100644 --- a/src/components/DistBuilder.re +++ b/src/components/DistBuilder.re @@ -17,7 +17,7 @@ module FormConfig = [%lenses // sampleCount: string, outputXYPoints: string, - truncateTo: string, + downsampleTo: string, kernelWidth: string, } ]; @@ -25,7 +25,7 @@ module FormConfig = [%lenses type options = { sampleCount: int, outputXYPoints: int, - truncateTo: option(int), + downsampleTo: option(int), kernelWidth: option(float), }; @@ -115,7 +115,7 @@ type inputs = { samplingInputs: RenderTypes.ShapeRenderer.Sampling.inputs, guesstimatorString: string, length: int, - shouldTruncateSampledDistribution: int, + shouldDownsampleSampledDistribution: int, }; module DemoDist = { @@ -141,8 +141,8 @@ module DemoDist = { kernelWidth: options.kernelWidth, }, ~distPlusIngredients, - ~shouldTruncate=options.truncateTo |> E.O.isSome, - ~recommendedLength=options.truncateTo |> E.O.default(10000), + ~shouldDownsample=options.downsampleTo |> E.O.isSome, + ~recommendedLength=options.downsampleTo |> E.O.default(10000), (), ); let response = DistPlusRenderer.run(inputs); @@ -182,7 +182,7 @@ let make = () => { unit: "days", sampleCount: "30000", outputXYPoints: "10000", - truncateTo: "1000", + downsampleTo: "1000", kernelWidth: "5", }, (), @@ -210,7 +210,7 @@ let make = () => { let sampleCount = reform.state.values.sampleCount |> Js.Float.fromString; let outputXYPoints = reform.state.values.outputXYPoints |> Js.Float.fromString; - let truncateTo = reform.state.values.truncateTo |> Js.Float.fromString; + let downsampleTo = reform.state.values.downsampleTo |> Js.Float.fromString; let kernelWidth = reform.state.values.kernelWidth |> Js.Float.fromString; let domain = @@ -252,20 +252,20 @@ let make = () => { }; let options = - switch (sampleCount, outputXYPoints, truncateTo) { + switch (sampleCount, outputXYPoints, downsampleTo) { | (_, _, _) when !Js.Float.isNaN(sampleCount) && !Js.Float.isNaN(outputXYPoints) - && !Js.Float.isNaN(truncateTo) + && !Js.Float.isNaN(downsampleTo) && sampleCount > 10. && outputXYPoints > 10. => Some({ sampleCount: sampleCount |> int_of_float, outputXYPoints: outputXYPoints |> int_of_float, - truncateTo: - int_of_float(truncateTo) > 0 - ? Some(int_of_float(truncateTo)) : None, + downsampleTo: + int_of_float(downsampleTo) > 0 + ? Some(int_of_float(downsampleTo)) : None, kernelWidth: kernelWidth == 0.0 ? None : Some(kernelWidth), }) | _ => None @@ -287,7 +287,7 @@ let make = () => { reform.state.values.unit, reform.state.values.sampleCount, reform.state.values.outputXYPoints, - reform.state.values.truncateTo, + reform.state.values.downsampleTo, reform.state.values.kernelWidth, reloader |> string_of_int, |], @@ -481,7 +481,7 @@ let make = () => { /> - + @@ -496,4 +496,4 @@ let make = () => {
; -}; \ No newline at end of file +}; diff --git a/src/components/DistBuilder3.re b/src/components/DistBuilder3.re index 86bb1d2a..124aad0f 100644 --- a/src/components/DistBuilder3.re +++ b/src/components/DistBuilder3.re @@ -43,7 +43,7 @@ module DemoDist = { let str = switch (parsed1) { - | Ok(r) => SymbolicDist.toString(r) + | Ok(r) => TreeNode.toString(r) | Error(e) => e }; @@ -58,7 +58,7 @@ module DemoDist = { ~guesstimatorString=None, (), ) - |> Distributions.DistPlus.T.scaleToIntegralSum(~intendedSum=1.0); + |> Distributions.DistPlus.T.normalize; ; }) |> E.O.default(ReasonReact.null); diff --git a/src/distPlus/distribution/Distributions.re b/src/distPlus/distribution/Distributions.re index f5807cd8..9497957d 100644 --- a/src/distPlus/distribution/Distributions.re +++ b/src/distPlus/distribution/Distributions.re @@ -3,7 +3,8 @@ module type dist = { type integral; let minX: t => float; let maxX: t => float; - let mapY: (~knownIntegralSumFn: float => option(float)=?, float => float, t) => t; + let mapY: + (~knownIntegralSumFn: float => option(float)=?, float => float, t) => t; let xToY: (float, t) => DistTypes.mixedPoint; let toShape: t => DistTypes.shape; let toContinuous: t => option(DistTypes.continuousShape); @@ -13,6 +14,7 @@ module type dist = { let normalizedToDiscrete: t => option(DistTypes.discreteShape); let toDiscreteProbabilityMassFraction: t => float; let downsample: (~cache: option(integral)=?, int, t) => t; + let truncate: (option(float), option(float), t) => t; let integral: (~cache: option(integral), t) => integral; let integralEndY: (~cache: option(integral), t) => float; @@ -38,6 +40,7 @@ module Dist = (T: dist) => { let toContinuous = T.toContinuous; let toDiscrete = T.toDiscrete; let normalize = T.normalize; + let truncate = T.truncate; let normalizedToContinuous = T.normalizedToContinuous; let normalizedToDiscrete = T.normalizedToDiscrete; let mean = T.mean; @@ -52,7 +55,22 @@ module Dist = (T: dist) => { }; }; -module Continuous { +module Common = { + let combineIntegralSums = + ( + combineFn: (float, float) => option(float), + t1KnownIntegralSum: option(float), + t2KnownIntegralSum: option(float), + ) => { + switch (t1KnownIntegralSum, t2KnownIntegralSum) { + | (None, _) + | (_, None) => None + | (Some(s1), Some(s2)) => combineFn(s1, s2) + }; + }; +}; + +module Continuous = { type t = DistTypes.continuousShape; let getShape = (t: t) => t.xyShape; let interpolation = (t: t) => t.interpolation; @@ -78,17 +96,21 @@ module Continuous { knownIntegralSum: Some(0.0), }; let combine = - (fn, t1: DistTypes.continuousShape, t2: DistTypes.continuousShape) + ( + ~knownIntegralSumsFn, + fn, + t1: DistTypes.continuousShape, + t2: DistTypes.continuousShape, + ) : DistTypes.continuousShape => { - // If we're adding the distributions, and we know the total of each, then we // can just sum them up. Otherwise, all bets are off. let combinedIntegralSum = - switch (fn, t1.knownIntegralSum, t2.knownIntegralSum) { - | (_, None, _) - | (_, _, None) => None - | ((+.), Some(s1), Some(s2)) => Some(s1 +. s2) - }; + Common.combineIntegralSums( + knownIntegralSumsFn, + t1.knownIntegralSum, + t2.knownIntegralSum, + ); make( `Linear, @@ -102,7 +124,6 @@ module Continuous { combinedIntegralSum, ); }; - let reduce = (fn, items) => items |> E.A.fold_left(combine(fn), empty); let toLinear = (t: t): option(t) => { switch (t) { @@ -114,7 +135,19 @@ module Continuous { }; }; let shapeFn = (fn, t: t) => t |> getShape |> fn; - let updateKnownIntegralSum = (knownIntegralSum, t: t): t => ({...t, knownIntegralSum}); + let updateKnownIntegralSum = (knownIntegralSum, t: t): t => { + ...t, + knownIntegralSum, + }; + + let reduce = + ( + ~knownIntegralSumsFn: (float, float) => option(float)=(_, _) => None, + fn, + continuousShapes, + ) => + continuousShapes + |> E.A.fold_left(combine(~knownIntegralSumsFn, fn), empty); // Contracts every point in the continuous xyShape into a single dirac-Delta-like point, // using the centerpoints between adjacent xs and the area under each trapezoid. @@ -128,11 +161,18 @@ module Continuous { Belt.Array.set( pointMassesY, x, - (xs[x + 1] -. xs[x]) *. ((ys[x] +. ys[x + 1]) /. 2.)); // = dx * (1/2) * (avgY) + (xs[x + 1] -. xs[x]) *. ((ys[x] +. ys[x + 1]) /. 2.), + ); // = dx * (1/2) * (avgY) (); }; - {xyShape: {xs: xs, ys: pointMassesY}, knownIntegralSum: t.knownIntegralSum}; + { + xyShape: { + xs, + ys: pointMassesY, + }, + knownIntegralSum: t.knownIntegralSum, + }; }; /* Performs a discrete convolution between two continuous distributions A and B. @@ -153,18 +193,25 @@ module Continuous { let t1n = t1s |> XYShape.T.length; let t2n = t2s |> XYShape.T.length; - let outXYShapes: array(array((float, float))) = Belt.Array.makeUninitializedUnsafe(t1n); + let outXYShapes: array(array((float, float))) = + Belt.Array.makeUninitializedUnsafe(t1n); for (i in 0 to t1n - 1) { // create a new distribution - let dxyShape: array((float, float)) = Belt.Array.makeUninitializedUnsafe(t2n); + let dxyShape: array((float, float)) = + Belt.Array.makeUninitializedUnsafe(t2n); for (j in 0 to t2n - 1) { - let _ = Belt.Array.set(dxyShape, j, (fn(t1s.xs[i], t2s.xs[j]), t1s.ys[i] *. t2s.ys[j])); + let _ = + Belt.Array.set( + dxyShape, + j, + (fn(t1s.xs[i], t2s.xs[j]), t1s.ys[i] *. t2s.ys[j]), + ); (); - } + }; let _ = Belt.Array.set(outXYShapes, i, dxyShape); (); - } + }; let combinedIntegralSum = switch (t1.knownIntegralSum, t2.knownIntegralSum) { @@ -175,9 +222,9 @@ module Continuous { outXYShapes |> E.A.fmap(s => { - let xyShape = XYShape.T.fromZippedArray(s); - make(`Linear, xyShape, None); - }) + let xyShape = XYShape.T.fromZippedArray(s); + make(`Linear, xyShape, None); + }) |> reduce((+.)) |> updateKnownIntegralSum(combinedIntegralSum); }; @@ -185,35 +232,22 @@ module Continuous { let convolve = (fn, t1: t, t2: t) => convolveWithDiscrete(fn, t1, toDiscretePointMasses(t2)); - let mapY = (~knownIntegralSumFn=(previousKnownIntegralSum => None), fn, t: t) => { + let mapY = (~knownIntegralSumFn=previousKnownIntegralSum => None, fn, t: t) => { let u = E.O.bind(_, knownIntegralSumFn); let yMapFn = shapeMap(XYShape.T.mapY(fn)); t |> yMapFn |> updateKnownIntegralSum(u(t.knownIntegralSum)); }; - let scaleBy = (~scale=1.0, ~knownIntegralSum=None, t: t): t => - t |> mapY((r: float) => r *. scale) |> updateKnownIntegralSum(knownIntegralSum); - - let truncate = (leftCutoff: option(float), rightCutoff: option(float), t: t) => { - let truncatedZippedPairs = - t - |> getShape - |> XYShape.T.zip - |> XYShape.Zipped.filterByX(x => x >= E.O.default(neg_infinity, leftCutoff) || x <= E.O.default(infinity, rightCutoff)); - - let eps = (t |> getShape |> XYShape.T.xTotalRange) *. 0.0001; - - let leftNewPoint = leftCutoff |> E.O.dimap(lc => [| (lc -. eps, 0.) |], _ => [||]); - let rightNewPoint = rightCutoff |> E.O.dimap(rc => [| (rc +. eps, 0.) |], _ => [||]); - - let truncatedZippedPairsWithNewPoints = - E.A.concatMany([| leftNewPoint, truncatedZippedPairs, rightNewPoint |]); - let truncatedShape = XYShape.T.fromZippedArray(truncatedZippedPairsWithNewPoints); - - make(`Linear, truncatedShape, None); + let scaleBy = (~scale=1.0, t: t): t => { + t + |> mapY((r: float) => r *. scale) + |> updateKnownIntegralSum( + E.O.bind(t.knownIntegralSum, v => Some(scale *. v)), + ); }; + module T = Dist({ type t = DistTypes.continuousShape; @@ -236,12 +270,31 @@ module Continuous { |> DistTypes.MixedPoint.makeContinuous; }; - // let combineWithFn = (t1: t, t2: t, fn: (float, float) => float) => { - // switch(t1, t2){ - // | ({interpolation: `Stepwise}, {interpolation: `Stepwise}) => 3.0 - // | ({interpolation: `Linear}, {interpolation: `Linear}) => 3.0 - // } - // }; + let truncate = + (leftCutoff: option(float), rightCutoff: option(float), t: t) => { + let truncatedZippedPairs = + t + |> getShape + |> XYShape.T.zip + |> XYShape.Zipped.filterByX(x => + x >= E.O.default(neg_infinity, leftCutoff) + || x <= E.O.default(infinity, rightCutoff) + ); + + let eps = (t |> getShape |> XYShape.T.xTotalRange) *. 0.0001; + + let leftNewPoint = + leftCutoff |> E.O.dimap(lc => [|(lc -. eps, 0.)|], _ => [||]); + let rightNewPoint = + rightCutoff |> E.O.dimap(rc => [|(rc +. eps, 0.)|], _ => [||]); + + let truncatedZippedPairsWithNewPoints = + E.A.concatMany([|leftNewPoint, truncatedZippedPairs, rightNewPoint|]); + let truncatedShape = + XYShape.T.fromZippedArray(truncatedZippedPairsWithNewPoints); + + make(`Linear, truncatedShape, None); + }; // TODO: This should work with stepwise plots. let integral = (~cache, t) => @@ -272,9 +325,9 @@ module Continuous { let toDiscrete = _ => None; let normalize = (t: t): t => { - let continuousIntegralSum = integralEndY(~cache=None, t); - - scaleBy(~scale=(1. /. continuousIntegralSum), ~knownIntegralSum=Some(1.0), t); + t + |> scaleBy(~scale=1. /. integralEndY(~cache=None, t)) + |> updateKnownIntegralSum(Some(1.0)); }; let normalizedToContinuous = t => Some(t); // TODO: this should be normalized @@ -316,40 +369,41 @@ module Discrete = { let lastY = (t: t) => t |> getShape |> XYShape.T.lastY; - let combineIntegralSums = (combineFn: ((float, float) => option(float)), t1KnownIntegralSum: option(float), t2KnownIntegralSum: option(float)) => { - switch (t1KnownIntegralSum, t2KnownIntegralSum) { - | (None, _) - | (_, None) => None - | (Some(s1), Some(s2)) => combineFn(s1, s2) - }; - }; - - let combine = (combineIntegralSumsFn, fn, t1: DistTypes.discreteShape, t2: DistTypes.discreteShape) + let combine = + ( + ~knownIntegralSumsFn, + fn, + t1: DistTypes.discreteShape, + t2: DistTypes.discreteShape, + ) : DistTypes.discreteShape => { - - let combinedIntegralSum = combineIntegralSums(combineIntegralSumsFn, t1.knownIntegralSum, t2.knownIntegralSum); + let combinedIntegralSum = + Common.combineIntegralSums( + knownIntegralSumsFn, + t1.knownIntegralSum, + t2.knownIntegralSum, + ); make( XYShape.Combine.combine( ~xsSelection=ALL_XS, ~xToYSelection=XYShape.XtoY.stepwiseIfAtX, - ~fn, // stepwiseIfAtX returns option(float), so this fn needs to handle None, which is what the _default0 wrapper is for + ~fn=((a, b) => fn(E.O.default(0.0, a), E.O.default(0.0, b))), // stepwiseIfAtX returns option(float), so this fn needs to handle None t1.xyShape, t2.xyShape, ), combinedIntegralSum, ); }; - let _default0 = (fn, a, b) => - fn(E.O.default(0.0, a), E.O.default(0.0, b)); - let reduce = (fn, items) => - items |> E.A.fold_left(combine((_, _) => None, _default0(fn)), empty); - // a special version of reduce that adds the results (which should be the most common case by far), - // and conveniently also adds the knownIntegralSums. - let reduceAdd = (fn, items) => - items |> E.A.fold_left(combine((s1, s2) => Some(s1 +. s2), _default0((+.))), empty); - let updateKnownIntegralSum = (knownIntegralSum, t: t): t => ({...t, knownIntegralSum}); + let reduce = (~knownIntegralSumsFn=(_, _) => None, fn, discreteShapes): DistTypes.discreteShape => + discreteShapes + |> E.A.fold_left(combine(~knownIntegralSumsFn, fn), empty); + + let updateKnownIntegralSum = (knownIntegralSum, t: t): t => { + ...t, + knownIntegralSum, + }; let convolve = (fn, t1: t, t2: t) => { let t1s = t1 |> getShape; @@ -357,7 +411,12 @@ module Discrete = { let t1n = t1s |> XYShape.T.length; let t2n = t2s |> XYShape.T.length; - let combinedIntegralSum = combineIntegralSums((s1, s2) => Some(s1 *. s2), t1.knownIntegralSum, t2.knownIntegralSum); + let combinedIntegralSum = + Common.combineIntegralSums( + (s1, s2) => Some(s1 *. s2), + t1.knownIntegralSum, + t2.knownIntegralSum, + ); let xToYMap = E.FloatFloatMap.empty(); @@ -368,8 +427,8 @@ module Discrete = { let my = t1s.ys[i] *. t2s.ys[j]; let _ = Belt.MutableMap.set(xToYMap, x, cv +. my); (); - } - } + }; + }; let rxys = xToYMap |> E.FloatFloatMap.toArray |> XYShape.Zipped.sortByX; @@ -378,25 +437,19 @@ module Discrete = { make(convolvedShape, combinedIntegralSum); }; - let mapY = (~knownIntegralSumFn=(previousKnownIntegralSum => None), fn, t: t) => { + let mapY = (~knownIntegralSumFn=previousKnownIntegralSum => None, fn, t: t) => { let u = E.O.bind(_, knownIntegralSumFn); let yMapFn = shapeMap(XYShape.T.mapY(fn)); t |> yMapFn |> updateKnownIntegralSum(u(t.knownIntegralSum)); }; - let scaleBy = (~scale=1.0, ~knownIntegralSum=None, t: t): t => - t |> mapY((r: float) => r *. scale) |> updateKnownIntegralSum(knownIntegralSum); - - let truncate = (leftCutoff: option(float), rightCutoff: option(float), t: t) => { - let truncatedShape = - t - |> getShape - |> XYShape.T.zip - |> XYShape.Zipped.filterByX(x => x >= E.O.default(neg_infinity, leftCutoff) || x <= E.O.default(infinity, rightCutoff)) - |> XYShape.T.fromZippedArray; - - make(truncatedShape, None); + let scaleBy = (~scale=1.0, t: t): t => { + t + |> mapY((r: float) => r *. scale) + |> updateKnownIntegralSum( + E.O.bind(t.knownIntegralSum, v => Some(scale *. v)), + ); }; module T = @@ -414,7 +467,8 @@ module Discrete = { ) }; let integralEndY = (~cache, t: t) => - t.knownIntegralSum |> E.O.default(t |> integral(~cache) |> Continuous.lastY); + t.knownIntegralSum + |> E.O.default(t |> integral(~cache) |> Continuous.lastY); let minX = shapeFn(XYShape.T.minX); let maxX = shapeFn(XYShape.T.maxX); let toDiscreteProbabilityMassFraction = _ => 1.0; @@ -424,9 +478,9 @@ module Discrete = { let toDiscrete = t => Some(t); let normalize = (t: t): t => { - let discreteIntegralSum = integralEndY(~cache=None, t); - - scaleBy(~scale=(1. /. discreteIntegralSum), ~knownIntegralSum=Some(1.0), t); + t + |> scaleBy(~scale=1. /. integralEndY(~cache=None, t)) + |> updateKnownIntegralSum(Some(1.0)); }; let normalizedToContinuous = _ => None; @@ -448,6 +502,21 @@ module Discrete = { make(clippedShape, None); // if someone needs the sum, they'll have to recompute it }; + let truncate = + (leftCutoff: option(float), rightCutoff: option(float), t: t): t => { + let truncatedShape = + t + |> getShape + |> XYShape.T.zip + |> XYShape.Zipped.filterByX(x => + x >= E.O.default(neg_infinity, leftCutoff) + || x <= E.O.default(infinity, rightCutoff) + ) + |> XYShape.T.fromZippedArray; + + make(truncatedShape, None); + }; + let xToY = (f, t) => t |> getShape @@ -477,53 +546,43 @@ module Discrete = { XYShape.Analysis.getVarianceDangerously(t, mean, getMeanOfSquares); }; }); - }; -// TODO: I think this shouldn't assume continuous/discrete are normalized to 1.0, and thus should not need the discreteProbabilityMassFraction being separate. module Mixed = { type t = DistTypes.mixedShape; - let make = (~continuous, ~discrete): t => { - continuous, - discrete, - }; + let make = (~continuous, ~discrete): t => {continuous, discrete}; let totalLength = (t: t): int => { - let continuousLength = t.continuous |> Continuous.getShape |> XYShape.T.length; + let continuousLength = + t.continuous |> Continuous.getShape |> XYShape.T.length; let discreteLength = t.discrete |> Discrete.getShape |> XYShape.T.length; continuousLength + discreteLength; }; - // TODO: Put into scaling module - //let normalizeMixedPoint = (t, f) => f *. discreteProbabilityMassFraction;*/ + let scaleBy = (~scale=1.0, {discrete, continuous}: t): t => { + let scaledDiscrete = Discrete.scaleBy(~scale, discrete); + let scaledContinuous = Continuous.scaleBy(~scale, continuous); + make(~discrete=scaledDiscrete, ~continuous=scaledContinuous); + }; - //TODO: Warning: This currently computes the integral, which is expensive. - /*let scaleContinuousFn = - ({discreteProbabilityMassFraction}: DistTypes.mixedShape, f) => - f *. (1.0 -. discreteProbabilityMassFraction); */ + let toContinuous = ({continuous}: t) => Some(continuous); + let toDiscrete = ({discrete}: t) => Some(discrete); - //TODO: Warning: This currently computes the integral, which is expensive. + let combine = (~knownIntegralSumsFn, fn, t1: t, t2: t) => { + let reducedDiscrete = + [|t1, t2|] + |> E.A.fmap(toDiscrete) + |> E.A.O.concatSomes + |> Discrete.reduce(~knownIntegralSumsFn, fn); - // Normalizes to 1.0. - /*let scaleContinuous = ({discreteProbabilityMassFraction}: t, continuous) => - // get only the continuous, and scale it to the respective - continuous - |> Continuous.T.scaleToIntegralSum( - ~intendedSum=1.0 -. discreteProbabilityMassFraction, - ); + let reducedContinuous = + [|t1, t2|] + |> E.A.fmap(toContinuous) + |> E.A.O.concatSomes + |> Continuous.reduce(~knownIntegralSumsFn, fn); - let scaleDiscrete = ({discreteProbabilityMassFraction}: t, disrete) => - disrete - |> Discrete.T.scaleToIntegralSum( - ~intendedSum=discreteProbabilityMassFraction, - );*/ - - let truncate = (leftCutoff: option(float), rightCutoff: option(float), {discrete, continuous}: t) => { - let truncatedDiscrete = Discrete.truncate(leftCutoff, rightCutoff, discrete); - let truncatedContinuous = Continuous.truncate(leftCutoff, rightCutoff, continuous); - - make(~discrete=truncatedDiscrete, ~continuous=truncatedContinuous); + make(~discrete=reducedDiscrete, ~continuous=reducedContinuous); }; module T = @@ -536,19 +595,40 @@ module Mixed = { let maxX = ({continuous, discrete}: t) => max(Continuous.T.maxX(continuous), Discrete.T.maxX(discrete)); let toShape = (t: t): DistTypes.shape => Mixed(t); - let toContinuous = ({continuous}: t) => Some(continuous); - let toDiscrete = ({discrete}: t) => Some(discrete); + + let toContinuous = toContinuous; + let toDiscrete = toDiscrete; + + let truncate = + ( + leftCutoff: option(float), + rightCutoff: option(float), + {discrete, continuous}: t, + ) => { + let truncatedContinuous = Continuous.T.truncate(leftCutoff, rightCutoff, continuous); + let truncatedDiscrete = Discrete.T.truncate(leftCutoff, rightCutoff, discrete); + + make(~discrete=truncatedDiscrete, ~continuous=truncatedContinuous); + }; let normalize = (t: t): t => { - let continuousIntegralSum = Continuous.T.Integral.sum(~cache=None, t.continuous); - let discreteIntegralSum = Discrete.T.Integral.sum(~cache=None, t.discrete); + let continuousIntegralSum = + Continuous.T.Integral.sum(~cache=None, t.continuous); + let discreteIntegralSum = + Discrete.T.Integral.sum(~cache=None, t.discrete); let totalIntegralSum = continuousIntegralSum +. discreteIntegralSum; let newContinuousSum = continuousIntegralSum /. totalIntegralSum; let newDiscreteSum = discreteIntegralSum /. totalIntegralSum; - let normalizedContinuous = Continuous.scaleBy(~scale=(1. /. newContinuousSum), ~knownIntegralSum=Some(newContinuousSum), t.continuous); - let normalizedDiscrete = Discrete.scaleBy(~scale=(1. /. newDiscreteSum), ~knownIntegralSum=Some(newDiscreteSum), t.discrete); + let normalizedContinuous = + t.continuous + |> Continuous.scaleBy(~scale=1. /. newContinuousSum) + |> Continuous.updateKnownIntegralSum(Some(newContinuousSum)); + let normalizedDiscrete = + t.discrete + |> Discrete.scaleBy(~scale=1. /. newDiscreteSum) + |> Discrete.updateKnownIntegralSum(Some(newDiscreteSum)); make(~continuous=normalizedContinuous, ~discrete=normalizedDiscrete); }; @@ -563,8 +643,10 @@ module Mixed = { }; let toDiscreteProbabilityMassFraction = ({discrete, continuous}: t) => { - let discreteIntegralSum = Discrete.T.Integral.sum(~cache=None, discrete); - let continuousIntegralSum = Continuous.T.Integral.sum(~cache=None, continuous); + let discreteIntegralSum = + Discrete.T.Integral.sum(~cache=None, discrete); + let continuousIntegralSum = + Continuous.T.Integral.sum(~cache=None, continuous); let totalIntegralSum = discreteIntegralSum +. continuousIntegralSum; discreteIntegralSum /. totalIntegralSum; @@ -575,20 +657,25 @@ module Mixed = { // The easiest way to do this is to simply go by the previous probability masses. // The cache really isn't helpful here, because we would need two separate caches - let discreteIntegralSum = Discrete.T.Integral.sum(~cache=None, discrete); - let continuousIntegralSum = Continuous.T.Integral.sum(~cache=None, continuous); + let discreteIntegralSum = + Discrete.T.Integral.sum(~cache=None, discrete); + let continuousIntegralSum = + Continuous.T.Integral.sum(~cache=None, continuous); let totalIntegralSum = discreteIntegralSum +. continuousIntegralSum; let downsampledDiscrete = Discrete.T.downsample( - int_of_float(float_of_int(count) *. (discreteIntegralSum /. totalIntegralSum)), + int_of_float( + float_of_int(count) *. (discreteIntegralSum /. totalIntegralSum), + ), discrete, ); let downsampledContinuous = Continuous.T.downsample( int_of_float( - float_of_int(count) *. (continuousIntegralSum /. totalIntegralSum), + float_of_int(count) + *. (continuousIntegralSum /. totalIntegralSum), ), continuous, ); @@ -596,23 +683,20 @@ module Mixed = { {discrete: downsampledDiscrete, continuous: downsampledContinuous}; }; - let normalizedToContinuous = (t: t) => - Some(normalize(t).continuous); + let normalizedToContinuous = (t: t) => Some(normalize(t).continuous); - let normalizedToDiscrete = ({discrete} as t: t) => - Some(normalize(t).discrete); + let normalizedToDiscrete = ({discrete} as t: t) => + Some(normalize(t).discrete); - let integral = - ( - ~cache, - {continuous, discrete}: t, - ) => { + let integral = (~cache, {continuous, discrete}: t) => { switch (cache) { | Some(cache) => cache - | None => { + | None => // note: if the underlying shapes aren't normalized, then these integrals won't be either! - let continuousIntegral = Continuous.T.Integral.get(~cache=None, continuous); - let discreteIntegral = Discrete.T.Integral.get(~cache=None, discrete); + let continuousIntegral = + Continuous.T.Integral.get(~cache=None, continuous); + let discreteIntegral = + Discrete.T.Integral.get(~cache=None, discrete); Continuous.make( `Linear, @@ -623,7 +707,6 @@ module Mixed = { ), None, ); - } }; }; @@ -648,14 +731,26 @@ module Mixed = { // This pipes all ys (continuous and discrete) through fn. // If mapY is a linear operation, we might be able to update the knownIntegralSums as well; // if not, they'll be set to None. - let mapY = (~knownIntegralSumFn=(previousIntegralSum => None), fn, {discrete, continuous}: t): t => { + let mapY = + ( + ~knownIntegralSumFn=previousIntegralSum => None, + fn, + {discrete, continuous}: t, + ) + : t => { let u = E.O.bind(_, knownIntegralSumFn); let yMappedDiscrete = - discrete |> Discrete.T.mapY(fn) |> Discrete.updateKnownIntegralSum(u(discrete.knownIntegralSum)); + discrete + |> Discrete.T.mapY(fn) + |> Discrete.updateKnownIntegralSum(u(discrete.knownIntegralSum)); let yMappedContinuous = - continuous |> Continuous.T.mapY(fn) |> Continuous.updateKnownIntegralSum(u(continuous.knownIntegralSum)); + continuous + |> Continuous.T.mapY(fn) + |> Continuous.updateKnownIntegralSum( + u(continuous.knownIntegralSum), + ); { discrete: yMappedDiscrete, @@ -668,34 +763,55 @@ module Mixed = { let continuousMean = Continuous.T.mean(continuous); // the combined mean is the weighted sum of the two: - let discreteIntegralSum = Discrete.T.Integral.sum(~cache=None, discrete); - let continuousIntegralSum = Continuous.T.Integral.sum(~cache=None, continuous); + let discreteIntegralSum = + Discrete.T.Integral.sum(~cache=None, discrete); + let continuousIntegralSum = + Continuous.T.Integral.sum(~cache=None, continuous); let totalIntegralSum = discreteIntegralSum +. continuousIntegralSum; - (discreteMean *. discreteIntegralSum +. continuousMean *. continuousIntegralSum) /. totalIntegralSum; + ( + discreteMean + *. discreteIntegralSum + +. continuousMean + *. continuousIntegralSum + ) + /. totalIntegralSum; }; let variance = ({discrete, continuous} as t: t): float => { // the combined mean is the weighted sum of the two: - let discreteIntegralSum = Discrete.T.Integral.sum(~cache=None, discrete); - let continuousIntegralSum = Continuous.T.Integral.sum(~cache=None, continuous); + let discreteIntegralSum = + Discrete.T.Integral.sum(~cache=None, discrete); + let continuousIntegralSum = + Continuous.T.Integral.sum(~cache=None, continuous); let totalIntegralSum = discreteIntegralSum +. continuousIntegralSum; let getMeanOfSquares = ({discrete, continuous} as t: t) => { - let discreteMean = discrete |> Discrete.shapeMap(XYShape.Analysis.squareXYShape) |> Discrete.T.mean; - let continuousMean = continuous |> XYShape.Analysis.getMeanOfSquaresContinuousShape; - (discreteMean *. discreteIntegralSum +. continuousMean *. continuousIntegralSum) /. totalIntegralSum + let discreteMean = + discrete + |> Discrete.shapeMap(XYShape.Analysis.squareXYShape) + |> Discrete.T.mean; + let continuousMean = + continuous |> XYShape.Analysis.getMeanOfSquaresContinuousShape; + ( + discreteMean + *. discreteIntegralSum + +. continuousMean + *. continuousIntegralSum + ) + /. totalIntegralSum; }; switch (discreteIntegralSum /. totalIntegralSum) { | 1.0 => Discrete.T.variance(discrete) | 0.0 => Continuous.T.variance(continuous) - | _ => XYShape.Analysis.getVarianceDangerously(t, mean, getMeanOfSquares) + | _ => + XYShape.Analysis.getVarianceDangerously(t, mean, getMeanOfSquares) }; }; }); - let convolve = (fn: ((float, float) => float), t1: t, t2: t): t => { + let convolve = (fn: (float, float) => float, t1: t, t2: t): t => { // Discrete convolution can cause a huge increase in the number of samples, // so we'll first downsample. @@ -713,16 +829,21 @@ module Mixed = { // continuous (*) continuous => continuous, but also // discrete (*) continuous => continuous (and vice versa). We have to take care of all combos and then combine them: - let ccConvResult = Continuous.convolve(fn, t1d.continuous, t2d.continuous); - let dcConvResult = Continuous.convolveWithDiscrete(fn, t2d.continuous, t1d.discrete); - let cdConvResult = Continuous.convolveWithDiscrete(fn, t1d.continuous, t2d.discrete); - let continuousConvResult = Continuous.reduce((+.), [|ccConvResult, dcConvResult, cdConvResult|]); + let ccConvResult = + Continuous.convolve(fn, t1d.continuous, t2d.continuous); + let dcConvResult = + Continuous.convolveWithDiscrete(fn, t2d.continuous, t1d.discrete); + let cdConvResult = + Continuous.convolveWithDiscrete(fn, t1d.continuous, t2d.discrete); + let continuousConvResult = + Continuous.reduce((+.), [|ccConvResult, dcConvResult, cdConvResult|]); // ... finally, discrete (*) discrete => discrete, obviously: - let discreteConvResult = Discrete.convolve(fn, t1d.discrete, t2d.discrete); + let discreteConvResult = + Discrete.convolve(fn, t1d.discrete, t2d.discrete); {discrete: discreteConvResult, continuous: continuousConvResult}; - } + }; }; module Shape = { @@ -741,43 +862,31 @@ module Shape = { | Continuous(m) => Continuous(fn3(m)) }; - let toMixed = mapToAll(( - m => m, - d => Mixed.make(~discrete=d, ~continuous=Continuous.empty), - c => Mixed.make(~discrete=Discrete.empty, ~continuous=c), - )); + let toMixed = + mapToAll(( + m => m, + d => Mixed.make(~discrete=d, ~continuous=Continuous.empty), + c => Mixed.make(~discrete=Discrete.empty, ~continuous=c), + )); let convolve = (fn, t1: t, t2: t): t => { Mixed(Mixed.convolve(fn, toMixed(t1), toMixed(t2))); }; - let downsample = (~cache=None, i, t) => - fmap(( - Mixed.T.downsample(i), - Discrete.T.downsample(i), - Continuous.T.downsample(i), - ), t); - - let normalize = - fmap(( - Mixed.T.normalize, - Discrete.T.normalize, - Continuous.T.normalize, - )); - - let truncate (leftCutoff, rightCutoff, t): t = - fmap(( - Mixed.truncate(leftCutoff, rightCutoff), - Discrete.truncate(leftCutoff, rightCutoff), - Continuous.truncate(leftCutoff, rightCutoff), - ), t); + let combine = (~knownIntegralSumsFn=(_, _) => None, fn, t1: t, t2: t) => + switch ((t1, t2)) { + | (Continuous(m1), Continuous(m2)) => DistTypes.Continuous(Continuous.combine(~knownIntegralSumsFn, fn, m1, m2)) + | (Discrete(m1), Discrete(m2)) => DistTypes.Discrete(Discrete.combine(~knownIntegralSumsFn, fn, m1, m2)) + | (m1, m2) => { + DistTypes.Mixed(Mixed.combine(~knownIntegralSumsFn, fn, toMixed(m1), toMixed(m2))) + } + }; module T = Dist({ type t = DistTypes.shape; type integral = DistTypes.continuousShape; - let xToY = (f: float) => mapToAll(( Mixed.T.xToY(f), @@ -789,9 +898,31 @@ module Shape = { let toContinuous = t => None; let toDiscrete = t => None; - let downsample = (~cache=None, i, t) => t; - let toDiscreteProbabilityMassFraction = t => 0.0; - let normalize = t => t; + + + let downsample = (~cache=None, i, t) => + fmap( + ( + Mixed.T.downsample(i), + Discrete.T.downsample(i), + Continuous.T.downsample(i), + ), + t, + ); + + let truncate = (leftCutoff, rightCutoff, t): t => + fmap( + ( + Mixed.T.truncate(leftCutoff, rightCutoff), + Discrete.T.truncate(leftCutoff, rightCutoff), + Continuous.T.truncate(leftCutoff, rightCutoff), + ), + t, + ); + + let toDiscreteProbabilityMassFraction = t => 0.0; + let normalize = + fmap((Mixed.T.normalize, Discrete.T.normalize, Continuous.T.normalize)); let toContinuous = mapToAll(( Mixed.T.toContinuous, @@ -853,7 +984,7 @@ module Shape = { )); }; let maxX = mapToAll((Mixed.T.maxX, Discrete.T.maxX, Continuous.T.maxX)); - let mapY = (~knownIntegralSumFn=(previousIntegralSum => None), fn) => + let mapY = (~knownIntegralSumFn=previousIntegralSum => None, fn) => fmap(( Mixed.T.mapY(~knownIntegralSumFn, fn), Discrete.T.mapY(~knownIntegralSumFn, fn), @@ -935,14 +1066,18 @@ module DistPlus = { let toDiscrete = shapeFn(Shape.T.toDiscrete); let normalize = (t: t): t => { - let normalizedShape = - t |> toShape |> Shape.T.normalize; - - t |> updateShape(normalizedShape); + let normalizedShape = t |> toShape |> Shape.T.normalize; + t |> updateShape(normalizedShape); // TODO: also adjust for domainIncludedProbabilityMass here. }; + let truncate = (leftCutoff, rightCutoff, t: t): t => { + let truncatedShape = t |> toShape |> Shape.T.truncate(leftCutoff, rightCutoff); + + t |> updateShape(truncatedShape); + }; + // TODO: replace this with let normalizedToContinuous = (t: t) => { t @@ -980,7 +1115,13 @@ module DistPlus = { let downsample = (~cache=None, i, t): t => updateShape(t |> toShape |> Shape.T.downsample(i), t); // todo: adjust for limit, maybe? - let mapY = (~knownIntegralSumFn=(previousIntegralSum => None), fn, {shape, _} as t: t): t => + let mapY = + ( + ~knownIntegralSumFn=previousIntegralSum => None, + fn, + {shape, _} as t: t, + ) + : t => Shape.T.mapY(~knownIntegralSumFn, fn, shape) |> updateShape(_, t); let integralEndY = (~cache as _, t: t) => diff --git a/src/distPlus/renderers/DistPlusRenderer.re b/src/distPlus/renderers/DistPlusRenderer.re index c2bf8360..e141d83c 100644 --- a/src/distPlus/renderers/DistPlusRenderer.re +++ b/src/distPlus/renderers/DistPlusRenderer.re @@ -1,13 +1,13 @@ -let truncateIfShould = +let downsampleIfShould = ( - {recommendedLength, shouldTruncate}: RenderTypes.DistPlusRenderer.inputs, + {recommendedLength, shouldDownsample}: RenderTypes.DistPlusRenderer.inputs, outputs: RenderTypes.ShapeRenderer.Combined.outputs, dist, ) => { - let willTruncate = - shouldTruncate + let willDownsample = + shouldDownsample && RenderTypes.ShapeRenderer.Combined.methodUsed(outputs) == `Sampling; - willTruncate ? dist |> Distributions.DistPlus.T.truncate(recommendedLength) : dist; + willDownsample ? dist |> Distributions.DistPlus.T.downsample(recommendedLength) : dist; }; let run = @@ -21,7 +21,7 @@ let run = ~guesstimatorString=Some(inputs.distPlusIngredients.guesstimatorString), (), ) - |> Distributions.DistPlus.T.scaleToIntegralSum(~intendedSum=1.0); + |> Distributions.DistPlus.T.normalize; let outputs = ShapeRenderer.run({ samplingInputs: inputs.samplingInputs, @@ -32,6 +32,6 @@ let run = }); let shape = outputs |> RenderTypes.ShapeRenderer.Combined.getShape; let dist = - shape |> E.O.fmap(toDist) |> E.O.fmap(truncateIfShould(inputs, outputs)); + shape |> E.O.fmap(toDist) |> E.O.fmap(downsampleIfShould(inputs, outputs)); RenderTypes.DistPlusRenderer.Outputs.make(outputs, dist); -}; \ No newline at end of file +}; diff --git a/src/distPlus/renderers/RenderTypes.re b/src/distPlus/renderers/RenderTypes.re index c94ca69a..e091ecad 100644 --- a/src/distPlus/renderers/RenderTypes.re +++ b/src/distPlus/renderers/RenderTypes.re @@ -75,7 +75,7 @@ module ShapeRenderer = { module DistPlusRenderer = { let defaultRecommendedLength = 10000; - let defaultShouldTruncate = true; + let defaultShouldDownsample = true; type ingredients = { guesstimatorString: string, domain: DistTypes.domain, @@ -85,7 +85,7 @@ module DistPlusRenderer = { distPlusIngredients: ingredients, samplingInputs: ShapeRenderer.Sampling.inputs, recommendedLength: int, - shouldTruncate: bool, + shouldDownsample: bool, }; module Ingredients = { let make = @@ -105,7 +105,7 @@ module DistPlusRenderer = { ( ~samplingInputs=ShapeRenderer.Sampling.Inputs.empty, ~recommendedLength=defaultRecommendedLength, - ~shouldTruncate=defaultShouldTruncate, + ~shouldDownsample=defaultShouldDownsample, ~distPlusIngredients, (), ) @@ -113,7 +113,7 @@ module DistPlusRenderer = { distPlusIngredients, samplingInputs, recommendedLength, - shouldTruncate, + shouldDownsample, }; type outputs = { shapeRenderOutputs: ShapeRenderer.Combined.outputs, diff --git a/src/distPlus/symbolic/MathJsParser.re b/src/distPlus/symbolic/MathJsParser.re index 5353aba0..d80cf004 100644 --- a/src/distPlus/symbolic/MathJsParser.re +++ b/src/distPlus/symbolic/MathJsParser.re @@ -154,16 +154,17 @@ module MathAdtToDistDst = { weights: option(array(float)), ) => { let weights = weights |> E.O.default([||]); - let dists = + + /*let dists: = args |> E.A.fmap( fun | Ok(a) => a | Error(e) => Error(e) - ); + );*/ - let firstWithError = dists |> Belt.Array.getBy(_, Belt.Result.isError); - let withoutErrors = dists |> E.A.fmap(E.R.toOption) |> E.A.O.concatSomes; + let firstWithError = args |> Belt.Array.getBy(_, Belt.Result.isError); + let withoutErrors = args |> E.A.fmap(E.R.toOption) |> E.A.O.concatSomes; switch (firstWithError) { | Some(Error(e)) => Error(e) @@ -174,16 +175,16 @@ module MathAdtToDistDst = { |> E.A.fmapi((index, t) => { let w = weights |> E.A.get(_, index) |> E.O.default(1.0); - `Operation(`ScaleBy(`Multiply, t, `DistData(`Symbolic(`Float(w))))) + `Operation(`ScaleOperation(`Multiply, t, `DistData(`Symbolic(`Float(w))))) }); let pointwiseSum = components |> Js.Array.sliceFrom(1) |> E.A.fold_left((acc, x) => { - `PointwiseSum(acc, x) + `Operation(`PointwiseOperation(`Add, acc, x)) }, E.A.unsafe_get(components, 0)) - Ok(`Normalize(pointwiseSum)) + Ok(`Operation(`Normalize(pointwiseSum))) } }; }; @@ -254,21 +255,21 @@ module MathAdtToDistDst = { args |> E.A.fmap(functionParser) |> (fun - | [|Ok(l), Ok(r)|] => Ok(`Combination(l, r, `AddOperation)) + | [|Ok(l), Ok(r)|] => Ok(`Operation(`StandardOperation(`Add, l, r))) | _ => Error("Addition needs two operands")) } | Fn({name: "subtract", args}) => { args |> E.A.fmap(functionParser) |> (fun - | [|Ok(l), Ok(r)|] => Ok(`Combination(l, r, `SubtractOperation)) + | [|Ok(l), Ok(r)|] => Ok(`Operation(`StandardOperation(`Subtract, l, r))) | _ => Error("Subtraction needs two operands")) } | Fn({name: "multiply", args}) => { args |> E.A.fmap(functionParser) |> (fun - | [|Ok(l), Ok(r)|] => Ok(`Combination(l, r, `MultiplyOperation)) + | [|Ok(l), Ok(r)|] => Ok(`Operation(`StandardOperation(`Multiply, l, r))) | _ => Error("Multiplication needs two operands")) } | Fn({name: "divide", args}) => { @@ -276,28 +277,37 @@ module MathAdtToDistDst = { |> E.A.fmap(functionParser) |> (fun | [|Ok(l), Ok(`DistData(`Symbolic(`Float(0.0))))|] => Error("Division by zero") - | [|Ok(l), Ok(r)|] => Ok(`Combination(l, r, `DivideOperation)) + | [|Ok(l), Ok(r)|] => Ok(`Operation(`StandardOperation(`Divide, l, r))) | _ => Error("Division needs two operands")) } | Fn({name: "pow", args}) => { args |> E.A.fmap(functionParser) |> (fun - | [|Ok(l), Ok(r)|] => Ok(`Combination(l, r, `ExponentiateOperation)) + | [|Ok(l), Ok(r)|] => Ok(`Operation(`StandardOperation(`Exponentiate, l, r))) + | _ => Error("Division needs two operands") | _ => Error("Exponentiations needs two operands")) } | Fn({name: "leftTruncate", args}) => { args |> E.A.fmap(functionParser) |> (fun - | [|Ok(l), Ok(`DistData(`Symbolic(`Float(r))))|] => Ok(`LeftTruncate(l, r)) + | [|Ok(d), Ok(`DistData(`Symbolic(`Float(lc))))|] => Ok(`Operation(`Truncate(Some(lc), None, d))) | _ => Error("leftTruncate needs two arguments: the expression and the cutoff")) } | Fn({name: "rightTruncate", args}) => { args |> E.A.fmap(functionParser) |> (fun - | [|Ok(l), Ok(`DistData(`Symbolic(`Float(r))))|] => Ok(`RightTruncate(l, r)) + | [|Ok(d), Ok(`DistData(`Symbolic(`Float(rc))))|] => Ok(`Operation(`Truncate(None, Some(rc), d))) + | _ => Error("rightTruncate needs two arguments: the expression and the cutoff")) + } + | Fn({name: "truncate", args}) => { + args + |> E.A.fmap(functionParser) + |> (fun + | [|Ok(d), Ok(`DistData(`Symbolic(`Float(lc)))), Ok(`DistData(`Symbolic(`Float(rc))))|] => Ok(`Operation(`Truncate(Some(lc), Some(rc), d))) + // TODO: allow on-the-fly evaluations of FloatFromDists to be used as cutoff arguments here. | _ => Error("rightTruncate needs two arguments: the expression and the cutoff")) } | Fn({name}) => Error(name ++ ": function not supported") diff --git a/src/distPlus/symbolic/TreeNode.re b/src/distPlus/symbolic/TreeNode.re index 5f1aece7..4aa645fb 100644 --- a/src/distPlus/symbolic/TreeNode.re +++ b/src/distPlus/symbolic/TreeNode.re @@ -1,69 +1,60 @@ /* This module represents a tree node. */ -/* TreeNodes are either Data (i.e. symbolic or rendered distributions) or Operations. */ -type treeNode = [ - | `DistData(distData) - | `Operation(operation) -] and distData = [ +type distData = [ | `Symbolic(SymbolicDist.dist) | `RenderedShape(DistTypes.shape) -] and operation = [ - // binary operations - | `StandardOperation(standardOperation, treeNode, treeNode) - | `PointwiseOperation(pointwiseOperation, treeNode, treeNode) - | `ScaleOperation(scaleOperation, treeNode, scaleBy) - // unary operations - | `Render(treeNode) // always evaluates to `DistData(`RenderedShape(...)) - | `Truncate(leftCutoff, rightCutoff, treeNode) - | `Normalize(treeNode) - // direct evaluations of dists (e.g. cdf, sample) - | `FloatFromDist(distToFloatOperation, treeNode) -] and standardOperation = [ +]; + +type standardOperation = [ | `Add | `Multiply | `Subtract | `Divide | `Exponentiate -] and pointwiseOperation = [ - | `Add - | `Multiply -] and scaleOperation = [ - | `Multiply - | `Log +]; +type pointwiseOperation = [ | `Add | `Multiply]; +type scaleOperation = [ | `Multiply | `Exponentiate | `Log]; +type distToFloatOperation = [ | `Pdf(float) | `Inv(float) | `Mean | `Sample]; + +/* TreeNodes are either Data (i.e. symbolic or rendered distributions) or Operations. */ +type treeNode = [ + | `DistData(distData) // a leaf node that describes a distribution + | `Operation(operation) // an operation on two child nodes ] -and scaleBy = treeNode and leftCutoff = option(float) and rightCutoff = option(float) -and distToFloatOperation = [ - | `Pdf(float) - | `Cdf(float) - | `Inv(float) - | `Sample +and operation = [ + | // binary operations + `StandardOperation( + standardOperation, + treeNode, + treeNode, + ) + // unary operations + | `PointwiseOperation(pointwiseOperation, treeNode, treeNode) // always evaluates to `DistData(`RenderedShape(...)) + | `ScaleOperation(scaleOperation, treeNode, treeNode) // always evaluates to `DistData(`RenderedShape(...)) + | `Render(treeNode) // always evaluates to `DistData(`RenderedShape(...)) + | `Truncate // always evaluates to `DistData(`RenderedShape(...)) +( + option(float), + option(float), + treeNode, + ) // leftCutoff and rightCutoff + | `Normalize // always evaluates to `DistData(`RenderedShape(...)) + // leftCutoff and rightCutoff +( + treeNode, + ) + | `FloatFromDist // always evaluates to `DistData(`RenderedShape(...)) + // leftCutoff and rightCutoff +( + distToFloatOperation, + treeNode, + ) ]; module TreeNode = { type t = treeNode; type simplifier = treeNode => result(treeNode, string); - type renderParams = { - operationToDistData: (int, operation) => result(t, string), - sampleCount: int, - } - - let rec renderToShape = (renderParams, t: t): result(DistTypes.shape, string) => { - switch (t) { - | `DistData(`RenderedShape(s)) => Ok(s) // already a rendered shape, we're done here - | `DistData(`Symbolic(d)) => - switch (d) { - | `Float(v) => - Ok(Discrete(Distributions.Discrete.make({xs: [|v|], ys: [|1.0|]}, Some(1.0)))); - | _ => - let xs = SymbolicDist.GenericDistFunctions.interpolateXs(~xSelection=`ByWeight, d, renderParams.sampleCount); - let ys = xs |> E.A.fmap(x => SymbolicDist.GenericDistFunctions.pdf(x, d)); - Ok(Continuous(Distributions.Continuous.make(`Linear, {xs, ys}, Some(1.0)))); - } - | `Operation(op) => E.R.bind(renderParams.operationToDistData(renderParams.sampleCount, op), renderToShape(renderParams)) - }; - }; - /* The following modules encapsulate everything we can do with * different kinds of operations. */ @@ -154,207 +145,328 @@ module TreeNode = { }; }; - let evaluateNumerically = (standardOp, renderParams, t1, t2) => { + let evaluateNumerically = (standardOp, operationToDistData, t1, t2) => { let func = funcFromOp(standardOp); - // TODO: downsample the two shapes - let renderedShape1 = t1 |> renderToShape(renderParams); - let renderedShape2 = t2 |> renderToShape(renderParams); + // force rendering into shapes + let renderedShape1 = operationToDistData(`Render(t1)); + let renderedShape2 = operationToDistData(`Render(t2)); - // This will most likely require a mixed - - switch ((renderedShape1, renderedShape2)) { - | (Error(e1), _) => Error(e1) - | (_, Error(e2)) => Error(e2) - | (Ok(s1), Ok(s2)) => Ok(`DistData(`RenderedShape(Distributions.Shape.convolve(func, s1, s2)))) + switch (renderedShape1, renderedShape2) { + | ( + Ok(`DistData(`RenderedShape(s1))), + Ok(`DistData(`RenderedShape(s2))), + ) => + Ok( + `DistData( + `RenderedShape(Distributions.Shape.convolve(func, s1, s2)), + ), + ) + | (Error(e1), _) => Error(e1) + | (_, Error(e2)) => Error(e2) + | _ => Error("Could not render shapes.") }; }; let evaluateToDistData = - (standardOp: standardOperation, renderParams, t1: t, t2: t): result(treeNode, string) => + (standardOp: standardOperation, operationToDistData, t1: t, t2: t) + : result(treeNode, string) => standardOp |> Simplify.attempt(_, t1, t2) |> E.R.bind( _, fun | `DistData(d) => Ok(`DistData(d)) // the analytical simplifaction worked, nice! - | `Operation(_) => // if not, run the convolution - evaluateNumerically(standardOp, renderParams, t1, t2), + | `Operation(_) => + // if not, run the convolution + evaluateNumerically(standardOp, operationToDistData, t1, t2), ); }; module ScaleOperation = { - let rec mean = (renderParams, t: t): result(float, string) => { - switch (t) { - | `DistData(`RenderedShape(s)) => Ok(Distributions.Shape.T.mean(s)) - | `DistData(`Symbolic(s)) => SymbolicDist.GenericDistFunctions.mean(s) - // evaluating the operation returns result(treeNode(distData)). We then want to make sure - | `Operation(op) => E.R.bind(renderParams.operationToDistData(renderParams.sampleCount, op), mean(renderParams)) - } - }; - let fnFromOp = fun - | `Multiply => (*.) - | `Log => ((a, b) => ( log(a) /. log(b) )); + | `Multiply => ( *. ) + | `Exponentiate => ( ** ) + | `Log => ((a, b) => log(a) /. log(b)); let knownIntegralSumFnFromOp = fun - | `Multiply => (a, b) => Some(a *. b) + | `Multiply => ((a, b) => Some(a *. b)) + | `Exponentiate => ((_, _) => None) | `Log => ((_, _) => None); - let evaluateToDistData = (scaleOp, renderParams, t, scaleBy) => { + let evaluateToDistData = (scaleOp, operationToDistData, t, scaleBy) => { + // scaleBy has to be a single float, otherwise we'll return an error. let fn = fnFromOp(scaleOp); let knownIntegralSumFn = knownIntegralSumFnFromOp(scaleOp); - let renderedShape = t |> renderToShape(renderParams); - let scaleByMeanValue = mean(renderParams, scaleBy); - switch ((renderedShape, scaleByMeanValue)) { + let renderedShape = operationToDistData(`Render(t)); + + switch (renderedShape, scaleBy) { | (Error(e1), _) => Error(e1) - | (_, Error(e2)) => Error(e2) - | (Ok(rs), Ok(sm)) => - Ok(`DistData(`RenderedShape(Distributions.Shape.T.mapY(~knownIntegralSumFn=knownIntegralSumFn(sm), fn(sm), rs)))) - } + | ( + Ok(`DistData(`RenderedShape(rs))), + `DistData(`Symbolic(`Float(sm))), + ) => + Ok( + `DistData( + `RenderedShape( + Distributions.Shape.T.mapY( + ~knownIntegralSumFn=knownIntegralSumFn(sm), + fn(sm), + rs, + ), + ), + ), + ) + | (_, _) => Error("Can only scale by float values.") + }; }; }; module PointwiseOperation = { - let funcFromOp: (pointwiseOperation => ((float, float) => float)) = - fun - | `Add => (+.) - | `Multiply => ( *. ); + let pointwiseAdd = (operationToDistData, t1, t2) => { + let renderedShape1 = operationToDistData(`Render(t1)); + let renderedShape2 = operationToDistData(`Render(t2)); - let evaluateToDistData = (pointwiseOp, renderParams, t1, t2) => { - let func = funcFromOp(pointwiseOp); - let renderedShape1 = t1 |> renderToShape(renderParams); - let renderedShape2 = t2 |> renderToShape(renderParams); + switch ((renderedShape1, renderedShape2)) { + | (Error(e1), _) => Error(e1) + | (_, Error(e2)) => Error(e2) + | (Ok(`DistData(`RenderedShape(rs1))), Ok(`DistData(`RenderedShape(rs2)))) => Ok(`DistData(`RenderedShape(Distributions.Shape.combine(~knownIntegralSumsFn=(a, b) => Some(a +. b), (+.), rs1, rs2)))) + | _ => Error("Could not perform pointwise addition.") + }; + }; - // TODO: figure out integral, diff between pointwiseAdd and pointwiseProduct and other stuff - // Distributions.Shape.reduce(func, renderedShape1, renderedShape2); + let pointwiseMultiply = (operationToDistData, t1, t2) => { + // TODO: construct a function that we can easily sample from, to construct + // a RenderedShape. Use the xMin and xMax of the rendered shapes to tell the sampling function where to look. + Error("Pointwise multiplication not yet supported."); + }; - Error("Pointwise operations currently not supported.") + let evaluateToDistData = (pointwiseOp, operationToDistData, t1, t2) => { + switch (pointwiseOp) { + | `Add => pointwiseAdd(operationToDistData, t1, t2) + | `Multiply => pointwiseMultiply(operationToDistData, t1, t2) + } }; }; module Truncate = { module Simplify = { - let tryTruncatingNothing: simplifier = fun - | `Operation(`Truncate(None, None, `DistData(d))) => Ok(`DistData(d)) - | t => Ok(t); + let tryTruncatingNothing: simplifier = + fun + | `Operation(`Truncate(None, None, `DistData(d))) => + Ok(`DistData(d)) + | t => Ok(t); - let tryTruncatingUniform: simplifier = fun - | `Operation(`Truncate(lc, rc, `DistData(`Symbolic(`Uniform(u))))) => { - // just create a new Uniform distribution - let newLow = max(E.O.default(neg_infinity, lc), u.low); - let newHigh = min(E.O.default(infinity, rc), u.high); - Ok(`DistData(`Symbolic(`Uniform({low: newLow, high: newHigh})))); - } - | t => Ok(t); + let tryTruncatingUniform: simplifier = + fun + | `Operation(`Truncate(lc, rc, `DistData(`Symbolic(`Uniform(u))))) => { + // just create a new Uniform distribution + let newLow = max(E.O.default(neg_infinity, lc), u.low); + let newHigh = min(E.O.default(infinity, rc), u.high); + Ok( + `DistData(`Symbolic(`Uniform({low: newLow, high: newHigh}))), + ); + } + | t => Ok(t); let attempt = (leftCutoff, rightCutoff, t): result(treeNode, string) => { - let originalTreeNode = `Operation(`Truncate(leftCutoff, rightCutoff, t)); + let originalTreeNode = + `Operation(`Truncate((leftCutoff, rightCutoff, t))); - originalTreeNode - |> tryTruncatingNothing - |> E.R.bind(_, tryTruncatingUniform); + originalTreeNode + |> tryTruncatingNothing + |> E.R.bind(_, tryTruncatingUniform); }; }; - let evaluateNumerically = (leftCutoff, rightCutoff, renderParams, t) => { + let evaluateNumerically = + (leftCutoff, rightCutoff, operationToDistData, t) => { // TODO: use named args in renderToShape; if we're lucky we can at least get the tail // of a distribution we otherwise wouldn't get at all - let renderedShape = t |> renderToShape(renderParams); + let renderedShape = operationToDistData(`Render(t)); - E.R.bind(renderedShape, rs => { - let truncatedShape = rs |> Distributions.Shape.truncate(leftCutoff, rightCutoff); + switch (renderedShape) { + | Ok(`DistData(`RenderedShape(rs))) => + let truncatedShape = + rs |> Distributions.Shape.T.truncate(leftCutoff, rightCutoff); Ok(`DistData(`RenderedShape(rs))); - }); + | Error(e1) => Error(e1) + | _ => Error("Could not truncate distribution.") + }; }; - let evaluateToDistData = (leftCutoff: option(float), rightCutoff: option(float), renderParams, t: treeNode): result(treeNode, string) => { + let evaluateToDistData = + ( + leftCutoff: option(float), + rightCutoff: option(float), + operationToDistData, + t: treeNode, + ) + : result(treeNode, string) => { t |> Simplify.attempt(leftCutoff, rightCutoff) |> E.R.bind( _, fun | `DistData(d) => Ok(`DistData(d)) // the analytical simplifaction worked, nice! - | `Operation(_) => evaluateNumerically(leftCutoff, rightCutoff, renderParams, t), + | `Operation(_) => + evaluateNumerically( + leftCutoff, + rightCutoff, + operationToDistData, + t, + ), ); // if not, run the convolution - }; + }; }; module Normalize = { - let rec evaluateToDistData = (renderParams, t: treeNode): result(treeNode, string) => { + let rec evaluateToDistData = + (operationToDistData, t: treeNode): result(treeNode, string) => { switch (t) { | `DistData(`Symbolic(_)) => Ok(t) - | `DistData(`RenderedShape(s)) => { - let normalized = Distributions.Shape.normalize(s); + | `DistData(`RenderedShape(s)) => + let normalized = Distributions.Shape.T.normalize(s); Ok(`DistData(`RenderedShape(normalized))); - } - | `Operation(op) => E.R.bind(renderParams.operationToDistData(renderParams.sampleCount, op), evaluateToDistData(renderParams)) - } - } + | `Operation(op) => + E.R.bind( + operationToDistData(op), + evaluateToDistData(operationToDistData), + ) + }; + }; }; module FloatFromDist = { let evaluateFromSymbolic = (distToFloatOp: distToFloatOperation, s) => { - let value = switch (distToFloatOp) { - | `Pdf(f) => SymbolicDist.GenericDistFunctions.pdf(f, s) - | `Cdf(f) => 0.0 - | `Inv(f) => SymbolicDist.GenericDistFunctions.inv(f, s) - | `Sample => SymbolicDist.GenericDistFunctions.sample(s) - } - Ok(`DistData(`Symbolic(`Float(value)))); + let value = + switch (distToFloatOp) { + | `Pdf(f) => Ok(SymbolicDist.GenericDistFunctions.pdf(f, s)) + | `Inv(f) => Ok(SymbolicDist.GenericDistFunctions.inv(f, s)) + | `Sample => Ok(SymbolicDist.GenericDistFunctions.sample(s)) + | `Mean => SymbolicDist.GenericDistFunctions.mean(s) + }; + E.R.bind(value, v => Ok(`DistData(`Symbolic(`Float(v))))); }; - let evaluateFromRenderedShape = (distToFloatOp: distToFloatOperation, rs: DistTypes.shape): result(treeNode, string) => { - // evaluate the pdf, cdf, get sample, etc. from the renderedShape rs - // Should be a float like Ok(`DistData(`Symbolic(Float(0.0)))); - Error("Float from dist is not yet implemented."); + let evaluateFromRenderedShape = + (distToFloatOp: distToFloatOperation, rs: DistTypes.shape) + : result(treeNode, string) => { + Ok(`DistData(`Symbolic(`Float(Distributions.Shape.T.mean(rs))))); }; - let rec evaluateToDistData = (distToFloatOp: distToFloatOperation, renderParams, t: treeNode): result(treeNode, string) => { + let rec evaluateToDistData = + ( + distToFloatOp: distToFloatOperation, + operationToDistData, + t: treeNode, + ) + : result(treeNode, string) => { switch (t) { | `DistData(`Symbolic(s)) => evaluateFromSymbolic(distToFloatOp, s) // we want to evaluate the distToFloatOp on the symbolic dist - | `DistData(`RenderedShape(rs)) => evaluateFromRenderedShape(distToFloatOp, rs) - | `Operation(op) => E.R.bind(renderParams.operationToDistData(renderParams.sampleCount, op), evaluateToDistData(distToFloatOp, renderParams)) - } - } + | `DistData(`RenderedShape(rs)) => + evaluateFromRenderedShape(distToFloatOp, rs) + | `Operation(op) => + E.R.bind( + operationToDistData(op), + evaluateToDistData(distToFloatOp, operationToDistData), + ) + }; + }; }; module Render = { - let evaluateToRenderedShape = (renderParams, t: treeNode): result(t, string) => { - E.R.bind(renderToShape(renderParams, t), rs => Ok(`DistData(`RenderedShape(rs)))); - } + let rec evaluateToRenderedShape = + (operationToDistData: operation => result(t, string), sampleCount: int, t: treeNode) + : result(t, string) => { + switch (t) { + | `DistData(`RenderedShape(s)) => Ok(`DistData(`RenderedShape(s))) // already a rendered shape, we're done here + | `DistData(`Symbolic(d)) => + switch (d) { + | `Float(v) => + Ok( + `DistData( + `RenderedShape( + Discrete( + Distributions.Discrete.make( + {xs: [|v|], ys: [|1.0|]}, + Some(1.0), + ), + ), + ), + ), + ) + | _ => + let xs = + SymbolicDist.GenericDistFunctions.interpolateXs( + ~xSelection=`ByWeight, + d, + sampleCount, + ); + let ys = + xs |> E.A.fmap(x => SymbolicDist.GenericDistFunctions.pdf(x, d)); + Ok( + `DistData( + `RenderedShape( + Continuous( + Distributions.Continuous.make( + `Linear, + {xs, ys}, + Some(1.0), + ), + ), + ), + ), + ); + } + | `Operation(op) => + E.R.bind( + operationToDistData(op), + evaluateToRenderedShape(operationToDistData, sampleCount), + ) + }; + }; }; let rec operationToDistData = - (sampleCount: int, op: operation): result(t, string) => { - + (sampleCount: int, op: operation): result(t, string) => { // the functions that convert the Operation nodes to DistData nodes need to // have a way to call this function on their children, if their children are themselves Operation nodes. - - let renderParams: renderParams = { - operationToDistData: operationToDistData, - sampleCount: sampleCount, - }; - switch (op) { | `StandardOperation(standardOp, t1, t2) => StandardOperation.evaluateToDistData( - standardOp, renderParams, t1, t2 // we want to give it the option to render or simply leave it as is + standardOp, + operationToDistData(sampleCount), + t1, + t2 // we want to give it the option to render or simply leave it as is ) | `PointwiseOperation(pointwiseOp, t1, t2) => PointwiseOperation.evaluateToDistData( pointwiseOp, - renderParams, + operationToDistData(sampleCount), t1, t2, ) | `ScaleOperation(scaleOp, t, scaleBy) => - ScaleOperation.evaluateToDistData(scaleOp, renderParams, t, scaleBy) - | `Truncate(leftCutoff, rightCutoff, t) => Truncate.evaluateToDistData(leftCutoff, rightCutoff, renderParams, t) - | `FloatFromDist(distToFloatOp, t) => FloatFromDist.evaluateToDistData(distToFloatOp, renderParams, t) - | `Normalize(t) => Normalize.evaluateToDistData(renderParams, t) - | `Render(t) => Render.evaluateToRenderedShape(renderParams, t) + ScaleOperation.evaluateToDistData( + scaleOp, + operationToDistData(sampleCount), + t, + scaleBy, + ) + | `Truncate(leftCutoff, rightCutoff, t) => + Truncate.evaluateToDistData( + leftCutoff, + rightCutoff, + operationToDistData(sampleCount), + t, + ) + | `FloatFromDist(distToFloatOp, t) => + FloatFromDist.evaluateToDistData(distToFloatOp, operationToDistData(sampleCount), t) + | `Normalize(t) => Normalize.evaluateToDistData(operationToDistData(sampleCount), t) + | `Render(t) => + Render.evaluateToRenderedShape(operationToDistData(sampleCount), sampleCount, t) }; }; @@ -372,7 +484,8 @@ module TreeNode = { }; let rec toString = (t: t): string => { - let stringFromStandardOperation = fun + let stringFromStandardOperation = + fun | `Add => " + " | `Subtract => " - " | `Multiply => " * " @@ -384,31 +497,53 @@ module TreeNode = { | `Add => " .+ " | `Multiply => " .* "; + let stringFromFloatFromDistOperation = + fun + | `Pdf(f) => "pdf(x=$f, " + | `Inv(f) => "inv(c=$f, " + | `Sample => "sample(" + | `Mean => "mean("; + + switch (t) { - | `DistData(`Symbolic(d)) => SymbolicDist.GenericDistFunctions.toString(d) + | `DistData(`Symbolic(d)) => + SymbolicDist.GenericDistFunctions.toString(d) | `DistData(`RenderedShape(s)) => "[shape]" - | `Operation(`StandardOperation(op, t1, t2)) => toString(t1) ++ stringFromStandardOperation(op) ++ toString(t2) - | `Operation(`PointwiseOperation(op, t1, t2)) => toString(t1) ++ stringFromPointwiseOperation(op) ++ toString(t2) - | `Operation(`ScaleOperation(_scaleOp, t, scaleBy)) => toString(t) ++ " @ " ++ toString(scaleBy) + | `Operation(`StandardOperation(op, t1, t2)) => + toString(t1) ++ stringFromStandardOperation(op) ++ toString(t2) + | `Operation(`PointwiseOperation(op, t1, t2)) => + toString(t1) ++ stringFromPointwiseOperation(op) ++ toString(t2) + | `Operation(`ScaleOperation(_scaleOp, t, scaleBy)) => + toString(t) ++ " @ " ++ toString(scaleBy) | `Operation(`Normalize(t)) => "normalize(" ++ toString(t) ++ ")" - | `Operation(`Truncate(lc, rc, t)) => "truncate(" ++ toString(t) ++ ", " ++ E.O.dimap(string_of_float, () => "-inf", lc) ++ ", " ++ E.O.dimap(string_of_float, () => "inf", rc) ++ ")" + | `Operation(`FloatFromDist(floatFromDistOp, t)) => stringFromFloatFromDistOperation(floatFromDistOp) ++ toString(t) ++ ")" + | `Operation(`Truncate(lc, rc, t)) => + "truncate(" + ++ toString(t) + ++ ", " + ++ E.O.dimap(Js.Float.toString, () => "-inf", lc) + ++ ", " + ++ E.O.dimap(Js.Float.toString, () => "inf", rc) + ++ ")" | `Operation(`Render(t)) => toString(t) - } + }; }; }; let toShape = (sampleCount: int, treeNode: treeNode) => { - let renderResult = TreeNode.toDistData(`Operation(`Render(treeNode)), sampleCount); - + let renderResult = + TreeNode.toDistData(`Operation(`Render(treeNode)), sampleCount); switch (renderResult) { - | Ok(`DistData(`RenderedShape(rs))) => { - let continuous = Distributions.Shape.T.toContinuous(rs); - let discrete = Distributions.Shape.T.toDiscrete(rs); - let shape = MixedShapeBuilder.buildSimple(~continuous, ~discrete); - shape |> E.O.toExt(""); - } + | Ok(`DistData(`RenderedShape(rs))) => + let continuous = Distributions.Shape.T.toContinuous(rs); + let discrete = Distributions.Shape.T.toDiscrete(rs); + let shape = MixedShapeBuilder.buildSimple(~continuous, ~discrete); + shape |> E.O.toExt(""); | Ok(_) => E.O.toExn("Rendering failed.", None) - | Error(message) => E.O.toExn("No shape found!", None) - } + | Error(message) => E.O.toExn("No shape found, error: " ++ message, None) + }; }; + +let toString = (treeNode: treeNode) => + TreeNode.toString(treeNode); diff --git a/src/interface/FormBuilder.re b/src/interface/FormBuilder.re index 55c4f071..7556a82f 100644 --- a/src/interface/FormBuilder.re +++ b/src/interface/FormBuilder.re @@ -22,7 +22,7 @@ let propValue = (t: Prop.Value.t) => { RenderTypes.DistPlusRenderer.make( ~distPlusIngredients=r, ~recommendedLength=10000, - ~shouldTruncate=true, + ~shouldDownsample=true, (), ) |> DistPlusRenderer.run @@ -105,4 +105,4 @@ module ModelForm = {
; }; -}; \ No newline at end of file +}; From d2e7e5f9288c1e3fef86c3001c1ab63c980a909b Mon Sep 17 00:00:00 2001 From: Sebastian Kosch Date: Fri, 26 Jun 2020 22:37:24 -0700 Subject: [PATCH 10/31] Fixed some indexing errors in convolution code --- src/components/DistBuilder.re | 9 +- src/distPlus/distribution/Distributions.re | 176 ++++++++++++--------- src/distPlus/distribution/XYShape.re | 2 +- src/distPlus/symbolic/SymbolicDist.re | 4 +- src/distPlus/symbolic/TreeNode.re | 94 +++++------ 5 files changed, 156 insertions(+), 129 deletions(-) diff --git a/src/components/DistBuilder.re b/src/components/DistBuilder.re index 93856fc9..c28ef275 100644 --- a/src/components/DistBuilder.re +++ b/src/components/DistBuilder.re @@ -171,7 +171,8 @@ let make = () => { ~schema, ~onSubmit=({state}) => {None}, ~initialState={ - guesstimatorString: "mm(normal(-10, 2), uniform(18, 25), lognormal({mean: 10, stdev: 8}), triangular(31,40,50))", + //guesstimatorString: "mm(normal(-10, 2), uniform(18, 25), lognormal({mean: 10, stdev: 8}), triangular(31,40,50))", + guesstimatorString: "uniform(0, 1) + normal(1, 2)", domainType: "Complete", xPoint: "50.0", xPoint2: "60.0", @@ -180,9 +181,9 @@ let make = () => { unitType: "UnspecifiedDistribution", zero: MomentRe.momentNow(), unit: "days", - sampleCount: "30000", - outputXYPoints: "10000", - downsampleTo: "1000", + sampleCount: "3000", + outputXYPoints: "100", + downsampleTo: "100", kernelWidth: "5", }, (), diff --git a/src/distPlus/distribution/Distributions.re b/src/distPlus/distribution/Distributions.re index 9497957d..7f0d642f 100644 --- a/src/distPlus/distribution/Distributions.re +++ b/src/distPlus/distribution/Distributions.re @@ -154,6 +154,7 @@ module Continuous = { // This is essentially like integrateWithTriangles, without the accumulation. let toDiscretePointMasses = (t: t): DistTypes.discreteShape => { let tl = t |> getShape |> XYShape.T.length; + let pointMassesX: array(float) = Belt.Array.make(tl - 1, 0.0); let pointMassesY: array(float) = Belt.Array.make(tl - 1, 0.0); let {xs, ys}: XYShape.T.t = t |> getShape; for (x in 0 to E.A.length(xs) - 2) { @@ -163,75 +164,24 @@ module Continuous = { x, (xs[x + 1] -. xs[x]) *. ((ys[x] +. ys[x + 1]) /. 2.), ); // = dx * (1/2) * (avgY) + let _ = + Belt.Array.set( + pointMassesX, + x, + (xs[x] +. xs[x + 1]) /. 2., + ); // midpoints (); }; { xyShape: { - xs, + xs: pointMassesX, ys: pointMassesY, }, knownIntegralSum: t.knownIntegralSum, }; }; - /* Performs a discrete convolution between two continuous distributions A and B. - * It is an extremely good idea to downsample the distributions beforehand, - * because the number of samples in the convolution can be up to length(A) * length(B). - * - * Conventional convolution uses fn = (+.), but we also allow other operations to combine the xs. - * - * In practice, the convolution works by multiplying the ys for each possible combo of points of - * the two shapes. This creates a new shape for each point of A. These new shapes are then combined - * linearly. This may not always be the most efficient way, but it is probably the most robust for now. - * - * In the future, it may be possible to use a non-uniform fast Fourier transform instead (although only for addition). - */ - let convolveWithDiscrete = (fn, t1: t, t2: DistTypes.discreteShape) => { - let t1s = t1 |> getShape; - let t2s = t2.xyShape; // would like to use Discrete.getShape here, but current file structure doesn't allow for that - let t1n = t1s |> XYShape.T.length; - let t2n = t2s |> XYShape.T.length; - - let outXYShapes: array(array((float, float))) = - Belt.Array.makeUninitializedUnsafe(t1n); - - for (i in 0 to t1n - 1) { - // create a new distribution - let dxyShape: array((float, float)) = - Belt.Array.makeUninitializedUnsafe(t2n); - for (j in 0 to t2n - 1) { - let _ = - Belt.Array.set( - dxyShape, - j, - (fn(t1s.xs[i], t2s.xs[j]), t1s.ys[i] *. t2s.ys[j]), - ); - (); - }; - let _ = Belt.Array.set(outXYShapes, i, dxyShape); - (); - }; - - let combinedIntegralSum = - switch (t1.knownIntegralSum, t2.knownIntegralSum) { - | (None, _) - | (_, None) => None - | (Some(s1), Some(s2)) => Some(s1 *. s2) - }; - - outXYShapes - |> E.A.fmap(s => { - let xyShape = XYShape.T.fromZippedArray(s); - make(`Linear, xyShape, None); - }) - |> reduce((+.)) - |> updateKnownIntegralSum(combinedIntegralSum); - }; - - let convolve = (fn, t1: t, t2: t) => - convolveWithDiscrete(fn, t1, toDiscretePointMasses(t2)); - let mapY = (~knownIntegralSumFn=previousKnownIntegralSum => None, fn, t: t) => { let u = E.O.bind(_, knownIntegralSumFn); let yMapFn = shapeMap(XYShape.T.mapY(fn)); @@ -350,6 +300,69 @@ module Continuous = { XYShape.Analysis.getMeanOfSquaresContinuousShape, ); }); + + + /* Performs a discrete convolution between two continuous distributions A and B. + * It is an extremely good idea to downsample the distributions beforehand, + * because the number of samples in the convolution can be up to length(A) * length(B). + * + * Conventional convolution uses fn = (+.), but we also allow other operations to combine the xs. + * + * In practice, the convolution works by multiplying the ys for each possible combo of points of + * the two shapes. This creates a new shape for each point of A. These new shapes are then combined + * linearly. This may not always be the most efficient way, but it is probably the most robust for now. + * + * In the future, it may be possible to use a non-uniform fast Fourier transform instead (although only for addition). + */ + let convolveWithDiscrete = (~downsample=false, fn, t1: t, t2: DistTypes.discreteShape) => { + let t1s = t1 |> getShape; + let t2s = t2.xyShape; // would like to use Discrete.getShape here, but current file structure doesn't allow for that + let t1n = t1s |> XYShape.T.length; + let t2n = t2s |> XYShape.T.length; + + let outXYShapes: array(array((float, float))) = + Belt.Array.makeUninitializedUnsafe(t1n); + + for (i in 0 to t1n - 1) { + // create a new distribution + let dxyShape: array((float, float)) = + Belt.Array.makeUninitializedUnsafe(t2n); + for (j in 0 to t2n - 1) { + let _ = + Belt.Array.set( + dxyShape, + j, + (fn(t1s.xs[i], t2s.xs[j]), t1s.ys[i] *. t2s.ys[j]), + ); + (); + }; + + let _ = Belt.Array.set(outXYShapes, i, dxyShape); + (); + }; + + let combinedIntegralSum = Common.combineIntegralSums((a, b) => Some(a *. b), t1.knownIntegralSum, t2.knownIntegralSum); + + outXYShapes + |> E.A.fmap(s => { + let xyShape = XYShape.T.fromZippedArray(s); + make(`Linear, xyShape, None); + }) + |> reduce((+.)) + |> updateKnownIntegralSum(combinedIntegralSum); + }; + + let convolve = (~downsample=false, fn, t1: t, t2: t) => { + let downsampleIfTooLarge = (t: t) => { + let sqtl = sqrt(float_of_int(t |> getShape |> XYShape.T.length)); + sqtl > 10. && downsample ? T.downsample(int_of_float(sqtl), t) : t; + }; + + let t1d = downsampleIfTooLarge(t1); + let t2d = downsampleIfTooLarge(t2); + + convolveWithDiscrete(~downsample=false, fn, t1, toDiscretePointMasses(t2)); + }; }; module Discrete = { @@ -489,17 +502,23 @@ module Discrete = { let downsample = (~cache=None, i, t: t): t => { // It's not clear how to downsample a set of discrete points in a meaningful way. // The best we can do is to clip off the smallest values. - let clippedShape = - t - |> getShape - |> XYShape.T.zip - |> XYShape.Zipped.sortByY - |> Belt.Array.reverse - |> Belt.Array.slice(_, ~offset=0, ~len=i) - |> XYShape.Zipped.sortByX - |> XYShape.T.fromZippedArray; + let currentLength = t |> getShape |> XYShape.T.length; - make(clippedShape, None); // if someone needs the sum, they'll have to recompute it + if (i < currentLength) { + let clippedShape = + t + |> getShape + |> XYShape.T.zip + |> XYShape.Zipped.sortByY + |> Belt.Array.reverse + |> Belt.Array.slice(_, ~offset=0, ~len=i) + |> XYShape.Zipped.sortByX + |> XYShape.T.fromZippedArray; + + make(clippedShape, None); // if someone needs the sum, they'll have to recompute it + } else { + t; + } }; let truncate = @@ -663,6 +682,8 @@ module Mixed = { Continuous.T.Integral.sum(~cache=None, continuous); let totalIntegralSum = discreteIntegralSum +. continuousIntegralSum; + // TODO: figure out what to do when the totalIntegralSum is zero. + let downsampledDiscrete = Discrete.T.downsample( int_of_float( @@ -811,17 +832,16 @@ module Mixed = { }; }); - let convolve = (fn: (float, float) => float, t1: t, t2: t): t => { + let convolve = (~downsample=false, fn: (float, float) => float, t1: t, t2: t): t => { // Discrete convolution can cause a huge increase in the number of samples, // so we'll first downsample. // An alternative (to be explored in the future) may be to first perform the full convolution and then to downsample the result; // to use non-uniform fast Fourier transforms (for addition only), add web workers or gpu.js, etc. ... - // TODO: make this optional or customizable let downsampleIfTooLarge = (t: t) => { let sqtl = sqrt(float_of_int(totalLength(t))); - sqtl > 10. ? T.downsample(int_of_float(sqtl), t) : t; + sqtl > 10. && downsample ? T.downsample(int_of_float(sqtl), t) : t; }; let t1d = downsampleIfTooLarge(t1); @@ -830,11 +850,11 @@ module Mixed = { // continuous (*) continuous => continuous, but also // discrete (*) continuous => continuous (and vice versa). We have to take care of all combos and then combine them: let ccConvResult = - Continuous.convolve(fn, t1d.continuous, t2d.continuous); + Continuous.convolve(~downsample=false, fn, t1d.continuous, t2d.continuous); let dcConvResult = - Continuous.convolveWithDiscrete(fn, t2d.continuous, t1d.discrete); + Continuous.convolveWithDiscrete(~downsample=false, fn, t2d.continuous, t1d.discrete); let cdConvResult = - Continuous.convolveWithDiscrete(fn, t1d.continuous, t2d.discrete); + Continuous.convolveWithDiscrete(~downsample=false, fn, t1d.continuous, t2d.discrete); let continuousConvResult = Continuous.reduce((+.), [|ccConvResult, dcConvResult, cdConvResult|]); @@ -870,7 +890,13 @@ module Shape = { )); let convolve = (fn, t1: t, t2: t): t => { - Mixed(Mixed.convolve(fn, toMixed(t1), toMixed(t2))); + switch ((t1, t2)) { + | (Continuous(m1), Continuous(m2)) => DistTypes.Continuous(Continuous.convolve(~downsample=true, fn, m1, m2)) + | (Discrete(m1), Discrete(m2)) => DistTypes.Discrete(Discrete.convolve(fn, m1, m2)) + | (m1, m2) => { + DistTypes.Mixed(Mixed.convolve(~downsample=true, fn, toMixed(m1), toMixed(m2))) + } + }; }; let combine = (~knownIntegralSumsFn=(_, _) => None, fn, t1: t, t2: t) => diff --git a/src/distPlus/distribution/XYShape.re b/src/distPlus/distribution/XYShape.re index 9451fb23..8e684b93 100644 --- a/src/distPlus/distribution/XYShape.re +++ b/src/distPlus/distribution/XYShape.re @@ -9,7 +9,7 @@ let interpolate = }; // TODO: Make sure that shapes cannot be empty. -let extImp = E.O.toExt("Should not be possible"); +let extImp = E.O.toExt("Tried to perform an operation on an empty XYShape."); module T = { type t = xyShape; diff --git a/src/distPlus/symbolic/SymbolicDist.re b/src/distPlus/symbolic/SymbolicDist.re index 8cab8227..e5df481c 100644 --- a/src/distPlus/symbolic/SymbolicDist.re +++ b/src/distPlus/symbolic/SymbolicDist.re @@ -283,11 +283,11 @@ module GenericDistFunctions = { (~xSelection: [ | `Linear | `ByWeight]=`Linear, dist: dist, n) => { switch (xSelection, dist) { | (`Linear, _) => E.A.Floats.range(min(dist), max(dist), n) - | (`ByWeight, `Uniform(n)) => +/* | (`ByWeight, `Uniform(n)) => // In `ByWeight mode, uniform distributions get special treatment because we need two x's // on either side for proper rendering (just left and right of the discontinuities). let dx = 0.00001 *. (n.high -. n.low); - [|n.low -. dx, n.low +. dx, n.high -. dx, n.high +. dx|]; + [|n.low -. dx, n.low +. dx, n.high -. dx, n.high +. dx|]; */ | (`ByWeight, _) => let ys = E.A.Floats.range(minCdfValue, maxCdfValue, n); ys |> E.A.fmap(y => inv(y, dist)); diff --git a/src/distPlus/symbolic/TreeNode.re b/src/distPlus/symbolic/TreeNode.re index 4aa645fb..8238d65b 100644 --- a/src/distPlus/symbolic/TreeNode.re +++ b/src/distPlus/symbolic/TreeNode.re @@ -55,6 +55,52 @@ module TreeNode = { type t = treeNode; type simplifier = treeNode => result(treeNode, string); + let rec toString = (t: t): string => { + let stringFromStandardOperation = + fun + | `Add => " + " + | `Subtract => " - " + | `Multiply => " * " + | `Divide => " / " + | `Exponentiate => "^"; + + let stringFromPointwiseOperation = + fun + | `Add => " .+ " + | `Multiply => " .* "; + + let stringFromFloatFromDistOperation = + fun + | `Pdf(f) => "pdf(x=$f, " + | `Inv(f) => "inv(c=$f, " + | `Sample => "sample(" + | `Mean => "mean("; + + + switch (t) { + | `DistData(`Symbolic(d)) => + SymbolicDist.GenericDistFunctions.toString(d) + | `DistData(`RenderedShape(s)) => "[shape]" + | `Operation(`StandardOperation(op, t1, t2)) => + toString(t1) ++ stringFromStandardOperation(op) ++ toString(t2) + | `Operation(`PointwiseOperation(op, t1, t2)) => + toString(t1) ++ stringFromPointwiseOperation(op) ++ toString(t2) + | `Operation(`ScaleOperation(_scaleOp, t, scaleBy)) => + toString(t) ++ " @ " ++ toString(scaleBy) + | `Operation(`Normalize(t)) => "normalize(" ++ toString(t) ++ ")" + | `Operation(`FloatFromDist(floatFromDistOp, t)) => stringFromFloatFromDistOperation(floatFromDistOp) ++ toString(t) ++ ")" + | `Operation(`Truncate(lc, rc, t)) => + "truncate(" + ++ toString(t) + ++ ", " + ++ E.O.dimap(Js.Float.toString, () => "-inf", lc) + ++ ", " + ++ E.O.dimap(Js.Float.toString, () => "inf", rc) + ++ ")" + | `Operation(`Render(t)) => toString(t) + }; + }; + /* The following modules encapsulate everything we can do with * different kinds of operations. */ @@ -482,52 +528,6 @@ module TreeNode = { | `Operation(op) => operationToDistData(sampleCount, op) }; }; - - let rec toString = (t: t): string => { - let stringFromStandardOperation = - fun - | `Add => " + " - | `Subtract => " - " - | `Multiply => " * " - | `Divide => " / " - | `Exponentiate => "^"; - - let stringFromPointwiseOperation = - fun - | `Add => " .+ " - | `Multiply => " .* "; - - let stringFromFloatFromDistOperation = - fun - | `Pdf(f) => "pdf(x=$f, " - | `Inv(f) => "inv(c=$f, " - | `Sample => "sample(" - | `Mean => "mean("; - - - switch (t) { - | `DistData(`Symbolic(d)) => - SymbolicDist.GenericDistFunctions.toString(d) - | `DistData(`RenderedShape(s)) => "[shape]" - | `Operation(`StandardOperation(op, t1, t2)) => - toString(t1) ++ stringFromStandardOperation(op) ++ toString(t2) - | `Operation(`PointwiseOperation(op, t1, t2)) => - toString(t1) ++ stringFromPointwiseOperation(op) ++ toString(t2) - | `Operation(`ScaleOperation(_scaleOp, t, scaleBy)) => - toString(t) ++ " @ " ++ toString(scaleBy) - | `Operation(`Normalize(t)) => "normalize(" ++ toString(t) ++ ")" - | `Operation(`FloatFromDist(floatFromDistOp, t)) => stringFromFloatFromDistOperation(floatFromDistOp) ++ toString(t) ++ ")" - | `Operation(`Truncate(lc, rc, t)) => - "truncate(" - ++ toString(t) - ++ ", " - ++ E.O.dimap(Js.Float.toString, () => "-inf", lc) - ++ ", " - ++ E.O.dimap(Js.Float.toString, () => "inf", rc) - ++ ")" - | `Operation(`Render(t)) => toString(t) - }; - }; }; let toShape = (sampleCount: int, treeNode: treeNode) => { @@ -539,7 +539,7 @@ let toShape = (sampleCount: int, treeNode: treeNode) => { let continuous = Distributions.Shape.T.toContinuous(rs); let discrete = Distributions.Shape.T.toDiscrete(rs); let shape = MixedShapeBuilder.buildSimple(~continuous, ~discrete); - shape |> E.O.toExt(""); + shape |> E.O.toExt("Could not build final shape."); | Ok(_) => E.O.toExn("Rendering failed.", None) | Error(message) => E.O.toExn("No shape found, error: " ++ message, None) }; From b368c2a8eeb051ab01d65ed33f431b7132063225 Mon Sep 17 00:00:00 2001 From: Sebastian Kosch Date: Fri, 26 Jun 2020 23:16:37 -0700 Subject: [PATCH 11/31] Testing convolutions --- src/components/DistBuilder.re | 2 +- src/distPlus/distribution/Distributions.re | 21 ++++++++++++--------- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/src/components/DistBuilder.re b/src/components/DistBuilder.re index c28ef275..227a8801 100644 --- a/src/components/DistBuilder.re +++ b/src/components/DistBuilder.re @@ -172,7 +172,7 @@ let make = () => { ~onSubmit=({state}) => {None}, ~initialState={ //guesstimatorString: "mm(normal(-10, 2), uniform(18, 25), lognormal({mean: 10, stdev: 8}), triangular(31,40,50))", - guesstimatorString: "uniform(0, 1) + normal(1, 2)", + guesstimatorString: "uniform(0, 1) * normal(1, 2)", domainType: "Complete", xPoint: "50.0", xPoint2: "60.0", diff --git a/src/distPlus/distribution/Distributions.re b/src/distPlus/distribution/Distributions.re index 7f0d642f..37823fa3 100644 --- a/src/distPlus/distribution/Distributions.re +++ b/src/distPlus/distribution/Distributions.re @@ -173,6 +173,10 @@ module Continuous = { (); }; + Js.log2("Previous xyshape:", t |> getShape); + Js.log2("new pointmasssex", pointMassesX); + Js.log2("new pointmasssey", pointMassesY); + { xyShape: { xs: pointMassesX, @@ -321,23 +325,22 @@ module Continuous = { let t2n = t2s |> XYShape.T.length; let outXYShapes: array(array((float, float))) = - Belt.Array.makeUninitializedUnsafe(t1n); + Belt.Array.makeUninitializedUnsafe(t2n); - for (i in 0 to t1n - 1) { - // create a new distribution - let dxyShape: array((float, float)) = - Belt.Array.makeUninitializedUnsafe(t2n); - for (j in 0 to t2n - 1) { + for (j in 0 to t2n - 1) { // for each one of the discrete points + // create a new distribution, as long as the original continuous one + let dxyShape: array((float, float)) = Belt.Array.makeUninitializedUnsafe(t1n); + for (i in 0 to t1n - 1) { let _ = Belt.Array.set( dxyShape, - j, + i, (fn(t1s.xs[i], t2s.xs[j]), t1s.ys[i] *. t2s.ys[j]), ); (); }; - let _ = Belt.Array.set(outXYShapes, i, dxyShape); + let _ = Belt.Array.set(outXYShapes, j, dxyShape); (); }; @@ -355,7 +358,7 @@ module Continuous = { let convolve = (~downsample=false, fn, t1: t, t2: t) => { let downsampleIfTooLarge = (t: t) => { let sqtl = sqrt(float_of_int(t |> getShape |> XYShape.T.length)); - sqtl > 10. && downsample ? T.downsample(int_of_float(sqtl), t) : t; + sqtl > 10. && downsample && false ? T.downsample(int_of_float(sqtl), t) : t; }; let t1d = downsampleIfTooLarge(t1); From f5ce4354abb4645f7c0b9995bb495be4073e616c Mon Sep 17 00:00:00 2001 From: Sebastian Kosch Date: Fri, 26 Jun 2020 23:48:54 -0700 Subject: [PATCH 12/31] Move to point-mass-based convolution for the continuous-continuous case --- src/distPlus/distribution/Distributions.re | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/distPlus/distribution/Distributions.re b/src/distPlus/distribution/Distributions.re index 37823fa3..81b16cd8 100644 --- a/src/distPlus/distribution/Distributions.re +++ b/src/distPlus/distribution/Distributions.re @@ -173,10 +173,6 @@ module Continuous = { (); }; - Js.log2("Previous xyshape:", t |> getShape); - Js.log2("new pointmasssex", pointMassesX); - Js.log2("new pointmasssey", pointMassesY); - { xyShape: { xs: pointMassesX, @@ -364,7 +360,14 @@ module Continuous = { let t1d = downsampleIfTooLarge(t1); let t2d = downsampleIfTooLarge(t2); - convolveWithDiscrete(~downsample=false, fn, t1, toDiscretePointMasses(t2)); + let t1m = toDiscretePointMasses(t1); + let t2m = toDiscretePointMasses(t2); + + // then convolve the two as discrete distributions + let c = Discrete.convolve(fn, t1m, t2m); + + // then convert back to an approximate pdf + // TODO: find an efficient way to do this (kernel densities? trapezoids?) }; }; From f1e2458bcaa2840bfe5f083fadd18727021c32b6 Mon Sep 17 00:00:00 2001 From: Sebastian Kosch Date: Sat, 27 Jun 2020 23:50:53 -0700 Subject: [PATCH 13/31] WIP: continuous/continuous convolution --- src/distPlus/distribution/Distributions.re | 114 +++++++++++++++++++-- 1 file changed, 108 insertions(+), 6 deletions(-) diff --git a/src/distPlus/distribution/Distributions.re b/src/distPlus/distribution/Distributions.re index 81b16cd8..30dd16c5 100644 --- a/src/distPlus/distribution/Distributions.re +++ b/src/distPlus/distribution/Distributions.re @@ -351,6 +351,83 @@ module Continuous = { |> updateKnownIntegralSum(combinedIntegralSum); }; + /* This function takes a continuous distribution and efficiently approximates it as + point masses that have variances associated with them. + We estimate the means and variances from overlapping triangular distributions which we imagine are making up the + XYShape. + We can then use the algebra of random variables to "convolve" the point masses and their variances, + and finally reconstruct a new distribution from them, e.g. using a Fast Gauss Transform or Raykar et al. (2007). */ + type pointMassesWithMoments = { + n: int, + masses: array(float), + means: array(float), + variances: array(float) + }; + let toDiscretePointMassesFromTriangulars = (~inverse=False, t: t): pointMassesWithMoments => { + // TODO: what if there is only one point in the distribution? + let s = t |> getShape; + let n = s |> XYShape.T.length; + // first, double up the leftmost and rightmost points: + let {xs, ys}: XYShape.T.t = s; + let _ = Js.Array.unshift(xs[0], xs); + let _ = Js.Array.unshift(ys[0], ys); + let _ = Js.Array.push(xs[n - 1], xs); + let _ = Js.Array.push(ys[n - 1], ys); + let n = E.A.length(xs); + // squares and neighbourly products of the xs + let xsSq: array(float) = Belt.Array.makeUninitializedUnsafe(n); + let xsProdN1: array(float) = Belt.Array.makeUninitializedUnsafe(n - 1); + let xsProdN2: array(float) = Belt.Array.makeUninitializedUnsafe(n - 2); + for (i in 0 to n - 1) { + let _ = Belt.Array.set(xsSq, i, xs[i] *. xs[i]); (); + }; + for (i in 0 to n - 2) { + let _ = Belt.Array.set(xsProdN1, i, xs[i] *. xs[i + 1]); (); + }; + for (i in 0 to n - 3) { + let _ = Belt.Array.set(xsProdN2, i, xs[i] *. xs[i + 2]); (); + }; + // means and variances + let masses: array(float) = Belt.Array.makeUninitializedUnsafe(n); + let means: array(float) = Belt.Array.makeUninitializedUnsafe(n); + let variances: array(float) = Belt.Array.makeUninitializedUnsafe(n); + + if (inverse) { + for (i in 1 to n - 2) { + let _ = Belt.Array.set(masses, i - 1, (xs[i + 1] -. xs[i - 1]) *. ys[i] /. 2.); + + // this only works when the whole triange is either on the left or on the right of zero + let a = xs[i - 1]; + let c = xs[i]; + let b = xs[i + 1]; + + // These are the moments of the reciprocal of a triangular distribution, as symbolically integrated by Mathematica. + // They're probably pretty close to invMean ~ 1/mean = 3/(a+b+c) and invVar. But I haven't worked out + // the worst case error, so for now let's use these monster equations + let inverseMean = 2. *. ((a *. log(a/.c) /. (a-.c)) +. ((b *. log(c/.b))/.(b-.c))) /. (a -. b); + let inverseVar = 2. *. ((log(c/.a) /. (a-.c)) +. ((b *. log(b/.c))/.(b-.c))) /. (a -. b) - inverseMean ** 2.; + + let _ = Belt.Array.set(means, i - 1, inverseMean); + + let _ = Belt.Array.set(variances, i - 1, inverseVar); + (); + }; + + {n, masses, means, variances}; + } else { + for (i in 1 to n - 2) { + let _ = Belt.Array.set(masses, i - 1, (xs[i + 1] -. xs[i - 1]) *. ys[i] /. 2.); + let _ = Belt.Array.set(means, i - 1, (xs[i - 1] +. xs[i] +. xs[i + 1]) /. 3.); + + let _ = Belt.Array.set(variances, i - 1, + (xsSq[i-1] +. xsSq[i] +. xsSq[i+1] -. xsProdN1[i-1] -. xsProdN1[i] -. xsProdN2[i-1]) /. 18.); + (); + }; + {n, masses, means, variances}; + }; + + }; + let convolve = (~downsample=false, fn, t1: t, t2: t) => { let downsampleIfTooLarge = (t: t) => { let sqtl = sqrt(float_of_int(t |> getShape |> XYShape.T.length)); @@ -360,14 +437,39 @@ module Continuous = { let t1d = downsampleIfTooLarge(t1); let t2d = downsampleIfTooLarge(t2); - let t1m = toDiscretePointMasses(t1); - let t2m = toDiscretePointMasses(t2); + // if we add the two distributions, we should probably use normal filters. + // if we multiply the two distributions, we should probably use lognormal filters. + let t1m = toDiscretePointMassesFromTriangulars(t1); + let t2m = toDiscretePointMassesFromTriangulars(t2); - // then convolve the two as discrete distributions - let c = Discrete.convolve(fn, t1m, t2m); + let convolveMeansFn = (TreeNode.standardOp) => fun + | `Add => (m1, m2) => m1 +. m2 + | `Subtract => (m1, m2) => m1 -. m2 + | `Multiply => (m1, m2) => m1 *. m2 + | `Divide => (m1, mInv2) => m1 *. mInv2; // note: here, mInv2 = mean(1 / t2) + + // converts the variances and means of the two inputs into the variance of the output + let convolveVariancesFn = (TreeNode.standardOp) => fun + | `Add => (v1, v2, m1, m2) => v1 +. v2 + | `Subtract => (v1, v2, m1, m2) => v1 +. v2 + | `Multiply => (v1, v2, m1, m2) => (v1 *. v2) +. (v1 *. m1**2.) +. (v2 *. m1**2.) + | `Divide => (v1, vInv2, m1, mInv2) => (v1 *. vInv2) +. (v1 *. mInv2**2.) +. (vInv2 *. m1**2.); + + let masses: array(float) = Belt.Array.makeUninitializedUnsafe(t1m.n * t2m.n); + let means: array(float) = Belt.Array.makeUninitializedUnsafe(t1m.n * t2m.n); + let variances: array(float) = Belt.Array.makeUninitializedUnsafe(t1m.n * t2m.n); + // then convolve the two sets of pointMassesWithMoments + for (i in 0 to t1m.n - 1) { + for (j in 0 to t2m.n - 1) { + let k = i * t2m.n + j; + let _ = Belt.Array.set(masses, k, t1m.masses[i] *. t2m.masses[j]); + let _ = Belt.Array.set(means, k, convolveMeansFn(t1m.means[i], t2m.means[j])); + let _ = Belt.Array.set(variances, k, convolveMeansFn(t1m.variances[i], t2m.variances[j], t1m.means[i], t2m.means[j])); + }; + }; + + // now, run a Fast Gauss transform to estimate the new distribution: - // then convert back to an approximate pdf - // TODO: find an efficient way to do this (kernel densities? trapezoids?) }; }; From 502481e345a542943591c6a2299b45bbd11ae50a Mon Sep 17 00:00:00 2001 From: Sebastian Kosch Date: Mon, 29 Jun 2020 22:29:15 -0700 Subject: [PATCH 14/31] First working prototype of algebraic combinations --- src/components/DistBuilder.re | 2 +- .../distribution/AlgebraicCombinations.re | 161 ++++++++++ src/distPlus/distribution/Distributions.re | 286 ++++++------------ src/distPlus/distribution/XYShape.re | 23 +- src/distPlus/symbolic/MathJsParser.re | 26 +- src/distPlus/symbolic/TreeNode.re | 120 ++++---- 6 files changed, 331 insertions(+), 287 deletions(-) create mode 100644 src/distPlus/distribution/AlgebraicCombinations.re diff --git a/src/components/DistBuilder.re b/src/components/DistBuilder.re index 227a8801..77c64aab 100644 --- a/src/components/DistBuilder.re +++ b/src/components/DistBuilder.re @@ -172,7 +172,7 @@ let make = () => { ~onSubmit=({state}) => {None}, ~initialState={ //guesstimatorString: "mm(normal(-10, 2), uniform(18, 25), lognormal({mean: 10, stdev: 8}), triangular(31,40,50))", - guesstimatorString: "uniform(0, 1) * normal(1, 2)", + guesstimatorString: "uniform(0, 1) * normal(1, 2) - 1", domainType: "Complete", xPoint: "50.0", xPoint2: "60.0", diff --git a/src/distPlus/distribution/AlgebraicCombinations.re b/src/distPlus/distribution/AlgebraicCombinations.re new file mode 100644 index 00000000..54d1fadc --- /dev/null +++ b/src/distPlus/distribution/AlgebraicCombinations.re @@ -0,0 +1,161 @@ +type algebraicOperation = [ + | `Add + | `Multiply + | `Subtract + | `Divide +]; + +type pointMassesWithMoments = { +n: int, +masses: array(float), +means: array(float), +variances: array(float) +}; + +let operationToFn: (algebraicOperation, float, float) => float = + fun + | `Add => (+.) + | `Subtract => (-.) + | `Multiply => ( *. ) + | `Divide => (/.); + + + /* This function takes a continuous distribution and efficiently approximates it as + point masses that have variances associated with them. + We estimate the means and variances from overlapping triangular distributions which we imagine are making up the + XYShape. + We can then use the algebra of random variables to "convolve" the point masses and their variances, + and finally reconstruct a new distribution from them, e.g. using a Fast Gauss Transform or Raykar et al. (2007). */ +let toDiscretePointMassesFromTriangulars = (~inverse=false, s: XYShape.T.t): pointMassesWithMoments => { +// TODO: what if there is only one point in the distribution? +let n = s |> XYShape.T.length; +// first, double up the leftmost and rightmost points: +let {xs, ys}: XYShape.T.t = s; +let _ = Js.Array.unshift(xs[0], xs); +let _ = Js.Array.unshift(ys[0], ys); +let _ = Js.Array.push(xs[n - 1], xs); +let _ = Js.Array.push(ys[n - 1], ys); +let n = E.A.length(xs); +// squares and neighbourly products of the xs +let xsSq: array(float) = Belt.Array.makeUninitializedUnsafe(n); +let xsProdN1: array(float) = Belt.Array.makeUninitializedUnsafe(n - 1); +let xsProdN2: array(float) = Belt.Array.makeUninitializedUnsafe(n - 2); +for (i in 0 to n - 1) { + let _ = Belt.Array.set(xsSq, i, xs[i] *. xs[i]); (); +}; +for (i in 0 to n - 2) { + let _ = Belt.Array.set(xsProdN1, i, xs[i] *. xs[i + 1]); (); +}; +for (i in 0 to n - 3) { + let _ = Belt.Array.set(xsProdN2, i, xs[i] *. xs[i + 2]); (); +}; +// means and variances +let masses: array(float) = Belt.Array.makeUninitializedUnsafe(n - 2); // doesn't include the fake first and last points +let means: array(float) = Belt.Array.makeUninitializedUnsafe(n - 2); +let variances: array(float) = Belt.Array.makeUninitializedUnsafe(n - 2); + +if (inverse) { + for (i in 1 to n - 2) { + let _ = Belt.Array.set(masses, i - 1, (xs[i + 1] -. xs[i - 1]) *. ys[i] /. 2.); + + // this only works when the whole triange is either on the left or on the right of zero + let a = xs[i - 1]; + let c = xs[i]; + let b = xs[i + 1]; + + // These are the moments of the reciprocal of a triangular distribution, as symbolically integrated by Mathematica. + // They're probably pretty close to invMean ~ 1/mean = 3/(a+b+c) and invVar. But I haven't worked out + // the worst case error, so for now let's use these monster equations + let inverseMean = 2. *. ((a *. log(a/.c) /. (a-.c)) +. ((b *. log(c/.b))/.(b-.c))) /. (a -. b); + let inverseVar = 2. *. ((log(c/.a) /. (a-.c)) +. ((b *. log(b/.c))/.(b-.c))) /. (a -. b) -. inverseMean ** 2.; + + let _ = Belt.Array.set(means, i - 1, inverseMean); + + let _ = Belt.Array.set(variances, i - 1, inverseVar); + (); + }; + + {n: n - 2, masses, means, variances}; +} else { + for (i in 1 to n - 2) { + let _ = Belt.Array.set(masses, i - 1, (xs[i + 1] -. xs[i - 1]) *. ys[i] /. 2.); + let _ = Belt.Array.set(means, i - 1, (xs[i - 1] +. xs[i] +. xs[i + 1]) /. 3.); + + let _ = Belt.Array.set(variances, i - 1, + (xsSq[i-1] +. xsSq[i] +. xsSq[i+1] -. xsProdN1[i-1] -. xsProdN1[i] -. xsProdN2[i-1]) /. 18.); + (); + }; + {n: n - 2, masses, means, variances}; + }; +}; + + +let combineShapesContinuousContinuous = (op: algebraicOperation, s1: DistTypes.xyShape, s2: DistTypes.xyShape): DistTypes.xyShape => { + let t1n = s1 |> XYShape.T.length; + let t2n = s2 |> XYShape.T.length; + + // if we add the two distributions, we should probably use normal filters. + // if we multiply the two distributions, we should probably use lognormal filters. + let t1m = toDiscretePointMassesFromTriangulars(s1); + let t2m = toDiscretePointMassesFromTriangulars(s2); + + let combineMeansFn = switch (op) { + | `Add => (m1, m2) => m1 +. m2 + | `Subtract => (m1, m2) => m1 -. m2 + | `Multiply => (m1, m2) => m1 *. m2 + | `Divide => (m1, mInv2) => m1 *. mInv2 + }; // note: here, mInv2 = mean(1 / t2) ~= 1 / mean(t2) + + // converts the variances and means of the two inputs into the variance of the output + let combineVariancesFn = switch (op) { + | `Add => (v1, v2, m1, m2) => v1 +. v2 + | `Subtract => (v1, v2, m1, m2) => v1 +. v2 + | `Multiply => (v1, v2, m1, m2) => (v1 *. v2) +. (v1 *. m1**2.) +. (v2 *. m1**2.) + | `Divide => (v1, vInv2, m1, mInv2) => (v1 *. vInv2) +. (v1 *. mInv2**2.) +. (vInv2 *. m1**2.) + }; + + let outputMinX: ref(float) = ref(infinity); + let outputMaxX: ref(float) = ref(neg_infinity); + let masses: array(float) = Belt.Array.makeUninitializedUnsafe(t1m.n * t2m.n); + let means: array(float) = Belt.Array.makeUninitializedUnsafe(t1m.n * t2m.n); + let variances: array(float) = Belt.Array.makeUninitializedUnsafe(t1m.n * t2m.n); + // then convolve the two sets of pointMassesWithMoments + for (i in 0 to t1m.n - 1) { + for (j in 0 to t2m.n - 1) { + let k = i * t2m.n + j; + let _ = Belt.Array.set(masses, k, t1m.masses[i] *. t2m.masses[j]); + + let mean = combineMeansFn(t1m.means[i], t2m.means[j]); + let variance = combineVariancesFn(t1m.variances[i], t2m.variances[j], t1m.means[i], t2m.means[j]); + let _ = Belt.Array.set(means, k, mean); + let _ = Belt.Array.set(variances, k, variance); + // update bounds + let minX = mean -. variance *. 1.644854; + let maxX = mean +. variance *. 1.644854; + if (minX < outputMinX^) { + outputMinX := minX; + } + if (maxX > outputMaxX^) { + outputMaxX := maxX; + } + }; + }; + + // we now want to create a set of target points. For now, let's just evenly distribute 200 points between + // between the outputMinX and outputMaxX + let outputXs: array(float) = E.A.Floats.range(outputMinX^, outputMaxX^, 200); + let outputYs: array(float) = Belt.Array.make(200, 0.0); + // now, for each of the outputYs, accumulate from a Gaussian kernel over each input point. + for (i in 0 to E.A.length(outputXs) - 1) { + let x = outputXs[i]; + for (j in 0 to E.A.length(masses) - 1) { + let dx = outputXs[i] -. means[j]; + let contribution = masses[j] *. exp(-.(dx**2.) /. (2. *. variances[j])); + let _ = Belt.Array.set(outputYs, i, outputYs[i] +. contribution); + (); + }; + (); + }; + + {xs: outputXs, ys: outputYs}; +}; diff --git a/src/distPlus/distribution/Distributions.re b/src/distPlus/distribution/Distributions.re index 30dd16c5..82a4cd6c 100644 --- a/src/distPlus/distribution/Distributions.re +++ b/src/distPlus/distribution/Distributions.re @@ -95,7 +95,7 @@ module Continuous = { interpolation: `Linear, knownIntegralSum: Some(0.0), }; - let combine = + let combinePointwise = ( ~knownIntegralSumsFn, fn, @@ -114,7 +114,7 @@ module Continuous = { make( `Linear, - XYShape.Combine.combine( + XYShape.PointwiseCombination.combine( ~xsSelection=ALL_XS, ~xToYSelection=XYShape.XtoY.linear, ~fn, @@ -147,42 +147,9 @@ module Continuous = { continuousShapes, ) => continuousShapes - |> E.A.fold_left(combine(~knownIntegralSumsFn, fn), empty); + |> E.A.fold_left(combinePointwise(~knownIntegralSumsFn, fn), empty); - // Contracts every point in the continuous xyShape into a single dirac-Delta-like point, - // using the centerpoints between adjacent xs and the area under each trapezoid. - // This is essentially like integrateWithTriangles, without the accumulation. - let toDiscretePointMasses = (t: t): DistTypes.discreteShape => { - let tl = t |> getShape |> XYShape.T.length; - let pointMassesX: array(float) = Belt.Array.make(tl - 1, 0.0); - let pointMassesY: array(float) = Belt.Array.make(tl - 1, 0.0); - let {xs, ys}: XYShape.T.t = t |> getShape; - for (x in 0 to E.A.length(xs) - 2) { - let _ = - Belt.Array.set( - pointMassesY, - x, - (xs[x + 1] -. xs[x]) *. ((ys[x] +. ys[x + 1]) /. 2.), - ); // = dx * (1/2) * (avgY) - let _ = - Belt.Array.set( - pointMassesX, - x, - (xs[x] +. xs[x + 1]) /. 2., - ); // midpoints - (); - }; - - { - xyShape: { - xs: pointMassesX, - ys: pointMassesY, - }, - knownIntegralSum: t.knownIntegralSum, - }; - }; - - let mapY = (~knownIntegralSumFn=previousKnownIntegralSum => None, fn, t: t) => { + let mapY = (~knownIntegralSumFn=(_ => None), fn, t: t) => { let u = E.O.bind(_, knownIntegralSumFn); let yMapFn = shapeMap(XYShape.T.mapY(fn)); @@ -247,7 +214,9 @@ module Continuous = { }; // TODO: This should work with stepwise plots. - let integral = (~cache, t) => + let integral = (~cache, t) => { + + if ((t |> getShape |> XYShape.T.length) > 0) { switch (cache) { | Some(cache) => cache | None => @@ -257,6 +226,11 @@ module Continuous = { |> E.O.toExt("This should not have happened") |> make(`Linear, _, None) }; + } else { + make(`Linear, {xs: [|neg_infinity|], ys: [|0.0|]}, None); + } + }; + let downsample = (~cache=None, length, t): t => t |> shapeMap( @@ -287,6 +261,7 @@ module Continuous = { let indefiniteIntegralStepwise = (p, h1) => h1 *. p ** 2.0 /. 2.0; let indefiniteIntegralLinear = (p, a, b) => a *. p ** 2.0 /. 2.0 +. b *. p ** 3.0 /. 3.0; + XYShape.Analysis.integrateContinuousShape( ~indefiniteIntegralStepwise, ~indefiniteIntegralLinear, @@ -302,24 +277,16 @@ module Continuous = { }); - /* Performs a discrete convolution between two continuous distributions A and B. - * It is an extremely good idea to downsample the distributions beforehand, - * because the number of samples in the convolution can be up to length(A) * length(B). - * - * Conventional convolution uses fn = (+.), but we also allow other operations to combine the xs. - * - * In practice, the convolution works by multiplying the ys for each possible combo of points of - * the two shapes. This creates a new shape for each point of A. These new shapes are then combined - * linearly. This may not always be the most efficient way, but it is probably the most robust for now. - * - * In the future, it may be possible to use a non-uniform fast Fourier transform instead (although only for addition). - */ - let convolveWithDiscrete = (~downsample=false, fn, t1: t, t2: DistTypes.discreteShape) => { + /* This simply creates multiple copies of the continuous distribution, scaled and shifted according to + each discrete data point, and then adds them all together. */ + let combineAlgebraicallyWithDiscrete = (~downsample=false, op: AlgebraicCombinations.algebraicOperation, t1: t, t2: DistTypes.discreteShape) => { let t1s = t1 |> getShape; let t2s = t2.xyShape; // would like to use Discrete.getShape here, but current file structure doesn't allow for that let t1n = t1s |> XYShape.T.length; let t2n = t2s |> XYShape.T.length; + let fn = AlgebraicCombinations.operationToFn(op); + let outXYShapes: array(array((float, float))) = Belt.Array.makeUninitializedUnsafe(t2n); @@ -351,125 +318,19 @@ module Continuous = { |> updateKnownIntegralSum(combinedIntegralSum); }; - /* This function takes a continuous distribution and efficiently approximates it as - point masses that have variances associated with them. - We estimate the means and variances from overlapping triangular distributions which we imagine are making up the - XYShape. - We can then use the algebra of random variables to "convolve" the point masses and their variances, - and finally reconstruct a new distribution from them, e.g. using a Fast Gauss Transform or Raykar et al. (2007). */ - type pointMassesWithMoments = { - n: int, - masses: array(float), - means: array(float), - variances: array(float) - }; - let toDiscretePointMassesFromTriangulars = (~inverse=False, t: t): pointMassesWithMoments => { - // TODO: what if there is only one point in the distribution? - let s = t |> getShape; - let n = s |> XYShape.T.length; - // first, double up the leftmost and rightmost points: - let {xs, ys}: XYShape.T.t = s; - let _ = Js.Array.unshift(xs[0], xs); - let _ = Js.Array.unshift(ys[0], ys); - let _ = Js.Array.push(xs[n - 1], xs); - let _ = Js.Array.push(ys[n - 1], ys); - let n = E.A.length(xs); - // squares and neighbourly products of the xs - let xsSq: array(float) = Belt.Array.makeUninitializedUnsafe(n); - let xsProdN1: array(float) = Belt.Array.makeUninitializedUnsafe(n - 1); - let xsProdN2: array(float) = Belt.Array.makeUninitializedUnsafe(n - 2); - for (i in 0 to n - 1) { - let _ = Belt.Array.set(xsSq, i, xs[i] *. xs[i]); (); - }; - for (i in 0 to n - 2) { - let _ = Belt.Array.set(xsProdN1, i, xs[i] *. xs[i + 1]); (); - }; - for (i in 0 to n - 3) { - let _ = Belt.Array.set(xsProdN2, i, xs[i] *. xs[i + 2]); (); - }; - // means and variances - let masses: array(float) = Belt.Array.makeUninitializedUnsafe(n); - let means: array(float) = Belt.Array.makeUninitializedUnsafe(n); - let variances: array(float) = Belt.Array.makeUninitializedUnsafe(n); - - if (inverse) { - for (i in 1 to n - 2) { - let _ = Belt.Array.set(masses, i - 1, (xs[i + 1] -. xs[i - 1]) *. ys[i] /. 2.); - - // this only works when the whole triange is either on the left or on the right of zero - let a = xs[i - 1]; - let c = xs[i]; - let b = xs[i + 1]; - - // These are the moments of the reciprocal of a triangular distribution, as symbolically integrated by Mathematica. - // They're probably pretty close to invMean ~ 1/mean = 3/(a+b+c) and invVar. But I haven't worked out - // the worst case error, so for now let's use these monster equations - let inverseMean = 2. *. ((a *. log(a/.c) /. (a-.c)) +. ((b *. log(c/.b))/.(b-.c))) /. (a -. b); - let inverseVar = 2. *. ((log(c/.a) /. (a-.c)) +. ((b *. log(b/.c))/.(b-.c))) /. (a -. b) - inverseMean ** 2.; - - let _ = Belt.Array.set(means, i - 1, inverseMean); - - let _ = Belt.Array.set(variances, i - 1, inverseVar); - (); - }; - - {n, masses, means, variances}; + let combineAlgebraically = (~downsample=false, op: AlgebraicCombinations.algebraicOperation, t1: t, t2: t) => { + let s1 = t1 |> getShape; + let s2 = t2 |> getShape; + let t1n = s1 |> XYShape.T.length; + let t2n = s2 |> XYShape.T.length; + if (t1n == 0 || t2n == 0) { + empty; } else { - for (i in 1 to n - 2) { - let _ = Belt.Array.set(masses, i - 1, (xs[i + 1] -. xs[i - 1]) *. ys[i] /. 2.); - let _ = Belt.Array.set(means, i - 1, (xs[i - 1] +. xs[i] +. xs[i + 1]) /. 3.); - - let _ = Belt.Array.set(variances, i - 1, - (xsSq[i-1] +. xsSq[i] +. xsSq[i+1] -. xsProdN1[i-1] -. xsProdN1[i] -. xsProdN2[i-1]) /. 18.); - (); - }; - {n, masses, means, variances}; + let combinedShape = AlgebraicCombinations.combineShapesContinuousContinuous(op, s1, s2); + let combinedIntegralSum = Common.combineIntegralSums((a, b) => Some(a *. b), t1.knownIntegralSum, t2.knownIntegralSum); + // return a new Continuous distribution + make(`Linear, combinedShape, combinedIntegralSum); }; - - }; - - let convolve = (~downsample=false, fn, t1: t, t2: t) => { - let downsampleIfTooLarge = (t: t) => { - let sqtl = sqrt(float_of_int(t |> getShape |> XYShape.T.length)); - sqtl > 10. && downsample && false ? T.downsample(int_of_float(sqtl), t) : t; - }; - - let t1d = downsampleIfTooLarge(t1); - let t2d = downsampleIfTooLarge(t2); - - // if we add the two distributions, we should probably use normal filters. - // if we multiply the two distributions, we should probably use lognormal filters. - let t1m = toDiscretePointMassesFromTriangulars(t1); - let t2m = toDiscretePointMassesFromTriangulars(t2); - - let convolveMeansFn = (TreeNode.standardOp) => fun - | `Add => (m1, m2) => m1 +. m2 - | `Subtract => (m1, m2) => m1 -. m2 - | `Multiply => (m1, m2) => m1 *. m2 - | `Divide => (m1, mInv2) => m1 *. mInv2; // note: here, mInv2 = mean(1 / t2) - - // converts the variances and means of the two inputs into the variance of the output - let convolveVariancesFn = (TreeNode.standardOp) => fun - | `Add => (v1, v2, m1, m2) => v1 +. v2 - | `Subtract => (v1, v2, m1, m2) => v1 +. v2 - | `Multiply => (v1, v2, m1, m2) => (v1 *. v2) +. (v1 *. m1**2.) +. (v2 *. m1**2.) - | `Divide => (v1, vInv2, m1, mInv2) => (v1 *. vInv2) +. (v1 *. mInv2**2.) +. (vInv2 *. m1**2.); - - let masses: array(float) = Belt.Array.makeUninitializedUnsafe(t1m.n * t2m.n); - let means: array(float) = Belt.Array.makeUninitializedUnsafe(t1m.n * t2m.n); - let variances: array(float) = Belt.Array.makeUninitializedUnsafe(t1m.n * t2m.n); - // then convolve the two sets of pointMassesWithMoments - for (i in 0 to t1m.n - 1) { - for (j in 0 to t2m.n - 1) { - let k = i * t2m.n + j; - let _ = Belt.Array.set(masses, k, t1m.masses[i] *. t2m.masses[j]); - let _ = Belt.Array.set(means, k, convolveMeansFn(t1m.means[i], t2m.means[j])); - let _ = Belt.Array.set(variances, k, convolveMeansFn(t1m.variances[i], t2m.variances[j], t1m.means[i], t2m.means[j])); - }; - }; - - // now, run a Fast Gauss transform to estimate the new distribution: - }; }; @@ -490,7 +351,7 @@ module Discrete = { let lastY = (t: t) => t |> getShape |> XYShape.T.lastY; - let combine = + let combinePointwise = ( ~knownIntegralSumsFn, fn, @@ -506,7 +367,7 @@ module Discrete = { ); make( - XYShape.Combine.combine( + XYShape.PointwiseCombination.combine( ~xsSelection=ALL_XS, ~xToYSelection=XYShape.XtoY.stepwiseIfAtX, ~fn=((a, b) => fn(E.O.default(0.0, a), E.O.default(0.0, b))), // stepwiseIfAtX returns option(float), so this fn needs to handle None @@ -519,14 +380,16 @@ module Discrete = { let reduce = (~knownIntegralSumsFn=(_, _) => None, fn, discreteShapes): DistTypes.discreteShape => discreteShapes - |> E.A.fold_left(combine(~knownIntegralSumsFn, fn), empty); + |> E.A.fold_left(combinePointwise(~knownIntegralSumsFn, fn), empty); let updateKnownIntegralSum = (knownIntegralSum, t: t): t => { ...t, knownIntegralSum, }; - let convolve = (fn, t1: t, t2: t) => { + /* This multiples all of the data points together and creates a new discrete distribution from the results. + Data points at the same xs get added together. It may be a good idea to downsample t1 and t2 before and/or the result after. */ + let combineAlgebraically = (op: AlgebraicCombinations.algebraicOperation, t1: t, t2: t) => { let t1s = t1 |> getShape; let t2s = t2 |> getShape; let t1n = t1s |> XYShape.T.length; @@ -539,6 +402,7 @@ module Discrete = { t2.knownIntegralSum, ); + let fn = AlgebraicCombinations.operationToFn(op); let xToYMap = E.FloatFloatMap.empty(); for (i in 0 to t1n - 1) { @@ -553,9 +417,9 @@ module Discrete = { let rxys = xToYMap |> E.FloatFloatMap.toArray |> XYShape.Zipped.sortByX; - let convolvedShape = XYShape.T.fromZippedArray(rxys); + let combinedShape = XYShape.T.fromZippedArray(rxys); - make(convolvedShape, combinedIntegralSum); + make(combinedShape, combinedIntegralSum); }; let mapY = (~knownIntegralSumFn=previousKnownIntegralSum => None, fn, t: t) => { @@ -577,16 +441,21 @@ module Discrete = { Dist({ type t = DistTypes.discreteShape; type integral = DistTypes.continuousShape; - let integral = (~cache, t) => - switch (cache) { - | Some(c) => c - | None => - Continuous.make( - `Stepwise, - XYShape.T.accumulateYs((+.), getShape(t)), - None, - ) - }; + let integral = (~cache, t) => { + if ((t |> getShape |> XYShape.T.length) > 0) { + switch (cache) { + | Some(c) => c + | None => + Continuous.make( + `Stepwise, + XYShape.T.accumulateYs((+.), getShape(t)), + None, + ) + }; + } else { + Continuous.make(`Stepwise, {xs: [|neg_infinity|], ys: [|0.0|]}, None); + }}; + let integralEndY = (~cache, t: t) => t.knownIntegralSum |> E.O.default(t |> integral(~cache) |> Continuous.lastY); @@ -612,7 +481,7 @@ module Discrete = { // The best we can do is to clip off the smallest values. let currentLength = t |> getShape |> XYShape.T.length; - if (i < currentLength) { + if (i < currentLength && i >= 1 && currentLength > 1) { let clippedShape = t |> getShape @@ -696,7 +565,7 @@ module Mixed = { let toContinuous = ({continuous}: t) => Some(continuous); let toDiscrete = ({discrete}: t) => Some(discrete); - let combine = (~knownIntegralSumsFn, fn, t1: t, t2: t) => { + let combinePointwise = (~knownIntegralSumsFn, fn, t1: t, t2: t) => { let reducedDiscrete = [|t1, t2|] |> E.A.fmap(toDiscrete) @@ -829,7 +698,7 @@ module Mixed = { Continuous.make( `Linear, - XYShape.Combine.combineLinear( + XYShape.PointwiseCombination.combineLinear( ~fn=(+.), Continuous.getShape(continuousIntegral), Continuous.getShape(discreteIntegral), @@ -940,7 +809,7 @@ module Mixed = { }; }); - let convolve = (~downsample=false, fn: (float, float) => float, t1: t, t2: t): t => { + let combineAlgebraically = (~downsample=false, op: AlgebraicCombinations.algebraicOperation, t1: t, t2: t): t => { // Discrete convolution can cause a huge increase in the number of samples, // so we'll first downsample. @@ -958,17 +827,17 @@ module Mixed = { // continuous (*) continuous => continuous, but also // discrete (*) continuous => continuous (and vice versa). We have to take care of all combos and then combine them: let ccConvResult = - Continuous.convolve(~downsample=false, fn, t1d.continuous, t2d.continuous); + Continuous.combineAlgebraically(~downsample=false, op, t1d.continuous, t2d.continuous); let dcConvResult = - Continuous.convolveWithDiscrete(~downsample=false, fn, t2d.continuous, t1d.discrete); + Continuous.combineAlgebraicallyWithDiscrete(~downsample=false, op, t2d.continuous, t1d.discrete); let cdConvResult = - Continuous.convolveWithDiscrete(~downsample=false, fn, t1d.continuous, t2d.discrete); + Continuous.combineAlgebraicallyWithDiscrete(~downsample=false, op, t1d.continuous, t2d.discrete); let continuousConvResult = Continuous.reduce((+.), [|ccConvResult, dcConvResult, cdConvResult|]); // ... finally, discrete (*) discrete => discrete, obviously: let discreteConvResult = - Discrete.convolve(fn, t1d.discrete, t2d.discrete); + Discrete.combineAlgebraically(op, t1d.discrete, t2d.discrete); {discrete: discreteConvResult, continuous: continuousConvResult}; }; @@ -997,25 +866,38 @@ module Shape = { c => Mixed.make(~discrete=Discrete.empty, ~continuous=c), )); - let convolve = (fn, t1: t, t2: t): t => { + let combineAlgebraically = (op: AlgebraicCombinations.algebraicOperation, t1: t, t2: t): t => { switch ((t1, t2)) { - | (Continuous(m1), Continuous(m2)) => DistTypes.Continuous(Continuous.convolve(~downsample=true, fn, m1, m2)) - | (Discrete(m1), Discrete(m2)) => DistTypes.Discrete(Discrete.convolve(fn, m1, m2)) + | (Continuous(m1), Continuous(m2)) => DistTypes.Continuous(Continuous.combineAlgebraically(~downsample=true, op, m1, m2)) + | (Discrete(m1), Discrete(m2)) => DistTypes.Discrete(Discrete.combineAlgebraically(op, m1, m2)) | (m1, m2) => { - DistTypes.Mixed(Mixed.convolve(~downsample=true, fn, toMixed(m1), toMixed(m2))) + DistTypes.Mixed(Mixed.combineAlgebraically(~downsample=true, op, toMixed(m1), toMixed(m2))) } }; }; - let combine = (~knownIntegralSumsFn=(_, _) => None, fn, t1: t, t2: t) => + let combinePointwise = (~knownIntegralSumsFn=(_, _) => None, fn, t1: t, t2: t) => switch ((t1, t2)) { - | (Continuous(m1), Continuous(m2)) => DistTypes.Continuous(Continuous.combine(~knownIntegralSumsFn, fn, m1, m2)) - | (Discrete(m1), Discrete(m2)) => DistTypes.Discrete(Discrete.combine(~knownIntegralSumsFn, fn, m1, m2)) + | (Continuous(m1), Continuous(m2)) => DistTypes.Continuous(Continuous.combinePointwise(~knownIntegralSumsFn, fn, m1, m2)) + | (Discrete(m1), Discrete(m2)) => DistTypes.Discrete(Discrete.combinePointwise(~knownIntegralSumsFn, fn, m1, m2)) | (m1, m2) => { - DistTypes.Mixed(Mixed.combine(~knownIntegralSumsFn, fn, toMixed(m1), toMixed(m2))) + DistTypes.Mixed(Mixed.combinePointwise(~knownIntegralSumsFn, fn, toMixed(m1), toMixed(m2))) } }; + // TODO: implement these functions + let pdf = (f: float, t: t): float => { + 0.0; + }; + + let inv = (f: float, t: t): float => { + 0.0; + }; + + let sample = (t: t): float => { + 0.0; + }; + module T = Dist({ type t = DistTypes.shape; @@ -1271,7 +1153,9 @@ module DistPlus = { let integralYtoX = (~cache as _, f, t: t) => { Shape.T.Integral.yToX(~cache=Some(t.integralCache), f, toShape(t)); }; - let mean = (t: t) => Shape.T.mean(t.shape); + let mean = (t: t) => { + Shape.T.mean(t.shape); + }; let variance = (t: t) => Shape.T.variance(t.shape); }); }; diff --git a/src/distPlus/distribution/XYShape.re b/src/distPlus/distribution/XYShape.re index 8e684b93..7bea8b06 100644 --- a/src/distPlus/distribution/XYShape.re +++ b/src/distPlus/distribution/XYShape.re @@ -170,7 +170,7 @@ module Zipped = { let filterByX = (testFn: (float => bool), t: zipped) => t |> E.A.filter(((x, _)) => testFn(x)); }; -module Combine = { +module PointwiseCombination = { type xsSelection = | ALL_XS | XS_EVENLY_DIVIDED(int); @@ -278,7 +278,7 @@ module Range = { items |> Belt.Array.map(_, rangePointAssumingSteps) |> T.fromZippedArray - |> Combine.intersperse(t |> T.mapX(e => e +. diff)), + |> PointwiseCombination.intersperse(t |> T.mapX(e => e +. diff)), ) | _ => Some(t) }; @@ -300,7 +300,7 @@ let pointLogScore = (prediction, answer) => }; let logScorePoint = (sampleCount, t1, t2) => - Combine.combine( + PointwiseCombination.combine( ~xsSelection=XS_EVENLY_DIVIDED(sampleCount), ~xToYSelection=XtoY.linear, ~fn=pointLogScore, @@ -328,6 +328,7 @@ module Analysis = { 0.0, (acc, _x, i) => { let areaUnderIntegral = + // TODO Take this switch statement out of the loop body switch (t.interpolation, i) { | (_, 0) => 0.0 | (`Stepwise, _) => @@ -336,12 +337,16 @@ module Analysis = { | (`Linear, _) => let x1 = xs[i - 1]; let x2 = xs[i]; - let h1 = ys[i - 1]; - let h2 = ys[i]; - let b = (h1 -. h2) /. (x1 -. x2); - let a = h1 -. b *. x1; - indefiniteIntegralLinear(x2, a, b) - -. indefiniteIntegralLinear(x1, a, b); + if (x1 == x2) { + 0.0 + } else { + let h1 = ys[i - 1]; + let h2 = ys[i]; + let b = (h1 -. h2) /. (x1 -. x2); + let a = h1 -. b *. x1; + indefiniteIntegralLinear(x2, a, b) + -. indefiniteIntegralLinear(x1, a, b); + }; }; acc +. areaUnderIntegral; }, diff --git a/src/distPlus/symbolic/MathJsParser.re b/src/distPlus/symbolic/MathJsParser.re index d80cf004..087aa14d 100644 --- a/src/distPlus/symbolic/MathJsParser.re +++ b/src/distPlus/symbolic/MathJsParser.re @@ -175,13 +175,13 @@ module MathAdtToDistDst = { |> E.A.fmapi((index, t) => { let w = weights |> E.A.get(_, index) |> E.O.default(1.0); - `Operation(`ScaleOperation(`Multiply, t, `DistData(`Symbolic(`Float(w))))) + `Operation(`VerticalScaling(`Multiply, t, `DistData(`Symbolic(`Float(w))))) }); let pointwiseSum = components |> Js.Array.sliceFrom(1) |> E.A.fold_left((acc, x) => { - `Operation(`PointwiseOperation(`Add, acc, x)) + `Operation(`PointwiseCombination(`Add, acc, x)) }, E.A.unsafe_get(components, 0)) Ok(`Operation(`Normalize(pointwiseSum))) @@ -251,25 +251,31 @@ module MathAdtToDistDst = { multiModal(dists, weights); } + // TODO: wire up these FloatFromDist operations + | Fn({name: "mean", args}) => Error("mean(...) not yet implemented.") + | Fn({name: "inv", args}) => Error("inv(...) not yet implemented.") + | Fn({name: "sample", args}) => Error("sample(...) not yet implemented.") + | Fn({name: "pdf", args}) => Error("pdf(...) not yet implemented.") + | Fn({name: "add", args}) => { args |> E.A.fmap(functionParser) |> (fun - | [|Ok(l), Ok(r)|] => Ok(`Operation(`StandardOperation(`Add, l, r))) + | [|Ok(l), Ok(r)|] => Ok(`Operation(`AlgebraicCombination(`Add, l, r))) | _ => Error("Addition needs two operands")) } | Fn({name: "subtract", args}) => { args |> E.A.fmap(functionParser) |> (fun - | [|Ok(l), Ok(r)|] => Ok(`Operation(`StandardOperation(`Subtract, l, r))) + | [|Ok(l), Ok(r)|] => Ok(`Operation(`AlgebraicCombination(`Subtract, l, r))) | _ => Error("Subtraction needs two operands")) } | Fn({name: "multiply", args}) => { args |> E.A.fmap(functionParser) |> (fun - | [|Ok(l), Ok(r)|] => Ok(`Operation(`StandardOperation(`Multiply, l, r))) + | [|Ok(l), Ok(r)|] => Ok(`Operation(`AlgebraicCombination(`Multiply, l, r))) | _ => Error("Multiplication needs two operands")) } | Fn({name: "divide", args}) => { @@ -277,16 +283,18 @@ module MathAdtToDistDst = { |> E.A.fmap(functionParser) |> (fun | [|Ok(l), Ok(`DistData(`Symbolic(`Float(0.0))))|] => Error("Division by zero") - | [|Ok(l), Ok(r)|] => Ok(`Operation(`StandardOperation(`Divide, l, r))) + | [|Ok(l), Ok(r)|] => Ok(`Operation(`AlgebraicCombination(`Divide, l, r))) | _ => Error("Division needs two operands")) } + // TODO: Figure out how to implement meaningful exponentiation | Fn({name: "pow", args}) => { args |> E.A.fmap(functionParser) |> (fun - | [|Ok(l), Ok(r)|] => Ok(`Operation(`StandardOperation(`Exponentiate, l, r))) - | _ => Error("Division needs two operands") - | _ => Error("Exponentiations needs two operands")) + //| [|Ok(l), Ok(r)|] => Ok(`Operation(`AlgebraicCombination(`Exponentiate, l, r))) + //| _ => Error("Exponentiations needs two operands")) + | _ => Error("Exponentiation is not yet supported.") + ) } | Fn({name: "leftTruncate", args}) => { args diff --git a/src/distPlus/symbolic/TreeNode.re b/src/distPlus/symbolic/TreeNode.re index 8238d65b..0e195018 100644 --- a/src/distPlus/symbolic/TreeNode.re +++ b/src/distPlus/symbolic/TreeNode.re @@ -5,13 +5,6 @@ type distData = [ | `RenderedShape(DistTypes.shape) ]; -type standardOperation = [ - | `Add - | `Multiply - | `Subtract - | `Divide - | `Exponentiate -]; type pointwiseOperation = [ | `Add | `Multiply]; type scaleOperation = [ | `Multiply | `Exponentiate | `Log]; type distToFloatOperation = [ | `Pdf(float) | `Inv(float) | `Mean | `Sample]; @@ -23,14 +16,14 @@ type treeNode = [ ] and operation = [ | // binary operations - `StandardOperation( - standardOperation, + `AlgebraicCombination( + AlgebraicCombinations.algebraicOperation, treeNode, treeNode, ) // unary operations - | `PointwiseOperation(pointwiseOperation, treeNode, treeNode) // always evaluates to `DistData(`RenderedShape(...)) - | `ScaleOperation(scaleOperation, treeNode, treeNode) // always evaluates to `DistData(`RenderedShape(...)) + | `PointwiseCombination(pointwiseOperation, treeNode, treeNode) // always evaluates to `DistData(`RenderedShape(...)) + | `VerticalScaling(scaleOperation, treeNode, treeNode) // always evaluates to `DistData(`RenderedShape(...)) | `Render(treeNode) // always evaluates to `DistData(`RenderedShape(...)) | `Truncate // always evaluates to `DistData(`RenderedShape(...)) ( @@ -56,15 +49,14 @@ module TreeNode = { type simplifier = treeNode => result(treeNode, string); let rec toString = (t: t): string => { - let stringFromStandardOperation = + let stringFromAlgebraicCombination = fun | `Add => " + " | `Subtract => " - " | `Multiply => " * " | `Divide => " / " - | `Exponentiate => "^"; - let stringFromPointwiseOperation = + let stringFromPointwiseCombination = fun | `Add => " .+ " | `Multiply => " .* "; @@ -81,11 +73,11 @@ module TreeNode = { | `DistData(`Symbolic(d)) => SymbolicDist.GenericDistFunctions.toString(d) | `DistData(`RenderedShape(s)) => "[shape]" - | `Operation(`StandardOperation(op, t1, t2)) => - toString(t1) ++ stringFromStandardOperation(op) ++ toString(t2) - | `Operation(`PointwiseOperation(op, t1, t2)) => - toString(t1) ++ stringFromPointwiseOperation(op) ++ toString(t2) - | `Operation(`ScaleOperation(_scaleOp, t, scaleBy)) => + | `Operation(`AlgebraicCombination(op, t1, t2)) => + toString(t1) ++ stringFromAlgebraicCombination(op) ++ toString(t2) + | `Operation(`PointwiseCombination(op, t1, t2)) => + toString(t1) ++ stringFromPointwiseCombination(op) ++ toString(t2) + | `Operation(`VerticalScaling(_scaleOp, t, scaleBy)) => toString(t) ++ " @ " ++ toString(scaleBy) | `Operation(`Normalize(t)) => "normalize(" ++ toString(t) ++ ")" | `Operation(`FloatFromDist(floatFromDistOp, t)) => stringFromFloatFromDistOperation(floatFromDistOp) ++ toString(t) ++ ")" @@ -108,20 +100,12 @@ module TreeNode = { of a new variable that is the result of the operation on A and B. For instance, normal(0, 1) + normal(1, 1) -> normal(1, 2). In general, this is implemented via convolution. */ - module StandardOperation = { - let funcFromOp: (standardOperation, float, float) => float = - fun - | `Add => (+.) - | `Subtract => (-.) - | `Multiply => ( *. ) - | `Divide => (/.) - | `Exponentiate => ( ** ); - - module Simplify = { + module AlgebraicCombination = { + let simplify = (algebraicOp, t1: t, t2: t): result(treeNode, string) => { let tryCombiningFloats: simplifier = fun | `Operation( - `StandardOperation( + `AlgebraicCombination( `Divide, `DistData(`Symbolic(`Float(v1))), `DistData(`Symbolic(`Float(0.))), @@ -129,13 +113,13 @@ module TreeNode = { ) => Error("Cannot divide $v1 by zero.") | `Operation( - `StandardOperation( - standardOp, + `AlgebraicCombination( + algebraicOp, `DistData(`Symbolic(`Float(v1))), `DistData(`Symbolic(`Float(v2))), ), ) => { - let func = funcFromOp(standardOp); + let func = AlgebraicCombinations.operationToFn(algebraicOp); Ok(`DistData(`Symbolic(`Float(func(v1, v2))))); } | t => Ok(t); @@ -143,7 +127,7 @@ module TreeNode = { let tryCombiningNormals: simplifier = fun | `Operation( - `StandardOperation( + `AlgebraicCombination( `Add, `DistData(`Symbolic(`Normal(n1))), `DistData(`Symbolic(`Normal(n2))), @@ -151,7 +135,7 @@ module TreeNode = { ) => Ok(`DistData(`Symbolic(SymbolicDist.Normal.add(n1, n2)))) | `Operation( - `StandardOperation( + `AlgebraicCombination( `Subtract, `DistData(`Symbolic(`Normal(n1))), `DistData(`Symbolic(`Normal(n2))), @@ -163,7 +147,7 @@ module TreeNode = { let tryCombiningLognormals: simplifier = fun | `Operation( - `StandardOperation( + `AlgebraicCombination( `Multiply, `DistData(`Symbolic(`Lognormal(l1))), `DistData(`Symbolic(`Lognormal(l2))), @@ -171,7 +155,7 @@ module TreeNode = { ) => Ok(`DistData(`Symbolic(SymbolicDist.Lognormal.multiply(l1, l2)))) | `Operation( - `StandardOperation( + `AlgebraicCombination( `Divide, `DistData(`Symbolic(`Lognormal(l1))), `DistData(`Symbolic(`Lognormal(l2))), @@ -180,20 +164,16 @@ module TreeNode = { Ok(`DistData(`Symbolic(SymbolicDist.Lognormal.divide(l1, l2)))) | t => Ok(t); - let attempt = (standardOp, t1: t, t2: t): result(treeNode, string) => { - let originalTreeNode = - `Operation(`StandardOperation((standardOp, t1, t2))); + let originalTreeNode = + `Operation(`AlgebraicCombination((algebraicOp, t1, t2))); - originalTreeNode - |> tryCombiningFloats - |> E.R.bind(_, tryCombiningNormals) - |> E.R.bind(_, tryCombiningLognormals); - }; + originalTreeNode + |> tryCombiningFloats + |> E.R.bind(_, tryCombiningNormals) + |> E.R.bind(_, tryCombiningLognormals); }; - let evaluateNumerically = (standardOp, operationToDistData, t1, t2) => { - let func = funcFromOp(standardOp); - + let evaluateNumerically = (algebraicOp, operationToDistData, t1, t2) => { // force rendering into shapes let renderedShape1 = operationToDistData(`Render(t1)); let renderedShape2 = operationToDistData(`Render(t2)); @@ -205,7 +185,7 @@ module TreeNode = { ) => Ok( `DistData( - `RenderedShape(Distributions.Shape.convolve(func, s1, s2)), + `RenderedShape(Distributions.Shape.combineAlgebraically(algebraicOp, s1, s2)), ), ) | (Error(e1), _) => Error(e1) @@ -215,21 +195,21 @@ module TreeNode = { }; let evaluateToDistData = - (standardOp: standardOperation, operationToDistData, t1: t, t2: t) + (algebraicOp: AlgebraicCombinations.algebraicOperation, operationToDistData, t1: t, t2: t) : result(treeNode, string) => - standardOp - |> Simplify.attempt(_, t1, t2) + algebraicOp + |> simplify(_, t1, t2) |> E.R.bind( _, fun | `DistData(d) => Ok(`DistData(d)) // the analytical simplifaction worked, nice! | `Operation(_) => // if not, run the convolution - evaluateNumerically(standardOp, operationToDistData, t1, t2), + evaluateNumerically(algebraicOp, operationToDistData, t1, t2), ); }; - module ScaleOperation = { + module VerticalScaling = { let fnFromOp = fun | `Multiply => ( *. ) @@ -271,7 +251,7 @@ module TreeNode = { }; }; - module PointwiseOperation = { + module PointwiseCombination = { let pointwiseAdd = (operationToDistData, t1, t2) => { let renderedShape1 = operationToDistData(`Render(t1)); let renderedShape2 = operationToDistData(`Render(t2)); @@ -279,7 +259,8 @@ module TreeNode = { switch ((renderedShape1, renderedShape2)) { | (Error(e1), _) => Error(e1) | (_, Error(e2)) => Error(e2) - | (Ok(`DistData(`RenderedShape(rs1))), Ok(`DistData(`RenderedShape(rs2)))) => Ok(`DistData(`RenderedShape(Distributions.Shape.combine(~knownIntegralSumsFn=(a, b) => Some(a +. b), (+.), rs1, rs2)))) + | (Ok(`DistData(`RenderedShape(rs1))), Ok(`DistData(`RenderedShape(rs2)))) => + Ok(`DistData(`RenderedShape(Distributions.Shape.combinePointwise(~knownIntegralSumsFn=(a, b) => Some(a +. b), (+.), rs1, rs2)))) | _ => Error("Could not perform pointwise addition.") }; }; @@ -397,10 +378,15 @@ module TreeNode = { }; E.R.bind(value, v => Ok(`DistData(`Symbolic(`Float(v))))); }; - let evaluateFromRenderedShape = - (distToFloatOp: distToFloatOperation, rs: DistTypes.shape) - : result(treeNode, string) => { - Ok(`DistData(`Symbolic(`Float(Distributions.Shape.T.mean(rs))))); + let evaluateFromRenderedShape = (distToFloatOp: distToFloatOperation, rs: DistTypes.shape) : result(treeNode, string) => { + let value = + switch (distToFloatOp) { + | `Pdf(f) => Ok(Distributions.Shape.pdf(f, rs)) + | `Inv(f) => Ok(Distributions.Shape.inv(f, rs)) // TODO: this is tricky for discrete distributions, because they have a stepwise CDF + | `Sample => Ok(Distributions.Shape.sample(rs)) + | `Mean => Ok(Distributions.Shape.T.mean(rs)) + }; + E.R.bind(value, v => Ok(`DistData(`Symbolic(`Float(v))))); }; let rec evaluateToDistData = ( @@ -480,22 +466,22 @@ module TreeNode = { // the functions that convert the Operation nodes to DistData nodes need to // have a way to call this function on their children, if their children are themselves Operation nodes. switch (op) { - | `StandardOperation(standardOp, t1, t2) => - StandardOperation.evaluateToDistData( - standardOp, + | `AlgebraicCombination(algebraicOp, t1, t2) => + AlgebraicCombination.evaluateToDistData( + algebraicOp, operationToDistData(sampleCount), t1, t2 // we want to give it the option to render or simply leave it as is ) - | `PointwiseOperation(pointwiseOp, t1, t2) => - PointwiseOperation.evaluateToDistData( + | `PointwiseCombination(pointwiseOp, t1, t2) => + PointwiseCombination.evaluateToDistData( pointwiseOp, operationToDistData(sampleCount), t1, t2, ) - | `ScaleOperation(scaleOp, t, scaleBy) => - ScaleOperation.evaluateToDistData( + | `VerticalScaling(scaleOp, t, scaleBy) => + VerticalScaling.evaluateToDistData( scaleOp, operationToDistData(sampleCount), t, From acdd3dfe7a8aac50adfb0174c2e39ecf46120197 Mon Sep 17 00:00:00 2001 From: Ozzie Gooen Date: Wed, 1 Jul 2020 20:26:39 +0100 Subject: [PATCH 15/31] Minor formatting and name changes --- .../distribution/AlgebraicCombinations.re | 334 ++++++++++-------- src/distPlus/distribution/Distributions.re | 4 +- .../distribution/MixedShapeBuilder.re | 66 +--- src/distPlus/symbolic/SymbolicDist.re | 29 +- src/distPlus/symbolic/TreeNode.re | 26 +- 5 files changed, 227 insertions(+), 232 deletions(-) diff --git a/src/distPlus/distribution/AlgebraicCombinations.re b/src/distPlus/distribution/AlgebraicCombinations.re index 54d1fadc..0bb4660f 100644 --- a/src/distPlus/distribution/AlgebraicCombinations.re +++ b/src/distPlus/distribution/AlgebraicCombinations.re @@ -1,161 +1,219 @@ -type algebraicOperation = [ - | `Add - | `Multiply - | `Subtract - | `Divide -]; +type algebraicOperation = [ | `Add | `Multiply | `Subtract | `Divide]; type pointMassesWithMoments = { -n: int, -masses: array(float), -means: array(float), -variances: array(float) + n: int, + masses: array(float), + means: array(float), + variances: array(float), }; -let operationToFn: (algebraicOperation, float, float) => float = +module Operation = { + type t = algebraicOperation; + let toFn: (t, float, float) => float = fun | `Add => (+.) | `Subtract => (-.) | `Multiply => ( *. ) | `Divide => (/.); - - /* This function takes a continuous distribution and efficiently approximates it as - point masses that have variances associated with them. - We estimate the means and variances from overlapping triangular distributions which we imagine are making up the - XYShape. - We can then use the algebra of random variables to "convolve" the point masses and their variances, - and finally reconstruct a new distribution from them, e.g. using a Fast Gauss Transform or Raykar et al. (2007). */ -let toDiscretePointMassesFromTriangulars = (~inverse=false, s: XYShape.T.t): pointMassesWithMoments => { -// TODO: what if there is only one point in the distribution? -let n = s |> XYShape.T.length; -// first, double up the leftmost and rightmost points: -let {xs, ys}: XYShape.T.t = s; -let _ = Js.Array.unshift(xs[0], xs); -let _ = Js.Array.unshift(ys[0], ys); -let _ = Js.Array.push(xs[n - 1], xs); -let _ = Js.Array.push(ys[n - 1], ys); -let n = E.A.length(xs); -// squares and neighbourly products of the xs -let xsSq: array(float) = Belt.Array.makeUninitializedUnsafe(n); -let xsProdN1: array(float) = Belt.Array.makeUninitializedUnsafe(n - 1); -let xsProdN2: array(float) = Belt.Array.makeUninitializedUnsafe(n - 2); -for (i in 0 to n - 1) { - let _ = Belt.Array.set(xsSq, i, xs[i] *. xs[i]); (); + let toString = + fun + | `Add => " + " + | `Subtract => " - " + | `Multiply => " * " + | `Divide => " / "; }; -for (i in 0 to n - 2) { - let _ = Belt.Array.set(xsProdN1, i, xs[i] *. xs[i + 1]); (); -}; -for (i in 0 to n - 3) { - let _ = Belt.Array.set(xsProdN2, i, xs[i] *. xs[i + 2]); (); -}; -// means and variances -let masses: array(float) = Belt.Array.makeUninitializedUnsafe(n - 2); // doesn't include the fake first and last points -let means: array(float) = Belt.Array.makeUninitializedUnsafe(n - 2); -let variances: array(float) = Belt.Array.makeUninitializedUnsafe(n - 2); -if (inverse) { - for (i in 1 to n - 2) { - let _ = Belt.Array.set(masses, i - 1, (xs[i + 1] -. xs[i - 1]) *. ys[i] /. 2.); - - // this only works when the whole triange is either on the left or on the right of zero - let a = xs[i - 1]; - let c = xs[i]; - let b = xs[i + 1]; - - // These are the moments of the reciprocal of a triangular distribution, as symbolically integrated by Mathematica. - // They're probably pretty close to invMean ~ 1/mean = 3/(a+b+c) and invVar. But I haven't worked out - // the worst case error, so for now let's use these monster equations - let inverseMean = 2. *. ((a *. log(a/.c) /. (a-.c)) +. ((b *. log(c/.b))/.(b-.c))) /. (a -. b); - let inverseVar = 2. *. ((log(c/.a) /. (a-.c)) +. ((b *. log(b/.c))/.(b-.c))) /. (a -. b) -. inverseMean ** 2.; - - let _ = Belt.Array.set(means, i - 1, inverseMean); - - let _ = Belt.Array.set(variances, i - 1, inverseVar); +/* This function takes a continuous distribution and efficiently approximates it as + point masses that have variances associated with them. + We estimate the means and variances from overlapping triangular distributions which we imagine are making up the + XYShape. + We can then use the algebra of random variables to "convolve" the point masses and their variances, + and finally reconstruct a new distribution from them, e.g. using a Fast Gauss Transform or Raykar et al. (2007). */ +let toDiscretePointMassesFromTriangulars = + (~inverse=false, s: XYShape.T.t): pointMassesWithMoments => { + // TODO: what if there is only one point in the distribution? + let n = s |> XYShape.T.length; + // first, double up the leftmost and rightmost points: + let {xs, ys}: XYShape.T.t = s; + let _ = Js.Array.unshift(xs[0], xs); + let _ = Js.Array.unshift(ys[0], ys); + let _ = Js.Array.push(xs[n - 1], xs); + let _ = Js.Array.push(ys[n - 1], ys); + let n = E.A.length(xs); + // squares and neighbourly products of the xs + let xsSq: array(float) = Belt.Array.makeUninitializedUnsafe(n); + let xsProdN1: array(float) = Belt.Array.makeUninitializedUnsafe(n - 1); + let xsProdN2: array(float) = Belt.Array.makeUninitializedUnsafe(n - 2); + for (i in 0 to n - 1) { + let _ = Belt.Array.set(xsSq, i, xs[i] *. xs[i]); (); - }; - - {n: n - 2, masses, means, variances}; -} else { - for (i in 1 to n - 2) { - let _ = Belt.Array.set(masses, i - 1, (xs[i + 1] -. xs[i - 1]) *. ys[i] /. 2.); - let _ = Belt.Array.set(means, i - 1, (xs[i - 1] +. xs[i] +. xs[i + 1]) /. 3.); - - let _ = Belt.Array.set(variances, i - 1, - (xsSq[i-1] +. xsSq[i] +. xsSq[i+1] -. xsProdN1[i-1] -. xsProdN1[i] -. xsProdN2[i-1]) /. 18.); + }; + for (i in 0 to n - 2) { + let _ = Belt.Array.set(xsProdN1, i, xs[i] *. xs[i + 1]); (); - }; - {n: n - 2, masses, means, variances}; - }; -}; + }; + for (i in 0 to n - 3) { + let _ = Belt.Array.set(xsProdN2, i, xs[i] *. xs[i + 2]); + (); + }; + // means and variances + let masses: array(float) = Belt.Array.makeUninitializedUnsafe(n - 2); // doesn't include the fake first and last points + let means: array(float) = Belt.Array.makeUninitializedUnsafe(n - 2); + let variances: array(float) = Belt.Array.makeUninitializedUnsafe(n - 2); + if (inverse) { + for (i in 1 to n - 2) { + let _ = + Belt.Array.set( + masses, + i - 1, + (xs[i + 1] -. xs[i - 1]) *. ys[i] /. 2., + ); -let combineShapesContinuousContinuous = (op: algebraicOperation, s1: DistTypes.xyShape, s2: DistTypes.xyShape): DistTypes.xyShape => { - let t1n = s1 |> XYShape.T.length; - let t2n = s2 |> XYShape.T.length; + // this only works when the whole triange is either on the left or on the right of zero + let a = xs[i - 1]; + let c = xs[i]; + let b = xs[i + 1]; - // if we add the two distributions, we should probably use normal filters. - // if we multiply the two distributions, we should probably use lognormal filters. - let t1m = toDiscretePointMassesFromTriangulars(s1); - let t2m = toDiscretePointMassesFromTriangulars(s2); + // These are the moments of the reciprocal of a triangular distribution, as symbolically integrated by Mathematica. + // They're probably pretty close to invMean ~ 1/mean = 3/(a+b+c) and invVar. But I haven't worked out + // the worst case error, so for now let's use these monster equations + let inverseMean = + 2. + *. (a *. log(a /. c) /. (a -. c) +. b *. log(c /. b) /. (b -. c)) + /. (a -. b); + let inverseVar = + 2. + *. (log(c /. a) /. (a -. c) +. b *. log(b /. c) /. (b -. c)) + /. (a -. b) + -. inverseMean + ** 2.; - let combineMeansFn = switch (op) { - | `Add => (m1, m2) => m1 +. m2 - | `Subtract => (m1, m2) => m1 -. m2 - | `Multiply => (m1, m2) => m1 *. m2 - | `Divide => (m1, mInv2) => m1 *. mInv2 - }; // note: here, mInv2 = mean(1 / t2) ~= 1 / mean(t2) + let _ = Belt.Array.set(means, i - 1, inverseMean); - // converts the variances and means of the two inputs into the variance of the output - let combineVariancesFn = switch (op) { - | `Add => (v1, v2, m1, m2) => v1 +. v2 - | `Subtract => (v1, v2, m1, m2) => v1 +. v2 - | `Multiply => (v1, v2, m1, m2) => (v1 *. v2) +. (v1 *. m1**2.) +. (v2 *. m1**2.) - | `Divide => (v1, vInv2, m1, mInv2) => (v1 *. vInv2) +. (v1 *. mInv2**2.) +. (vInv2 *. m1**2.) - }; - - let outputMinX: ref(float) = ref(infinity); - let outputMaxX: ref(float) = ref(neg_infinity); - let masses: array(float) = Belt.Array.makeUninitializedUnsafe(t1m.n * t2m.n); - let means: array(float) = Belt.Array.makeUninitializedUnsafe(t1m.n * t2m.n); - let variances: array(float) = Belt.Array.makeUninitializedUnsafe(t1m.n * t2m.n); - // then convolve the two sets of pointMassesWithMoments - for (i in 0 to t1m.n - 1) { - for (j in 0 to t2m.n - 1) { - let k = i * t2m.n + j; - let _ = Belt.Array.set(masses, k, t1m.masses[i] *. t2m.masses[j]); - - let mean = combineMeansFn(t1m.means[i], t2m.means[j]); - let variance = combineVariancesFn(t1m.variances[i], t2m.variances[j], t1m.means[i], t2m.means[j]); - let _ = Belt.Array.set(means, k, mean); - let _ = Belt.Array.set(variances, k, variance); - // update bounds - let minX = mean -. variance *. 1.644854; - let maxX = mean +. variance *. 1.644854; - if (minX < outputMinX^) { - outputMinX := minX; - } - if (maxX > outputMaxX^) { - outputMaxX := maxX; - } - }; - }; - - // we now want to create a set of target points. For now, let's just evenly distribute 200 points between - // between the outputMinX and outputMaxX - let outputXs: array(float) = E.A.Floats.range(outputMinX^, outputMaxX^, 200); - let outputYs: array(float) = Belt.Array.make(200, 0.0); - // now, for each of the outputYs, accumulate from a Gaussian kernel over each input point. - for (i in 0 to E.A.length(outputXs) - 1) { - let x = outputXs[i]; - for (j in 0 to E.A.length(masses) - 1) { - let dx = outputXs[i] -. means[j]; - let contribution = masses[j] *. exp(-.(dx**2.) /. (2. *. variances[j])); - let _ = Belt.Array.set(outputYs, i, outputYs[i] +. contribution); - (); - }; + let _ = Belt.Array.set(variances, i - 1, inverseVar); (); }; - {xs: outputXs, ys: outputYs}; + {n: n - 2, masses, means, variances}; + } else { + for (i in 1 to n - 2) { + let _ = + Belt.Array.set( + masses, + i - 1, + (xs[i + 1] -. xs[i - 1]) *. ys[i] /. 2., + ); + let _ = + Belt.Array.set(means, i - 1, (xs[i - 1] +. xs[i] +. xs[i + 1]) /. 3.); + + let _ = + Belt.Array.set( + variances, + i - 1, + ( + xsSq[i - 1] + +. xsSq[i] + +. xsSq[i + 1] + -. xsProdN1[i - 1] + -. xsProdN1[i] + -. xsProdN2[i - 1] + ) + /. 18., + ); + (); + }; + {n: n - 2, masses, means, variances}; + }; +}; + +let combineShapesContinuousContinuous = + (op: algebraicOperation, s1: DistTypes.xyShape, s2: DistTypes.xyShape) + : DistTypes.xyShape => { + let t1n = s1 |> XYShape.T.length; + let t2n = s2 |> XYShape.T.length; + + // if we add the two distributions, we should probably use normal filters. + // if we multiply the two distributions, we should probably use lognormal filters. + let t1m = toDiscretePointMassesFromTriangulars(s1); + let t2m = toDiscretePointMassesFromTriangulars(s2); + + let combineMeansFn = + switch (op) { + | `Add => ((m1, m2) => m1 +. m2) + | `Subtract => ((m1, m2) => m1 -. m2) + | `Multiply => ((m1, m2) => m1 *. m2) + | `Divide => ((m1, mInv2) => m1 *. mInv2) + }; // note: here, mInv2 = mean(1 / t2) ~= 1 / mean(t2) + + // converts the variances and means of the two inputs into the variance of the output + let combineVariancesFn = + switch (op) { + | `Add => ((v1, v2, m1, m2) => v1 +. v2) + | `Subtract => ((v1, v2, m1, m2) => v1 +. v2) + | `Multiply => ( + (v1, v2, m1, m2) => v1 *. v2 +. v1 *. m1 ** 2. +. v2 *. m1 ** 2. + ) + | `Divide => ( + (v1, vInv2, m1, mInv2) => + v1 *. vInv2 +. v1 *. mInv2 ** 2. +. vInv2 *. m1 ** 2. + ) + }; + + let outputMinX: ref(float) = ref(infinity); + let outputMaxX: ref(float) = ref(neg_infinity); + let masses: array(float) = + Belt.Array.makeUninitializedUnsafe(t1m.n * t2m.n); + let means: array(float) = + Belt.Array.makeUninitializedUnsafe(t1m.n * t2m.n); + let variances: array(float) = + Belt.Array.makeUninitializedUnsafe(t1m.n * t2m.n); + // then convolve the two sets of pointMassesWithMoments + for (i in 0 to t1m.n - 1) { + for (j in 0 to t2m.n - 1) { + let k = i * t2m.n + j; + let _ = Belt.Array.set(masses, k, t1m.masses[i] *. t2m.masses[j]); + + let mean = combineMeansFn(t1m.means[i], t2m.means[j]); + let variance = + combineVariancesFn( + t1m.variances[i], + t2m.variances[j], + t1m.means[i], + t2m.means[j], + ); + let _ = Belt.Array.set(means, k, mean); + let _ = Belt.Array.set(variances, k, variance); + // update bounds + let minX = mean -. variance *. 1.644854; + let maxX = mean +. variance *. 1.644854; + if (minX < outputMinX^) { + outputMinX := minX; + }; + if (maxX > outputMaxX^) { + outputMaxX := maxX; + }; + }; + }; + + // we now want to create a set of target points. For now, let's just evenly distribute 200 points between + // between the outputMinX and outputMaxX + let outputXs: array(float) = + E.A.Floats.range(outputMinX^, outputMaxX^, 200); + let outputYs: array(float) = Belt.Array.make(200, 0.0); + // now, for each of the outputYs, accumulate from a Gaussian kernel over each input point. + for (i in 0 to E.A.length(outputXs) - 1) { + let x = outputXs[i]; + for (j in 0 to E.A.length(masses) - 1) { + let dx = outputXs[i] -. means[j]; + let contribution = + masses[j] *. exp(-. (dx ** 2.) /. (2. *. variances[j])); + let _ = Belt.Array.set(outputYs, i, outputYs[i] +. contribution); + (); + }; + (); + }; + + {xs: outputXs, ys: outputYs}; }; diff --git a/src/distPlus/distribution/Distributions.re b/src/distPlus/distribution/Distributions.re index 82a4cd6c..69ec8ba2 100644 --- a/src/distPlus/distribution/Distributions.re +++ b/src/distPlus/distribution/Distributions.re @@ -285,7 +285,7 @@ module Continuous = { let t1n = t1s |> XYShape.T.length; let t2n = t2s |> XYShape.T.length; - let fn = AlgebraicCombinations.operationToFn(op); + let fn = AlgebraicCombinations.Operation.toFn(op); let outXYShapes: array(array((float, float))) = Belt.Array.makeUninitializedUnsafe(t2n); @@ -402,7 +402,7 @@ module Discrete = { t2.knownIntegralSum, ); - let fn = AlgebraicCombinations.operationToFn(op); + let fn = AlgebraicCombinations.Operation.toFn(op); let xToYMap = E.FloatFloatMap.empty(); for (i in 0 to t1n - 1) { diff --git a/src/distPlus/distribution/MixedShapeBuilder.re b/src/distPlus/distribution/MixedShapeBuilder.re index 496e298c..9689c1c4 100644 --- a/src/distPlus/distribution/MixedShapeBuilder.re +++ b/src/distPlus/distribution/MixedShapeBuilder.re @@ -33,68 +33,4 @@ let buildSimple = (~continuous: option(DistTypes.continuousShape), ~discrete: op ); Some(Mixed(mixedDist)); }; -}; - - -// TODO: Delete, only being used in tests -/*let build = (~continuous, ~discrete, ~assumptions) => - switch (assumptions) { - | { - continuous: ADDS_TO_CORRECT_PROBABILITY, - discrete: ADDS_TO_CORRECT_PROBABILITY, - discreteProbabilityMass: Some(r), - } => - // TODO: Fix this, it's wrong :( - Some( - Distributions.Mixed.make( - ~continuous, - ~discrete, - ~discreteProbabilityMassFraction=r, - ), - ) - - | { - continuous: ADDS_TO_1, - discrete: ADDS_TO_1, - discreteProbabilityMass: Some(r), - } => - Some( - Distributions.Mixed.make( - ~continuous, - ~discrete, - ~discreteProbabilityMassFraction=r, - ), - ) - - | { - continuous: ADDS_TO_1, - discrete: ADDS_TO_1, - discreteProbabilityMass: None, - } => - None - - | { - continuous: ADDS_TO_CORRECT_PROBABILITY, - discrete: ADDS_TO_1, - discreteProbabilityMass: None, - } => - None - - | { - continuous: ADDS_TO_1, - discrete: ADDS_TO_CORRECT_PROBABILITY, - discreteProbabilityMass: None, - } => - let discreteProbabilityMassFraction = - Distributions.Discrete.T.Integral.sum(~cache=None, discrete); - let discrete = - Distributions.Discrete.T.scaleToIntegralSum(~intendedSum=1.0, discrete); - Some( - Distributions.Mixed.make( - ~continuous, - ~discrete, - ~discreteProbabilityMassFraction, - ), - ); - | _ => None - };*/ +}; \ No newline at end of file diff --git a/src/distPlus/symbolic/SymbolicDist.re b/src/distPlus/symbolic/SymbolicDist.re index e5df481c..9765ed04 100644 --- a/src/distPlus/symbolic/SymbolicDist.re +++ b/src/distPlus/symbolic/SymbolicDist.re @@ -36,7 +36,6 @@ type continuousShape = { cdf: DistTypes.continuousShape, }; - type dist = [ | `Normal(normal) | `Beta(beta) @@ -54,6 +53,7 @@ module ContinuousShape = { let make = (pdf, cdf): t => {pdf, cdf}; let pdf = (x, t: t) => Distributions.Continuous.T.xToY(x, t.pdf).continuous; + // TODO: pdf and inv are currently the same, this seems broken. let inv = (p, t: t) => Distributions.Continuous.T.xToY(p, t.pdf).continuous; // TODO: Fix the sampling, to have it work correctly. @@ -77,7 +77,7 @@ module Cauchy = { let pdf = (x, t: t) => Jstat.cauchy##pdf(x, t.local, t.scale); let inv = (p, t: t) => Jstat.cauchy##inv(p, t.local, t.scale); let sample = (t: t) => Jstat.cauchy##sample(t.local, t.scale); - let mean = (t: t) => Error("Cauchy distributions have no mean value.") + let mean = (_: t) => Error("Cauchy distributions have no mean value."); let toString = ({local, scale}: t) => {j|Cauchy($local, $scale)|j}; }; @@ -117,8 +117,10 @@ module Normal = { // TODO: is this useful here at all? would need the integral as well ... let pointwiseProduct = (n1: t, n2: t) => { - let mean = (n1.mean *. n2.stdev**2. +. n2.mean *. n1.stdev**2.) /. (n1.stdev**2. +. n2.stdev**2.); - let stdev = 1. /. ((1. /. n1.stdev**2.) +. (1. /. n2.stdev**2.)); + let mean = + (n1.mean *. n2.stdev ** 2. +. n2.mean *. n1.stdev ** 2.) + /. (n1.stdev ** 2. +. n2.stdev ** 2.); + let stdev = 1. /. (1. /. n1.stdev ** 2. +. 1. /. n2.stdev ** 2.); `Normal({mean, stdev}); }; }; @@ -162,12 +164,12 @@ module Lognormal = { let multiply = (l1, l2) => { let mu = l1.mu +. l2.mu; let sigma = l1.sigma +. l2.sigma; - `Lognormal({mu, sigma}) + `Lognormal({mu, sigma}); }; let divide = (l1, l2) => { let mu = l1.mu -. l2.mu; let sigma = l1.sigma +. l2.sigma; - `Lognormal({mu, sigma}) + `Lognormal({mu, sigma}); }; }; @@ -277,21 +279,20 @@ module GenericDistFunctions = { | `Beta(n) => Beta.mean(n) | `ContinuousShape(n) => ContinuousShape.mean(n) | `Uniform(n) => Uniform.mean(n) - | `Float(n) => Float.mean(n) + | `Float(n) => Float.mean(n); let interpolateXs = - (~xSelection: [ | `Linear | `ByWeight]=`Linear, dist: dist, n) => { + (~xSelection: [ | `Linear | `ByWeight]=`Linear, dist: dist, n) => { switch (xSelection, dist) { | (`Linear, _) => E.A.Floats.range(min(dist), max(dist), n) -/* | (`ByWeight, `Uniform(n)) => - // In `ByWeight mode, uniform distributions get special treatment because we need two x's - // on either side for proper rendering (just left and right of the discontinuities). - let dx = 0.00001 *. (n.high -. n.low); - [|n.low -. dx, n.low +. dx, n.high -. dx, n.high +. dx|]; */ + /* | (`ByWeight, `Uniform(n)) => + // In `ByWeight mode, uniform distributions get special treatment because we need two x's + // on either side for proper rendering (just left and right of the discontinuities). + let dx = 0.00001 *. (n.high -. n.low); + [|n.low -. dx, n.low +. dx, n.high -. dx, n.high +. dx|]; */ | (`ByWeight, _) => let ys = E.A.Floats.range(minCdfValue, maxCdfValue, n); ys |> E.A.fmap(y => inv(y, dist)); }; }; }; - diff --git a/src/distPlus/symbolic/TreeNode.re b/src/distPlus/symbolic/TreeNode.re index 0e195018..c3601873 100644 --- a/src/distPlus/symbolic/TreeNode.re +++ b/src/distPlus/symbolic/TreeNode.re @@ -1,5 +1,6 @@ /* This module represents a tree node. */ +// todo: Symbolic already has an arbitrary continuousShape option. It seems messy to have both. type distData = [ | `Symbolic(SymbolicDist.dist) | `RenderedShape(DistTypes.shape) @@ -46,7 +47,7 @@ and operation = [ module TreeNode = { type t = treeNode; - type simplifier = treeNode => result(treeNode, string); + type tResult = treeNode => result(treeNode, string); let rec toString = (t: t): string => { let stringFromAlgebraicCombination = @@ -63,16 +64,15 @@ module TreeNode = { let stringFromFloatFromDistOperation = fun - | `Pdf(f) => "pdf(x=$f, " - | `Inv(f) => "inv(c=$f, " + | `Pdf(f) => {j|pdf(x=$f, |j} + | `Inv(f) => {j|inv(x=$f, |j} | `Sample => "sample(" | `Mean => "mean("; - switch (t) { | `DistData(`Symbolic(d)) => SymbolicDist.GenericDistFunctions.toString(d) - | `DistData(`RenderedShape(s)) => "[shape]" + | `DistData(`RenderedShape(_)) => "[shape]" | `Operation(`AlgebraicCombination(op, t1, t2)) => toString(t1) ++ stringFromAlgebraicCombination(op) ++ toString(t2) | `Operation(`PointwiseCombination(op, t1, t2)) => @@ -102,12 +102,12 @@ module TreeNode = { In general, this is implemented via convolution. */ module AlgebraicCombination = { let simplify = (algebraicOp, t1: t, t2: t): result(treeNode, string) => { - let tryCombiningFloats: simplifier = + let tryCombiningFloats: tResult = fun | `Operation( `AlgebraicCombination( `Divide, - `DistData(`Symbolic(`Float(v1))), + `DistData(`Symbolic(`Float(_))), `DistData(`Symbolic(`Float(0.))), ), ) => @@ -119,12 +119,12 @@ module TreeNode = { `DistData(`Symbolic(`Float(v2))), ), ) => { - let func = AlgebraicCombinations.operationToFn(algebraicOp); + let func = AlgebraicCombinations.Operation.toFn(algebraicOp); Ok(`DistData(`Symbolic(`Float(func(v1, v2))))); } | t => Ok(t); - let tryCombiningNormals: simplifier = + let tryCombiningNormals: tResult = fun | `Operation( `AlgebraicCombination( @@ -144,7 +144,7 @@ module TreeNode = { Ok(`DistData(`Symbolic(SymbolicDist.Normal.subtract(n1, n2)))) | t => Ok(t); - let tryCombiningLognormals: simplifier = + let tryCombiningLognormals: tResult = fun | `Operation( `AlgebraicCombination( @@ -281,13 +281,13 @@ module TreeNode = { module Truncate = { module Simplify = { - let tryTruncatingNothing: simplifier = + let tryTruncatingNothing: tResult = fun | `Operation(`Truncate(None, None, `DistData(d))) => Ok(`DistData(d)) | t => Ok(t); - let tryTruncatingUniform: simplifier = + let tryTruncatingUniform: tResult = fun | `Operation(`Truncate(lc, rc, `DistData(`Symbolic(`Uniform(u))))) => { // just create a new Uniform distribution @@ -508,7 +508,7 @@ module TreeNode = { but most often it will produce a RenderedShape. This function is used mainly to turn a parse tree into a single RenderedShape that can then be displayed to the user. */ - let rec toDistData = (treeNode: t, sampleCount: int): result(t, string) => { + let toDistData = (treeNode: t, sampleCount: int): result(t, string) => { switch (treeNode) { | `DistData(d) => Ok(`DistData(d)) | `Operation(op) => operationToDistData(sampleCount, op) From baaff197500a06d9a437fdcfdb88e52e5ec68962 Mon Sep 17 00:00:00 2001 From: Ozzie Gooen Date: Wed, 1 Jul 2020 22:01:58 +0100 Subject: [PATCH 16/31] Moving operations functionality into new SymbolicTypes.re file --- .../distribution/AlgebraicCombinations.re | 23 +- src/distPlus/distribution/Distributions.re | 221 +++++++++++----- src/distPlus/symbolic/SymbolicDist.re | 11 + src/distPlus/symbolic/SymbolicTypes.re | 67 +++++ src/distPlus/symbolic/TreeNode.re | 235 ++++++++---------- 5 files changed, 330 insertions(+), 227 deletions(-) create mode 100644 src/distPlus/symbolic/SymbolicTypes.re diff --git a/src/distPlus/distribution/AlgebraicCombinations.re b/src/distPlus/distribution/AlgebraicCombinations.re index 0bb4660f..683ab2ba 100644 --- a/src/distPlus/distribution/AlgebraicCombinations.re +++ b/src/distPlus/distribution/AlgebraicCombinations.re @@ -1,5 +1,3 @@ -type algebraicOperation = [ | `Add | `Multiply | `Subtract | `Divide]; - type pointMassesWithMoments = { n: int, masses: array(float), @@ -7,23 +5,6 @@ type pointMassesWithMoments = { variances: array(float), }; -module Operation = { - type t = algebraicOperation; - let toFn: (t, float, float) => float = - fun - | `Add => (+.) - | `Subtract => (-.) - | `Multiply => ( *. ) - | `Divide => (/.); - - let toString = - fun - | `Add => " + " - | `Subtract => " - " - | `Multiply => " * " - | `Divide => " / "; -}; - /* This function takes a continuous distribution and efficiently approximates it as point masses that have variances associated with them. We estimate the means and variances from overlapping triangular distributions which we imagine are making up the @@ -129,7 +110,7 @@ let toDiscretePointMassesFromTriangulars = }; let combineShapesContinuousContinuous = - (op: algebraicOperation, s1: DistTypes.xyShape, s2: DistTypes.xyShape) + (op: SymbolicTypes.algebraicOperation, s1: DistTypes.xyShape, s2: DistTypes.xyShape) : DistTypes.xyShape => { let t1n = s1 |> XYShape.T.length; let t2n = s2 |> XYShape.T.length; @@ -216,4 +197,4 @@ let combineShapesContinuousContinuous = }; {xs: outputXs, ys: outputYs}; -}; +}; \ No newline at end of file diff --git a/src/distPlus/distribution/Distributions.re b/src/distPlus/distribution/Distributions.re index 69ec8ba2..968c275f 100644 --- a/src/distPlus/distribution/Distributions.re +++ b/src/distPlus/distribution/Distributions.re @@ -149,7 +149,7 @@ module Continuous = { continuousShapes |> E.A.fold_left(combinePointwise(~knownIntegralSumsFn, fn), empty); - let mapY = (~knownIntegralSumFn=(_ => None), fn, t: t) => { + let mapY = (~knownIntegralSumFn=_ => None, fn, t: t) => { let u = E.O.bind(_, knownIntegralSumFn); let yMapFn = shapeMap(XYShape.T.mapY(fn)); @@ -164,7 +164,6 @@ module Continuous = { ); }; - module T = Dist({ type t = DistTypes.continuousShape; @@ -194,9 +193,9 @@ module Continuous = { |> getShape |> XYShape.T.zip |> XYShape.Zipped.filterByX(x => - x >= E.O.default(neg_infinity, leftCutoff) - || x <= E.O.default(infinity, rightCutoff) - ); + x >= E.O.default(neg_infinity, leftCutoff) + || x <= E.O.default(infinity, rightCutoff) + ); let eps = (t |> getShape |> XYShape.T.xTotalRange) *. 0.0001; @@ -206,7 +205,11 @@ module Continuous = { rightCutoff |> E.O.dimap(rc => [|(rc +. eps, 0.)|], _ => [||]); let truncatedZippedPairsWithNewPoints = - E.A.concatMany([|leftNewPoint, truncatedZippedPairs, rightNewPoint|]); + E.A.concatMany([| + leftNewPoint, + truncatedZippedPairs, + rightNewPoint, + |]); let truncatedShape = XYShape.T.fromZippedArray(truncatedZippedPairsWithNewPoints); @@ -214,22 +217,20 @@ module Continuous = { }; // TODO: This should work with stepwise plots. - let integral = (~cache, t) => { - - if ((t |> getShape |> XYShape.T.length) > 0) { - switch (cache) { - | Some(cache) => cache - | None => - t - |> getShape - |> XYShape.Range.integrateWithTriangles - |> E.O.toExt("This should not have happened") - |> make(`Linear, _, None) - }; + let integral = (~cache, t) => + if (t |> getShape |> XYShape.T.length > 0) { + switch (cache) { + | Some(cache) => cache + | None => + t + |> getShape + |> XYShape.Range.integrateWithTriangles + |> E.O.toExt("This should not have happened") + |> make(`Linear, _, None) + }; } else { make(`Linear, {xs: [|neg_infinity|], ys: [|0.0|]}, None); - } - }; + }; let downsample = (~cache=None, length, t): t => t @@ -276,23 +277,31 @@ module Continuous = { ); }); - /* This simply creates multiple copies of the continuous distribution, scaled and shifted according to each discrete data point, and then adds them all together. */ - let combineAlgebraicallyWithDiscrete = (~downsample=false, op: AlgebraicCombinations.algebraicOperation, t1: t, t2: DistTypes.discreteShape) => { + let combineAlgebraicallyWithDiscrete = + ( + ~downsample=false, + op: SymbolicTypes.algebraicOperation, + t1: t, + t2: DistTypes.discreteShape, + ) => { let t1s = t1 |> getShape; let t2s = t2.xyShape; // would like to use Discrete.getShape here, but current file structure doesn't allow for that let t1n = t1s |> XYShape.T.length; let t2n = t2s |> XYShape.T.length; - let fn = AlgebraicCombinations.Operation.toFn(op); + let fn = SymbolicTypes.Algebraic.toFn(op); let outXYShapes: array(array((float, float))) = Belt.Array.makeUninitializedUnsafe(t2n); - for (j in 0 to t2n - 1) { // for each one of the discrete points + for (j in 0 to t2n - 1) { + // for each one of the discrete points // create a new distribution, as long as the original continuous one - let dxyShape: array((float, float)) = Belt.Array.makeUninitializedUnsafe(t1n); + + let dxyShape: array((float, float)) = + Belt.Array.makeUninitializedUnsafe(t1n); for (i in 0 to t1n - 1) { let _ = Belt.Array.set( @@ -307,7 +316,12 @@ module Continuous = { (); }; - let combinedIntegralSum = Common.combineIntegralSums((a, b) => Some(a *. b), t1.knownIntegralSum, t2.knownIntegralSum); + let combinedIntegralSum = + Common.combineIntegralSums( + (a, b) => Some(a *. b), + t1.knownIntegralSum, + t2.knownIntegralSum, + ); outXYShapes |> E.A.fmap(s => { @@ -318,7 +332,13 @@ module Continuous = { |> updateKnownIntegralSum(combinedIntegralSum); }; - let combineAlgebraically = (~downsample=false, op: AlgebraicCombinations.algebraicOperation, t1: t, t2: t) => { + let combineAlgebraically = + ( + ~downsample=false, + op: SymbolicTypes.algebraicOperation, + t1: t, + t2: t, + ) => { let s1 = t1 |> getShape; let s2 = t2 |> getShape; let t1n = s1 |> XYShape.T.length; @@ -326,8 +346,14 @@ module Continuous = { if (t1n == 0 || t2n == 0) { empty; } else { - let combinedShape = AlgebraicCombinations.combineShapesContinuousContinuous(op, s1, s2); - let combinedIntegralSum = Common.combineIntegralSums((a, b) => Some(a *. b), t1.knownIntegralSum, t2.knownIntegralSum); + let combinedShape = + AlgebraicCombinations.combineShapesContinuousContinuous(op, s1, s2); + let combinedIntegralSum = + Common.combineIntegralSums( + (a, b) => Some(a *. b), + t1.knownIntegralSum, + t2.knownIntegralSum, + ); // return a new Continuous distribution make(`Linear, combinedShape, combinedIntegralSum); }; @@ -370,7 +396,7 @@ module Discrete = { XYShape.PointwiseCombination.combine( ~xsSelection=ALL_XS, ~xToYSelection=XYShape.XtoY.stepwiseIfAtX, - ~fn=((a, b) => fn(E.O.default(0.0, a), E.O.default(0.0, b))), // stepwiseIfAtX returns option(float), so this fn needs to handle None + ~fn=(a, b) => fn(E.O.default(0.0, a), E.O.default(0.0, b)), // stepwiseIfAtX returns option(float), so this fn needs to handle None t1.xyShape, t2.xyShape, ), @@ -378,7 +404,9 @@ module Discrete = { ); }; - let reduce = (~knownIntegralSumsFn=(_, _) => None, fn, discreteShapes): DistTypes.discreteShape => + let reduce = + (~knownIntegralSumsFn=(_, _) => None, fn, discreteShapes) + : DistTypes.discreteShape => discreteShapes |> E.A.fold_left(combinePointwise(~knownIntegralSumsFn, fn), empty); @@ -389,7 +417,8 @@ module Discrete = { /* This multiples all of the data points together and creates a new discrete distribution from the results. Data points at the same xs get added together. It may be a good idea to downsample t1 and t2 before and/or the result after. */ - let combineAlgebraically = (op: AlgebraicCombinations.algebraicOperation, t1: t, t2: t) => { + let combineAlgebraically = + (op: SymbolicTypes.algebraicOperation, t1: t, t2: t) => { let t1s = t1 |> getShape; let t2s = t2 |> getShape; let t1n = t1s |> XYShape.T.length; @@ -402,7 +431,7 @@ module Discrete = { t2.knownIntegralSum, ); - let fn = AlgebraicCombinations.Operation.toFn(op); + let fn = SymbolicTypes.Algebraic.toFn(op); let xToYMap = E.FloatFloatMap.empty(); for (i in 0 to t1n - 1) { @@ -441,8 +470,8 @@ module Discrete = { Dist({ type t = DistTypes.discreteShape; type integral = DistTypes.continuousShape; - let integral = (~cache, t) => { - if ((t |> getShape |> XYShape.T.length) > 0) { + let integral = (~cache, t) => + if (t |> getShape |> XYShape.T.length > 0) { switch (cache) { | Some(c) => c | None => @@ -453,9 +482,13 @@ module Discrete = { ) }; } else { - Continuous.make(`Stepwise, {xs: [|neg_infinity|], ys: [|0.0|]}, None); - }}; - + Continuous.make( + `Stepwise, + {xs: [|neg_infinity|], ys: [|0.0|]}, + None, + ); + }; + let integralEndY = (~cache, t: t) => t.knownIntegralSum |> E.O.default(t |> integral(~cache) |> Continuous.lastY); @@ -495,7 +528,7 @@ module Discrete = { make(clippedShape, None); // if someone needs the sum, they'll have to recompute it } else { t; - } + }; }; let truncate = @@ -505,9 +538,9 @@ module Discrete = { |> getShape |> XYShape.T.zip |> XYShape.Zipped.filterByX(x => - x >= E.O.default(neg_infinity, leftCutoff) - || x <= E.O.default(infinity, rightCutoff) - ) + x >= E.O.default(neg_infinity, leftCutoff) + || x <= E.O.default(infinity, rightCutoff) + ) |> XYShape.T.fromZippedArray; make(truncatedShape, None); @@ -601,8 +634,10 @@ module Mixed = { rightCutoff: option(float), {discrete, continuous}: t, ) => { - let truncatedContinuous = Continuous.T.truncate(leftCutoff, rightCutoff, continuous); - let truncatedDiscrete = Discrete.T.truncate(leftCutoff, rightCutoff, discrete); + let truncatedContinuous = + Continuous.T.truncate(leftCutoff, rightCutoff, continuous); + let truncatedDiscrete = + Discrete.T.truncate(leftCutoff, rightCutoff, discrete); make(~discrete=truncatedDiscrete, ~continuous=truncatedContinuous); }; @@ -809,7 +844,14 @@ module Mixed = { }; }); - let combineAlgebraically = (~downsample=false, op: AlgebraicCombinations.algebraicOperation, t1: t, t2: t): t => { + let combineAlgebraically = + ( + ~downsample=false, + op: SymbolicTypes.algebraicOperation, + t1: t, + t2: t, + ) + : t => { // Discrete convolution can cause a huge increase in the number of samples, // so we'll first downsample. @@ -827,11 +869,26 @@ module Mixed = { // continuous (*) continuous => continuous, but also // discrete (*) continuous => continuous (and vice versa). We have to take care of all combos and then combine them: let ccConvResult = - Continuous.combineAlgebraically(~downsample=false, op, t1d.continuous, t2d.continuous); + Continuous.combineAlgebraically( + ~downsample=false, + op, + t1d.continuous, + t2d.continuous, + ); let dcConvResult = - Continuous.combineAlgebraicallyWithDiscrete(~downsample=false, op, t2d.continuous, t1d.discrete); + Continuous.combineAlgebraicallyWithDiscrete( + ~downsample=false, + op, + t2d.continuous, + t1d.discrete, + ); let cdConvResult = - Continuous.combineAlgebraicallyWithDiscrete(~downsample=false, op, t1d.continuous, t2d.discrete); + Continuous.combineAlgebraicallyWithDiscrete( + ~downsample=false, + op, + t1d.continuous, + t2d.discrete, + ); let continuousConvResult = Continuous.reduce((+.), [|ccConvResult, dcConvResult, cdConvResult|]); @@ -866,23 +923,47 @@ module Shape = { c => Mixed.make(~discrete=Discrete.empty, ~continuous=c), )); - let combineAlgebraically = (op: AlgebraicCombinations.algebraicOperation, t1: t, t2: t): t => { - switch ((t1, t2)) { - | (Continuous(m1), Continuous(m2)) => DistTypes.Continuous(Continuous.combineAlgebraically(~downsample=true, op, m1, m2)) - | (Discrete(m1), Discrete(m2)) => DistTypes.Discrete(Discrete.combineAlgebraically(op, m1, m2)) - | (m1, m2) => { - DistTypes.Mixed(Mixed.combineAlgebraically(~downsample=true, op, toMixed(m1), toMixed(m2))) - } + let combineAlgebraically = + (op: SymbolicTypes.algebraicOperation, t1: t, t2: t): t => { + switch (t1, t2) { + | (Continuous(m1), Continuous(m2)) => + DistTypes.Continuous( + Continuous.combineAlgebraically(~downsample=true, op, m1, m2), + ) + | (Discrete(m1), Discrete(m2)) => + DistTypes.Discrete(Discrete.combineAlgebraically(op, m1, m2)) + | (m1, m2) => + DistTypes.Mixed( + Mixed.combineAlgebraically( + ~downsample=true, + op, + toMixed(m1), + toMixed(m2), + ), + ) }; }; - let combinePointwise = (~knownIntegralSumsFn=(_, _) => None, fn, t1: t, t2: t) => - switch ((t1, t2)) { - | (Continuous(m1), Continuous(m2)) => DistTypes.Continuous(Continuous.combinePointwise(~knownIntegralSumsFn, fn, m1, m2)) - | (Discrete(m1), Discrete(m2)) => DistTypes.Discrete(Discrete.combinePointwise(~knownIntegralSumsFn, fn, m1, m2)) - | (m1, m2) => { - DistTypes.Mixed(Mixed.combinePointwise(~knownIntegralSumsFn, fn, toMixed(m1), toMixed(m2))) - } + let combinePointwise = + (~knownIntegralSumsFn=(_, _) => None, fn, t1: t, t2: t) => + switch (t1, t2) { + | (Continuous(m1), Continuous(m2)) => + DistTypes.Continuous( + Continuous.combinePointwise(~knownIntegralSumsFn, fn, m1, m2), + ) + | (Discrete(m1), Discrete(m2)) => + DistTypes.Discrete( + Discrete.combinePointwise(~knownIntegralSumsFn, fn, m1, m2), + ) + | (m1, m2) => + DistTypes.Mixed( + Mixed.combinePointwise( + ~knownIntegralSumsFn, + fn, + toMixed(m1), + toMixed(m2), + ), + ) }; // TODO: implement these functions @@ -915,7 +996,6 @@ module Shape = { let toContinuous = t => None; let toDiscrete = t => None; - let downsample = (~cache=None, i, t) => fmap( ( @@ -938,7 +1018,11 @@ module Shape = { let toDiscreteProbabilityMassFraction = t => 0.0; let normalize = - fmap((Mixed.T.normalize, Discrete.T.normalize, Continuous.T.normalize)); + fmap(( + Mixed.T.normalize, + Discrete.T.normalize, + Continuous.T.normalize, + )); let toContinuous = mapToAll(( Mixed.T.toContinuous, @@ -1089,7 +1173,8 @@ module DistPlus = { }; let truncate = (leftCutoff, rightCutoff, t: t): t => { - let truncatedShape = t |> toShape |> Shape.T.truncate(leftCutoff, rightCutoff); + let truncatedShape = + t |> toShape |> Shape.T.truncate(leftCutoff, rightCutoff); t |> updateShape(truncatedShape); }; @@ -1153,9 +1238,9 @@ module DistPlus = { let integralYtoX = (~cache as _, f, t: t) => { Shape.T.Integral.yToX(~cache=Some(t.integralCache), f, toShape(t)); }; - let mean = (t: t) => { - Shape.T.mean(t.shape); - }; + let mean = (t: t) => { + Shape.T.mean(t.shape); + }; let variance = (t: t) => Shape.T.variance(t.shape); }); }; diff --git a/src/distPlus/symbolic/SymbolicDist.re b/src/distPlus/symbolic/SymbolicDist.re index 9765ed04..28130a98 100644 --- a/src/distPlus/symbolic/SymbolicDist.re +++ b/src/distPlus/symbolic/SymbolicDist.re @@ -123,6 +123,12 @@ module Normal = { let stdev = 1. /. (1. /. n1.stdev ** 2. +. 1. /. n2.stdev ** 2.); `Normal({mean, stdev}); }; + + let operate = (operation: SymbolicTypes.Algebraic.t, n1: t, n2: t) => switch(operation){ + | `Add => Some(add(n1, n2)) + | `Subtract => Some(subtract(n1, n2)) + | _ => None + } }; module Beta = { @@ -171,6 +177,11 @@ module Lognormal = { let sigma = l1.sigma +. l2.sigma; `Lognormal({mu, sigma}); }; + let operate = (operation: SymbolicTypes.Algebraic.t, n1: t, n2: t) => switch(operation){ + | `Multiply => Some(multiply(n1, n2)) + | `Divide => Some(divide(n1, n2)) + | _ => None + } }; module Uniform = { diff --git a/src/distPlus/symbolic/SymbolicTypes.re b/src/distPlus/symbolic/SymbolicTypes.re new file mode 100644 index 00000000..47a8598e --- /dev/null +++ b/src/distPlus/symbolic/SymbolicTypes.re @@ -0,0 +1,67 @@ +type pointwiseOperation = [ | `Add | `Multiply]; +type scaleOperation = [ | `Multiply | `Exponentiate | `Log]; +type distToFloatOperation = [ | `Pdf(float) | `Inv(float) | `Mean | `Sample]; +type algebraicOperation = [ | `Add | `Multiply | `Subtract | `Divide]; + +module Algebraic = { + type t = algebraicOperation; + let toFn: (t, float, float) => float = + fun + | `Add => (+.) + | `Subtract => (-.) + | `Multiply => ( *. ) + | `Divide => (/.); + + let applyFn = (t, f1, f2) => { + switch (t, f1, f2) { + | (`Divide, _, 0.) => Error("Cannot divide $v1 by zero.") + | _ => Ok(toFn(t, f1, f2)) + }; + }; + + let toString = + fun + | `Add => "+" + | `Subtract => "-" + | `Multiply => "*" + | `Divide => "/"; + + let format = (a, b, c) => b ++ " " ++ toString(a) ++ " " ++ c; +}; + +module Pointwise = { + type t = pointwiseOperation; + let toString = + fun + | `Add => "+" + | `Multiply => "*"; + + let format = (a, b, c) => b ++ " " ++ toString(a) ++ " " ++ c; +}; + +module DistToFloat = { + type t = distToFloatOperation; + + let stringFromFloatFromDistOperation = + fun + | `Pdf(f) => {j|pdf(x=$f, |j} + | `Inv(f) => {j|inv(x=$f, |j} + | `Sample => "sample(" + | `Mean => "mean("; + let format = (a, b) => stringFromFloatFromDistOperation(a) ++ b ++ ")"; +}; + +module Scale = { + type t = scaleOperation; + let toFn = + fun + | `Multiply => ( *. ) + | `Exponentiate => ( ** ) + | `Log => ((a, b) => log(a) /. log(b)); + + let toKnownIntegralSumFn = + fun + | `Multiply => ((a, b) => Some(a *. b)) + | `Exponentiate => ((_, _) => None) + | `Log => ((_, _) => None); +} \ No newline at end of file diff --git a/src/distPlus/symbolic/TreeNode.re b/src/distPlus/symbolic/TreeNode.re index c3601873..d76a26c5 100644 --- a/src/distPlus/symbolic/TreeNode.re +++ b/src/distPlus/symbolic/TreeNode.re @@ -1,86 +1,41 @@ /* This module represents a tree node. */ +open SymbolicTypes; // todo: Symbolic already has an arbitrary continuousShape option. It seems messy to have both. type distData = [ | `Symbolic(SymbolicDist.dist) | `RenderedShape(DistTypes.shape) ]; - -type pointwiseOperation = [ | `Add | `Multiply]; -type scaleOperation = [ | `Multiply | `Exponentiate | `Log]; -type distToFloatOperation = [ | `Pdf(float) | `Inv(float) | `Mean | `Sample]; - -/* TreeNodes are either Data (i.e. symbolic or rendered distributions) or Operations. */ -type treeNode = [ - | `DistData(distData) // a leaf node that describes a distribution - | `Operation(operation) // an operation on two child nodes -] +/* TreeNodes are either Data (i.e. symbolic or rendered distributions) or Operations. Operations always refer to two child nodes.*/ +type treeNode = [ | `DistData(distData) | `Operation(operation)] and operation = [ - | // binary operations - `AlgebraicCombination( - AlgebraicCombinations.algebraicOperation, - treeNode, - treeNode, - ) - // unary operations - | `PointwiseCombination(pointwiseOperation, treeNode, treeNode) // always evaluates to `DistData(`RenderedShape(...)) - | `VerticalScaling(scaleOperation, treeNode, treeNode) // always evaluates to `DistData(`RenderedShape(...)) - | `Render(treeNode) // always evaluates to `DistData(`RenderedShape(...)) - | `Truncate // always evaluates to `DistData(`RenderedShape(...)) -( - option(float), - option(float), - treeNode, - ) // leftCutoff and rightCutoff - | `Normalize // always evaluates to `DistData(`RenderedShape(...)) - // leftCutoff and rightCutoff -( - treeNode, - ) - | `FloatFromDist // always evaluates to `DistData(`RenderedShape(...)) - // leftCutoff and rightCutoff -( - distToFloatOperation, - treeNode, - ) + | `AlgebraicCombination(algebraicOperation, treeNode, treeNode) + | `PointwiseCombination(pointwiseOperation, treeNode, treeNode) + | `VerticalScaling(scaleOperation, treeNode, treeNode) + | `Render(treeNode) + | `Truncate(option(float), option(float), treeNode) + | `Normalize(treeNode) + | `FloatFromDist(distToFloatOperation, treeNode) ]; module TreeNode = { type t = treeNode; type tResult = treeNode => result(treeNode, string); - let rec toString = (t: t): string => { - let stringFromAlgebraicCombination = - fun - | `Add => " + " - | `Subtract => " - " - | `Multiply => " * " - | `Divide => " / " - - let stringFromPointwiseCombination = - fun - | `Add => " .+ " - | `Multiply => " .* "; - - let stringFromFloatFromDistOperation = - fun - | `Pdf(f) => {j|pdf(x=$f, |j} - | `Inv(f) => {j|inv(x=$f, |j} - | `Sample => "sample(" - | `Mean => "mean("; - - switch (t) { + let rec toString = + fun | `DistData(`Symbolic(d)) => SymbolicDist.GenericDistFunctions.toString(d) | `DistData(`RenderedShape(_)) => "[shape]" | `Operation(`AlgebraicCombination(op, t1, t2)) => - toString(t1) ++ stringFromAlgebraicCombination(op) ++ toString(t2) + SymbolicTypes.Algebraic.format(op, toString(t1), toString(t2)) | `Operation(`PointwiseCombination(op, t1, t2)) => - toString(t1) ++ stringFromPointwiseCombination(op) ++ toString(t2) + SymbolicTypes.Pointwise.format(op, toString(t1), toString(t2)) | `Operation(`VerticalScaling(_scaleOp, t, scaleBy)) => toString(t) ++ " @ " ++ toString(scaleBy) | `Operation(`Normalize(t)) => "normalize(" ++ toString(t) ++ ")" - | `Operation(`FloatFromDist(floatFromDistOp, t)) => stringFromFloatFromDistOperation(floatFromDistOp) ++ toString(t) ++ ")" + | `Operation(`FloatFromDist(floatFromDistOp, t)) => + SymbolicTypes.DistToFloat.format(floatFromDistOp, toString(t)) | `Operation(`Truncate(lc, rc, t)) => "truncate(" ++ toString(t) @@ -89,9 +44,7 @@ module TreeNode = { ++ ", " ++ E.O.dimap(Js.Float.toString, () => "inf", rc) ++ ")" - | `Operation(`Render(t)) => toString(t) - }; - }; + | `Operation(`Render(t)) => toString(t); /* The following modules encapsulate everything we can do with * different kinds of operations. */ @@ -104,88 +57,72 @@ module TreeNode = { let simplify = (algebraicOp, t1: t, t2: t): result(treeNode, string) => { let tryCombiningFloats: tResult = fun - | `Operation( - `AlgebraicCombination( - `Divide, - `DistData(`Symbolic(`Float(_))), - `DistData(`Symbolic(`Float(0.))), - ), - ) => - Error("Cannot divide $v1 by zero.") | `Operation( `AlgebraicCombination( algebraicOp, `DistData(`Symbolic(`Float(v1))), `DistData(`Symbolic(`Float(v2))), ), - ) => { - let func = AlgebraicCombinations.Operation.toFn(algebraicOp); - Ok(`DistData(`Symbolic(`Float(func(v1, v2))))); - } + ) => + SymbolicTypes.Algebraic.applyFn(algebraicOp, v1, v2) + |> E.R.fmap(r => `DistData(`Symbolic(`Float(r)))) | t => Ok(t); + let optionToSymbolicResult = (t, o) => + o + |> E.O.dimap(r => `DistData(`Symbolic(r)), () => t) + |> (r => Ok(r)); + let tryCombiningNormals: tResult = fun | `Operation( `AlgebraicCombination( - `Add, + operation, `DistData(`Symbolic(`Normal(n1))), `DistData(`Symbolic(`Normal(n2))), ), - ) => - Ok(`DistData(`Symbolic(SymbolicDist.Normal.add(n1, n2)))) - | `Operation( - `AlgebraicCombination( - `Subtract, - `DistData(`Symbolic(`Normal(n1))), - `DistData(`Symbolic(`Normal(n2))), - ), - ) => - Ok(`DistData(`Symbolic(SymbolicDist.Normal.subtract(n1, n2)))) + ) as t => + SymbolicDist.Normal.operate(operation, n1, n2) + |> optionToSymbolicResult(t) | t => Ok(t); let tryCombiningLognormals: tResult = fun | `Operation( `AlgebraicCombination( - `Multiply, - `DistData(`Symbolic(`Lognormal(l1))), - `DistData(`Symbolic(`Lognormal(l2))), + operation, + `DistData(`Symbolic(`Lognormal(n1))), + `DistData(`Symbolic(`Lognormal(n2))), ), - ) => - Ok(`DistData(`Symbolic(SymbolicDist.Lognormal.multiply(l1, l2)))) - | `Operation( - `AlgebraicCombination( - `Divide, - `DistData(`Symbolic(`Lognormal(l1))), - `DistData(`Symbolic(`Lognormal(l2))), - ), - ) => - Ok(`DistData(`Symbolic(SymbolicDist.Lognormal.divide(l1, l2)))) + ) as t => + SymbolicDist.Lognormal.operate(operation, n1, n2) + |> optionToSymbolicResult(t) | t => Ok(t); let originalTreeNode = - `Operation(`AlgebraicCombination((algebraicOp, t1, t2))); + `Operation(`AlgebraicCombination((algebraicOp, t1, t2))); + // Feedback: I like this pattern, kudos originalTreeNode |> tryCombiningFloats |> E.R.bind(_, tryCombiningNormals) |> E.R.bind(_, tryCombiningLognormals); }; + // todo: I don't like the name evaluateNumerically that much, if this renders and does it algebraically. It's tricky. let evaluateNumerically = (algebraicOp, operationToDistData, t1, t2) => { // force rendering into shapes - let renderedShape1 = operationToDistData(`Render(t1)); - let renderedShape2 = operationToDistData(`Render(t2)); - - switch (renderedShape1, renderedShape2) { + let renderShape = r => operationToDistData(`Render(r)); + switch (renderShape(t1), renderShape(t2)) { | ( Ok(`DistData(`RenderedShape(s1))), Ok(`DistData(`RenderedShape(s2))), ) => Ok( `DistData( - `RenderedShape(Distributions.Shape.combineAlgebraically(algebraicOp, s1, s2)), + `RenderedShape( + Distributions.Shape.combineAlgebraically(algebraicOp, s1, s2), + ), ), ) | (Error(e1), _) => Error(e1) @@ -195,7 +132,12 @@ module TreeNode = { }; let evaluateToDistData = - (algebraicOp: AlgebraicCombinations.algebraicOperation, operationToDistData, t1: t, t2: t) + ( + algebraicOp: SymbolicTypes.algebraicOperation, + operationToDistData, + t1: t, + t2: t, + ) : result(treeNode, string) => algebraicOp |> simplify(_, t1, t2) @@ -210,27 +152,13 @@ module TreeNode = { }; module VerticalScaling = { - let fnFromOp = - fun - | `Multiply => ( *. ) - | `Exponentiate => ( ** ) - | `Log => ((a, b) => log(a) /. log(b)); - - let knownIntegralSumFnFromOp = - fun - | `Multiply => ((a, b) => Some(a *. b)) - | `Exponentiate => ((_, _) => None) - | `Log => ((_, _) => None); - let evaluateToDistData = (scaleOp, operationToDistData, t, scaleBy) => { // scaleBy has to be a single float, otherwise we'll return an error. - let fn = fnFromOp(scaleOp); - let knownIntegralSumFn = knownIntegralSumFnFromOp(scaleOp); - + let fn = SymbolicTypes.Scale.toFn(scaleOp); + let knownIntegralSumFn = SymbolicTypes.Scale.toKnownIntegralSumFn(scaleOp); let renderedShape = operationToDistData(`Render(t)); switch (renderedShape, scaleBy) { - | (Error(e1), _) => Error(e1) | ( Ok(`DistData(`RenderedShape(rs))), `DistData(`Symbolic(`Float(sm))), @@ -246,6 +174,7 @@ module TreeNode = { ), ), ) + | (Error(e1), _) => Error(e1) | (_, _) => Error("Can only scale by float values.") }; }; @@ -253,14 +182,28 @@ module TreeNode = { module PointwiseCombination = { let pointwiseAdd = (operationToDistData, t1, t2) => { - let renderedShape1 = operationToDistData(`Render(t1)); - let renderedShape2 = operationToDistData(`Render(t2)); + let renderedShape1 = operationToDistData(`Render(t1)); + let renderedShape2 = operationToDistData(`Render(t2)); - switch ((renderedShape1, renderedShape2)) { + switch (renderedShape1, renderedShape2) { + | ( + Ok(`DistData(`RenderedShape(rs1))), + Ok(`DistData(`RenderedShape(rs2))), + ) => + Ok( + `DistData( + `RenderedShape( + Distributions.Shape.combinePointwise( + ~knownIntegralSumsFn=(a, b) => Some(a +. b), + (+.), + rs1, + rs2, + ), + ), + ), + ) | (Error(e1), _) => Error(e1) | (_, Error(e2)) => Error(e2) - | (Ok(`DistData(`RenderedShape(rs1))), Ok(`DistData(`RenderedShape(rs2)))) => - Ok(`DistData(`RenderedShape(Distributions.Shape.combinePointwise(~knownIntegralSumsFn=(a, b) => Some(a +. b), (+.), rs1, rs2)))) | _ => Error("Could not perform pointwise addition.") }; }; @@ -268,14 +211,16 @@ module TreeNode = { let pointwiseMultiply = (operationToDistData, t1, t2) => { // TODO: construct a function that we can easily sample from, to construct // a RenderedShape. Use the xMin and xMax of the rendered shapes to tell the sampling function where to look. - Error("Pointwise multiplication not yet supported."); + Error( + "Pointwise multiplication not yet supported.", + ); }; let evaluateToDistData = (pointwiseOp, operationToDistData, t1, t2) => { switch (pointwiseOp) { | `Add => pointwiseAdd(operationToDistData, t1, t2) | `Multiply => pointwiseMultiply(operationToDistData, t1, t2) - } + }; }; }; @@ -378,7 +323,9 @@ module TreeNode = { }; E.R.bind(value, v => Ok(`DistData(`Symbolic(`Float(v))))); }; - let evaluateFromRenderedShape = (distToFloatOp: distToFloatOperation, rs: DistTypes.shape) : result(treeNode, string) => { + let evaluateFromRenderedShape = + (distToFloatOp: distToFloatOperation, rs: DistTypes.shape) + : result(treeNode, string) => { let value = switch (distToFloatOp) { | `Pdf(f) => Ok(Distributions.Shape.pdf(f, rs)) @@ -410,8 +357,12 @@ module TreeNode = { module Render = { let rec evaluateToRenderedShape = - (operationToDistData: operation => result(t, string), sampleCount: int, t: treeNode) - : result(t, string) => { + ( + operationToDistData: operation => result(t, string), + sampleCount: int, + t: treeNode, + ) + : result(t, string) => { switch (t) { | `DistData(`RenderedShape(s)) => Ok(`DistData(`RenderedShape(s))) // already a rendered shape, we're done here | `DistData(`Symbolic(d)) => @@ -495,10 +446,19 @@ module TreeNode = { t, ) | `FloatFromDist(distToFloatOp, t) => - FloatFromDist.evaluateToDistData(distToFloatOp, operationToDistData(sampleCount), t) - | `Normalize(t) => Normalize.evaluateToDistData(operationToDistData(sampleCount), t) + FloatFromDist.evaluateToDistData( + distToFloatOp, + operationToDistData(sampleCount), + t, + ) + | `Normalize(t) => + Normalize.evaluateToDistData(operationToDistData(sampleCount), t) | `Render(t) => - Render.evaluateToRenderedShape(operationToDistData(sampleCount), sampleCount, t) + Render.evaluateToRenderedShape( + operationToDistData(sampleCount), + sampleCount, + t, + ) }; }; @@ -531,5 +491,4 @@ let toShape = (sampleCount: int, treeNode: treeNode) => { }; }; -let toString = (treeNode: treeNode) => - TreeNode.toString(treeNode); +let toString = (treeNode: treeNode) => TreeNode.toString(treeNode); From 05097bf28aa449ece7e4d97dc1365fdf5091a5f9 Mon Sep 17 00:00:00 2001 From: Ozzie Gooen Date: Wed, 1 Jul 2020 23:05:35 +0100 Subject: [PATCH 17/31] Simplifications for MathJsParser --- .../distribution/AlgebraicCombinations.re | 1 - src/distPlus/symbolic/MathJsParser.re | 274 ++++++++++-------- 2 files changed, 148 insertions(+), 127 deletions(-) diff --git a/src/distPlus/distribution/AlgebraicCombinations.re b/src/distPlus/distribution/AlgebraicCombinations.re index 683ab2ba..17e1a1c0 100644 --- a/src/distPlus/distribution/AlgebraicCombinations.re +++ b/src/distPlus/distribution/AlgebraicCombinations.re @@ -185,7 +185,6 @@ let combineShapesContinuousContinuous = let outputYs: array(float) = Belt.Array.make(200, 0.0); // now, for each of the outputYs, accumulate from a Gaussian kernel over each input point. for (i in 0 to E.A.length(outputXs) - 1) { - let x = outputXs[i]; for (j in 0 to E.A.length(masses) - 1) { let dx = outputXs[i] -. means[j]; let contribution = diff --git a/src/distPlus/symbolic/MathJsParser.re b/src/distPlus/symbolic/MathJsParser.re index 087aa14d..c51026ca 100644 --- a/src/distPlus/symbolic/MathJsParser.re +++ b/src/distPlus/symbolic/MathJsParser.re @@ -96,12 +96,19 @@ module MathAdtToDistDst = { let lognormal: array(arg) => result(TreeNode.treeNode, string) = fun - | [|Value(mu), Value(sigma)|] => Ok(`DistData(`Symbolic(`Lognormal({mu, sigma})))) + | [|Value(mu), Value(sigma)|] => + Ok(`DistData(`Symbolic(`Lognormal({mu, sigma})))) | [|Object(o)|] => { let g = Js.Dict.get(o); switch (g("mean"), g("stdev"), g("mu"), g("sigma")) { | (Some(Value(mean)), Some(Value(stdev)), _, _) => - Ok(`DistData(`Symbolic(SymbolicDist.Lognormal.fromMeanAndStdev(mean, stdev)))) + Ok( + `DistData( + `Symbolic( + SymbolicDist.Lognormal.fromMeanAndStdev(mean, stdev), + ), + ), + ) | (_, _, Some(Value(mu)), Some(Value(sigma))) => Ok(`DistData(`Symbolic(`Lognormal({mu, sigma})))) | _ => Error("Lognormal distribution would need mean and stdev") @@ -111,11 +118,19 @@ module MathAdtToDistDst = { let to_: array(arg) => result(TreeNode.treeNode, string) = fun - | [|Value(low), Value(high)|] when low <= 0.0 && low < high=> { - Ok(`DistData(`Symbolic(SymbolicDist.Normal.from90PercentCI(low, high)))); + | [|Value(low), Value(high)|] when low <= 0.0 && low < high => { + Ok( + `DistData( + `Symbolic(SymbolicDist.Normal.from90PercentCI(low, high)), + ), + ); } | [|Value(low), Value(high)|] when low < high => { - Ok(`DistData(`Symbolic(SymbolicDist.Lognormal.from90PercentCI(low, high)))); + Ok( + `DistData( + `Symbolic(SymbolicDist.Lognormal.from90PercentCI(low, high)), + ), + ); } | [|Value(_), Value(_)|] => Error("Low value must be less than high value.") @@ -123,17 +138,20 @@ module MathAdtToDistDst = { let uniform: array(arg) => result(TreeNode.treeNode, string) = fun - | [|Value(low), Value(high)|] => Ok(`DistData(`Symbolic(`Uniform({low, high})))) + | [|Value(low), Value(high)|] => + Ok(`DistData(`Symbolic(`Uniform({low, high})))) | _ => Error("Wrong number of variables in lognormal distribution"); let beta: array(arg) => result(TreeNode.treeNode, string) = fun - | [|Value(alpha), Value(beta)|] => Ok(`DistData(`Symbolic(`Beta({alpha, beta})))) + | [|Value(alpha), Value(beta)|] => + Ok(`DistData(`Symbolic(`Beta({alpha, beta})))) | _ => Error("Wrong number of variables in lognormal distribution"); let exponential: array(arg) => result(TreeNode.treeNode, string) = fun - | [|Value(rate)|] => Ok(`DistData(`Symbolic(`Exponential({rate: rate})))) + | [|Value(rate)|] => + Ok(`DistData(`Symbolic(`Exponential({rate: rate})))) | _ => Error("Wrong number of variables in Exponential distribution"); let cauchy: array(arg) => result(TreeNode.treeNode, string) = @@ -167,62 +185,133 @@ module MathAdtToDistDst = { let withoutErrors = args |> E.A.fmap(E.R.toOption) |> E.A.O.concatSomes; switch (firstWithError) { - | Some(Error(e)) => Error(e) - | None when withoutErrors |> E.A.length == 0 => - Error("Multimodals need at least one input") - | _ => { - let components = withoutErrors + | Some(Error(e)) => Error(e) + | None when withoutErrors |> E.A.length == 0 => + Error("Multimodals need at least one input") + | _ => + let components = + withoutErrors |> E.A.fmapi((index, t) => { - let w = weights |> E.A.get(_, index) |> E.O.default(1.0); + let w = weights |> E.A.get(_, index) |> E.O.default(1.0); - `Operation(`VerticalScaling(`Multiply, t, `DistData(`Symbolic(`Float(w))))) - }); + `Operation( + `VerticalScaling(( + `Multiply, + t, + `DistData(`Symbolic(`Float(w))), + )), + ); + }); - let pointwiseSum = components + let pointwiseSum = + components |> Js.Array.sliceFrom(1) - |> E.A.fold_left((acc, x) => { - `Operation(`PointwiseCombination(`Add, acc, x)) - }, E.A.unsafe_get(components, 0)) + |> E.A.fold_left( + (acc, x) => { + `Operation(`PointwiseCombination((`Add, acc, x))) + }, + E.A.unsafe_get(components, 0), + ); - Ok(`Operation(`Normalize(pointwiseSum))) - } + Ok(`Operation(`Normalize(pointwiseSum))); }; }; - let arrayParser = (args:array(arg)):result(TreeNode.treeNode, string) => { - let samples = args - |> E.A.fmap( - fun - | Value(n) => Some(n) - | _ => None - ) - |> E.A.O.concatSomes + let arrayParser = (args: array(arg)): result(TreeNode.treeNode, string) => { + let samples = + args + |> E.A.fmap( + fun + | Value(n) => Some(n) + | _ => None, + ) + |> E.A.O.concatSomes; let outputs = Samples.T.fromSamples(samples); - let pdf = outputs.shape |> E.O.bind(_,Distributions.Shape.T.toContinuous); - let shape = pdf |> E.O.fmap(pdf => { - let _pdf = Distributions.Continuous.T.normalize(pdf); - let cdf = Distributions.Continuous.T.integral(~cache=None, _pdf); - SymbolicDist.ContinuousShape.make(_pdf, cdf) - }); - switch(shape){ - | Some(s) => Ok(`DistData(`Symbolic(`ContinuousShape(s)))) - | None => Error("Rendering did not work") - } - } + let pdf = + outputs.shape |> E.O.bind(_, Distributions.Shape.T.toContinuous); + let shape = + pdf + |> E.O.fmap(pdf => { + let _pdf = Distributions.Continuous.T.normalize(pdf); + let cdf = Distributions.Continuous.T.integral(~cache=None, _pdf); + SymbolicDist.ContinuousShape.make(_pdf, cdf); + }); + switch (shape) { + | Some(s) => Ok(`DistData(`Symbolic(`ContinuousShape(s)))) + | None => Error("Rendering did not work") + }; + }; + let toCombination = r => Ok(`Operation(`AlgebraicCombination(r))); + let operationParser = + (name: string, args: array(result(TreeNode.treeNode, string))) => + switch (name, args) { + | ("add", [|Ok(l), Ok(r)|]) => toCombination((`Add, l, r)) + | ("add", _) => Error("Addition needs two operands") + | ("subtract", [|Ok(l), Ok(r)|]) => toCombination((`Subtract, l, r)) + | ("subtract", _) => Error("Subtraction needs two operands") + | ("multiply", [|Ok(l), Ok(r)|]) => toCombination((`Multiply, l, r)) + | ("multiply", _) => Error("Multiplication needs two operands") + | ("divide", [|Ok(_), Ok(`DistData(`Symbolic(`Float(0.0))))|]) => + Error("Division by zero") + | ("divide", [|Ok(l), Ok(r)|]) => toCombination((`Divide, l, r)) + | ("divide", _) => Error("Division needs two operands") + | ("pow", _) => Error("Exponentiation is not yet supported.") + | ("leftTruncate", [|Ok(d), Ok(`DistData(`Symbolic(`Float(lc))))|]) => + Ok(`Operation(`Truncate((Some(lc), None, d)))) + | ("leftTruncate", _) => + Error("leftTruncate needs two arguments: the expression and the cutoff") + | ("rightTruncate", [|Ok(d), Ok(`DistData(`Symbolic(`Float(rc))))|]) => + Ok(`Operation(`Truncate((None, Some(rc), d)))) + | ("rightTruncate", _) => + Error( + "rightTruncate needs two arguments: the expression and the cutoff", + ) + | ( + "truncate", + [| + Ok(d), + Ok(`DistData(`Symbolic(`Float(lc)))), + Ok(`DistData(`Symbolic(`Float(rc)))), + |], + ) => + Ok(`Operation(`Truncate((Some(lc), Some(rc), d)))) + | ("truncate", _) => + Error("truncate needs three arguments: the expression and both cutoffs") + | _ => Error("This type not currently supported") + }; + + let rec functionParser = (r): result(TreeNode.treeNode, string) => { + let parseFunction = (name, args) => { + let parseArgs = () => args |> E.A.fmap(functionParser); + switch (name) { + | "normal" => normal(args) + | "lognormal" => lognormal(args) + | "uniform" => uniform(args) + | "beta" => beta(args) + | "to" => to_(args) + | "exponential" => exponential(args) + | "cauchy" => cauchy(args) + | "triangular" => triangular(args) + | "mean" => Error("mean(...) not yet implemented.") + | "inv" => Error("inv(...) not yet implemented.") + | "sample" => Error("sample(...) not yet implemented.") + | "pdf" => Error("pdf(...) not yet implemented.") + | "add" + | "subtract" + | "multiply" + | "divide" + | "pow" + | "leftTruncate" + | "rightTruncate" + | "truncate" => operationParser(name, parseArgs()) + | n => Error(n ++ " is not currently supported") + }; + }; - let rec functionParser = (r): result(TreeNode.treeNode, string) => r |> ( fun - | Fn({name: "normal", args}) => normal(args) - | Fn({name: "lognormal", args}) => lognormal(args) - | Fn({name: "uniform", args}) => uniform(args) - | Fn({name: "beta", args}) => beta(args) - | Fn({name: "to", args}) => to_(args) - | Fn({name: "exponential", args}) => exponential(args) - | Fn({name: "cauchy", args}) => cauchy(args) - | Fn({name: "triangular", args}) => triangular(args) | Value(f) => Ok(`DistData(`Symbolic(`Float(f)))) | Fn({name: "mm", args}) => { let weights = @@ -250,79 +339,12 @@ module MathAdtToDistDst = { let dists = possibleDists |> E.A.fmap(functionParser); multiModal(dists, weights); } - - // TODO: wire up these FloatFromDist operations - | Fn({name: "mean", args}) => Error("mean(...) not yet implemented.") - | Fn({name: "inv", args}) => Error("inv(...) not yet implemented.") - | Fn({name: "sample", args}) => Error("sample(...) not yet implemented.") - | Fn({name: "pdf", args}) => Error("pdf(...) not yet implemented.") - - | Fn({name: "add", args}) => { - args - |> E.A.fmap(functionParser) - |> (fun - | [|Ok(l), Ok(r)|] => Ok(`Operation(`AlgebraicCombination(`Add, l, r))) - | _ => Error("Addition needs two operands")) - } - | Fn({name: "subtract", args}) => { - args - |> E.A.fmap(functionParser) - |> (fun - | [|Ok(l), Ok(r)|] => Ok(`Operation(`AlgebraicCombination(`Subtract, l, r))) - | _ => Error("Subtraction needs two operands")) - } - | Fn({name: "multiply", args}) => { - args - |> E.A.fmap(functionParser) - |> (fun - | [|Ok(l), Ok(r)|] => Ok(`Operation(`AlgebraicCombination(`Multiply, l, r))) - | _ => Error("Multiplication needs two operands")) - } - | Fn({name: "divide", args}) => { - args - |> E.A.fmap(functionParser) - |> (fun - | [|Ok(l), Ok(`DistData(`Symbolic(`Float(0.0))))|] => Error("Division by zero") - | [|Ok(l), Ok(r)|] => Ok(`Operation(`AlgebraicCombination(`Divide, l, r))) - | _ => Error("Division needs two operands")) - } - // TODO: Figure out how to implement meaningful exponentiation - | Fn({name: "pow", args}) => { - args - |> E.A.fmap(functionParser) - |> (fun - //| [|Ok(l), Ok(r)|] => Ok(`Operation(`AlgebraicCombination(`Exponentiate, l, r))) - //| _ => Error("Exponentiations needs two operands")) - | _ => Error("Exponentiation is not yet supported.") - ) - } - | Fn({name: "leftTruncate", args}) => { - args - |> E.A.fmap(functionParser) - |> (fun - | [|Ok(d), Ok(`DistData(`Symbolic(`Float(lc))))|] => Ok(`Operation(`Truncate(Some(lc), None, d))) - | _ => Error("leftTruncate needs two arguments: the expression and the cutoff")) - } - | Fn({name: "rightTruncate", args}) => { - args - |> E.A.fmap(functionParser) - |> (fun - | [|Ok(d), Ok(`DistData(`Symbolic(`Float(rc))))|] => Ok(`Operation(`Truncate(None, Some(rc), d))) - | _ => Error("rightTruncate needs two arguments: the expression and the cutoff")) - } - | Fn({name: "truncate", args}) => { - args - |> E.A.fmap(functionParser) - |> (fun - | [|Ok(d), Ok(`DistData(`Symbolic(`Float(lc)))), Ok(`DistData(`Symbolic(`Float(rc))))|] => Ok(`Operation(`Truncate(Some(lc), Some(rc), d))) - // TODO: allow on-the-fly evaluations of FloatFromDists to be used as cutoff arguments here. - | _ => Error("rightTruncate needs two arguments: the expression and the cutoff")) - } - | Fn({name}) => Error(name ++ ": function not supported") + | Fn({name: n, args}) => parseFunction(n, args) | _ => { Error("This type not currently supported"); } ); + }; let topLevel = (r): result(TreeNode.treeNode, string) => r @@ -341,13 +363,13 @@ module MathAdtToDistDst = { let fromString = str => { /* We feed the user-typed string into Mathjs.parseMath, - which returns a JSON with (hopefully) a single-element array. - This array element is the top-level node of a nested-object tree - representing the functions/arguments/values/etc. in the string. + which returns a JSON with (hopefully) a single-element array. + This array element is the top-level node of a nested-object tree + representing the functions/arguments/values/etc. in the string. - The function MathJsonToMathJsAdt then recursively unpacks this JSON into a typed data structure we can use. - Inside of this function, MathAdtToDistDst is called whenever a distribution function is encountered. - */ + The function MathJsonToMathJsAdt then recursively unpacks this JSON into a typed data structure we can use. + Inside of this function, MathAdtToDistDst is called whenever a distribution function is encountered. + */ let mathJsToJson = Mathjs.parseMath(str); let mathJsParse = E.R.bind(mathJsToJson, r => { @@ -355,7 +377,7 @@ let fromString = str => { | Some(r) => Ok(r) | None => Error("MathJsParse Error") } -}); + }); let value = E.R.bind(mathJsParse, MathAdtToDistDst.run); value; From 43ba49dd120e40368fe7d0e274cd3c7ee03e60ac Mon Sep 17 00:00:00 2001 From: Ozzie Gooen Date: Wed, 1 Jul 2020 23:47:49 +0100 Subject: [PATCH 18/31] Minor MathJsParser cleanup --- src/distPlus/symbolic/MathJsParser.re | 164 ++++++++++++-------------- 1 file changed, 77 insertions(+), 87 deletions(-) diff --git a/src/distPlus/symbolic/MathJsParser.re b/src/distPlus/symbolic/MathJsParser.re index c51026ca..b83713a2 100644 --- a/src/distPlus/symbolic/MathJsParser.re +++ b/src/distPlus/symbolic/MathJsParser.re @@ -1,5 +1,3 @@ -// todo: rename to SymbolicParser - module MathJsonToMathJsAdt = { type arg = | Symbol(string) @@ -242,27 +240,26 @@ module MathAdtToDistDst = { }; }; - let toCombination = r => Ok(`Operation(`AlgebraicCombination(r))); let operationParser = - (name: string, args: array(result(TreeNode.treeNode, string))) => + (name: string, args: array(result(TreeNode.treeNode, string))) => { + let toOkAlgebraic = r => Ok(`Operation(`AlgebraicCombination(r))); + let toOkTrunctate = r => Ok(`Operation(`Truncate(r))); switch (name, args) { - | ("add", [|Ok(l), Ok(r)|]) => toCombination((`Add, l, r)) + | ("add", [|Ok(l), Ok(r)|]) => toOkAlgebraic((`Add, l, r)) | ("add", _) => Error("Addition needs two operands") - | ("subtract", [|Ok(l), Ok(r)|]) => toCombination((`Subtract, l, r)) + | ("subtract", [|Ok(l), Ok(r)|]) => toOkAlgebraic((`Subtract, l, r)) | ("subtract", _) => Error("Subtraction needs two operands") - | ("multiply", [|Ok(l), Ok(r)|]) => toCombination((`Multiply, l, r)) + | ("multiply", [|Ok(l), Ok(r)|]) => toOkAlgebraic((`Multiply, l, r)) | ("multiply", _) => Error("Multiplication needs two operands") - | ("divide", [|Ok(_), Ok(`DistData(`Symbolic(`Float(0.0))))|]) => - Error("Division by zero") - | ("divide", [|Ok(l), Ok(r)|]) => toCombination((`Divide, l, r)) + | ("divide", [|Ok(l), Ok(r)|]) => toOkAlgebraic((`Divide, l, r)) | ("divide", _) => Error("Division needs two operands") | ("pow", _) => Error("Exponentiation is not yet supported.") | ("leftTruncate", [|Ok(d), Ok(`DistData(`Symbolic(`Float(lc))))|]) => - Ok(`Operation(`Truncate((Some(lc), None, d)))) + toOkTrunctate((Some(lc), None, d)) | ("leftTruncate", _) => Error("leftTruncate needs two arguments: the expression and the cutoff") | ("rightTruncate", [|Ok(d), Ok(`DistData(`Symbolic(`Float(rc))))|]) => - Ok(`Operation(`Truncate((None, Some(rc), d)))) + toOkTrunctate((None, Some(rc), d)) | ("rightTruncate", _) => Error( "rightTruncate needs two arguments: the expression and the cutoff", @@ -275,87 +272,80 @@ module MathAdtToDistDst = { Ok(`DistData(`Symbolic(`Float(rc)))), |], ) => - Ok(`Operation(`Truncate((Some(lc), Some(rc), d)))) + toOkTrunctate((Some(lc), Some(rc), d)) | ("truncate", _) => Error("truncate needs three arguments: the expression and both cutoffs") | _ => Error("This type not currently supported") }; - - let rec functionParser = (r): result(TreeNode.treeNode, string) => { - let parseFunction = (name, args) => { - let parseArgs = () => args |> E.A.fmap(functionParser); - switch (name) { - | "normal" => normal(args) - | "lognormal" => lognormal(args) - | "uniform" => uniform(args) - | "beta" => beta(args) - | "to" => to_(args) - | "exponential" => exponential(args) - | "cauchy" => cauchy(args) - | "triangular" => triangular(args) - | "mean" => Error("mean(...) not yet implemented.") - | "inv" => Error("inv(...) not yet implemented.") - | "sample" => Error("sample(...) not yet implemented.") - | "pdf" => Error("pdf(...) not yet implemented.") - | "add" - | "subtract" - | "multiply" - | "divide" - | "pow" - | "leftTruncate" - | "rightTruncate" - | "truncate" => operationParser(name, parseArgs()) - | n => Error(n ++ " is not currently supported") - }; - }; - - r - |> ( - fun - | Value(f) => Ok(`DistData(`Symbolic(`Float(f)))) - | Fn({name: "mm", args}) => { - let weights = - args - |> E.A.last - |> E.O.bind( - _, - fun - | Array(values) => Some(values) - | _ => None, - ) - |> E.O.fmap(o => - o - |> E.A.fmap( - fun - | Value(r) => Some(r) - | _ => None, - ) - |> E.A.O.concatSomes - ); - let possibleDists = - E.O.isSome(weights) - ? Belt.Array.slice(args, ~offset=0, ~len=E.A.length(args) - 1) - : args; - let dists = possibleDists |> E.A.fmap(functionParser); - multiModal(dists, weights); - } - | Fn({name: n, args}) => parseFunction(n, args) - | _ => { - Error("This type not currently supported"); - } - ); }; - let topLevel = (r): result(TreeNode.treeNode, string) => - r - |> ( - fun - | Fn(_) => functionParser(r) - | Value(r) => Ok(`DistData(`Symbolic(`Float(r)))) - | Array(r) => arrayParser(r) - | Symbol(_) => Error("Symbol not valid as top level") - | Object(_) => Error("Object not valid as top level") - ); + let functionParser = (nodeParser, name, args) => { + let parseArgs = () => args |> E.A.fmap(nodeParser); + switch (name) { + | "normal" => normal(args) + | "lognormal" => lognormal(args) + | "uniform" => uniform(args) + | "beta" => beta(args) + | "to" => to_(args) + | "exponential" => exponential(args) + | "cauchy" => cauchy(args) + | "triangular" => triangular(args) + | "mm" => + let weights = + args + |> E.A.last + |> E.O.bind( + _, + fun + | Array(values) => Some(values) + | _ => None, + ) + |> E.O.fmap(o => + o + |> E.A.fmap( + fun + | Value(r) => Some(r) + | _ => None, + ) + |> E.A.O.concatSomes + ); + let possibleDists = + E.O.isSome(weights) + ? Belt.Array.slice(args, ~offset=0, ~len=E.A.length(args) - 1) + : args; + let dists = possibleDists |> E.A.fmap(nodeParser); + multiModal(dists, weights); + | "add" + | "subtract" + | "multiply" + | "divide" + | "pow" + | "leftTruncate" + | "rightTruncate" + | "truncate" => operationParser(name, parseArgs()) + | "mean" as n + | "inv" as n + | "sample" as n + | "pdf" as n + | n => Error(n ++ "(...) is not currently supported") + }; + }; + + let rec nodeParser = + fun + | Value(f) => Ok(`DistData(`Symbolic(`Float(f)))) + | Fn({name, args}) => functionParser(nodeParser, name, args) + | _ => { + Error("This type not currently supported"); + }; + + let topLevel = + fun + | Array(r) => arrayParser(r) + | Value(_) as r => nodeParser(r) + | Fn(_) as r => nodeParser(r) + | Symbol(_) => Error("Symbol not valid as top level") + | Object(_) => Error("Object not valid as top level"); let run = (r): result(TreeNode.treeNode, string) => r |> MathAdtCleaner.run |> topLevel; From 491ac15f7b1fa3005c336d80fbbfcd03efc771f8 Mon Sep 17 00:00:00 2001 From: Ozzie Gooen Date: Thu, 2 Jul 2020 10:40:21 +0100 Subject: [PATCH 19/31] Separated Operation toString --- src/distPlus/symbolic/SymbolicTypes.re | 38 ++++++++++---------- src/distPlus/symbolic/TreeNode.re | 49 +++++++++++++++----------- 2 files changed, 48 insertions(+), 39 deletions(-) diff --git a/src/distPlus/symbolic/SymbolicTypes.re b/src/distPlus/symbolic/SymbolicTypes.re index 47a8598e..07ae5703 100644 --- a/src/distPlus/symbolic/SymbolicTypes.re +++ b/src/distPlus/symbolic/SymbolicTypes.re @@ -42,26 +42,26 @@ module Pointwise = { module DistToFloat = { type t = distToFloatOperation; - let stringFromFloatFromDistOperation = - fun - | `Pdf(f) => {j|pdf(x=$f, |j} - | `Inv(f) => {j|inv(x=$f, |j} - | `Sample => "sample(" - | `Mean => "mean("; - let format = (a, b) => stringFromFloatFromDistOperation(a) ++ b ++ ")"; + let format = (operation, value) => + switch (operation) { + | `Pdf(f) => {j|pdf(x=$f,$value) |j} + | `Inv(f) => {j|inv(x=$f,$value) |j} + | `Sample => "sample($value)" + | `Mean => "mean($value)" + }; }; module Scale = { - type t = scaleOperation; - let toFn = - fun - | `Multiply => ( *. ) - | `Exponentiate => ( ** ) - | `Log => ((a, b) => log(a) /. log(b)); + type t = scaleOperation; + let toFn = + fun + | `Multiply => ( *. ) + | `Exponentiate => ( ** ) + | `Log => ((a, b) => log(a) /. log(b)); - let toKnownIntegralSumFn = - fun - | `Multiply => ((a, b) => Some(a *. b)) - | `Exponentiate => ((_, _) => None) - | `Log => ((_, _) => None); -} \ No newline at end of file + let toKnownIntegralSumFn = + fun + | `Multiply => ((a, b) => Some(a *. b)) + | `Exponentiate => ((_, _) => None) + | `Log => ((_, _) => None); +}; diff --git a/src/distPlus/symbolic/TreeNode.re b/src/distPlus/symbolic/TreeNode.re index d76a26c5..92836c0d 100644 --- a/src/distPlus/symbolic/TreeNode.re +++ b/src/distPlus/symbolic/TreeNode.re @@ -18,6 +18,30 @@ and operation = [ | `FloatFromDist(distToFloatOperation, treeNode) ]; +module Operation = { + type t = operation; + let toString = nodeToString => + fun + | `AlgebraicCombination(op, t1, t2) => + SymbolicTypes.Algebraic.format(op, nodeToString(t1), nodeToString(t2)) + | `PointwiseCombination(op, t1, t2) => + SymbolicTypes.Pointwise.format(op, nodeToString(t1), nodeToString(t2)) + | `VerticalScaling(_scaleOp, t, scaleBy) => + nodeToString(t) ++ " @ " ++ nodeToString(scaleBy) + | `Normalize(t) => "normalize(" ++ nodeToString(t) ++ ")" + | `FloatFromDist(floatFromDistOp, t) => + SymbolicTypes.DistToFloat.format(floatFromDistOp, nodeToString(t)) + | `Truncate(lc, rc, t) => + "truncate(" + ++ nodeToString(t) + ++ ", " + ++ E.O.dimap(Js.Float.toString, () => "-inf", lc) + ++ ", " + ++ E.O.dimap(Js.Float.toString, () => "inf", rc) + ++ ")" + | `Render(t) => nodeToString(t); +}; + module TreeNode = { type t = treeNode; type tResult = treeNode => result(treeNode, string); @@ -27,24 +51,7 @@ module TreeNode = { | `DistData(`Symbolic(d)) => SymbolicDist.GenericDistFunctions.toString(d) | `DistData(`RenderedShape(_)) => "[shape]" - | `Operation(`AlgebraicCombination(op, t1, t2)) => - SymbolicTypes.Algebraic.format(op, toString(t1), toString(t2)) - | `Operation(`PointwiseCombination(op, t1, t2)) => - SymbolicTypes.Pointwise.format(op, toString(t1), toString(t2)) - | `Operation(`VerticalScaling(_scaleOp, t, scaleBy)) => - toString(t) ++ " @ " ++ toString(scaleBy) - | `Operation(`Normalize(t)) => "normalize(" ++ toString(t) ++ ")" - | `Operation(`FloatFromDist(floatFromDistOp, t)) => - SymbolicTypes.DistToFloat.format(floatFromDistOp, toString(t)) - | `Operation(`Truncate(lc, rc, t)) => - "truncate(" - ++ toString(t) - ++ ", " - ++ E.O.dimap(Js.Float.toString, () => "-inf", lc) - ++ ", " - ++ E.O.dimap(Js.Float.toString, () => "inf", rc) - ++ ")" - | `Operation(`Render(t)) => toString(t); + | `Operation(op) => Operation.toString(toString, op); /* The following modules encapsulate everything we can do with * different kinds of operations. */ @@ -109,7 +116,7 @@ module TreeNode = { |> E.R.bind(_, tryCombiningLognormals); }; - // todo: I don't like the name evaluateNumerically that much, if this renders and does it algebraically. It's tricky. + // todo: I don't like the name evaluateNumerically that much, if this renders and does it algebraically. It's tricky. let evaluateNumerically = (algebraicOp, operationToDistData, t1, t2) => { // force rendering into shapes let renderShape = r => operationToDistData(`Render(r)); @@ -155,7 +162,8 @@ module TreeNode = { let evaluateToDistData = (scaleOp, operationToDistData, t, scaleBy) => { // scaleBy has to be a single float, otherwise we'll return an error. let fn = SymbolicTypes.Scale.toFn(scaleOp); - let knownIntegralSumFn = SymbolicTypes.Scale.toKnownIntegralSumFn(scaleOp); + let knownIntegralSumFn = + SymbolicTypes.Scale.toKnownIntegralSumFn(scaleOp); let renderedShape = operationToDistData(`Render(t)); switch (renderedShape, scaleBy) { @@ -366,6 +374,7 @@ module TreeNode = { switch (t) { | `DistData(`RenderedShape(s)) => Ok(`DistData(`RenderedShape(s))) // already a rendered shape, we're done here | `DistData(`Symbolic(d)) => + // todo: move to dist switch (d) { | `Float(v) => Ok( From 101824e500b4e7d982b06e5d192051c4aa8a5cb7 Mon Sep 17 00:00:00 2001 From: Ozzie Gooen Date: Thu, 2 Jul 2020 12:14:16 +0100 Subject: [PATCH 20/31] Minor renames, and moved attemptAlgebraicOperation to SymbolicDist --- __tests__/Distributions__Test.re | 6 +- src/components/Drawer.re | 8 +- src/distPlus/symbolic/MathJsParser.re | 42 ++-- src/distPlus/symbolic/SymbolicDist.re | 106 ++++----- src/distPlus/symbolic/SymbolicTypes.re | 52 ++++- src/distPlus/symbolic/TreeNode.re | 289 ++++++++++--------------- 6 files changed, 241 insertions(+), 262 deletions(-) diff --git a/__tests__/Distributions__Test.re b/__tests__/Distributions__Test.re index c02430fe..4e16bb80 100644 --- a/__tests__/Distributions__Test.re +++ b/__tests__/Distributions__Test.re @@ -382,10 +382,10 @@ describe("Shape", () => { let variance = stdev ** 2.0; let numSamples = 10000; open Distributions.Shape; - let normal: SymbolicDist.dist = `Normal({mean, stdev}); - let normalShape = TreeNode.toShape(numSamples, `DistData(`Symbolic(normal))); + let normal: SymbolicTypes.symbolicDist = `Normal({mean, stdev}); + let normalShape = TreeNode.toShape(numSamples, `Leaf(`SymbolicDist(normal))); let lognormal = SymbolicDist.Lognormal.fromMeanAndStdev(mean, stdev); - let lognormalShape = TreeNode.toShape(numSamples, `DistData(`Symbolic(lognormal))); + let lognormalShape = TreeNode.toShape(numSamples, `Leaf(`SymbolicDist(lognormal))); makeTestCloseEquality( "Mean of a normal", diff --git a/src/components/Drawer.re b/src/components/Drawer.re index 8dc0c7db..f39cd3ad 100644 --- a/src/components/Drawer.re +++ b/src/components/Drawer.re @@ -388,8 +388,8 @@ module Draw = { let stdev = 15.0; let numSamples = 3000; - let normal: SymbolicDist.dist = `Normal({mean, stdev}); - let normalShape = TreeNode.toShape(numSamples, `DistData(`Symbolic(normal))); + let normal: SymbolicTypes.symbolicDist = `Normal({mean, stdev}); + let normalShape = TreeNode.toShape(numSamples, `Leaf(`SymbolicDist(normal))); let xyShape: Types.xyShape = switch (normalShape) { | Mixed(_) => {xs: [||], ys: [||]} @@ -398,9 +398,9 @@ module Draw = { }; /* // To use a lognormal instead: - let lognormal = SymbolicDist.Lognormal.fromMeanAndStdev(mean, stdev); + let lognormal = SymbolicTypes.Lognormal.fromMeanAndStdev(mean, stdev); let lognormalShape = - SymbolicDist.GenericSimple.toShape(lognormal, numSamples); + SymbolicTypes.GenericSimple.toShape(lognormal, numSamples); let lognormalXYShape: Types.xyShape = switch (lognormalShape) { | Mixed(_) => {xs: [||], ys: [||]} diff --git a/src/distPlus/symbolic/MathJsParser.re b/src/distPlus/symbolic/MathJsParser.re index b83713a2..3874fda4 100644 --- a/src/distPlus/symbolic/MathJsParser.re +++ b/src/distPlus/symbolic/MathJsParser.re @@ -89,26 +89,26 @@ module MathAdtToDistDst = { let normal: array(arg) => result(TreeNode.treeNode, string) = fun | [|Value(mean), Value(stdev)|] => - Ok(`DistData(`Symbolic(`Normal({mean, stdev})))) + Ok(`Leaf(`SymbolicDist(`Normal({mean, stdev})))) | _ => Error("Wrong number of variables in normal distribution"); let lognormal: array(arg) => result(TreeNode.treeNode, string) = fun | [|Value(mu), Value(sigma)|] => - Ok(`DistData(`Symbolic(`Lognormal({mu, sigma})))) + Ok(`Leaf(`SymbolicDist(`Lognormal({mu, sigma})))) | [|Object(o)|] => { let g = Js.Dict.get(o); switch (g("mean"), g("stdev"), g("mu"), g("sigma")) { | (Some(Value(mean)), Some(Value(stdev)), _, _) => Ok( - `DistData( - `Symbolic( + `Leaf( + `SymbolicDist( SymbolicDist.Lognormal.fromMeanAndStdev(mean, stdev), ), ), ) | (_, _, Some(Value(mu)), Some(Value(sigma))) => - Ok(`DistData(`Symbolic(`Lognormal({mu, sigma})))) + Ok(`Leaf(`SymbolicDist(`Lognormal({mu, sigma})))) | _ => Error("Lognormal distribution would need mean and stdev") }; } @@ -118,15 +118,15 @@ module MathAdtToDistDst = { fun | [|Value(low), Value(high)|] when low <= 0.0 && low < high => { Ok( - `DistData( - `Symbolic(SymbolicDist.Normal.from90PercentCI(low, high)), + `Leaf( + `SymbolicDist(SymbolicDist.Normal.from90PercentCI(low, high)), ), ); } | [|Value(low), Value(high)|] when low < high => { Ok( - `DistData( - `Symbolic(SymbolicDist.Lognormal.from90PercentCI(low, high)), + `Leaf( + `SymbolicDist(SymbolicDist.Lognormal.from90PercentCI(low, high)), ), ); } @@ -137,31 +137,31 @@ module MathAdtToDistDst = { let uniform: array(arg) => result(TreeNode.treeNode, string) = fun | [|Value(low), Value(high)|] => - Ok(`DistData(`Symbolic(`Uniform({low, high})))) + Ok(`Leaf(`SymbolicDist(`Uniform({low, high})))) | _ => Error("Wrong number of variables in lognormal distribution"); let beta: array(arg) => result(TreeNode.treeNode, string) = fun | [|Value(alpha), Value(beta)|] => - Ok(`DistData(`Symbolic(`Beta({alpha, beta})))) + Ok(`Leaf(`SymbolicDist(`Beta({alpha, beta})))) | _ => Error("Wrong number of variables in lognormal distribution"); let exponential: array(arg) => result(TreeNode.treeNode, string) = fun | [|Value(rate)|] => - Ok(`DistData(`Symbolic(`Exponential({rate: rate})))) + Ok(`Leaf(`SymbolicDist(`Exponential({rate: rate})))) | _ => Error("Wrong number of variables in Exponential distribution"); let cauchy: array(arg) => result(TreeNode.treeNode, string) = fun | [|Value(local), Value(scale)|] => - Ok(`DistData(`Symbolic(`Cauchy({local, scale})))) + Ok(`Leaf(`SymbolicDist(`Cauchy({local, scale})))) | _ => Error("Wrong number of variables in cauchy distribution"); let triangular: array(arg) => result(TreeNode.treeNode, string) = fun | [|Value(low), Value(medium), Value(high)|] => - Ok(`DistData(`Symbolic(`Triangular({low, medium, high})))) + Ok(`Leaf(`SymbolicDist(`Triangular({low, medium, high})))) | _ => Error("Wrong number of variables in triangle distribution"); let multiModal = @@ -196,7 +196,7 @@ module MathAdtToDistDst = { `VerticalScaling(( `Multiply, t, - `DistData(`Symbolic(`Float(w))), + `Leaf(`SymbolicDist(`Float(w))), )), ); }); @@ -235,7 +235,7 @@ module MathAdtToDistDst = { SymbolicDist.ContinuousShape.make(_pdf, cdf); }); switch (shape) { - | Some(s) => Ok(`DistData(`Symbolic(`ContinuousShape(s)))) + | Some(s) => Ok(`Leaf(`SymbolicDist(`ContinuousShape(s)))) | None => Error("Rendering did not work") }; }; @@ -254,11 +254,11 @@ module MathAdtToDistDst = { | ("divide", [|Ok(l), Ok(r)|]) => toOkAlgebraic((`Divide, l, r)) | ("divide", _) => Error("Division needs two operands") | ("pow", _) => Error("Exponentiation is not yet supported.") - | ("leftTruncate", [|Ok(d), Ok(`DistData(`Symbolic(`Float(lc))))|]) => + | ("leftTruncate", [|Ok(d), Ok(`Leaf(`SymbolicDist(`Float(lc))))|]) => toOkTrunctate((Some(lc), None, d)) | ("leftTruncate", _) => Error("leftTruncate needs two arguments: the expression and the cutoff") - | ("rightTruncate", [|Ok(d), Ok(`DistData(`Symbolic(`Float(rc))))|]) => + | ("rightTruncate", [|Ok(d), Ok(`Leaf(`SymbolicDist(`Float(rc))))|]) => toOkTrunctate((None, Some(rc), d)) | ("rightTruncate", _) => Error( @@ -268,8 +268,8 @@ module MathAdtToDistDst = { "truncate", [| Ok(d), - Ok(`DistData(`Symbolic(`Float(lc)))), - Ok(`DistData(`Symbolic(`Float(rc)))), + Ok(`Leaf(`SymbolicDist(`Float(lc)))), + Ok(`Leaf(`SymbolicDist(`Float(rc)))), |], ) => toOkTrunctate((Some(lc), Some(rc), d)) @@ -333,7 +333,7 @@ module MathAdtToDistDst = { let rec nodeParser = fun - | Value(f) => Ok(`DistData(`Symbolic(`Float(f)))) + | Value(f) => Ok(`Leaf(`SymbolicDist(`Float(f)))) | Fn({name, args}) => functionParser(nodeParser, name, args) | _ => { Error("This type not currently supported"); diff --git a/src/distPlus/symbolic/SymbolicDist.re b/src/distPlus/symbolic/SymbolicDist.re index 28130a98..58402b38 100644 --- a/src/distPlus/symbolic/SymbolicDist.re +++ b/src/distPlus/symbolic/SymbolicDist.re @@ -1,52 +1,4 @@ -type normal = { - mean: float, - stdev: float, -}; - -type lognormal = { - mu: float, - sigma: float, -}; - -type uniform = { - low: float, - high: float, -}; - -type beta = { - alpha: float, - beta: float, -}; - -type exponential = {rate: float}; - -type cauchy = { - local: float, - scale: float, -}; - -type triangular = { - low: float, - medium: float, - high: float, -}; - -type continuousShape = { - pdf: DistTypes.continuousShape, - cdf: DistTypes.continuousShape, -}; - -type dist = [ - | `Normal(normal) - | `Beta(beta) - | `Lognormal(lognormal) - | `Uniform(uniform) - | `Exponential(exponential) - | `Cauchy(cauchy) - | `Triangular(triangular) - | `ContinuousShape(continuousShape) - | `Float(float) // Dirac delta at x. Practically useful only in the context of multimodals. -]; +open SymbolicTypes; module ContinuousShape = { type t = continuousShape; @@ -124,11 +76,12 @@ module Normal = { `Normal({mean, stdev}); }; - let operate = (operation: SymbolicTypes.Algebraic.t, n1: t, n2: t) => switch(operation){ + let operate = (operation: SymbolicTypes.Algebraic.t, n1: t, n2: t) => + switch (operation) { | `Add => Some(add(n1, n2)) | `Subtract => Some(subtract(n1, n2)) | _ => None - } + }; }; module Beta = { @@ -177,11 +130,12 @@ module Lognormal = { let sigma = l1.sigma +. l2.sigma; `Lognormal({mu, sigma}); }; - let operate = (operation: SymbolicTypes.Algebraic.t, n1: t, n2: t) => switch(operation){ + let operate = (operation: SymbolicTypes.Algebraic.t, n1: t, n2: t) => + switch (operation) { | `Multiply => Some(multiply(n1, n2)) | `Divide => Some(divide(n1, n2)) | _ => None - } + }; }; module Uniform = { @@ -202,7 +156,7 @@ module Float = { let toString = Js.Float.toString; }; -module GenericDistFunctions = { +module T = { let minCdfValue = 0.0001; let maxCdfValue = 0.9999; @@ -232,7 +186,7 @@ module GenericDistFunctions = { | `ContinuousShape(n) => ContinuousShape.inv(x, n) }; - let sample: dist => float = + let sample: symbolicDist => float = fun | `Normal(n) => Normal.sample(n) | `Triangular(n) => Triangular.sample(n) @@ -244,7 +198,7 @@ module GenericDistFunctions = { | `Float(n) => Float.sample(n) | `ContinuousShape(n) => ContinuousShape.sample(n); - let toString: dist => string = + let toString: symbolicDist => string = fun | `Triangular(n) => Triangular.toString(n) | `Exponential(n) => Exponential.toString(n) @@ -256,7 +210,7 @@ module GenericDistFunctions = { | `Float(n) => Float.toString(n) | `ContinuousShape(n) => ContinuousShape.toString(n); - let min: dist => float = + let min: symbolicDist => float = fun | `Triangular({low}) => low | `Exponential(n) => Exponential.inv(minCdfValue, n) @@ -268,7 +222,7 @@ module GenericDistFunctions = { | `ContinuousShape(n) => ContinuousShape.inv(minCdfValue, n) | `Float(n) => n; - let max: dist => float = + let max: symbolicDist => float = fun | `Triangular(n) => n.high | `Exponential(n) => Exponential.inv(maxCdfValue, n) @@ -280,7 +234,7 @@ module GenericDistFunctions = { | `Uniform({high}) => high | `Float(n) => n; - let mean: dist => result(float, string) = + let mean: symbolicDist => result(float, string) = fun | `Triangular(n) => Triangular.mean(n) | `Exponential(n) => Exponential.mean(n) @@ -293,7 +247,7 @@ module GenericDistFunctions = { | `Float(n) => Float.mean(n); let interpolateXs = - (~xSelection: [ | `Linear | `ByWeight]=`Linear, dist: dist, n) => { + (~xSelection: [ | `Linear | `ByWeight]=`Linear, dist: symbolicDist, n) => { switch (xSelection, dist) { | (`Linear, _) => E.A.Floats.range(min(dist), max(dist), n) /* | (`ByWeight, `Uniform(n)) => @@ -306,4 +260,36 @@ module GenericDistFunctions = { ys |> E.A.fmap(y => inv(y, dist)); }; }; + + /* This returns an optional that wraps a result. If the optional is None, + there is no valid analytic solution. If it Some, it + can still return an error if there is a serious problem, + like in the casea of a divide by 0. + */ + type analyticalSolutionAttempt = [ + | `AnalyticalSolution(SymbolicTypes.symbolicDist) + | `Error(string) + | `NoSolution + ]; + let attemptAlgebraicOperation = + ( + d1: symbolicDist, + d2: symbolicDist, + op: SymbolicTypes.algebraicOperation, + ) + : analyticalSolutionAttempt => + switch (d1, d2) { + | (`Float(v1), `Float(v2)) => + switch (SymbolicTypes.Algebraic.applyFn(op, v1, v2)) { + | Ok(r) => `AnalyticalSolution(`Float(r)) + | Error(n) => `Error(n) + } + | (`Normal(v1), `Normal(v2)) => + Normal.operate(op, v1, v2) + |> E.O.dimap(r => `AnalyticalSolution(r), () => `NoSolution) + | (`Lognormal(v1), `Lognormal(v2)) => + Lognormal.operate(op, v1, v2) + |> E.O.dimap(r => `AnalyticalSolution(r), () => `NoSolution) + | _ => `NoSolution + }; }; diff --git a/src/distPlus/symbolic/SymbolicTypes.re b/src/distPlus/symbolic/SymbolicTypes.re index 07ae5703..ccc2ecba 100644 --- a/src/distPlus/symbolic/SymbolicTypes.re +++ b/src/distPlus/symbolic/SymbolicTypes.re @@ -1,7 +1,57 @@ +type normal = { + mean: float, + stdev: float, +}; + +type lognormal = { + mu: float, + sigma: float, +}; + +type uniform = { + low: float, + high: float, +}; + +type beta = { + alpha: float, + beta: float, +}; + +type exponential = {rate: float}; + +type cauchy = { + local: float, + scale: float, +}; + +type triangular = { + low: float, + medium: float, + high: float, +}; + +type continuousShape = { + pdf: DistTypes.continuousShape, + cdf: DistTypes.continuousShape, +}; + +type symbolicDist = [ + | `Normal(normal) + | `Beta(beta) + | `Lognormal(lognormal) + | `Uniform(uniform) + | `Exponential(exponential) + | `Cauchy(cauchy) + | `Triangular(triangular) + | `ContinuousShape(continuousShape) + | `Float(float) // Dirac delta at x. Practically useful only in the context of multimodals. +]; + +type algebraicOperation = [ | `Add | `Multiply | `Subtract | `Divide]; type pointwiseOperation = [ | `Add | `Multiply]; type scaleOperation = [ | `Multiply | `Exponentiate | `Log]; type distToFloatOperation = [ | `Pdf(float) | `Inv(float) | `Mean | `Sample]; -type algebraicOperation = [ | `Add | `Multiply | `Subtract | `Divide]; module Algebraic = { type t = algebraicOperation; diff --git a/src/distPlus/symbolic/TreeNode.re b/src/distPlus/symbolic/TreeNode.re index 92836c0d..de68e433 100644 --- a/src/distPlus/symbolic/TreeNode.re +++ b/src/distPlus/symbolic/TreeNode.re @@ -1,13 +1,12 @@ /* This module represents a tree node. */ open SymbolicTypes; -// todo: Symbolic already has an arbitrary continuousShape option. It seems messy to have both. -type distData = [ - | `Symbolic(SymbolicDist.dist) - | `RenderedShape(DistTypes.shape) +type leaf = [ + | `SymbolicDist(SymbolicTypes.symbolicDist) + | `RenderedDist(DistTypes.shape) ]; /* TreeNodes are either Data (i.e. symbolic or rendered distributions) or Operations. Operations always refer to two child nodes.*/ -type treeNode = [ | `DistData(distData) | `Operation(operation)] +type treeNode = [ | `Leaf(leaf) | `Operation(operation)] and operation = [ | `AlgebraicCombination(algebraicOperation, treeNode, treeNode) | `PointwiseCombination(pointwiseOperation, treeNode, treeNode) @@ -48,9 +47,8 @@ module TreeNode = { let rec toString = fun - | `DistData(`Symbolic(d)) => - SymbolicDist.GenericDistFunctions.toString(d) - | `DistData(`RenderedShape(_)) => "[shape]" + | `Leaf(`SymbolicDist(d)) => SymbolicDist.T.toString(d) + | `Leaf(`RenderedDist(_)) => "[shape]" | `Operation(op) => Operation.toString(toString, op); /* The following modules encapsulate everything we can do with @@ -61,73 +59,34 @@ module TreeNode = { For instance, normal(0, 1) + normal(1, 1) -> normal(1, 2). In general, this is implemented via convolution. */ module AlgebraicCombination = { - let simplify = (algebraicOp, t1: t, t2: t): result(treeNode, string) => { - let tryCombiningFloats: tResult = - fun - | `Operation( - `AlgebraicCombination( - algebraicOp, - `DistData(`Symbolic(`Float(v1))), - `DistData(`Symbolic(`Float(v2))), - ), - ) => - SymbolicTypes.Algebraic.applyFn(algebraicOp, v1, v2) - |> E.R.fmap(r => `DistData(`Symbolic(`Float(r)))) - | t => Ok(t); - - let optionToSymbolicResult = (t, o) => - o - |> E.O.dimap(r => `DistData(`Symbolic(r)), () => t) - |> (r => Ok(r)); - - let tryCombiningNormals: tResult = - fun - | `Operation( - `AlgebraicCombination( - operation, - `DistData(`Symbolic(`Normal(n1))), - `DistData(`Symbolic(`Normal(n2))), - ), - ) as t => - SymbolicDist.Normal.operate(operation, n1, n2) - |> optionToSymbolicResult(t) - | t => Ok(t); - - let tryCombiningLognormals: tResult = - fun - | `Operation( - `AlgebraicCombination( - operation, - `DistData(`Symbolic(`Lognormal(n1))), - `DistData(`Symbolic(`Lognormal(n2))), - ), - ) as t => - SymbolicDist.Lognormal.operate(operation, n1, n2) - |> optionToSymbolicResult(t) - | t => Ok(t); - - let originalTreeNode = - `Operation(`AlgebraicCombination((algebraicOp, t1, t2))); - - // Feedback: I like this pattern, kudos - originalTreeNode - |> tryCombiningFloats - |> E.R.bind(_, tryCombiningNormals) - |> E.R.bind(_, tryCombiningLognormals); - }; + let toTreeNode = (op, t1, t2) => + `Operation(`AlgebraicCombination((op, t1, t2))); + let tryAnalyticalSolution = + fun + | `Operation( + `AlgebraicCombination( + operation, + `Leaf(`SymbolicDist(d1)), + `Leaf(`SymbolicDist(d2)), + ), + ) as t => + switch (SymbolicDist.T.attemptAlgebraicOperation(d1, d2, operation)) { + | `AnalyticalSolution(symbolicDist) => + Ok(`Leaf(`SymbolicDist(symbolicDist))) + | `Error(er) => Error(er) + | `NoSolution => Ok(t) + } + | t => Ok(t); // todo: I don't like the name evaluateNumerically that much, if this renders and does it algebraically. It's tricky. - let evaluateNumerically = (algebraicOp, operationToDistData, t1, t2) => { + let evaluateNumerically = (algebraicOp, operationToLeaf, t1, t2) => { // force rendering into shapes - let renderShape = r => operationToDistData(`Render(r)); + let renderShape = r => operationToLeaf(`Render(r)); switch (renderShape(t1), renderShape(t2)) { - | ( - Ok(`DistData(`RenderedShape(s1))), - Ok(`DistData(`RenderedShape(s2))), - ) => + | (Ok(`Leaf(`RenderedDist(s1))), Ok(`Leaf(`RenderedDist(s2)))) => Ok( - `DistData( - `RenderedShape( + `Leaf( + `RenderedDist( Distributions.Shape.combineAlgebraically(algebraicOp, s1, s2), ), ), @@ -138,42 +97,40 @@ module TreeNode = { }; }; - let evaluateToDistData = + let evaluateToLeaf = ( algebraicOp: SymbolicTypes.algebraicOperation, - operationToDistData, + operationToLeaf, t1: t, t2: t, ) : result(treeNode, string) => algebraicOp - |> simplify(_, t1, t2) + |> toTreeNode(_, t1, t2) + |> tryAnalyticalSolution |> E.R.bind( _, fun - | `DistData(d) => Ok(`DistData(d)) // the analytical simplifaction worked, nice! + | `Leaf(d) => Ok(`Leaf(d)) // the analytical simplifaction worked, nice! | `Operation(_) => // if not, run the convolution - evaluateNumerically(algebraicOp, operationToDistData, t1, t2), + evaluateNumerically(algebraicOp, operationToLeaf, t1, t2), ); }; module VerticalScaling = { - let evaluateToDistData = (scaleOp, operationToDistData, t, scaleBy) => { + let evaluateToLeaf = (scaleOp, operationToLeaf, t, scaleBy) => { // scaleBy has to be a single float, otherwise we'll return an error. let fn = SymbolicTypes.Scale.toFn(scaleOp); let knownIntegralSumFn = SymbolicTypes.Scale.toKnownIntegralSumFn(scaleOp); - let renderedShape = operationToDistData(`Render(t)); + let renderedShape = operationToLeaf(`Render(t)); switch (renderedShape, scaleBy) { - | ( - Ok(`DistData(`RenderedShape(rs))), - `DistData(`Symbolic(`Float(sm))), - ) => + | (Ok(`Leaf(`RenderedDist(rs))), `Leaf(`SymbolicDist(`Float(sm)))) => Ok( - `DistData( - `RenderedShape( + `Leaf( + `RenderedDist( Distributions.Shape.T.mapY( ~knownIntegralSumFn=knownIntegralSumFn(sm), fn(sm), @@ -189,18 +146,15 @@ module TreeNode = { }; module PointwiseCombination = { - let pointwiseAdd = (operationToDistData, t1, t2) => { - let renderedShape1 = operationToDistData(`Render(t1)); - let renderedShape2 = operationToDistData(`Render(t2)); + let pointwiseAdd = (operationToLeaf, t1, t2) => { + let renderedShape1 = operationToLeaf(`Render(t1)); + let renderedShape2 = operationToLeaf(`Render(t2)); switch (renderedShape1, renderedShape2) { - | ( - Ok(`DistData(`RenderedShape(rs1))), - Ok(`DistData(`RenderedShape(rs2))), - ) => + | (Ok(`Leaf(`RenderedDist(rs1))), Ok(`Leaf(`RenderedDist(rs2)))) => Ok( - `DistData( - `RenderedShape( + `Leaf( + `RenderedDist( Distributions.Shape.combinePointwise( ~knownIntegralSumsFn=(a, b) => Some(a +. b), (+.), @@ -216,18 +170,18 @@ module TreeNode = { }; }; - let pointwiseMultiply = (operationToDistData, t1, t2) => { + let pointwiseMultiply = (operationToLeaf, t1, t2) => { // TODO: construct a function that we can easily sample from, to construct - // a RenderedShape. Use the xMin and xMax of the rendered shapes to tell the sampling function where to look. + // a RenderedDist. Use the xMin and xMax of the rendered shapes to tell the sampling function where to look. Error( "Pointwise multiplication not yet supported.", ); }; - let evaluateToDistData = (pointwiseOp, operationToDistData, t1, t2) => { + let evaluateToLeaf = (pointwiseOp, operationToLeaf, t1, t2) => { switch (pointwiseOp) { - | `Add => pointwiseAdd(operationToDistData, t1, t2) - | `Multiply => pointwiseMultiply(operationToDistData, t1, t2) + | `Add => pointwiseAdd(operationToLeaf, t1, t2) + | `Multiply => pointwiseMultiply(operationToLeaf, t1, t2) }; }; }; @@ -236,18 +190,17 @@ module TreeNode = { module Simplify = { let tryTruncatingNothing: tResult = fun - | `Operation(`Truncate(None, None, `DistData(d))) => - Ok(`DistData(d)) + | `Operation(`Truncate(None, None, `Leaf(d))) => Ok(`Leaf(d)) | t => Ok(t); let tryTruncatingUniform: tResult = fun - | `Operation(`Truncate(lc, rc, `DistData(`Symbolic(`Uniform(u))))) => { + | `Operation(`Truncate(lc, rc, `Leaf(`SymbolicDist(`Uniform(u))))) => { // just create a new Uniform distribution let newLow = max(E.O.default(neg_infinity, lc), u.low); let newHigh = min(E.O.default(infinity, rc), u.high); Ok( - `DistData(`Symbolic(`Uniform({low: newLow, high: newHigh}))), + `Leaf(`SymbolicDist(`Uniform({low: newLow, high: newHigh}))), ); } | t => Ok(t); @@ -262,27 +215,26 @@ module TreeNode = { }; }; - let evaluateNumerically = - (leftCutoff, rightCutoff, operationToDistData, t) => { + let evaluateNumerically = (leftCutoff, rightCutoff, operationToLeaf, t) => { // TODO: use named args in renderToShape; if we're lucky we can at least get the tail // of a distribution we otherwise wouldn't get at all - let renderedShape = operationToDistData(`Render(t)); + let renderedShape = operationToLeaf(`Render(t)); switch (renderedShape) { - | Ok(`DistData(`RenderedShape(rs))) => + | Ok(`Leaf(`RenderedDist(rs))) => let truncatedShape = rs |> Distributions.Shape.T.truncate(leftCutoff, rightCutoff); - Ok(`DistData(`RenderedShape(rs))); + Ok(`Leaf(`RenderedDist(rs))); | Error(e1) => Error(e1) | _ => Error("Could not truncate distribution.") }; }; - let evaluateToDistData = + let evaluateToLeaf = ( leftCutoff: option(float), rightCutoff: option(float), - operationToDistData, + operationToLeaf, t: treeNode, ) : result(treeNode, string) => { @@ -291,31 +243,23 @@ module TreeNode = { |> E.R.bind( _, fun - | `DistData(d) => Ok(`DistData(d)) // the analytical simplifaction worked, nice! + | `Leaf(d) => Ok(`Leaf(d)) // the analytical simplifaction worked, nice! | `Operation(_) => - evaluateNumerically( - leftCutoff, - rightCutoff, - operationToDistData, - t, - ), + evaluateNumerically(leftCutoff, rightCutoff, operationToLeaf, t), ); // if not, run the convolution }; }; module Normalize = { - let rec evaluateToDistData = - (operationToDistData, t: treeNode): result(treeNode, string) => { + let rec evaluateToLeaf = + (operationToLeaf, t: treeNode): result(treeNode, string) => { switch (t) { - | `DistData(`Symbolic(_)) => Ok(t) - | `DistData(`RenderedShape(s)) => + | `Leaf(`SymbolicDist(_)) => Ok(t) + | `Leaf(`RenderedDist(s)) => let normalized = Distributions.Shape.T.normalize(s); - Ok(`DistData(`RenderedShape(normalized))); + Ok(`Leaf(`RenderedDist(normalized))); | `Operation(op) => - E.R.bind( - operationToDistData(op), - evaluateToDistData(operationToDistData), - ) + E.R.bind(operationToLeaf(op), evaluateToLeaf(operationToLeaf)) }; }; }; @@ -324,14 +268,14 @@ module TreeNode = { let evaluateFromSymbolic = (distToFloatOp: distToFloatOperation, s) => { let value = switch (distToFloatOp) { - | `Pdf(f) => Ok(SymbolicDist.GenericDistFunctions.pdf(f, s)) - | `Inv(f) => Ok(SymbolicDist.GenericDistFunctions.inv(f, s)) - | `Sample => Ok(SymbolicDist.GenericDistFunctions.sample(s)) - | `Mean => SymbolicDist.GenericDistFunctions.mean(s) + | `Pdf(f) => Ok(SymbolicDist.T.pdf(f, s)) + | `Inv(f) => Ok(SymbolicDist.T.inv(f, s)) + | `Sample => Ok(SymbolicDist.T.sample(s)) + | `Mean => SymbolicDist.T.mean(s) }; - E.R.bind(value, v => Ok(`DistData(`Symbolic(`Float(v))))); + E.R.bind(value, v => Ok(`Leaf(`SymbolicDist(`Float(v))))); }; - let evaluateFromRenderedShape = + let evaluateFromRenderedDist = (distToFloatOp: distToFloatOperation, rs: DistTypes.shape) : result(treeNode, string) => { let value = @@ -341,45 +285,45 @@ module TreeNode = { | `Sample => Ok(Distributions.Shape.sample(rs)) | `Mean => Ok(Distributions.Shape.T.mean(rs)) }; - E.R.bind(value, v => Ok(`DistData(`Symbolic(`Float(v))))); + E.R.bind(value, v => Ok(`Leaf(`SymbolicDist(`Float(v))))); }; - let rec evaluateToDistData = + let rec evaluateToLeaf = ( distToFloatOp: distToFloatOperation, - operationToDistData, + operationToLeaf, t: treeNode, ) : result(treeNode, string) => { switch (t) { - | `DistData(`Symbolic(s)) => evaluateFromSymbolic(distToFloatOp, s) // we want to evaluate the distToFloatOp on the symbolic dist - | `DistData(`RenderedShape(rs)) => - evaluateFromRenderedShape(distToFloatOp, rs) + | `Leaf(`SymbolicDist(s)) => evaluateFromSymbolic(distToFloatOp, s) // we want to evaluate the distToFloatOp on the symbolic dist + | `Leaf(`RenderedDist(rs)) => + evaluateFromRenderedDist(distToFloatOp, rs) | `Operation(op) => E.R.bind( - operationToDistData(op), - evaluateToDistData(distToFloatOp, operationToDistData), + operationToLeaf(op), + evaluateToLeaf(distToFloatOp, operationToLeaf), ) }; }; }; module Render = { - let rec evaluateToRenderedShape = + let rec evaluateToRenderedDist = ( - operationToDistData: operation => result(t, string), + operationToLeaf: operation => result(t, string), sampleCount: int, t: treeNode, ) : result(t, string) => { switch (t) { - | `DistData(`RenderedShape(s)) => Ok(`DistData(`RenderedShape(s))) // already a rendered shape, we're done here - | `DistData(`Symbolic(d)) => + | `Leaf(`RenderedDist(s)) => Ok(`Leaf(`RenderedDist(s))) // already a rendered shape, we're done here + | `Leaf(`SymbolicDist(d)) => // todo: move to dist switch (d) { | `Float(v) => Ok( - `DistData( - `RenderedShape( + `Leaf( + `RenderedDist( Discrete( Distributions.Discrete.make( {xs: [|v|], ys: [|1.0|]}, @@ -391,16 +335,15 @@ module TreeNode = { ) | _ => let xs = - SymbolicDist.GenericDistFunctions.interpolateXs( + SymbolicDist.T.interpolateXs( ~xSelection=`ByWeight, d, sampleCount, ); - let ys = - xs |> E.A.fmap(x => SymbolicDist.GenericDistFunctions.pdf(x, d)); + let ys = xs |> E.A.fmap(x => SymbolicDist.T.pdf(x, d)); Ok( - `DistData( - `RenderedShape( + `Leaf( + `RenderedDist( Continuous( Distributions.Continuous.make( `Linear, @@ -414,57 +357,57 @@ module TreeNode = { } | `Operation(op) => E.R.bind( - operationToDistData(op), - evaluateToRenderedShape(operationToDistData, sampleCount), + operationToLeaf(op), + evaluateToRenderedDist(operationToLeaf, sampleCount), ) }; }; }; - let rec operationToDistData = + let rec operationToLeaf = (sampleCount: int, op: operation): result(t, string) => { - // the functions that convert the Operation nodes to DistData nodes need to + // the functions that convert the Operation nodes to Leaf nodes need to // have a way to call this function on their children, if their children are themselves Operation nodes. switch (op) { | `AlgebraicCombination(algebraicOp, t1, t2) => - AlgebraicCombination.evaluateToDistData( + AlgebraicCombination.evaluateToLeaf( algebraicOp, - operationToDistData(sampleCount), + operationToLeaf(sampleCount), t1, t2 // we want to give it the option to render or simply leave it as is ) | `PointwiseCombination(pointwiseOp, t1, t2) => - PointwiseCombination.evaluateToDistData( + PointwiseCombination.evaluateToLeaf( pointwiseOp, - operationToDistData(sampleCount), + operationToLeaf(sampleCount), t1, t2, ) | `VerticalScaling(scaleOp, t, scaleBy) => - VerticalScaling.evaluateToDistData( + VerticalScaling.evaluateToLeaf( scaleOp, - operationToDistData(sampleCount), + operationToLeaf(sampleCount), t, scaleBy, ) | `Truncate(leftCutoff, rightCutoff, t) => - Truncate.evaluateToDistData( + Truncate.evaluateToLeaf( leftCutoff, rightCutoff, - operationToDistData(sampleCount), + operationToLeaf(sampleCount), t, ) | `FloatFromDist(distToFloatOp, t) => - FloatFromDist.evaluateToDistData( + FloatFromDist.evaluateToLeaf( distToFloatOp, - operationToDistData(sampleCount), + operationToLeaf(sampleCount), t, ) | `Normalize(t) => - Normalize.evaluateToDistData(operationToDistData(sampleCount), t) + Normalize.evaluateToLeaf(operationToLeaf(sampleCount), t) | `Render(t) => - Render.evaluateToRenderedShape( - operationToDistData(sampleCount), + Render.evaluateToRenderedDist( + operationToLeaf(sampleCount), sampleCount, t, ) @@ -474,23 +417,23 @@ module TreeNode = { /* This function recursively goes through the nodes of the parse tree, replacing each Operation node and its subtree with a Data node. Whenever possible, the replacement produces a new Symbolic Data node, - but most often it will produce a RenderedShape. - This function is used mainly to turn a parse tree into a single RenderedShape + but most often it will produce a RenderedDist. + This function is used mainly to turn a parse tree into a single RenderedDist that can then be displayed to the user. */ - let toDistData = (treeNode: t, sampleCount: int): result(t, string) => { + let toLeaf = (treeNode: t, sampleCount: int): result(t, string) => { switch (treeNode) { - | `DistData(d) => Ok(`DistData(d)) - | `Operation(op) => operationToDistData(sampleCount, op) + | `Leaf(d) => Ok(`Leaf(d)) + | `Operation(op) => operationToLeaf(sampleCount, op) }; }; }; let toShape = (sampleCount: int, treeNode: treeNode) => { let renderResult = - TreeNode.toDistData(`Operation(`Render(treeNode)), sampleCount); + TreeNode.toLeaf(`Operation(`Render(treeNode)), sampleCount); switch (renderResult) { - | Ok(`DistData(`RenderedShape(rs))) => + | Ok(`Leaf(`RenderedDist(rs))) => let continuous = Distributions.Shape.T.toContinuous(rs); let discrete = Distributions.Shape.T.toDiscrete(rs); let shape = MixedShapeBuilder.buildSimple(~continuous, ~discrete); From 4663700f672357f2c2158ee7558ade0eb3f30302 Mon Sep 17 00:00:00 2001 From: Ozzie Gooen Date: Thu, 2 Jul 2020 12:25:04 +0100 Subject: [PATCH 21/31] Moved out distToFloatOps to better places --- src/distPlus/distribution/Distributions.re | 22 ++++++++++------------ src/distPlus/symbolic/SymbolicDist.re | 8 ++++++++ src/distPlus/symbolic/SymbolicTypes.re | 1 + src/distPlus/symbolic/TreeNode.re | 20 ++++---------------- 4 files changed, 23 insertions(+), 28 deletions(-) diff --git a/src/distPlus/distribution/Distributions.re b/src/distPlus/distribution/Distributions.re index 968c275f..2e4856b9 100644 --- a/src/distPlus/distribution/Distributions.re +++ b/src/distPlus/distribution/Distributions.re @@ -333,12 +333,7 @@ module Continuous = { }; let combineAlgebraically = - ( - ~downsample=false, - op: SymbolicTypes.algebraicOperation, - t1: t, - t2: t, - ) => { + (~downsample=false, op: SymbolicTypes.algebraicOperation, t1: t, t2: t) => { let s1 = t1 |> getShape; let s2 = t2 |> getShape; let t1n = s1 |> XYShape.T.length; @@ -845,12 +840,7 @@ module Mixed = { }); let combineAlgebraically = - ( - ~downsample=false, - op: SymbolicTypes.algebraicOperation, - t1: t, - t2: t, - ) + (~downsample=false, op: SymbolicTypes.algebraicOperation, t1: t, t2: t) : t => { // Discrete convolution can cause a huge increase in the number of samples, // so we'll first downsample. @@ -1105,6 +1095,14 @@ module Shape = { | Continuous(m) => Continuous.T.variance(m) }; }); + + let operate = (distToFloatOp: SymbolicTypes.distToFloatOperation, s) => + switch (distToFloatOp) { + | `Pdf(f) => pdf(f, s) + | `Inv(f) => inv(f, s) + | `Sample => sample(s) + | `Mean => T.mean(s) + }; }; module DistPlus = { diff --git a/src/distPlus/symbolic/SymbolicDist.re b/src/distPlus/symbolic/SymbolicDist.re index 58402b38..15ce2861 100644 --- a/src/distPlus/symbolic/SymbolicDist.re +++ b/src/distPlus/symbolic/SymbolicDist.re @@ -246,6 +246,14 @@ module T = { | `Uniform(n) => Uniform.mean(n) | `Float(n) => Float.mean(n); + let operate = (distToFloatOp: distToFloatOperation, s) => + switch (distToFloatOp) { + | `Pdf(f) => Ok(pdf(f, s)) + | `Inv(f) => Ok(inv(f, s)) + | `Sample => Ok(sample(s)) + | `Mean => mean(s) + }; + let interpolateXs = (~xSelection: [ | `Linear | `ByWeight]=`Linear, dist: symbolicDist, n) => { switch (xSelection, dist) { diff --git a/src/distPlus/symbolic/SymbolicTypes.re b/src/distPlus/symbolic/SymbolicTypes.re index ccc2ecba..b0d92d41 100644 --- a/src/distPlus/symbolic/SymbolicTypes.re +++ b/src/distPlus/symbolic/SymbolicTypes.re @@ -48,6 +48,7 @@ type symbolicDist = [ | `Float(float) // Dirac delta at x. Practically useful only in the context of multimodals. ]; +// todo: These operations are really applicable for all dists type algebraicOperation = [ | `Add | `Multiply | `Subtract | `Divide]; type pointwiseOperation = [ | `Add | `Multiply]; type scaleOperation = [ | `Multiply | `Exponentiate | `Log]; diff --git a/src/distPlus/symbolic/TreeNode.re b/src/distPlus/symbolic/TreeNode.re index de68e433..c29505e5 100644 --- a/src/distPlus/symbolic/TreeNode.re +++ b/src/distPlus/symbolic/TreeNode.re @@ -266,26 +266,14 @@ module TreeNode = { module FloatFromDist = { let evaluateFromSymbolic = (distToFloatOp: distToFloatOperation, s) => { - let value = - switch (distToFloatOp) { - | `Pdf(f) => Ok(SymbolicDist.T.pdf(f, s)) - | `Inv(f) => Ok(SymbolicDist.T.inv(f, s)) - | `Sample => Ok(SymbolicDist.T.sample(s)) - | `Mean => SymbolicDist.T.mean(s) - }; - E.R.bind(value, v => Ok(`Leaf(`SymbolicDist(`Float(v))))); + SymbolicDist.T.operate(distToFloatOp, s) + |> E.R.bind(_, v => Ok(`Leaf(`SymbolicDist(`Float(v))))); }; let evaluateFromRenderedDist = (distToFloatOp: distToFloatOperation, rs: DistTypes.shape) : result(treeNode, string) => { - let value = - switch (distToFloatOp) { - | `Pdf(f) => Ok(Distributions.Shape.pdf(f, rs)) - | `Inv(f) => Ok(Distributions.Shape.inv(f, rs)) // TODO: this is tricky for discrete distributions, because they have a stepwise CDF - | `Sample => Ok(Distributions.Shape.sample(rs)) - | `Mean => Ok(Distributions.Shape.T.mean(rs)) - }; - E.R.bind(value, v => Ok(`Leaf(`SymbolicDist(`Float(v))))); + Distributions.Shape.operate(distToFloatOp, rs) + |> (v => Ok(`Leaf(`SymbolicDist(`Float(v))))); }; let rec evaluateToLeaf = ( From 99c0803953986dccaf1410c9b5707f555c0773fc Mon Sep 17 00:00:00 2001 From: Ozzie Gooen Date: Thu, 2 Jul 2020 13:24:49 +0100 Subject: [PATCH 22/31] Moved truncateString work to separate section --- src/distPlus/symbolic/SymbolicDist.re | 2 +- src/distPlus/symbolic/SymbolicTypes.re | 11 +++++++++-- src/distPlus/symbolic/TreeNode.re | 27 +++++++++++++++----------- 3 files changed, 26 insertions(+), 14 deletions(-) diff --git a/src/distPlus/symbolic/SymbolicDist.re b/src/distPlus/symbolic/SymbolicDist.re index 15ce2861..bfe4cc6e 100644 --- a/src/distPlus/symbolic/SymbolicDist.re +++ b/src/distPlus/symbolic/SymbolicDist.re @@ -279,7 +279,7 @@ module T = { | `Error(string) | `NoSolution ]; - let attemptAlgebraicOperation = + let attemptAnalyticalOperation = ( d1: symbolicDist, d2: symbolicDist, diff --git a/src/distPlus/symbolic/SymbolicTypes.re b/src/distPlus/symbolic/SymbolicTypes.re index b0d92d41..2dbb95db 100644 --- a/src/distPlus/symbolic/SymbolicTypes.re +++ b/src/distPlus/symbolic/SymbolicTypes.re @@ -95,8 +95,8 @@ module DistToFloat = { let format = (operation, value) => switch (operation) { - | `Pdf(f) => {j|pdf(x=$f,$value) |j} - | `Inv(f) => {j|inv(x=$f,$value) |j} + | `Pdf(f) => {j|pdf(x=$f,$value)|j} + | `Inv(f) => {j|inv(x=$f,$value)|j} | `Sample => "sample($value)" | `Mean => "mean($value)" }; @@ -110,6 +110,13 @@ module Scale = { | `Exponentiate => ( ** ) | `Log => ((a, b) => log(a) /. log(b)); + let format = (operation:t, value, scaleBy) => + switch (operation) { + | `Multiply => {j|scaleMultiply($value, $scaleBy) |j} + | `Exponentiate => {j|ScaleExponentiate($value, $scaleBy) |j} + | `Log => {j|ScaleLog($value, $scaleBy) |j} + }; + let toKnownIntegralSumFn = fun | `Multiply => ((a, b) => Some(a *. b)) diff --git a/src/distPlus/symbolic/TreeNode.re b/src/distPlus/symbolic/TreeNode.re index c29505e5..155b9169 100644 --- a/src/distPlus/symbolic/TreeNode.re +++ b/src/distPlus/symbolic/TreeNode.re @@ -5,6 +5,7 @@ type leaf = [ | `SymbolicDist(SymbolicTypes.symbolicDist) | `RenderedDist(DistTypes.shape) ]; + /* TreeNodes are either Data (i.e. symbolic or rendered distributions) or Operations. Operations always refer to two child nodes.*/ type treeNode = [ | `Leaf(leaf) | `Operation(operation)] and operation = [ @@ -19,25 +20,29 @@ and operation = [ module Operation = { type t = operation; + let truncateToString = + (left: option(float), right: option(float), nodeToString) => { + let left = left |> E.O.dimap(Js.Float.toString, () => "-inf"); + let right = right |> E.O.dimap(Js.Float.toString, () => "inf"); + {j|truncate($nodeToString, $left, $right)|j}; + }; + let toString = nodeToString => fun | `AlgebraicCombination(op, t1, t2) => SymbolicTypes.Algebraic.format(op, nodeToString(t1), nodeToString(t2)) | `PointwiseCombination(op, t1, t2) => SymbolicTypes.Pointwise.format(op, nodeToString(t1), nodeToString(t2)) - | `VerticalScaling(_scaleOp, t, scaleBy) => - nodeToString(t) ++ " @ " ++ nodeToString(scaleBy) + | `VerticalScaling(scaleOp, t, scaleBy) => + SymbolicTypes.Scale.format( + scaleOp, + nodeToString(t), + nodeToString(scaleBy), + ) | `Normalize(t) => "normalize(" ++ nodeToString(t) ++ ")" | `FloatFromDist(floatFromDistOp, t) => SymbolicTypes.DistToFloat.format(floatFromDistOp, nodeToString(t)) - | `Truncate(lc, rc, t) => - "truncate(" - ++ nodeToString(t) - ++ ", " - ++ E.O.dimap(Js.Float.toString, () => "-inf", lc) - ++ ", " - ++ E.O.dimap(Js.Float.toString, () => "inf", rc) - ++ ")" + | `Truncate(lc, rc, t) => truncateToString(lc, rc, nodeToString(t)) | `Render(t) => nodeToString(t); }; @@ -70,7 +75,7 @@ module TreeNode = { `Leaf(`SymbolicDist(d2)), ), ) as t => - switch (SymbolicDist.T.attemptAlgebraicOperation(d1, d2, operation)) { + switch (SymbolicDist.T.attemptAnalyticalOperation(d1, d2, operation)) { | `AnalyticalSolution(symbolicDist) => Ok(`Leaf(`SymbolicDist(symbolicDist))) | `Error(er) => Error(er) From 96df9ced8598fab099fbd554171e2c58788ad8bc Mon Sep 17 00:00:00 2001 From: Ozzie Gooen Date: Thu, 2 Jul 2020 14:30:01 +0100 Subject: [PATCH 23/31] Moved rendering code from TreeNode to SymbolicDist --- src/distPlus/symbolic/SymbolicDist.re | 14 ++++ src/distPlus/symbolic/TreeNode.re | 102 ++++++++------------------ 2 files changed, 43 insertions(+), 73 deletions(-) diff --git a/src/distPlus/symbolic/SymbolicDist.re b/src/distPlus/symbolic/SymbolicDist.re index bfe4cc6e..18142e74 100644 --- a/src/distPlus/symbolic/SymbolicDist.re +++ b/src/distPlus/symbolic/SymbolicDist.re @@ -300,4 +300,18 @@ module T = { |> E.O.dimap(r => `AnalyticalSolution(r), () => `NoSolution) | _ => `NoSolution }; + + let toShape = (sampleCount, d: symbolicDist): DistTypes.shape => + switch (d) { + | `Float(v) => + Discrete( + Distributions.Discrete.make({xs: [|v|], ys: [|1.0|]}, Some(1.0)), + ) + | _ => + let xs = interpolateXs(~xSelection=`ByWeight, d, sampleCount); + let ys = xs |> E.A.fmap(x => pdf(x, d)); + Continuous( + Distributions.Continuous.make(`Linear, {xs, ys}, Some(1.0)), + ); + }; }; diff --git a/src/distPlus/symbolic/TreeNode.re b/src/distPlus/symbolic/TreeNode.re index 155b9169..ba15712e 100644 --- a/src/distPlus/symbolic/TreeNode.re +++ b/src/distPlus/symbolic/TreeNode.re @@ -102,16 +102,15 @@ module TreeNode = { }; }; - let evaluateToLeaf = + let toLeaf = ( - algebraicOp: SymbolicTypes.algebraicOperation, operationToLeaf, + algebraicOp: SymbolicTypes.algebraicOperation, t1: t, t2: t, ) : result(treeNode, string) => - algebraicOp - |> toTreeNode(_, t1, t2) + toTreeNode(algebraicOp, t1, t2) |> tryAnalyticalSolution |> E.R.bind( _, @@ -124,7 +123,7 @@ module TreeNode = { }; module VerticalScaling = { - let evaluateToLeaf = (scaleOp, operationToLeaf, t, scaleBy) => { + let toLeaf = (operationToLeaf,scaleOp, t, scaleBy) => { // scaleBy has to be a single float, otherwise we'll return an error. let fn = SymbolicTypes.Scale.toFn(scaleOp); let knownIntegralSumFn = @@ -183,7 +182,7 @@ module TreeNode = { ); }; - let evaluateToLeaf = (pointwiseOp, operationToLeaf, t1, t2) => { + let toLeaf = (operationToLeaf,pointwiseOp, t1, t2) => { switch (pointwiseOp) { | `Add => pointwiseAdd(operationToLeaf, t1, t2) | `Multiply => pointwiseMultiply(operationToLeaf, t1, t2) @@ -235,11 +234,11 @@ module TreeNode = { }; }; - let evaluateToLeaf = + let toLeaf = ( + operationToLeaf, leftCutoff: option(float), rightCutoff: option(float), - operationToLeaf, t: treeNode, ) : result(treeNode, string) => { @@ -256,15 +255,13 @@ module TreeNode = { }; module Normalize = { - let rec evaluateToLeaf = - (operationToLeaf, t: treeNode): result(treeNode, string) => { + let rec toLeaf = (operationToLeaf, t: treeNode): result(treeNode, string) => { switch (t) { - | `Leaf(`SymbolicDist(_)) => Ok(t) | `Leaf(`RenderedDist(s)) => - let normalized = Distributions.Shape.T.normalize(s); - Ok(`Leaf(`RenderedDist(normalized))); + Ok(`Leaf(`RenderedDist(Distributions.Shape.T.normalize(s)))) + | `Leaf(`SymbolicDist(_)) => Ok(t) | `Operation(op) => - E.R.bind(operationToLeaf(op), evaluateToLeaf(operationToLeaf)) + operationToLeaf(op) |> E.R.bind(_, toLeaf(operationToLeaf)) }; }; }; @@ -280,10 +277,10 @@ module TreeNode = { Distributions.Shape.operate(distToFloatOp, rs) |> (v => Ok(`Leaf(`SymbolicDist(`Float(v))))); }; - let rec evaluateToLeaf = + let rec toLeaf = ( - distToFloatOp: distToFloatOperation, operationToLeaf, + distToFloatOp: distToFloatOperation, t: treeNode, ) : result(treeNode, string) => { @@ -294,14 +291,14 @@ module TreeNode = { | `Operation(op) => E.R.bind( operationToLeaf(op), - evaluateToLeaf(distToFloatOp, operationToLeaf), + toLeaf(operationToLeaf,distToFloatOp), ) }; }; }; module Render = { - let rec evaluateToRenderedDist = + let rec toLeaf = ( operationToLeaf: operation => result(t, string), sampleCount: int, @@ -309,49 +306,13 @@ module TreeNode = { ) : result(t, string) => { switch (t) { - | `Leaf(`RenderedDist(s)) => Ok(`Leaf(`RenderedDist(s))) // already a rendered shape, we're done here | `Leaf(`SymbolicDist(d)) => - // todo: move to dist - switch (d) { - | `Float(v) => - Ok( - `Leaf( - `RenderedDist( - Discrete( - Distributions.Discrete.make( - {xs: [|v|], ys: [|1.0|]}, - Some(1.0), - ), - ), - ), - ), - ) - | _ => - let xs = - SymbolicDist.T.interpolateXs( - ~xSelection=`ByWeight, - d, - sampleCount, - ); - let ys = xs |> E.A.fmap(x => SymbolicDist.T.pdf(x, d)); - Ok( - `Leaf( - `RenderedDist( - Continuous( - Distributions.Continuous.make( - `Linear, - {xs, ys}, - Some(1.0), - ), - ), - ), - ), - ); - } + Ok(`Leaf(`RenderedDist(SymbolicDist.T.toShape(sampleCount, d)))) + | `Leaf(`RenderedDist(_)) as t => Ok(t) // already a rendered shape, we're done here | `Operation(op) => E.R.bind( operationToLeaf(op), - evaluateToRenderedDist(operationToLeaf, sampleCount), + toLeaf(operationToLeaf, sampleCount), ) }; }; @@ -363,43 +324,38 @@ module TreeNode = { // have a way to call this function on their children, if their children are themselves Operation nodes. switch (op) { | `AlgebraicCombination(algebraicOp, t1, t2) => - AlgebraicCombination.evaluateToLeaf( - algebraicOp, + AlgebraicCombination.toLeaf( operationToLeaf(sampleCount), + algebraicOp, t1, t2 // we want to give it the option to render or simply leave it as is ) | `PointwiseCombination(pointwiseOp, t1, t2) => - PointwiseCombination.evaluateToLeaf( - pointwiseOp, + PointwiseCombination.toLeaf( operationToLeaf(sampleCount), + pointwiseOp, t1, t2, ) | `VerticalScaling(scaleOp, t, scaleBy) => - VerticalScaling.evaluateToLeaf( - scaleOp, + VerticalScaling.toLeaf( operationToLeaf(sampleCount), + scaleOp, t, scaleBy, ) | `Truncate(leftCutoff, rightCutoff, t) => - Truncate.evaluateToLeaf( + Truncate.toLeaf( + operationToLeaf(sampleCount), leftCutoff, rightCutoff, - operationToLeaf(sampleCount), t, ) | `FloatFromDist(distToFloatOp, t) => - FloatFromDist.evaluateToLeaf( - distToFloatOp, - operationToLeaf(sampleCount), - t, - ) - | `Normalize(t) => - Normalize.evaluateToLeaf(operationToLeaf(sampleCount), t) + FloatFromDist.toLeaf(operationToLeaf(sampleCount),distToFloatOp, t) + | `Normalize(t) => Normalize.toLeaf(operationToLeaf(sampleCount), t) | `Render(t) => - Render.evaluateToRenderedDist( + Render.toLeaf( operationToLeaf(sampleCount), sampleCount, t, From 19e9eaee83855d4daf7ed45d7beb71c7e0719813 Mon Sep 17 00:00:00 2001 From: Ozzie Gooen Date: Thu, 2 Jul 2020 14:33:41 +0100 Subject: [PATCH 24/31] Minor renaming --- src/distPlus/symbolic/TreeNode.re | 28 ++++++++++------------------ 1 file changed, 10 insertions(+), 18 deletions(-) diff --git a/src/distPlus/symbolic/TreeNode.re b/src/distPlus/symbolic/TreeNode.re index ba15712e..b6f83801 100644 --- a/src/distPlus/symbolic/TreeNode.re +++ b/src/distPlus/symbolic/TreeNode.re @@ -123,7 +123,7 @@ module TreeNode = { }; module VerticalScaling = { - let toLeaf = (operationToLeaf,scaleOp, t, scaleBy) => { + let toLeaf = (operationToLeaf, scaleOp, t, scaleBy) => { // scaleBy has to be a single float, otherwise we'll return an error. let fn = SymbolicTypes.Scale.toFn(scaleOp); let knownIntegralSumFn = @@ -182,7 +182,7 @@ module TreeNode = { ); }; - let toLeaf = (operationToLeaf,pointwiseOp, t1, t2) => { + let toLeaf = (operationToLeaf, pointwiseOp, t1, t2) => { switch (pointwiseOp) { | `Add => pointwiseAdd(operationToLeaf, t1, t2) | `Multiply => pointwiseMultiply(operationToLeaf, t1, t2) @@ -267,11 +267,11 @@ module TreeNode = { }; module FloatFromDist = { - let evaluateFromSymbolic = (distToFloatOp: distToFloatOperation, s) => { + let symbolicToLeaf = (distToFloatOp: distToFloatOperation, s) => { SymbolicDist.T.operate(distToFloatOp, s) |> E.R.bind(_, v => Ok(`Leaf(`SymbolicDist(`Float(v))))); }; - let evaluateFromRenderedDist = + let renderedToLeaf = (distToFloatOp: distToFloatOperation, rs: DistTypes.shape) : result(treeNode, string) => { Distributions.Shape.operate(distToFloatOp, rs) @@ -285,13 +285,12 @@ module TreeNode = { ) : result(treeNode, string) => { switch (t) { - | `Leaf(`SymbolicDist(s)) => evaluateFromSymbolic(distToFloatOp, s) // we want to evaluate the distToFloatOp on the symbolic dist - | `Leaf(`RenderedDist(rs)) => - evaluateFromRenderedDist(distToFloatOp, rs) + | `Leaf(`SymbolicDist(s)) => symbolicToLeaf(distToFloatOp, s) // we want to evaluate the distToFloatOp on the symbolic dist + | `Leaf(`RenderedDist(rs)) => renderedToLeaf(distToFloatOp, rs) | `Operation(op) => E.R.bind( operationToLeaf(op), - toLeaf(operationToLeaf,distToFloatOp), + toLeaf(operationToLeaf, distToFloatOp), ) }; }; @@ -310,10 +309,7 @@ module TreeNode = { Ok(`Leaf(`RenderedDist(SymbolicDist.T.toShape(sampleCount, d)))) | `Leaf(`RenderedDist(_)) as t => Ok(t) // already a rendered shape, we're done here | `Operation(op) => - E.R.bind( - operationToLeaf(op), - toLeaf(operationToLeaf, sampleCount), - ) + E.R.bind(operationToLeaf(op), toLeaf(operationToLeaf, sampleCount)) }; }; }; @@ -352,14 +348,10 @@ module TreeNode = { t, ) | `FloatFromDist(distToFloatOp, t) => - FloatFromDist.toLeaf(operationToLeaf(sampleCount),distToFloatOp, t) + FloatFromDist.toLeaf(operationToLeaf(sampleCount), distToFloatOp, t) | `Normalize(t) => Normalize.toLeaf(operationToLeaf(sampleCount), t) | `Render(t) => - Render.toLeaf( - operationToLeaf(sampleCount), - sampleCount, - t, - ) + Render.toLeaf(operationToLeaf(sampleCount), sampleCount, t) }; }; From 41eca03618c24a64466d77c10b0433f98253b1c5 Mon Sep 17 00:00:00 2001 From: Ozzie Gooen Date: Thu, 2 Jul 2020 18:12:03 +0100 Subject: [PATCH 25/31] Set up new expressionTree directory --- __tests__/Distributions__Test.re | 4 +- src/components/DistBuilder3.re | 4 +- src/components/Drawer.re | 2 +- .../distribution/AlgebraicCombinations.re | 2 +- src/distPlus/distribution/Distributions.re | 16 +- src/distPlus/expressionTree/ExpressionTree.re | 22 + .../expressionTree/ExpressionTreeEvaluator.re | 294 +++++++++++++ .../expressionTree/ExpressionTypes.re | 24 ++ .../MathJsParser.re | 24 +- .../{symbolic => expressionTree}/Mathjs.re | 0 src/distPlus/expressionTree/Operation.re | 93 +++++ src/distPlus/renderers/RenderTypes.re | 2 +- src/distPlus/renderers/ShapeRenderer.re | 2 +- src/distPlus/symbolic/MathjsWrapper.js | 8 - src/distPlus/symbolic/SymbolicDist.re | 10 +- src/distPlus/symbolic/SymbolicTypes.re | 78 +--- src/distPlus/symbolic/TreeNode.re | 387 ------------------ 17 files changed, 467 insertions(+), 505 deletions(-) create mode 100644 src/distPlus/expressionTree/ExpressionTree.re create mode 100644 src/distPlus/expressionTree/ExpressionTreeEvaluator.re create mode 100644 src/distPlus/expressionTree/ExpressionTypes.re rename src/distPlus/{symbolic => expressionTree}/MathJsParser.re (91%) rename src/distPlus/{symbolic => expressionTree}/Mathjs.re (100%) create mode 100644 src/distPlus/expressionTree/Operation.re delete mode 100644 src/distPlus/symbolic/MathjsWrapper.js delete mode 100644 src/distPlus/symbolic/TreeNode.re diff --git a/__tests__/Distributions__Test.re b/__tests__/Distributions__Test.re index 4e16bb80..341ef8a4 100644 --- a/__tests__/Distributions__Test.re +++ b/__tests__/Distributions__Test.re @@ -383,9 +383,9 @@ describe("Shape", () => { let numSamples = 10000; open Distributions.Shape; let normal: SymbolicTypes.symbolicDist = `Normal({mean, stdev}); - let normalShape = TreeNode.toShape(numSamples, `Leaf(`SymbolicDist(normal))); + let normalShape = ExpressionTree.toShape(numSamples, `Leaf(`SymbolicDist(normal))); let lognormal = SymbolicDist.Lognormal.fromMeanAndStdev(mean, stdev); - let lognormalShape = TreeNode.toShape(numSamples, `Leaf(`SymbolicDist(lognormal))); + let lognormalShape = ExpressionTree.toShape(numSamples, `Leaf(`SymbolicDist(lognormal))); makeTestCloseEquality( "Mean of a normal", diff --git a/src/components/DistBuilder3.re b/src/components/DistBuilder3.re index 124aad0f..c0a5aac3 100644 --- a/src/components/DistBuilder3.re +++ b/src/components/DistBuilder3.re @@ -37,13 +37,13 @@ module DemoDist = { let parsed1 = MathJsParser.fromString(guesstimatorString); let shape = switch (parsed1) { - | Ok(r) => Some(TreeNode.toShape(10000, r)) + | Ok(r) => Some(ExpressionTree.toShape(10000, r)) | _ => None }; let str = switch (parsed1) { - | Ok(r) => TreeNode.toString(r) + | Ok(r) => ExpressionTree.toString(r) | Error(e) => e }; diff --git a/src/components/Drawer.re b/src/components/Drawer.re index f39cd3ad..8a0f2cfa 100644 --- a/src/components/Drawer.re +++ b/src/components/Drawer.re @@ -389,7 +389,7 @@ module Draw = { let numSamples = 3000; let normal: SymbolicTypes.symbolicDist = `Normal({mean, stdev}); - let normalShape = TreeNode.toShape(numSamples, `Leaf(`SymbolicDist(normal))); + let normalShape = ExpressionTree.toShape(numSamples, `Leaf(`SymbolicDist(normal))); let xyShape: Types.xyShape = switch (normalShape) { | Mixed(_) => {xs: [||], ys: [||]} diff --git a/src/distPlus/distribution/AlgebraicCombinations.re b/src/distPlus/distribution/AlgebraicCombinations.re index 17e1a1c0..538a5117 100644 --- a/src/distPlus/distribution/AlgebraicCombinations.re +++ b/src/distPlus/distribution/AlgebraicCombinations.re @@ -110,7 +110,7 @@ let toDiscretePointMassesFromTriangulars = }; let combineShapesContinuousContinuous = - (op: SymbolicTypes.algebraicOperation, s1: DistTypes.xyShape, s2: DistTypes.xyShape) + (op: ExpressionTypes.algebraicOperation, s1: DistTypes.xyShape, s2: DistTypes.xyShape) : DistTypes.xyShape => { let t1n = s1 |> XYShape.T.length; let t2n = s2 |> XYShape.T.length; diff --git a/src/distPlus/distribution/Distributions.re b/src/distPlus/distribution/Distributions.re index 2e4856b9..24053418 100644 --- a/src/distPlus/distribution/Distributions.re +++ b/src/distPlus/distribution/Distributions.re @@ -282,7 +282,7 @@ module Continuous = { let combineAlgebraicallyWithDiscrete = ( ~downsample=false, - op: SymbolicTypes.algebraicOperation, + op: ExpressionTypes.algebraicOperation, t1: t, t2: DistTypes.discreteShape, ) => { @@ -291,7 +291,7 @@ module Continuous = { let t1n = t1s |> XYShape.T.length; let t2n = t2s |> XYShape.T.length; - let fn = SymbolicTypes.Algebraic.toFn(op); + let fn = Operation.Algebraic.toFn(op); let outXYShapes: array(array((float, float))) = Belt.Array.makeUninitializedUnsafe(t2n); @@ -333,7 +333,7 @@ module Continuous = { }; let combineAlgebraically = - (~downsample=false, op: SymbolicTypes.algebraicOperation, t1: t, t2: t) => { + (~downsample=false, op: ExpressionTypes.algebraicOperation, t1: t, t2: t) => { let s1 = t1 |> getShape; let s2 = t2 |> getShape; let t1n = s1 |> XYShape.T.length; @@ -413,7 +413,7 @@ module Discrete = { /* This multiples all of the data points together and creates a new discrete distribution from the results. Data points at the same xs get added together. It may be a good idea to downsample t1 and t2 before and/or the result after. */ let combineAlgebraically = - (op: SymbolicTypes.algebraicOperation, t1: t, t2: t) => { + (op: ExpressionTypes.algebraicOperation, t1: t, t2: t) => { let t1s = t1 |> getShape; let t2s = t2 |> getShape; let t1n = t1s |> XYShape.T.length; @@ -426,7 +426,7 @@ module Discrete = { t2.knownIntegralSum, ); - let fn = SymbolicTypes.Algebraic.toFn(op); + let fn = Operation.Algebraic.toFn(op); let xToYMap = E.FloatFloatMap.empty(); for (i in 0 to t1n - 1) { @@ -840,7 +840,7 @@ module Mixed = { }); let combineAlgebraically = - (~downsample=false, op: SymbolicTypes.algebraicOperation, t1: t, t2: t) + (~downsample=false, op: ExpressionTypes.algebraicOperation, t1: t, t2: t) : t => { // Discrete convolution can cause a huge increase in the number of samples, // so we'll first downsample. @@ -914,7 +914,7 @@ module Shape = { )); let combineAlgebraically = - (op: SymbolicTypes.algebraicOperation, t1: t, t2: t): t => { + (op: ExpressionTypes.algebraicOperation, t1: t, t2: t): t => { switch (t1, t2) { | (Continuous(m1), Continuous(m2)) => DistTypes.Continuous( @@ -1096,7 +1096,7 @@ module Shape = { }; }); - let operate = (distToFloatOp: SymbolicTypes.distToFloatOperation, s) => + let operate = (distToFloatOp: ExpressionTypes.distToFloatOperation, s) => switch (distToFloatOp) { | `Pdf(f) => pdf(f, s) | `Inv(f) => inv(f, s) diff --git a/src/distPlus/expressionTree/ExpressionTree.re b/src/distPlus/expressionTree/ExpressionTree.re new file mode 100644 index 00000000..2ceb783b --- /dev/null +++ b/src/distPlus/expressionTree/ExpressionTree.re @@ -0,0 +1,22 @@ +open ExpressionTypes.ExpressionTree; + +let toShape = (sampleCount: int, node: node) => { + let renderResult = + ExpressionTreeEvaluator.toLeaf(`Operation(`Render(node)), sampleCount); + + switch (renderResult) { + | Ok(`Leaf(`RenderedDist(rs))) => + let continuous = Distributions.Shape.T.toContinuous(rs); + let discrete = Distributions.Shape.T.toDiscrete(rs); + let shape = MixedShapeBuilder.buildSimple(~continuous, ~discrete); + shape |> E.O.toExt("Could not build final shape."); + | Ok(_) => E.O.toExn("Rendering failed.", None) + | Error(message) => E.O.toExn("No shape found, error: " ++ message, None) + }; +}; + +let rec toString = + fun + | `Leaf(`SymbolicDist(d)) => SymbolicDist.T.toString(d) + | `Leaf(`RenderedDist(_)) => "[shape]" + | `Operation(op) => Operation.T.toString(toString, op); diff --git a/src/distPlus/expressionTree/ExpressionTreeEvaluator.re b/src/distPlus/expressionTree/ExpressionTreeEvaluator.re new file mode 100644 index 00000000..348f91ef --- /dev/null +++ b/src/distPlus/expressionTree/ExpressionTreeEvaluator.re @@ -0,0 +1,294 @@ +/* This module represents a tree node. */ +open ExpressionTypes; +open ExpressionTypes.ExpressionTree; + +type t = node; +type tResult = node => result(node, string); + +/* Given two random variables A and B, this returns the distribution + of a new variable that is the result of the operation on A and B. + For instance, normal(0, 1) + normal(1, 1) -> normal(1, 2). + In general, this is implemented via convolution. */ +module AlgebraicCombination = { + let toTreeNode = (op, t1, t2) => + `Operation(`AlgebraicCombination((op, t1, t2))); + let tryAnalyticalSolution = + fun + | `Operation( + `AlgebraicCombination( + operation, + `Leaf(`SymbolicDist(d1)), + `Leaf(`SymbolicDist(d2)), + ), + ) as t => + switch (SymbolicDist.T.attemptAnalyticalOperation(d1, d2, operation)) { + | `AnalyticalSolution(symbolicDist) => + Ok(`Leaf(`SymbolicDist(symbolicDist))) + | `Error(er) => Error(er) + | `NoSolution => Ok(t) + } + | t => Ok(t); + + // todo: I don't like the name evaluateNumerically that much, if this renders and does it algebraically. It's tricky. + let evaluateNumerically = (algebraicOp, operationToLeaf, t1, t2) => { + // force rendering into shapes + let renderShape = r => operationToLeaf(`Render(r)); + switch (renderShape(t1), renderShape(t2)) { + | (Ok(`Leaf(`RenderedDist(s1))), Ok(`Leaf(`RenderedDist(s2)))) => + Ok( + `Leaf( + `RenderedDist( + Distributions.Shape.combineAlgebraically(algebraicOp, s1, s2), + ), + ), + ) + | (Error(e1), _) => Error(e1) + | (_, Error(e2)) => Error(e2) + | _ => Error("Could not render shapes.") + }; + }; + + let toLeaf = + ( + operationToLeaf, + algebraicOp: ExpressionTypes.algebraicOperation, + t1: t, + t2: t, + ) + : result(node, string) => + toTreeNode(algebraicOp, t1, t2) + |> tryAnalyticalSolution + |> E.R.bind( + _, + fun + | `Leaf(d) => Ok(`Leaf(d)) // the analytical simplifaction worked, nice! + | `Operation(_) => + // if not, run the convolution + evaluateNumerically(algebraicOp, operationToLeaf, t1, t2), + ); +}; + +module VerticalScaling = { + let toLeaf = (operationToLeaf, scaleOp, t, scaleBy) => { + // scaleBy has to be a single float, otherwise we'll return an error. + let fn = Operation.Scale.toFn(scaleOp); + let knownIntegralSumFn = Operation.Scale.toKnownIntegralSumFn(scaleOp); + let renderedShape = operationToLeaf(`Render(t)); + + switch (renderedShape, scaleBy) { + | (Ok(`Leaf(`RenderedDist(rs))), `Leaf(`SymbolicDist(`Float(sm)))) => + Ok( + `Leaf( + `RenderedDist( + Distributions.Shape.T.mapY( + ~knownIntegralSumFn=knownIntegralSumFn(sm), + fn(sm), + rs, + ), + ), + ), + ) + | (Error(e1), _) => Error(e1) + | (_, _) => Error("Can only scale by float values.") + }; + }; +}; + +module PointwiseCombination = { + let pointwiseAdd = (operationToLeaf, t1, t2) => { + let renderedShape1 = operationToLeaf(`Render(t1)); + let renderedShape2 = operationToLeaf(`Render(t2)); + + switch (renderedShape1, renderedShape2) { + | (Ok(`Leaf(`RenderedDist(rs1))), Ok(`Leaf(`RenderedDist(rs2)))) => + Ok( + `Leaf( + `RenderedDist( + Distributions.Shape.combinePointwise( + ~knownIntegralSumsFn=(a, b) => Some(a +. b), + (+.), + rs1, + rs2, + ), + ), + ), + ) + | (Error(e1), _) => Error(e1) + | (_, Error(e2)) => Error(e2) + | _ => Error("Could not perform pointwise addition.") + }; + }; + + let pointwiseMultiply = (operationToLeaf, t1, t2) => { + // TODO: construct a function that we can easily sample from, to construct + // a RenderedDist. Use the xMin and xMax of the rendered shapes to tell the sampling function where to look. + Error( + "Pointwise multiplication not yet supported.", + ); + }; + + let toLeaf = (operationToLeaf, pointwiseOp, t1, t2) => { + switch (pointwiseOp) { + | `Add => pointwiseAdd(operationToLeaf, t1, t2) + | `Multiply => pointwiseMultiply(operationToLeaf, t1, t2) + }; + }; +}; + +module Truncate = { + module Simplify = { + let tryTruncatingNothing: tResult = + fun + | `Operation(`Truncate(None, None, `Leaf(d))) => Ok(`Leaf(d)) + | t => Ok(t); + + let tryTruncatingUniform: tResult = + fun + | `Operation(`Truncate(lc, rc, `Leaf(`SymbolicDist(`Uniform(u))))) => { + // just create a new Uniform distribution + let newLow = max(E.O.default(neg_infinity, lc), u.low); + let newHigh = min(E.O.default(infinity, rc), u.high); + Ok(`Leaf(`SymbolicDist(`Uniform({low: newLow, high: newHigh})))); + } + | t => Ok(t); + + let attempt = (leftCutoff, rightCutoff, t): result(node, string) => { + let originalTreeNode = + `Operation(`Truncate((leftCutoff, rightCutoff, t))); + + originalTreeNode + |> tryTruncatingNothing + |> E.R.bind(_, tryTruncatingUniform); + }; + }; + + let evaluateNumerically = (leftCutoff, rightCutoff, operationToLeaf, t) => { + // TODO: use named args in renderToShape; if we're lucky we can at least get the tail + // of a distribution we otherwise wouldn't get at all + let renderedShape = operationToLeaf(`Render(t)); + + switch (renderedShape) { + | Ok(`Leaf(`RenderedDist(rs))) => + let truncatedShape = + rs |> Distributions.Shape.T.truncate(leftCutoff, rightCutoff); + Ok(`Leaf(`RenderedDist(rs))); + | Error(e1) => Error(e1) + | _ => Error("Could not truncate distribution.") + }; + }; + + let toLeaf = + ( + operationToLeaf, + leftCutoff: option(float), + rightCutoff: option(float), + t: node, + ) + : result(node, string) => { + t + |> Simplify.attempt(leftCutoff, rightCutoff) + |> E.R.bind( + _, + fun + | `Leaf(d) => Ok(`Leaf(d)) // the analytical simplifaction worked, nice! + | `Operation(_) => + evaluateNumerically(leftCutoff, rightCutoff, operationToLeaf, t), + ); // if not, run the convolution + }; +}; + +module Normalize = { + let rec toLeaf = (operationToLeaf, t: node): result(node, string) => { + switch (t) { + | `Leaf(`RenderedDist(s)) => + Ok(`Leaf(`RenderedDist(Distributions.Shape.T.normalize(s)))) + | `Leaf(`SymbolicDist(_)) => Ok(t) + | `Operation(op) => + operationToLeaf(op) |> E.R.bind(_, toLeaf(operationToLeaf)) + }; + }; +}; + +module FloatFromDist = { + let symbolicToLeaf = (distToFloatOp: distToFloatOperation, s) => { + SymbolicDist.T.operate(distToFloatOp, s) + |> E.R.bind(_, v => Ok(`Leaf(`SymbolicDist(`Float(v))))); + }; + let renderedToLeaf = + (distToFloatOp: distToFloatOperation, rs: DistTypes.shape) + : result(node, string) => { + Distributions.Shape.operate(distToFloatOp, rs) + |> (v => Ok(`Leaf(`SymbolicDist(`Float(v))))); + }; + let rec toLeaf = + (operationToLeaf, distToFloatOp: distToFloatOperation, t: node) + : result(node, string) => { + switch (t) { + | `Leaf(`SymbolicDist(s)) => symbolicToLeaf(distToFloatOp, s) // we want to evaluate the distToFloatOp on the symbolic dist + | `Leaf(`RenderedDist(rs)) => renderedToLeaf(distToFloatOp, rs) + | `Operation(op) => + E.R.bind(operationToLeaf(op), toLeaf(operationToLeaf, distToFloatOp)) + }; + }; +}; + +module Render = { + let rec toLeaf = + ( + operationToLeaf: operation => result(t, string), + sampleCount: int, + t: node, + ) + : result(t, string) => { + switch (t) { + | `Leaf(`SymbolicDist(d)) => + Ok(`Leaf(`RenderedDist(SymbolicDist.T.toShape(sampleCount, d)))) + | `Leaf(`RenderedDist(_)) as t => Ok(t) // already a rendered shape, we're done here + | `Operation(op) => + E.R.bind(operationToLeaf(op), toLeaf(operationToLeaf, sampleCount)) + }; + }; +}; + +let rec operationToLeaf = + (sampleCount: int, op: operation): result(t, string) => { + // the functions that convert the Operation nodes to Leaf nodes need to + // have a way to call this function on their children, if their children are themselves Operation nodes. + switch (op) { + | `AlgebraicCombination(algebraicOp, t1, t2) => + AlgebraicCombination.toLeaf( + operationToLeaf(sampleCount), + algebraicOp, + t1, + t2 // we want to give it the option to render or simply leave it as is + ) + | `PointwiseCombination(pointwiseOp, t1, t2) => + PointwiseCombination.toLeaf( + operationToLeaf(sampleCount), + pointwiseOp, + t1, + t2, + ) + | `VerticalScaling(scaleOp, t, scaleBy) => + VerticalScaling.toLeaf(operationToLeaf(sampleCount), scaleOp, t, scaleBy) + | `Truncate(leftCutoff, rightCutoff, t) => + Truncate.toLeaf(operationToLeaf(sampleCount), leftCutoff, rightCutoff, t) + | `FloatFromDist(distToFloatOp, t) => + FloatFromDist.toLeaf(operationToLeaf(sampleCount), distToFloatOp, t) + | `Normalize(t) => Normalize.toLeaf(operationToLeaf(sampleCount), t) + | `Render(t) => Render.toLeaf(operationToLeaf(sampleCount), sampleCount, t) + }; +}; + +/* This function recursively goes through the nodes of the parse tree, + replacing each Operation node and its subtree with a Data node. + Whenever possible, the replacement produces a new Symbolic Data node, + but most often it will produce a RenderedDist. + This function is used mainly to turn a parse tree into a single RenderedDist + that can then be displayed to the user. */ +let toLeaf = (node: t, sampleCount: int): result(t, string) => { + switch (node) { + | `Leaf(d) => Ok(`Leaf(d)) + | `Operation(op) => operationToLeaf(sampleCount, op) + }; +}; diff --git a/src/distPlus/expressionTree/ExpressionTypes.re b/src/distPlus/expressionTree/ExpressionTypes.re new file mode 100644 index 00000000..730a228b --- /dev/null +++ b/src/distPlus/expressionTree/ExpressionTypes.re @@ -0,0 +1,24 @@ +type algebraicOperation = [ | `Add | `Multiply | `Subtract | `Divide]; +type pointwiseOperation = [ | `Add | `Multiply]; +type scaleOperation = [ | `Multiply | `Exponentiate | `Log]; +type distToFloatOperation = [ | `Pdf(float) | `Inv(float) | `Mean | `Sample]; + +type abstractOperation('a) = [ + | `AlgebraicCombination(algebraicOperation, 'a, 'a) + | `PointwiseCombination(pointwiseOperation, 'a, 'a) + | `VerticalScaling(scaleOperation, 'a, 'a) + | `Render('a) + | `Truncate(option(float), option(float), 'a) + | `Normalize('a) + | `FloatFromDist(distToFloatOperation, 'a) +]; + +module ExpressionTree = { + type leaf = [ + | `SymbolicDist(SymbolicTypes.symbolicDist) + | `RenderedDist(DistTypes.shape) + ]; + + type node = [ | `Leaf(leaf) | `Operation(operation)] + and operation = abstractOperation(node); +}; diff --git a/src/distPlus/symbolic/MathJsParser.re b/src/distPlus/expressionTree/MathJsParser.re similarity index 91% rename from src/distPlus/symbolic/MathJsParser.re rename to src/distPlus/expressionTree/MathJsParser.re index 3874fda4..92227736 100644 --- a/src/distPlus/symbolic/MathJsParser.re +++ b/src/distPlus/expressionTree/MathJsParser.re @@ -86,13 +86,13 @@ module MathAdtToDistDst = { ); }; - let normal: array(arg) => result(TreeNode.treeNode, string) = + let normal: array(arg) => result(ExpressionTypes.ExpressionTree.node, string) = fun | [|Value(mean), Value(stdev)|] => Ok(`Leaf(`SymbolicDist(`Normal({mean, stdev})))) | _ => Error("Wrong number of variables in normal distribution"); - let lognormal: array(arg) => result(TreeNode.treeNode, string) = + let lognormal: array(arg) => result(ExpressionTypes.ExpressionTree.node, string) = fun | [|Value(mu), Value(sigma)|] => Ok(`Leaf(`SymbolicDist(`Lognormal({mu, sigma})))) @@ -114,7 +114,7 @@ module MathAdtToDistDst = { } | _ => Error("Wrong number of variables in lognormal distribution"); - let to_: array(arg) => result(TreeNode.treeNode, string) = + let to_: array(arg) => result(ExpressionTypes.ExpressionTree.node, string) = fun | [|Value(low), Value(high)|] when low <= 0.0 && low < high => { Ok( @@ -134,31 +134,31 @@ module MathAdtToDistDst = { Error("Low value must be less than high value.") | _ => Error("Wrong number of variables in lognormal distribution"); - let uniform: array(arg) => result(TreeNode.treeNode, string) = + let uniform: array(arg) => result(ExpressionTypes.ExpressionTree.node, string) = fun | [|Value(low), Value(high)|] => Ok(`Leaf(`SymbolicDist(`Uniform({low, high})))) | _ => Error("Wrong number of variables in lognormal distribution"); - let beta: array(arg) => result(TreeNode.treeNode, string) = + let beta: array(arg) => result(ExpressionTypes.ExpressionTree.node, string) = fun | [|Value(alpha), Value(beta)|] => Ok(`Leaf(`SymbolicDist(`Beta({alpha, beta})))) | _ => Error("Wrong number of variables in lognormal distribution"); - let exponential: array(arg) => result(TreeNode.treeNode, string) = + let exponential: array(arg) => result(ExpressionTypes.ExpressionTree.node, string) = fun | [|Value(rate)|] => Ok(`Leaf(`SymbolicDist(`Exponential({rate: rate})))) | _ => Error("Wrong number of variables in Exponential distribution"); - let cauchy: array(arg) => result(TreeNode.treeNode, string) = + let cauchy: array(arg) => result(ExpressionTypes.ExpressionTree.node, string) = fun | [|Value(local), Value(scale)|] => Ok(`Leaf(`SymbolicDist(`Cauchy({local, scale})))) | _ => Error("Wrong number of variables in cauchy distribution"); - let triangular: array(arg) => result(TreeNode.treeNode, string) = + let triangular: array(arg) => result(ExpressionTypes.ExpressionTree.node, string) = fun | [|Value(low), Value(medium), Value(high)|] => Ok(`Leaf(`SymbolicDist(`Triangular({low, medium, high})))) @@ -166,7 +166,7 @@ module MathAdtToDistDst = { let multiModal = ( - args: array(result(TreeNode.treeNode, string)), + args: array(result(ExpressionTypes.ExpressionTree.node, string)), weights: option(array(float)), ) => { let weights = weights |> E.O.default([||]); @@ -215,7 +215,7 @@ module MathAdtToDistDst = { }; }; - let arrayParser = (args: array(arg)): result(TreeNode.treeNode, string) => { + let arrayParser = (args: array(arg)): result(ExpressionTypes.ExpressionTree.node, string) => { let samples = args |> E.A.fmap( @@ -241,7 +241,7 @@ module MathAdtToDistDst = { }; let operationParser = - (name: string, args: array(result(TreeNode.treeNode, string))) => { + (name: string, args: array(result(ExpressionTypes.ExpressionTree.node, string))) => { let toOkAlgebraic = r => Ok(`Operation(`AlgebraicCombination(r))); let toOkTrunctate = r => Ok(`Operation(`Truncate(r))); switch (name, args) { @@ -347,7 +347,7 @@ module MathAdtToDistDst = { | Symbol(_) => Error("Symbol not valid as top level") | Object(_) => Error("Object not valid as top level"); - let run = (r): result(TreeNode.treeNode, string) => + let run = (r): result(ExpressionTypes.ExpressionTree.node, string) => r |> MathAdtCleaner.run |> topLevel; }; diff --git a/src/distPlus/symbolic/Mathjs.re b/src/distPlus/expressionTree/Mathjs.re similarity index 100% rename from src/distPlus/symbolic/Mathjs.re rename to src/distPlus/expressionTree/Mathjs.re diff --git a/src/distPlus/expressionTree/Operation.re b/src/distPlus/expressionTree/Operation.re new file mode 100644 index 00000000..112ca17e --- /dev/null +++ b/src/distPlus/expressionTree/Operation.re @@ -0,0 +1,93 @@ +open ExpressionTypes; + +module Algebraic = { + type t = algebraicOperation; + let toFn: (t, float, float) => float = + fun + | `Add => (+.) + | `Subtract => (-.) + | `Multiply => ( *. ) + | `Divide => (/.); + + let applyFn = (t, f1, f2) => { + switch (t, f1, f2) { + | (`Divide, _, 0.) => Error("Cannot divide $v1 by zero.") + | _ => Ok(toFn(t, f1, f2)) + }; + }; + + let toString = + fun + | `Add => "+" + | `Subtract => "-" + | `Multiply => "*" + | `Divide => "/"; + + let format = (a, b, c) => b ++ " " ++ toString(a) ++ " " ++ c; +}; + +module Pointwise = { + type t = pointwiseOperation; + let toString = + fun + | `Add => "+" + | `Multiply => "*"; + + let format = (a, b, c) => b ++ " " ++ toString(a) ++ " " ++ c; +}; + +module DistToFloat = { + type t = distToFloatOperation; + + let format = (operation, value) => + switch (operation) { + | `Pdf(f) => {j|pdf(x=$f,$value)|j} + | `Inv(f) => {j|inv(x=$f,$value)|j} + | `Sample => "sample($value)" + | `Mean => "mean($value)" + }; +}; + +module Scale = { + type t = scaleOperation; + let toFn = + fun + | `Multiply => ( *. ) + | `Exponentiate => ( ** ) + | `Log => ((a, b) => log(a) /. log(b)); + + let format = (operation: t, value, scaleBy) => + switch (operation) { + | `Multiply => {j|scaleMultiply($value, $scaleBy) |j} + | `Exponentiate => {j|ScaleExponentiate($value, $scaleBy) |j} + | `Log => {j|ScaleLog($value, $scaleBy) |j} + }; + + let toKnownIntegralSumFn = + fun + | `Multiply => ((a, b) => Some(a *. b)) + | `Exponentiate => ((_, _) => None) + | `Log => ((_, _) => None); +}; + +module T = { + let truncateToString = + (left: option(float), right: option(float), nodeToString) => { + let left = left |> E.O.dimap(Js.Float.toString, () => "-inf"); + let right = right |> E.O.dimap(Js.Float.toString, () => "inf"); + {j|truncate($nodeToString, $left, $right)|j}; + }; + let toString = nodeToString => + fun + | `AlgebraicCombination(op, t1, t2) => + Algebraic.format(op, nodeToString(t1), nodeToString(t2)) + | `PointwiseCombination(op, t1, t2) => + Pointwise.format(op, nodeToString(t1), nodeToString(t2)) + | `VerticalScaling(scaleOp, t, scaleBy) => + Scale.format(scaleOp, nodeToString(t), nodeToString(scaleBy)) + | `Normalize(t) => "normalize(" ++ nodeToString(t) ++ ")" + | `FloatFromDist(floatFromDistOp, t) => + DistToFloat.format(floatFromDistOp, nodeToString(t)) + | `Truncate(lc, rc, t) => truncateToString(lc, rc, nodeToString(t)) + | `Render(t) => nodeToString(t); +}; diff --git a/src/distPlus/renderers/RenderTypes.re b/src/distPlus/renderers/RenderTypes.re index e091ecad..9b37503f 100644 --- a/src/distPlus/renderers/RenderTypes.re +++ b/src/distPlus/renderers/RenderTypes.re @@ -43,7 +43,7 @@ module ShapeRenderer = { module Symbolic = { type inputs = {length: int}; type outputs = { - graph: TreeNode.treeNode, + graph: ExpressionTypes.ExpressionTree.node, shape: DistTypes.shape, }; let make = (graph, shape) => {graph, shape}; diff --git a/src/distPlus/renderers/ShapeRenderer.re b/src/distPlus/renderers/ShapeRenderer.re index 8542ba4a..b439240b 100644 --- a/src/distPlus/renderers/ShapeRenderer.re +++ b/src/distPlus/renderers/ShapeRenderer.re @@ -21,7 +21,7 @@ let runSymbolic = (guesstimatorString, length) => { |> E.R.fmap(g => RenderTypes.ShapeRenderer.Symbolic.make( g, - TreeNode.toShape(length, g), + ExpressionTree.toShape(length, g), ) ); }; diff --git a/src/distPlus/symbolic/MathjsWrapper.js b/src/distPlus/symbolic/MathjsWrapper.js deleted file mode 100644 index 01fd4994..00000000 --- a/src/distPlus/symbolic/MathjsWrapper.js +++ /dev/null @@ -1,8 +0,0 @@ - -const math = require("mathjs"); - -function parseMath(f){ return JSON.parse(JSON.stringify(math.parse(f))) }; - -module.exports = { - parseMath, -}; diff --git a/src/distPlus/symbolic/SymbolicDist.re b/src/distPlus/symbolic/SymbolicDist.re index 18142e74..94e513d6 100644 --- a/src/distPlus/symbolic/SymbolicDist.re +++ b/src/distPlus/symbolic/SymbolicDist.re @@ -76,7 +76,7 @@ module Normal = { `Normal({mean, stdev}); }; - let operate = (operation: SymbolicTypes.Algebraic.t, n1: t, n2: t) => + let operate = (operation: Operation.Algebraic.t, n1: t, n2: t) => switch (operation) { | `Add => Some(add(n1, n2)) | `Subtract => Some(subtract(n1, n2)) @@ -130,7 +130,7 @@ module Lognormal = { let sigma = l1.sigma +. l2.sigma; `Lognormal({mu, sigma}); }; - let operate = (operation: SymbolicTypes.Algebraic.t, n1: t, n2: t) => + let operate = (operation: Operation.Algebraic.t, n1: t, n2: t) => switch (operation) { | `Multiply => Some(multiply(n1, n2)) | `Divide => Some(divide(n1, n2)) @@ -246,7 +246,7 @@ module T = { | `Uniform(n) => Uniform.mean(n) | `Float(n) => Float.mean(n); - let operate = (distToFloatOp: distToFloatOperation, s) => + let operate = (distToFloatOp: ExpressionTypes.distToFloatOperation, s) => switch (distToFloatOp) { | `Pdf(f) => Ok(pdf(f, s)) | `Inv(f) => Ok(inv(f, s)) @@ -283,12 +283,12 @@ module T = { ( d1: symbolicDist, d2: symbolicDist, - op: SymbolicTypes.algebraicOperation, + op: ExpressionTypes.algebraicOperation, ) : analyticalSolutionAttempt => switch (d1, d2) { | (`Float(v1), `Float(v2)) => - switch (SymbolicTypes.Algebraic.applyFn(op, v1, v2)) { + switch (Operation.Algebraic.applyFn(op, v1, v2)) { | Ok(r) => `AnalyticalSolution(`Float(r)) | Error(n) => `Error(n) } diff --git a/src/distPlus/symbolic/SymbolicTypes.re b/src/distPlus/symbolic/SymbolicTypes.re index 2dbb95db..b372a00f 100644 --- a/src/distPlus/symbolic/SymbolicTypes.re +++ b/src/distPlus/symbolic/SymbolicTypes.re @@ -46,80 +46,4 @@ type symbolicDist = [ | `Triangular(triangular) | `ContinuousShape(continuousShape) | `Float(float) // Dirac delta at x. Practically useful only in the context of multimodals. -]; - -// todo: These operations are really applicable for all dists -type algebraicOperation = [ | `Add | `Multiply | `Subtract | `Divide]; -type pointwiseOperation = [ | `Add | `Multiply]; -type scaleOperation = [ | `Multiply | `Exponentiate | `Log]; -type distToFloatOperation = [ | `Pdf(float) | `Inv(float) | `Mean | `Sample]; - -module Algebraic = { - type t = algebraicOperation; - let toFn: (t, float, float) => float = - fun - | `Add => (+.) - | `Subtract => (-.) - | `Multiply => ( *. ) - | `Divide => (/.); - - let applyFn = (t, f1, f2) => { - switch (t, f1, f2) { - | (`Divide, _, 0.) => Error("Cannot divide $v1 by zero.") - | _ => Ok(toFn(t, f1, f2)) - }; - }; - - let toString = - fun - | `Add => "+" - | `Subtract => "-" - | `Multiply => "*" - | `Divide => "/"; - - let format = (a, b, c) => b ++ " " ++ toString(a) ++ " " ++ c; -}; - -module Pointwise = { - type t = pointwiseOperation; - let toString = - fun - | `Add => "+" - | `Multiply => "*"; - - let format = (a, b, c) => b ++ " " ++ toString(a) ++ " " ++ c; -}; - -module DistToFloat = { - type t = distToFloatOperation; - - let format = (operation, value) => - switch (operation) { - | `Pdf(f) => {j|pdf(x=$f,$value)|j} - | `Inv(f) => {j|inv(x=$f,$value)|j} - | `Sample => "sample($value)" - | `Mean => "mean($value)" - }; -}; - -module Scale = { - type t = scaleOperation; - let toFn = - fun - | `Multiply => ( *. ) - | `Exponentiate => ( ** ) - | `Log => ((a, b) => log(a) /. log(b)); - - let format = (operation:t, value, scaleBy) => - switch (operation) { - | `Multiply => {j|scaleMultiply($value, $scaleBy) |j} - | `Exponentiate => {j|ScaleExponentiate($value, $scaleBy) |j} - | `Log => {j|ScaleLog($value, $scaleBy) |j} - }; - - let toKnownIntegralSumFn = - fun - | `Multiply => ((a, b) => Some(a *. b)) - | `Exponentiate => ((_, _) => None) - | `Log => ((_, _) => None); -}; +]; \ No newline at end of file diff --git a/src/distPlus/symbolic/TreeNode.re b/src/distPlus/symbolic/TreeNode.re deleted file mode 100644 index b6f83801..00000000 --- a/src/distPlus/symbolic/TreeNode.re +++ /dev/null @@ -1,387 +0,0 @@ -/* This module represents a tree node. */ -open SymbolicTypes; - -type leaf = [ - | `SymbolicDist(SymbolicTypes.symbolicDist) - | `RenderedDist(DistTypes.shape) -]; - -/* TreeNodes are either Data (i.e. symbolic or rendered distributions) or Operations. Operations always refer to two child nodes.*/ -type treeNode = [ | `Leaf(leaf) | `Operation(operation)] -and operation = [ - | `AlgebraicCombination(algebraicOperation, treeNode, treeNode) - | `PointwiseCombination(pointwiseOperation, treeNode, treeNode) - | `VerticalScaling(scaleOperation, treeNode, treeNode) - | `Render(treeNode) - | `Truncate(option(float), option(float), treeNode) - | `Normalize(treeNode) - | `FloatFromDist(distToFloatOperation, treeNode) -]; - -module Operation = { - type t = operation; - let truncateToString = - (left: option(float), right: option(float), nodeToString) => { - let left = left |> E.O.dimap(Js.Float.toString, () => "-inf"); - let right = right |> E.O.dimap(Js.Float.toString, () => "inf"); - {j|truncate($nodeToString, $left, $right)|j}; - }; - - let toString = nodeToString => - fun - | `AlgebraicCombination(op, t1, t2) => - SymbolicTypes.Algebraic.format(op, nodeToString(t1), nodeToString(t2)) - | `PointwiseCombination(op, t1, t2) => - SymbolicTypes.Pointwise.format(op, nodeToString(t1), nodeToString(t2)) - | `VerticalScaling(scaleOp, t, scaleBy) => - SymbolicTypes.Scale.format( - scaleOp, - nodeToString(t), - nodeToString(scaleBy), - ) - | `Normalize(t) => "normalize(" ++ nodeToString(t) ++ ")" - | `FloatFromDist(floatFromDistOp, t) => - SymbolicTypes.DistToFloat.format(floatFromDistOp, nodeToString(t)) - | `Truncate(lc, rc, t) => truncateToString(lc, rc, nodeToString(t)) - | `Render(t) => nodeToString(t); -}; - -module TreeNode = { - type t = treeNode; - type tResult = treeNode => result(treeNode, string); - - let rec toString = - fun - | `Leaf(`SymbolicDist(d)) => SymbolicDist.T.toString(d) - | `Leaf(`RenderedDist(_)) => "[shape]" - | `Operation(op) => Operation.toString(toString, op); - - /* The following modules encapsulate everything we can do with - * different kinds of operations. */ - - /* Given two random variables A and B, this returns the distribution - of a new variable that is the result of the operation on A and B. - For instance, normal(0, 1) + normal(1, 1) -> normal(1, 2). - In general, this is implemented via convolution. */ - module AlgebraicCombination = { - let toTreeNode = (op, t1, t2) => - `Operation(`AlgebraicCombination((op, t1, t2))); - let tryAnalyticalSolution = - fun - | `Operation( - `AlgebraicCombination( - operation, - `Leaf(`SymbolicDist(d1)), - `Leaf(`SymbolicDist(d2)), - ), - ) as t => - switch (SymbolicDist.T.attemptAnalyticalOperation(d1, d2, operation)) { - | `AnalyticalSolution(symbolicDist) => - Ok(`Leaf(`SymbolicDist(symbolicDist))) - | `Error(er) => Error(er) - | `NoSolution => Ok(t) - } - | t => Ok(t); - - // todo: I don't like the name evaluateNumerically that much, if this renders and does it algebraically. It's tricky. - let evaluateNumerically = (algebraicOp, operationToLeaf, t1, t2) => { - // force rendering into shapes - let renderShape = r => operationToLeaf(`Render(r)); - switch (renderShape(t1), renderShape(t2)) { - | (Ok(`Leaf(`RenderedDist(s1))), Ok(`Leaf(`RenderedDist(s2)))) => - Ok( - `Leaf( - `RenderedDist( - Distributions.Shape.combineAlgebraically(algebraicOp, s1, s2), - ), - ), - ) - | (Error(e1), _) => Error(e1) - | (_, Error(e2)) => Error(e2) - | _ => Error("Could not render shapes.") - }; - }; - - let toLeaf = - ( - operationToLeaf, - algebraicOp: SymbolicTypes.algebraicOperation, - t1: t, - t2: t, - ) - : result(treeNode, string) => - toTreeNode(algebraicOp, t1, t2) - |> tryAnalyticalSolution - |> E.R.bind( - _, - fun - | `Leaf(d) => Ok(`Leaf(d)) // the analytical simplifaction worked, nice! - | `Operation(_) => - // if not, run the convolution - evaluateNumerically(algebraicOp, operationToLeaf, t1, t2), - ); - }; - - module VerticalScaling = { - let toLeaf = (operationToLeaf, scaleOp, t, scaleBy) => { - // scaleBy has to be a single float, otherwise we'll return an error. - let fn = SymbolicTypes.Scale.toFn(scaleOp); - let knownIntegralSumFn = - SymbolicTypes.Scale.toKnownIntegralSumFn(scaleOp); - let renderedShape = operationToLeaf(`Render(t)); - - switch (renderedShape, scaleBy) { - | (Ok(`Leaf(`RenderedDist(rs))), `Leaf(`SymbolicDist(`Float(sm)))) => - Ok( - `Leaf( - `RenderedDist( - Distributions.Shape.T.mapY( - ~knownIntegralSumFn=knownIntegralSumFn(sm), - fn(sm), - rs, - ), - ), - ), - ) - | (Error(e1), _) => Error(e1) - | (_, _) => Error("Can only scale by float values.") - }; - }; - }; - - module PointwiseCombination = { - let pointwiseAdd = (operationToLeaf, t1, t2) => { - let renderedShape1 = operationToLeaf(`Render(t1)); - let renderedShape2 = operationToLeaf(`Render(t2)); - - switch (renderedShape1, renderedShape2) { - | (Ok(`Leaf(`RenderedDist(rs1))), Ok(`Leaf(`RenderedDist(rs2)))) => - Ok( - `Leaf( - `RenderedDist( - Distributions.Shape.combinePointwise( - ~knownIntegralSumsFn=(a, b) => Some(a +. b), - (+.), - rs1, - rs2, - ), - ), - ), - ) - | (Error(e1), _) => Error(e1) - | (_, Error(e2)) => Error(e2) - | _ => Error("Could not perform pointwise addition.") - }; - }; - - let pointwiseMultiply = (operationToLeaf, t1, t2) => { - // TODO: construct a function that we can easily sample from, to construct - // a RenderedDist. Use the xMin and xMax of the rendered shapes to tell the sampling function where to look. - Error( - "Pointwise multiplication not yet supported.", - ); - }; - - let toLeaf = (operationToLeaf, pointwiseOp, t1, t2) => { - switch (pointwiseOp) { - | `Add => pointwiseAdd(operationToLeaf, t1, t2) - | `Multiply => pointwiseMultiply(operationToLeaf, t1, t2) - }; - }; - }; - - module Truncate = { - module Simplify = { - let tryTruncatingNothing: tResult = - fun - | `Operation(`Truncate(None, None, `Leaf(d))) => Ok(`Leaf(d)) - | t => Ok(t); - - let tryTruncatingUniform: tResult = - fun - | `Operation(`Truncate(lc, rc, `Leaf(`SymbolicDist(`Uniform(u))))) => { - // just create a new Uniform distribution - let newLow = max(E.O.default(neg_infinity, lc), u.low); - let newHigh = min(E.O.default(infinity, rc), u.high); - Ok( - `Leaf(`SymbolicDist(`Uniform({low: newLow, high: newHigh}))), - ); - } - | t => Ok(t); - - let attempt = (leftCutoff, rightCutoff, t): result(treeNode, string) => { - let originalTreeNode = - `Operation(`Truncate((leftCutoff, rightCutoff, t))); - - originalTreeNode - |> tryTruncatingNothing - |> E.R.bind(_, tryTruncatingUniform); - }; - }; - - let evaluateNumerically = (leftCutoff, rightCutoff, operationToLeaf, t) => { - // TODO: use named args in renderToShape; if we're lucky we can at least get the tail - // of a distribution we otherwise wouldn't get at all - let renderedShape = operationToLeaf(`Render(t)); - - switch (renderedShape) { - | Ok(`Leaf(`RenderedDist(rs))) => - let truncatedShape = - rs |> Distributions.Shape.T.truncate(leftCutoff, rightCutoff); - Ok(`Leaf(`RenderedDist(rs))); - | Error(e1) => Error(e1) - | _ => Error("Could not truncate distribution.") - }; - }; - - let toLeaf = - ( - operationToLeaf, - leftCutoff: option(float), - rightCutoff: option(float), - t: treeNode, - ) - : result(treeNode, string) => { - t - |> Simplify.attempt(leftCutoff, rightCutoff) - |> E.R.bind( - _, - fun - | `Leaf(d) => Ok(`Leaf(d)) // the analytical simplifaction worked, nice! - | `Operation(_) => - evaluateNumerically(leftCutoff, rightCutoff, operationToLeaf, t), - ); // if not, run the convolution - }; - }; - - module Normalize = { - let rec toLeaf = (operationToLeaf, t: treeNode): result(treeNode, string) => { - switch (t) { - | `Leaf(`RenderedDist(s)) => - Ok(`Leaf(`RenderedDist(Distributions.Shape.T.normalize(s)))) - | `Leaf(`SymbolicDist(_)) => Ok(t) - | `Operation(op) => - operationToLeaf(op) |> E.R.bind(_, toLeaf(operationToLeaf)) - }; - }; - }; - - module FloatFromDist = { - let symbolicToLeaf = (distToFloatOp: distToFloatOperation, s) => { - SymbolicDist.T.operate(distToFloatOp, s) - |> E.R.bind(_, v => Ok(`Leaf(`SymbolicDist(`Float(v))))); - }; - let renderedToLeaf = - (distToFloatOp: distToFloatOperation, rs: DistTypes.shape) - : result(treeNode, string) => { - Distributions.Shape.operate(distToFloatOp, rs) - |> (v => Ok(`Leaf(`SymbolicDist(`Float(v))))); - }; - let rec toLeaf = - ( - operationToLeaf, - distToFloatOp: distToFloatOperation, - t: treeNode, - ) - : result(treeNode, string) => { - switch (t) { - | `Leaf(`SymbolicDist(s)) => symbolicToLeaf(distToFloatOp, s) // we want to evaluate the distToFloatOp on the symbolic dist - | `Leaf(`RenderedDist(rs)) => renderedToLeaf(distToFloatOp, rs) - | `Operation(op) => - E.R.bind( - operationToLeaf(op), - toLeaf(operationToLeaf, distToFloatOp), - ) - }; - }; - }; - - module Render = { - let rec toLeaf = - ( - operationToLeaf: operation => result(t, string), - sampleCount: int, - t: treeNode, - ) - : result(t, string) => { - switch (t) { - | `Leaf(`SymbolicDist(d)) => - Ok(`Leaf(`RenderedDist(SymbolicDist.T.toShape(sampleCount, d)))) - | `Leaf(`RenderedDist(_)) as t => Ok(t) // already a rendered shape, we're done here - | `Operation(op) => - E.R.bind(operationToLeaf(op), toLeaf(operationToLeaf, sampleCount)) - }; - }; - }; - - let rec operationToLeaf = - (sampleCount: int, op: operation): result(t, string) => { - // the functions that convert the Operation nodes to Leaf nodes need to - // have a way to call this function on their children, if their children are themselves Operation nodes. - switch (op) { - | `AlgebraicCombination(algebraicOp, t1, t2) => - AlgebraicCombination.toLeaf( - operationToLeaf(sampleCount), - algebraicOp, - t1, - t2 // we want to give it the option to render or simply leave it as is - ) - | `PointwiseCombination(pointwiseOp, t1, t2) => - PointwiseCombination.toLeaf( - operationToLeaf(sampleCount), - pointwiseOp, - t1, - t2, - ) - | `VerticalScaling(scaleOp, t, scaleBy) => - VerticalScaling.toLeaf( - operationToLeaf(sampleCount), - scaleOp, - t, - scaleBy, - ) - | `Truncate(leftCutoff, rightCutoff, t) => - Truncate.toLeaf( - operationToLeaf(sampleCount), - leftCutoff, - rightCutoff, - t, - ) - | `FloatFromDist(distToFloatOp, t) => - FloatFromDist.toLeaf(operationToLeaf(sampleCount), distToFloatOp, t) - | `Normalize(t) => Normalize.toLeaf(operationToLeaf(sampleCount), t) - | `Render(t) => - Render.toLeaf(operationToLeaf(sampleCount), sampleCount, t) - }; - }; - - /* This function recursively goes through the nodes of the parse tree, - replacing each Operation node and its subtree with a Data node. - Whenever possible, the replacement produces a new Symbolic Data node, - but most often it will produce a RenderedDist. - This function is used mainly to turn a parse tree into a single RenderedDist - that can then be displayed to the user. */ - let toLeaf = (treeNode: t, sampleCount: int): result(t, string) => { - switch (treeNode) { - | `Leaf(d) => Ok(`Leaf(d)) - | `Operation(op) => operationToLeaf(sampleCount, op) - }; - }; -}; - -let toShape = (sampleCount: int, treeNode: treeNode) => { - let renderResult = - TreeNode.toLeaf(`Operation(`Render(treeNode)), sampleCount); - - switch (renderResult) { - | Ok(`Leaf(`RenderedDist(rs))) => - let continuous = Distributions.Shape.T.toContinuous(rs); - let discrete = Distributions.Shape.T.toDiscrete(rs); - let shape = MixedShapeBuilder.buildSimple(~continuous, ~discrete); - shape |> E.O.toExt("Could not build final shape."); - | Ok(_) => E.O.toExn("Rendering failed.", None) - | Error(message) => E.O.toExn("No shape found, error: " ++ message, None) - }; -}; - -let toString = (treeNode: treeNode) => TreeNode.toString(treeNode); From 9f30b6d75c006d97301fdd18d2cf2cb621f246e7 Mon Sep 17 00:00:00 2001 From: Ozzie Gooen Date: Thu, 2 Jul 2020 18:29:04 +0100 Subject: [PATCH 26/31] Added back MathjsWrapper, which I accidentally lost --- src/distPlus/expressionTree/MathjsWrapper.js | 9 +++++++++ src/distPlus/expressionTree/Operation.re | 6 +++--- 2 files changed, 12 insertions(+), 3 deletions(-) create mode 100644 src/distPlus/expressionTree/MathjsWrapper.js diff --git a/src/distPlus/expressionTree/MathjsWrapper.js b/src/distPlus/expressionTree/MathjsWrapper.js new file mode 100644 index 00000000..3546ba42 --- /dev/null +++ b/src/distPlus/expressionTree/MathjsWrapper.js @@ -0,0 +1,9 @@ +const math = require("mathjs"); + +function parseMath(f) { + return JSON.parse(JSON.stringify(math.parse(f))) +}; + +module.exports = { + parseMath, +}; \ No newline at end of file diff --git a/src/distPlus/expressionTree/Operation.re b/src/distPlus/expressionTree/Operation.re index 112ca17e..b2214c1e 100644 --- a/src/distPlus/expressionTree/Operation.re +++ b/src/distPlus/expressionTree/Operation.re @@ -58,9 +58,9 @@ module Scale = { let format = (operation: t, value, scaleBy) => switch (operation) { - | `Multiply => {j|scaleMultiply($value, $scaleBy) |j} - | `Exponentiate => {j|ScaleExponentiate($value, $scaleBy) |j} - | `Log => {j|ScaleLog($value, $scaleBy) |j} + | `Multiply => {j|verticalMultiply($value, $scaleBy) |j} + | `Exponentiate => {j|verticalExponentiate($value, $scaleBy) |j} + | `Log => {j|verticalLog($value, $scaleBy) |j} }; let toKnownIntegralSumFn = From a681135f3d404d7f2070aff2e08a781cdd368c36 Mon Sep 17 00:00:00 2001 From: Ozzie Gooen Date: Thu, 2 Jul 2020 19:01:20 +0100 Subject: [PATCH 27/31] AlgebraicCombinations -> ShapeConvolution --- src/distPlus/distribution/Distributions.re | 2 +- .../{AlgebraicCombinations.re => ShapeConvolution.re} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename src/distPlus/distribution/{AlgebraicCombinations.re => ShapeConvolution.re} (100%) diff --git a/src/distPlus/distribution/Distributions.re b/src/distPlus/distribution/Distributions.re index 24053418..9d7fe31c 100644 --- a/src/distPlus/distribution/Distributions.re +++ b/src/distPlus/distribution/Distributions.re @@ -342,7 +342,7 @@ module Continuous = { empty; } else { let combinedShape = - AlgebraicCombinations.combineShapesContinuousContinuous(op, s1, s2); + ShapeConvolution.combineShapesContinuousContinuous(op, s1, s2); let combinedIntegralSum = Common.combineIntegralSums( (a, b) => Some(a *. b), diff --git a/src/distPlus/distribution/AlgebraicCombinations.re b/src/distPlus/distribution/ShapeConvolution.re similarity index 100% rename from src/distPlus/distribution/AlgebraicCombinations.re rename to src/distPlus/distribution/ShapeConvolution.re From a649a6bca2020490bcff75ab36ffa127fd82dbb7 Mon Sep 17 00:00:00 2001 From: Ozzie Gooen Date: Thu, 2 Jul 2020 23:52:58 +0100 Subject: [PATCH 28/31] Added ExpressionTreeExamples in showcase --- showcase/Entries.re | 2 +- showcase/entries/Continuous.re | 2 +- showcase/entries/ExpressionTreeExamples.re | 71 ++++++++++++++++++++++ src/distPlus/expressionTree/Operation.re | 2 +- 4 files changed, 74 insertions(+), 3 deletions(-) create mode 100644 showcase/entries/ExpressionTreeExamples.re diff --git a/showcase/Entries.re b/showcase/Entries.re index ae4cef64..f3e96e75 100644 --- a/showcase/Entries.re +++ b/showcase/Entries.re @@ -1 +1 @@ -let entries = EntryTypes.[Continuous.entry]; \ No newline at end of file +let entries = EntryTypes.[Continuous.entry,ExpressionTreeExamples.entry]; \ No newline at end of file diff --git a/showcase/entries/Continuous.re b/showcase/entries/Continuous.re index 237b1081..86823fc0 100644 --- a/showcase/entries/Continuous.re +++ b/showcase/entries/Continuous.re @@ -84,4 +84,4 @@ let distributions = () => ; -let entry = EntryTypes.(entry(~title="Pdf", ~render=distributions)); \ No newline at end of file +let entry = EntryTypes.(entry(~title="Mixed Distributions", ~render=distributions)); \ No newline at end of file diff --git a/showcase/entries/ExpressionTreeExamples.re b/showcase/entries/ExpressionTreeExamples.re new file mode 100644 index 00000000..ef29cdaf --- /dev/null +++ b/showcase/entries/ExpressionTreeExamples.re @@ -0,0 +1,71 @@ +let setup = dist => + RenderTypes.DistPlusRenderer.make(~distPlusIngredients=dist, ()) + |> DistPlusRenderer.run + |> RenderTypes.DistPlusRenderer.Outputs.distplus + |> R.O.fmapOrNull(distPlus => ); + +let simpleExample = (guesstimatorString, ~problem="", ()) => + <> +

{guesstimatorString |> ReasonReact.string}

+

{problem |> (e => "problem: " ++ e) |> ReasonReact.string}

+ {setup( + RenderTypes.DistPlusRenderer.Ingredients.make(~guesstimatorString, ()), + )} + ; + +let distributions = () => +
+
+

+ {"Initial Section" |> ReasonReact.string} +

+ {simpleExample( + "normal(-1, 1) + normal(5, 2)", + ~problem="Tails look too flat", + (), + )} + {simpleExample( + "mm(normal(4,2), normal(10,1))", + ~problem="Tails look too flat", + (), + )} + {simpleExample( + "normal(-1, 1) * normal(5, 2)", + ~problem="This looks really weird", + (), + )} + {simpleExample( + "normal(1,2) * normal(2,2) * normal(3,1)", + ~problem="Seems like important parts are cut off", + (), + )} + {simpleExample( + "mm(uniform(0, 1) , normal(3,2))", + ~problem="Uniform distribution seems to break multimodal", + (), + )} + {simpleExample( + "truncate(mm(1 to 10, 10 to 30), 10, 20)", + ~problem="Truncate seems to have no effect", + (), + )} + {simpleExample( + "normal(5,2)*(10^3)", + ~problem="Multiplied items should be evaluated.", + (), + )} + {simpleExample( + "normal(5,10*3)", + ~problem="At least simple operations in the distributions should be evaluated.", + (), + )} + {simpleExample( + "normal(5,10)^3", + ~problem="Exponentiation not yet supported", + (), + )} +
+
; + +let entry = + EntryTypes.(entry(~title="ExpressionTree", ~render=distributions)); diff --git a/src/distPlus/expressionTree/Operation.re b/src/distPlus/expressionTree/Operation.re index b2214c1e..29cee28b 100644 --- a/src/distPlus/expressionTree/Operation.re +++ b/src/distPlus/expressionTree/Operation.re @@ -85,7 +85,7 @@ module T = { Pointwise.format(op, nodeToString(t1), nodeToString(t2)) | `VerticalScaling(scaleOp, t, scaleBy) => Scale.format(scaleOp, nodeToString(t), nodeToString(scaleBy)) - | `Normalize(t) => "normalize(" ++ nodeToString(t) ++ ")" + | `Normalize(t) => "normalize(k" ++ nodeToString(t) ++ ")" | `FloatFromDist(floatFromDistOp, t) => DistToFloat.format(floatFromDistOp, nodeToString(t)) | `Truncate(lc, rc, t) => truncateToString(lc, rc, nodeToString(t)) From ca9f725ae75556928fef0efebbca3ade299d4fbe Mon Sep 17 00:00:00 2001 From: Sebastian Kosch Date: Fri, 3 Jul 2020 14:55:27 -0700 Subject: [PATCH 29/31] Remove Leaf and Operation wrapper types --- __tests__/Distributions__Test.re | 4 +- src/components/Drawer.re | 2 +- src/distPlus/expressionTree/ExpressionTree.re | 11 +- .../expressionTree/ExpressionTreeEvaluator.re | 276 ++++++++---------- .../expressionTree/ExpressionTypes.re | 24 +- src/distPlus/expressionTree/MathJsParser.re | 90 +++--- src/distPlus/expressionTree/Operation.re | 3 +- src/distPlus/symbolic/SymbolicDist.re | 12 +- 8 files changed, 193 insertions(+), 229 deletions(-) diff --git a/__tests__/Distributions__Test.re b/__tests__/Distributions__Test.re index 341ef8a4..0b2e30e6 100644 --- a/__tests__/Distributions__Test.re +++ b/__tests__/Distributions__Test.re @@ -383,9 +383,9 @@ describe("Shape", () => { let numSamples = 10000; open Distributions.Shape; let normal: SymbolicTypes.symbolicDist = `Normal({mean, stdev}); - let normalShape = ExpressionTree.toShape(numSamples, `Leaf(`SymbolicDist(normal))); + let normalShape = ExpressionTree.toShape(numSamples, `SymbolicDist(normal)); let lognormal = SymbolicDist.Lognormal.fromMeanAndStdev(mean, stdev); - let lognormalShape = ExpressionTree.toShape(numSamples, `Leaf(`SymbolicDist(lognormal))); + let lognormalShape = ExpressionTree.toShape(numSamples, `SymbolicDist(lognormal)); makeTestCloseEquality( "Mean of a normal", diff --git a/src/components/Drawer.re b/src/components/Drawer.re index 8a0f2cfa..f9ae5ddb 100644 --- a/src/components/Drawer.re +++ b/src/components/Drawer.re @@ -389,7 +389,7 @@ module Draw = { let numSamples = 3000; let normal: SymbolicTypes.symbolicDist = `Normal({mean, stdev}); - let normalShape = ExpressionTree.toShape(numSamples, `Leaf(`SymbolicDist(normal))); + let normalShape = ExpressionTree.toShape(numSamples, `SymbolicDist(normal)); let xyShape: Types.xyShape = switch (normalShape) { | Mixed(_) => {xs: [||], ys: [||]} diff --git a/src/distPlus/expressionTree/ExpressionTree.re b/src/distPlus/expressionTree/ExpressionTree.re index 2ceb783b..bd162bbf 100644 --- a/src/distPlus/expressionTree/ExpressionTree.re +++ b/src/distPlus/expressionTree/ExpressionTree.re @@ -2,10 +2,11 @@ open ExpressionTypes.ExpressionTree; let toShape = (sampleCount: int, node: node) => { let renderResult = - ExpressionTreeEvaluator.toLeaf(`Operation(`Render(node)), sampleCount); + `Render(`Normalize(node)) + |> ExpressionTreeEvaluator.toLeaf({sampleCount: sampleCount}); switch (renderResult) { - | Ok(`Leaf(`RenderedDist(rs))) => + | Ok(`RenderedDist(rs)) => let continuous = Distributions.Shape.T.toContinuous(rs); let discrete = Distributions.Shape.T.toDiscrete(rs); let shape = MixedShapeBuilder.buildSimple(~continuous, ~discrete); @@ -17,6 +18,6 @@ let toShape = (sampleCount: int, node: node) => { let rec toString = fun - | `Leaf(`SymbolicDist(d)) => SymbolicDist.T.toString(d) - | `Leaf(`RenderedDist(_)) => "[shape]" - | `Operation(op) => Operation.T.toString(toString, op); + | `SymbolicDist(d) => SymbolicDist.T.toString(d) + | `RenderedDist(_) => "[shape]" + | op => Operation.T.toString(toString, op); diff --git a/src/distPlus/expressionTree/ExpressionTreeEvaluator.re b/src/distPlus/expressionTree/ExpressionTreeEvaluator.re index 348f91ef..6c5210f8 100644 --- a/src/distPlus/expressionTree/ExpressionTreeEvaluator.re +++ b/src/distPlus/expressionTree/ExpressionTreeEvaluator.re @@ -1,91 +1,83 @@ -/* This module represents a tree node. */ open ExpressionTypes; open ExpressionTypes.ExpressionTree; type t = node; type tResult = node => result(node, string); +type renderParams = { + sampleCount: int, +}; + /* Given two random variables A and B, this returns the distribution of a new variable that is the result of the operation on A and B. For instance, normal(0, 1) + normal(1, 1) -> normal(1, 2). In general, this is implemented via convolution. */ module AlgebraicCombination = { - let toTreeNode = (op, t1, t2) => - `Operation(`AlgebraicCombination((op, t1, t2))); - let tryAnalyticalSolution = - fun - | `Operation( - `AlgebraicCombination( - operation, - `Leaf(`SymbolicDist(d1)), - `Leaf(`SymbolicDist(d2)), - ), - ) as t => - switch (SymbolicDist.T.attemptAnalyticalOperation(d1, d2, operation)) { - | `AnalyticalSolution(symbolicDist) => - Ok(`Leaf(`SymbolicDist(symbolicDist))) + let tryAnalyticalSimplification = (operation, t1: t, t2: t) => + switch (operation, t1, t2) { + | (operation, + `SymbolicDist(d1), + `SymbolicDist(d2), + ) => + switch (SymbolicDist.T.tryAnalyticalSimplification(d1, d2, operation)) { + | `AnalyticalSolution(symbolicDist) => Ok(`SymbolicDist(symbolicDist)) | `Error(er) => Error(er) - | `NoSolution => Ok(t) + | `NoSolution => Ok(`AlgebraicCombination(operation, t1, t2)) } - | t => Ok(t); + | _ => Ok(`AlgebraicCombination(operation, t1, t2)) + }; - // todo: I don't like the name evaluateNumerically that much, if this renders and does it algebraically. It's tricky. - let evaluateNumerically = (algebraicOp, operationToLeaf, t1, t2) => { - // force rendering into shapes - let renderShape = r => operationToLeaf(`Render(r)); + let combineAsShapes = (toLeaf, renderParams, algebraicOp, t1, t2) => { + let renderShape = r => toLeaf(renderParams, `Render(r)); switch (renderShape(t1), renderShape(t2)) { - | (Ok(`Leaf(`RenderedDist(s1))), Ok(`Leaf(`RenderedDist(s2)))) => + | (Ok(`RenderedDist(s1)), Ok(`RenderedDist(s2))) => Ok( - `Leaf( `RenderedDist( Distributions.Shape.combineAlgebraically(algebraicOp, s1, s2), ), - ), ) | (Error(e1), _) => Error(e1) | (_, Error(e2)) => Error(e2) - | _ => Error("Could not render shapes.") + | _ => Error("Algebraic combination: rendering failed.") }; }; - let toLeaf = + let operationToLeaf = ( - operationToLeaf, + toLeaf, + renderParams: renderParams, algebraicOp: ExpressionTypes.algebraicOperation, t1: t, t2: t, ) : result(node, string) => - toTreeNode(algebraicOp, t1, t2) - |> tryAnalyticalSolution + + algebraicOp + |> tryAnalyticalSimplification(_, t1, t2) |> E.R.bind( _, fun - | `Leaf(d) => Ok(`Leaf(d)) // the analytical simplifaction worked, nice! - | `Operation(_) => - // if not, run the convolution - evaluateNumerically(algebraicOp, operationToLeaf, t1, t2), + | `SymbolicDist(d) as t => Ok(t) + | _ => combineAsShapes(toLeaf, renderParams, algebraicOp, t1, t2) ); }; module VerticalScaling = { - let toLeaf = (operationToLeaf, scaleOp, t, scaleBy) => { + let operationToLeaf = (toLeaf, renderParams, scaleOp, t, scaleBy) => { // scaleBy has to be a single float, otherwise we'll return an error. let fn = Operation.Scale.toFn(scaleOp); let knownIntegralSumFn = Operation.Scale.toKnownIntegralSumFn(scaleOp); - let renderedShape = operationToLeaf(`Render(t)); + let renderedShape = toLeaf(renderParams, `Render(t)); switch (renderedShape, scaleBy) { - | (Ok(`Leaf(`RenderedDist(rs))), `Leaf(`SymbolicDist(`Float(sm)))) => + | (Ok(`RenderedDist(rs)), `SymbolicDist(`Float(sm))) => Ok( - `Leaf( `RenderedDist( Distributions.Shape.T.mapY( ~knownIntegralSumFn=knownIntegralSumFn(sm), fn(sm), rs, ), - ), ), ) | (Error(e1), _) => Error(e1) @@ -95,31 +87,27 @@ module VerticalScaling = { }; module PointwiseCombination = { - let pointwiseAdd = (operationToLeaf, t1, t2) => { - let renderedShape1 = operationToLeaf(`Render(t1)); - let renderedShape2 = operationToLeaf(`Render(t2)); - - switch (renderedShape1, renderedShape2) { - | (Ok(`Leaf(`RenderedDist(rs1))), Ok(`Leaf(`RenderedDist(rs2)))) => + let pointwiseAdd = (toLeaf, renderParams, t1, t2) => { + let renderShape = r => toLeaf(renderParams, `Render(r)); + switch (renderShape(t1), renderShape(t2)) { + | (Ok(`RenderedDist(rs1)), Ok(`RenderedDist(rs2))) => Ok( - `Leaf( - `RenderedDist( - Distributions.Shape.combinePointwise( - ~knownIntegralSumsFn=(a, b) => Some(a +. b), - (+.), - rs1, - rs2, - ), + `RenderedDist( + Distributions.Shape.combinePointwise( + ~knownIntegralSumsFn=(a, b) => Some(a +. b), + (+.), + rs1, + rs2, ), ), ) | (Error(e1), _) => Error(e1) | (_, Error(e2)) => Error(e2) - | _ => Error("Could not perform pointwise addition.") + | _ => Error("Pointwise combination: rendering failed.") }; }; - let pointwiseMultiply = (operationToLeaf, t1, t2) => { + let pointwiseMultiply = (toLeaf, renderParams, t1, t2) => { // TODO: construct a function that we can easily sample from, to construct // a RenderedDist. Use the xMin and xMax of the rendered shapes to tell the sampling function where to look. Error( @@ -127,84 +115,72 @@ module PointwiseCombination = { ); }; - let toLeaf = (operationToLeaf, pointwiseOp, t1, t2) => { + let operationToLeaf = (toLeaf, renderParams, pointwiseOp, t1, t2) => { switch (pointwiseOp) { - | `Add => pointwiseAdd(operationToLeaf, t1, t2) - | `Multiply => pointwiseMultiply(operationToLeaf, t1, t2) + | `Add => pointwiseAdd(toLeaf, renderParams, t1, t2) + | `Multiply => pointwiseMultiply(toLeaf, renderParams, t1, t2) }; }; }; module Truncate = { - module Simplify = { - let tryTruncatingNothing: tResult = - fun - | `Operation(`Truncate(None, None, `Leaf(d))) => Ok(`Leaf(d)) - | t => Ok(t); - - let tryTruncatingUniform: tResult = - fun - | `Operation(`Truncate(lc, rc, `Leaf(`SymbolicDist(`Uniform(u))))) => { - // just create a new Uniform distribution - let newLow = max(E.O.default(neg_infinity, lc), u.low); - let newHigh = min(E.O.default(infinity, rc), u.high); - Ok(`Leaf(`SymbolicDist(`Uniform({low: newLow, high: newHigh})))); - } - | t => Ok(t); - - let attempt = (leftCutoff, rightCutoff, t): result(node, string) => { - let originalTreeNode = - `Operation(`Truncate((leftCutoff, rightCutoff, t))); - - originalTreeNode - |> tryTruncatingNothing - |> E.R.bind(_, tryTruncatingUniform); + let trySimplification = (leftCutoff, rightCutoff, t) => { + switch (leftCutoff, rightCutoff, t) { + | (None, None, t) => Ok(t) + | (lc, rc, `SymbolicDist(`Uniform(u))) => { + // just create a new Uniform distribution + let nu: SymbolicTypes.uniform = u; + let newLow = max(E.O.default(neg_infinity, lc), nu.low); + let newHigh = min(E.O.default(infinity, rc), nu.high); + Ok(`SymbolicDist(`Uniform({low: newLow, high: newHigh}))); + } + | (_, _, t) => Ok(t) }; }; - let evaluateNumerically = (leftCutoff, rightCutoff, operationToLeaf, t) => { + let truncateAsShape = (toLeaf, renderParams, leftCutoff, rightCutoff, t) => { // TODO: use named args in renderToShape; if we're lucky we can at least get the tail // of a distribution we otherwise wouldn't get at all - let renderedShape = operationToLeaf(`Render(t)); + let renderedShape = toLeaf(renderParams, `Render(t)); switch (renderedShape) { - | Ok(`Leaf(`RenderedDist(rs))) => + | Ok(`RenderedDist(rs)) => { let truncatedShape = rs |> Distributions.Shape.T.truncate(leftCutoff, rightCutoff); - Ok(`Leaf(`RenderedDist(rs))); + Ok(`RenderedDist(rs)); + } | Error(e1) => Error(e1) | _ => Error("Could not truncate distribution.") }; }; - let toLeaf = - ( - operationToLeaf, - leftCutoff: option(float), - rightCutoff: option(float), - t: node, - ) - : result(node, string) => { + let operationToLeaf = + ( + toLeaf, + renderParams, + leftCutoff: option(float), + rightCutoff: option(float), + t: node, + ) + : result(node, string) => { t - |> Simplify.attempt(leftCutoff, rightCutoff) + |> trySimplification(leftCutoff, rightCutoff) |> E.R.bind( _, fun - | `Leaf(d) => Ok(`Leaf(d)) // the analytical simplifaction worked, nice! - | `Operation(_) => - evaluateNumerically(leftCutoff, rightCutoff, operationToLeaf, t), - ); // if not, run the convolution + | `SymbolicDist(d) as t => Ok(t) + | _ => truncateAsShape(toLeaf, renderParams, leftCutoff, rightCutoff, t), + ); }; }; module Normalize = { - let rec toLeaf = (operationToLeaf, t: node): result(node, string) => { + let rec operationToLeaf = (toLeaf, renderParams, t: node): result(node, string) => { switch (t) { - | `Leaf(`RenderedDist(s)) => - Ok(`Leaf(`RenderedDist(Distributions.Shape.T.normalize(s)))) - | `Leaf(`SymbolicDist(_)) => Ok(t) - | `Operation(op) => - operationToLeaf(op) |> E.R.bind(_, toLeaf(operationToLeaf)) + | `RenderedDist(s) => + Ok(`RenderedDist(Distributions.Shape.T.normalize(s))) + | `SymbolicDist(_) => Ok(t) + | _ => t |> toLeaf(renderParams) |> E.R.bind(_, operationToLeaf(toLeaf, renderParams)) }; }; }; @@ -212,83 +188,79 @@ module Normalize = { module FloatFromDist = { let symbolicToLeaf = (distToFloatOp: distToFloatOperation, s) => { SymbolicDist.T.operate(distToFloatOp, s) - |> E.R.bind(_, v => Ok(`Leaf(`SymbolicDist(`Float(v))))); + |> E.R.bind(_, v => Ok(`SymbolicDist(`Float(v)))); }; let renderedToLeaf = (distToFloatOp: distToFloatOperation, rs: DistTypes.shape) : result(node, string) => { Distributions.Shape.operate(distToFloatOp, rs) - |> (v => Ok(`Leaf(`SymbolicDist(`Float(v))))); + |> (v => Ok(`SymbolicDist(`Float(v)))); }; - let rec toLeaf = - (operationToLeaf, distToFloatOp: distToFloatOperation, t: node) + let rec operationToLeaf = + (toLeaf, renderParams, distToFloatOp: distToFloatOperation, t: node) : result(node, string) => { switch (t) { - | `Leaf(`SymbolicDist(s)) => symbolicToLeaf(distToFloatOp, s) // we want to evaluate the distToFloatOp on the symbolic dist - | `Leaf(`RenderedDist(rs)) => renderedToLeaf(distToFloatOp, rs) - | `Operation(op) => - E.R.bind(operationToLeaf(op), toLeaf(operationToLeaf, distToFloatOp)) + | `SymbolicDist(s) => symbolicToLeaf(distToFloatOp, s) + | `RenderedDist(rs) => renderedToLeaf(distToFloatOp, rs) + | _ => t |> toLeaf(renderParams) |> E.R.bind(_, operationToLeaf(toLeaf, renderParams, distToFloatOp)) }; }; }; module Render = { - let rec toLeaf = + let rec operationToLeaf = ( - operationToLeaf: operation => result(t, string), - sampleCount: int, + toLeaf, + renderParams, t: node, ) : result(t, string) => { switch (t) { - | `Leaf(`SymbolicDist(d)) => - Ok(`Leaf(`RenderedDist(SymbolicDist.T.toShape(sampleCount, d)))) - | `Leaf(`RenderedDist(_)) as t => Ok(t) // already a rendered shape, we're done here - | `Operation(op) => - E.R.bind(operationToLeaf(op), toLeaf(operationToLeaf, sampleCount)) + | `SymbolicDist(d) => + Ok(`RenderedDist(SymbolicDist.T.toShape(renderParams.sampleCount, d))) + | `RenderedDist(_) as t => Ok(t) // already a rendered shape, we're done here + | _ => t |> toLeaf(renderParams) |> E.R.bind(_, operationToLeaf(toLeaf, renderParams)) }; }; }; -let rec operationToLeaf = - (sampleCount: int, op: operation): result(t, string) => { - // the functions that convert the Operation nodes to Leaf nodes need to - // have a way to call this function on their children, if their children are themselves Operation nodes. - switch (op) { - | `AlgebraicCombination(algebraicOp, t1, t2) => - AlgebraicCombination.toLeaf( - operationToLeaf(sampleCount), - algebraicOp, - t1, - t2 // we want to give it the option to render or simply leave it as is - ) - | `PointwiseCombination(pointwiseOp, t1, t2) => - PointwiseCombination.toLeaf( - operationToLeaf(sampleCount), - pointwiseOp, - t1, - t2, - ) - | `VerticalScaling(scaleOp, t, scaleBy) => - VerticalScaling.toLeaf(operationToLeaf(sampleCount), scaleOp, t, scaleBy) - | `Truncate(leftCutoff, rightCutoff, t) => - Truncate.toLeaf(operationToLeaf(sampleCount), leftCutoff, rightCutoff, t) - | `FloatFromDist(distToFloatOp, t) => - FloatFromDist.toLeaf(operationToLeaf(sampleCount), distToFloatOp, t) - | `Normalize(t) => Normalize.toLeaf(operationToLeaf(sampleCount), t) - | `Render(t) => Render.toLeaf(operationToLeaf(sampleCount), sampleCount, t) - }; -}; - /* This function recursively goes through the nodes of the parse tree, replacing each Operation node and its subtree with a Data node. Whenever possible, the replacement produces a new Symbolic Data node, but most often it will produce a RenderedDist. This function is used mainly to turn a parse tree into a single RenderedDist that can then be displayed to the user. */ -let toLeaf = (node: t, sampleCount: int): result(t, string) => { +let rec toLeaf = (renderParams, node: t): result(t, string) => { switch (node) { - | `Leaf(d) => Ok(`Leaf(d)) - | `Operation(op) => operationToLeaf(sampleCount, op) + // Leaf nodes just stay leaf nodes + | `SymbolicDist(_) + | `RenderedDist(_) => Ok(node) + // Operations need to be turned into leaves + | `AlgebraicCombination(algebraicOp, t1, t2) => + AlgebraicCombination.operationToLeaf( + toLeaf, + renderParams, + algebraicOp, + t1, + t2 + ) + | `PointwiseCombination(pointwiseOp, t1, t2) => + PointwiseCombination.operationToLeaf( + toLeaf, + renderParams, + pointwiseOp, + t1, + t2, + ) + | `VerticalScaling(scaleOp, t, scaleBy) => + VerticalScaling.operationToLeaf( + toLeaf, renderParams, scaleOp, t, scaleBy + ) + | `Truncate(leftCutoff, rightCutoff, t) => + Truncate.operationToLeaf(toLeaf, renderParams, leftCutoff, rightCutoff, t) + | `FloatFromDist(distToFloatOp, t) => + FloatFromDist.operationToLeaf(toLeaf, renderParams, distToFloatOp, t) + | `Normalize(t) => Normalize.operationToLeaf(toLeaf, renderParams, t) + | `Render(t) => Render.operationToLeaf(toLeaf, renderParams, t) }; }; diff --git a/src/distPlus/expressionTree/ExpressionTypes.re b/src/distPlus/expressionTree/ExpressionTypes.re index 730a228b..06be9967 100644 --- a/src/distPlus/expressionTree/ExpressionTypes.re +++ b/src/distPlus/expressionTree/ExpressionTypes.re @@ -3,22 +3,18 @@ type pointwiseOperation = [ | `Add | `Multiply]; type scaleOperation = [ | `Multiply | `Exponentiate | `Log]; type distToFloatOperation = [ | `Pdf(float) | `Inv(float) | `Mean | `Sample]; -type abstractOperation('a) = [ - | `AlgebraicCombination(algebraicOperation, 'a, 'a) - | `PointwiseCombination(pointwiseOperation, 'a, 'a) - | `VerticalScaling(scaleOperation, 'a, 'a) - | `Render('a) - | `Truncate(option(float), option(float), 'a) - | `Normalize('a) - | `FloatFromDist(distToFloatOperation, 'a) -]; - module ExpressionTree = { - type leaf = [ + type node = [ + // leaf nodes: | `SymbolicDist(SymbolicTypes.symbolicDist) | `RenderedDist(DistTypes.shape) + // operations: + | `AlgebraicCombination(algebraicOperation, node, node) + | `PointwiseCombination(pointwiseOperation, node, node) + | `VerticalScaling(scaleOperation, node, node) + | `Render(node) + | `Truncate(option(float), option(float), node) + | `Normalize(node) + | `FloatFromDist(distToFloatOperation, node) ]; - - type node = [ | `Leaf(leaf) | `Operation(operation)] - and operation = abstractOperation(node); }; diff --git a/src/distPlus/expressionTree/MathJsParser.re b/src/distPlus/expressionTree/MathJsParser.re index 92227736..42ebb3ec 100644 --- a/src/distPlus/expressionTree/MathJsParser.re +++ b/src/distPlus/expressionTree/MathJsParser.re @@ -86,29 +86,29 @@ module MathAdtToDistDst = { ); }; - let normal: array(arg) => result(ExpressionTypes.ExpressionTree.node, string) = + let normal: + array(arg) => result(ExpressionTypes.ExpressionTree.node, string) = fun | [|Value(mean), Value(stdev)|] => - Ok(`Leaf(`SymbolicDist(`Normal({mean, stdev})))) + Ok(`SymbolicDist(`Normal({mean, stdev}))) | _ => Error("Wrong number of variables in normal distribution"); - let lognormal: array(arg) => result(ExpressionTypes.ExpressionTree.node, string) = + let lognormal: + array(arg) => result(ExpressionTypes.ExpressionTree.node, string) = fun | [|Value(mu), Value(sigma)|] => - Ok(`Leaf(`SymbolicDist(`Lognormal({mu, sigma})))) + Ok(`SymbolicDist(`Lognormal({mu, sigma}))) | [|Object(o)|] => { let g = Js.Dict.get(o); switch (g("mean"), g("stdev"), g("mu"), g("sigma")) { | (Some(Value(mean)), Some(Value(stdev)), _, _) => Ok( - `Leaf( - `SymbolicDist( - SymbolicDist.Lognormal.fromMeanAndStdev(mean, stdev), - ), + `SymbolicDist( + SymbolicDist.Lognormal.fromMeanAndStdev(mean, stdev), ), ) | (_, _, Some(Value(mu)), Some(Value(sigma))) => - Ok(`Leaf(`SymbolicDist(`Lognormal({mu, sigma})))) + Ok(`SymbolicDist(`Lognormal({mu, sigma}))) | _ => Error("Lognormal distribution would need mean and stdev") }; } @@ -117,51 +117,48 @@ module MathAdtToDistDst = { let to_: array(arg) => result(ExpressionTypes.ExpressionTree.node, string) = fun | [|Value(low), Value(high)|] when low <= 0.0 && low < high => { - Ok( - `Leaf( - `SymbolicDist(SymbolicDist.Normal.from90PercentCI(low, high)), - ), - ); + Ok(`SymbolicDist(SymbolicDist.Normal.from90PercentCI(low, high))); } | [|Value(low), Value(high)|] when low < high => { Ok( - `Leaf( - `SymbolicDist(SymbolicDist.Lognormal.from90PercentCI(low, high)), - ), + `SymbolicDist(SymbolicDist.Lognormal.from90PercentCI(low, high)), ); } | [|Value(_), Value(_)|] => Error("Low value must be less than high value.") | _ => Error("Wrong number of variables in lognormal distribution"); - let uniform: array(arg) => result(ExpressionTypes.ExpressionTree.node, string) = + let uniform: + array(arg) => result(ExpressionTypes.ExpressionTree.node, string) = fun | [|Value(low), Value(high)|] => - Ok(`Leaf(`SymbolicDist(`Uniform({low, high})))) + Ok(`SymbolicDist(`Uniform({low, high}))) | _ => Error("Wrong number of variables in lognormal distribution"); let beta: array(arg) => result(ExpressionTypes.ExpressionTree.node, string) = fun | [|Value(alpha), Value(beta)|] => - Ok(`Leaf(`SymbolicDist(`Beta({alpha, beta})))) + Ok(`SymbolicDist(`Beta({alpha, beta}))) | _ => Error("Wrong number of variables in lognormal distribution"); - let exponential: array(arg) => result(ExpressionTypes.ExpressionTree.node, string) = + let exponential: + array(arg) => result(ExpressionTypes.ExpressionTree.node, string) = fun - | [|Value(rate)|] => - Ok(`Leaf(`SymbolicDist(`Exponential({rate: rate})))) + | [|Value(rate)|] => Ok(`SymbolicDist(`Exponential({rate: rate}))) | _ => Error("Wrong number of variables in Exponential distribution"); - let cauchy: array(arg) => result(ExpressionTypes.ExpressionTree.node, string) = + let cauchy: + array(arg) => result(ExpressionTypes.ExpressionTree.node, string) = fun | [|Value(local), Value(scale)|] => - Ok(`Leaf(`SymbolicDist(`Cauchy({local, scale})))) + Ok(`SymbolicDist(`Cauchy({local, scale}))) | _ => Error("Wrong number of variables in cauchy distribution"); - let triangular: array(arg) => result(ExpressionTypes.ExpressionTree.node, string) = + let triangular: + array(arg) => result(ExpressionTypes.ExpressionTree.node, string) = fun | [|Value(low), Value(medium), Value(high)|] => - Ok(`Leaf(`SymbolicDist(`Triangular({low, medium, high})))) + Ok(`SymbolicDist(`Triangular({low, medium, high}))) | _ => Error("Wrong number of variables in triangle distribution"); let multiModal = @@ -192,30 +189,24 @@ module MathAdtToDistDst = { |> E.A.fmapi((index, t) => { let w = weights |> E.A.get(_, index) |> E.O.default(1.0); - `Operation( - `VerticalScaling(( - `Multiply, - t, - `Leaf(`SymbolicDist(`Float(w))), - )), - ); + `VerticalScaling((`Multiply, t, `SymbolicDist(`Float(w)))); }); let pointwiseSum = components |> Js.Array.sliceFrom(1) |> E.A.fold_left( - (acc, x) => { - `Operation(`PointwiseCombination((`Add, acc, x))) - }, + (acc, x) => {`PointwiseCombination((`Add, acc, x))}, E.A.unsafe_get(components, 0), ); - Ok(`Operation(`Normalize(pointwiseSum))); + Ok(`Normalize(pointwiseSum)); }; }; - let arrayParser = (args: array(arg)): result(ExpressionTypes.ExpressionTree.node, string) => { + let arrayParser = + (args: array(arg)) + : result(ExpressionTypes.ExpressionTree.node, string) => { let samples = args |> E.A.fmap( @@ -235,15 +226,18 @@ module MathAdtToDistDst = { SymbolicDist.ContinuousShape.make(_pdf, cdf); }); switch (shape) { - | Some(s) => Ok(`Leaf(`SymbolicDist(`ContinuousShape(s)))) + | Some(s) => Ok(`SymbolicDist(`ContinuousShape(s))) | None => Error("Rendering did not work") }; }; let operationParser = - (name: string, args: array(result(ExpressionTypes.ExpressionTree.node, string))) => { - let toOkAlgebraic = r => Ok(`Operation(`AlgebraicCombination(r))); - let toOkTrunctate = r => Ok(`Operation(`Truncate(r))); + ( + name: string, + args: array(result(ExpressionTypes.ExpressionTree.node, string)), + ) => { + let toOkAlgebraic = r => Ok(`AlgebraicCombination(r)); + let toOkTrunctate = r => Ok(`Truncate(r)); switch (name, args) { | ("add", [|Ok(l), Ok(r)|]) => toOkAlgebraic((`Add, l, r)) | ("add", _) => Error("Addition needs two operands") @@ -254,11 +248,11 @@ module MathAdtToDistDst = { | ("divide", [|Ok(l), Ok(r)|]) => toOkAlgebraic((`Divide, l, r)) | ("divide", _) => Error("Division needs two operands") | ("pow", _) => Error("Exponentiation is not yet supported.") - | ("leftTruncate", [|Ok(d), Ok(`Leaf(`SymbolicDist(`Float(lc))))|]) => + | ("leftTruncate", [|Ok(d), Ok(`SymbolicDist(`Float(lc)))|]) => toOkTrunctate((Some(lc), None, d)) | ("leftTruncate", _) => Error("leftTruncate needs two arguments: the expression and the cutoff") - | ("rightTruncate", [|Ok(d), Ok(`Leaf(`SymbolicDist(`Float(rc))))|]) => + | ("rightTruncate", [|Ok(d), Ok(`SymbolicDist(`Float(rc)))|]) => toOkTrunctate((None, Some(rc), d)) | ("rightTruncate", _) => Error( @@ -268,8 +262,8 @@ module MathAdtToDistDst = { "truncate", [| Ok(d), - Ok(`Leaf(`SymbolicDist(`Float(lc)))), - Ok(`Leaf(`SymbolicDist(`Float(rc)))), + Ok(`SymbolicDist(`Float(lc))), + Ok(`SymbolicDist(`Float(rc))), |], ) => toOkTrunctate((Some(lc), Some(rc), d)) @@ -333,7 +327,7 @@ module MathAdtToDistDst = { let rec nodeParser = fun - | Value(f) => Ok(`Leaf(`SymbolicDist(`Float(f)))) + | Value(f) => Ok(`SymbolicDist(`Float(f))) | Fn({name, args}) => functionParser(nodeParser, name, args) | _ => { Error("This type not currently supported"); diff --git a/src/distPlus/expressionTree/Operation.re b/src/distPlus/expressionTree/Operation.re index 29cee28b..33c05461 100644 --- a/src/distPlus/expressionTree/Operation.re +++ b/src/distPlus/expressionTree/Operation.re @@ -89,5 +89,6 @@ module T = { | `FloatFromDist(floatFromDistOp, t) => DistToFloat.format(floatFromDistOp, nodeToString(t)) | `Truncate(lc, rc, t) => truncateToString(lc, rc, nodeToString(t)) - | `Render(t) => nodeToString(t); + | `Render(t) => nodeToString(t) + | _ => ""; // SymbolicDist and RenderedDist are handled in ExpressionTree.toString. }; diff --git a/src/distPlus/symbolic/SymbolicDist.re b/src/distPlus/symbolic/SymbolicDist.re index 94e513d6..96ecf0c1 100644 --- a/src/distPlus/symbolic/SymbolicDist.re +++ b/src/distPlus/symbolic/SymbolicDist.re @@ -269,23 +269,23 @@ module T = { }; }; - /* This returns an optional that wraps a result. If the optional is None, - there is no valid analytic solution. If it Some, it + /* Calling e.g. "Normal.operate" returns an optional that wraps a result. + If the optional is None, there is no valid analytic solution. If it Some, it can still return an error if there is a serious problem, - like in the casea of a divide by 0. + like in the case of a divide by 0. */ - type analyticalSolutionAttempt = [ + type analyticalSimplificationResult = [ | `AnalyticalSolution(SymbolicTypes.symbolicDist) | `Error(string) | `NoSolution ]; - let attemptAnalyticalOperation = + let tryAnalyticalSimplification = ( d1: symbolicDist, d2: symbolicDist, op: ExpressionTypes.algebraicOperation, ) - : analyticalSolutionAttempt => + : analyticalSimplificationResult => switch (d1, d2) { | (`Float(v1), `Float(v2)) => switch (Operation.Algebraic.applyFn(op, v1, v2)) { From 730dbddaf96b449cbddca4311bc1fe05189e1acf Mon Sep 17 00:00:00 2001 From: Sebastian Kosch Date: Fri, 3 Jul 2020 17:13:26 -0700 Subject: [PATCH 30/31] Fix multiplication of variances in ShapeConvolution --- src/distPlus/distribution/ShapeConvolution.re | 35 ++++++++++++------- 1 file changed, 23 insertions(+), 12 deletions(-) diff --git a/src/distPlus/distribution/ShapeConvolution.re b/src/distPlus/distribution/ShapeConvolution.re index 538a5117..5fadda1c 100644 --- a/src/distPlus/distribution/ShapeConvolution.re +++ b/src/distPlus/distribution/ShapeConvolution.re @@ -80,15 +80,20 @@ let toDiscretePointMassesFromTriangulars = {n: n - 2, masses, means, variances}; } else { for (i in 1 to n - 2) { + + // area of triangle = width * height / 2 let _ = Belt.Array.set( masses, i - 1, (xs[i + 1] -. xs[i - 1]) *. ys[i] /. 2., ); + + // means of triangle = (a + b + c) / 3 let _ = Belt.Array.set(means, i - 1, (xs[i - 1] +. xs[i] +. xs[i + 1]) /. 3.); + // variance of triangle = (a^2 + b^2 + c^2 - ab - ac - bc) / 18 let _ = Belt.Array.set( variances, @@ -118,7 +123,10 @@ let combineShapesContinuousContinuous = // if we add the two distributions, we should probably use normal filters. // if we multiply the two distributions, we should probably use lognormal filters. let t1m = toDiscretePointMassesFromTriangulars(s1); - let t2m = toDiscretePointMassesFromTriangulars(s2); + let t2m = switch (op) { + | `Divide => toDiscretePointMassesFromTriangulars(~inverse=true, s2) + | _ => toDiscretePointMassesFromTriangulars(~inverse=false, s2) + }; let combineMeansFn = switch (op) { @@ -134,7 +142,7 @@ let combineShapesContinuousContinuous = | `Add => ((v1, v2, m1, m2) => v1 +. v2) | `Subtract => ((v1, v2, m1, m2) => v1 +. v2) | `Multiply => ( - (v1, v2, m1, m2) => v1 *. v2 +. v1 *. m1 ** 2. +. v2 *. m1 ** 2. + (v1, v2, m1, m2) => v1 *. v2 +. v1 *. m2 ** 2. +. v2 *. m1 ** 2. ) | `Divide => ( (v1, vInv2, m1, mInv2) => @@ -142,6 +150,7 @@ let combineShapesContinuousContinuous = ) }; + // TODO: If operating on two positive-domain distributions, we should take that into account let outputMinX: ref(float) = ref(infinity); let outputMaxX: ref(float) = ref(neg_infinity); let masses: array(float) = @@ -180,20 +189,22 @@ let combineShapesContinuousContinuous = // we now want to create a set of target points. For now, let's just evenly distribute 200 points between // between the outputMinX and outputMaxX - let outputXs: array(float) = - E.A.Floats.range(outputMinX^, outputMaxX^, 200); - let outputYs: array(float) = Belt.Array.make(200, 0.0); + let nOut = 300; + let outputXs: array(float) = E.A.Floats.range(outputMinX^, outputMaxX^, nOut); + let outputYs: array(float) = Belt.Array.make(nOut, 0.0); // now, for each of the outputYs, accumulate from a Gaussian kernel over each input point. - for (i in 0 to E.A.length(outputXs) - 1) { - for (j in 0 to E.A.length(masses) - 1) { - let dx = outputXs[i] -. means[j]; - let contribution = - masses[j] *. exp(-. (dx ** 2.) /. (2. *. variances[j])); - let _ = Belt.Array.set(outputYs, i, outputYs[i] +. contribution); + for (j in 0 to E.A.length(masses) - 1) { + let _ = if (variances[j] > 0.) { + for (i in 0 to E.A.length(outputXs) - 1) { + let dx = outputXs[i] -. means[j]; + let contribution = masses[j] *. exp(-. (dx ** 2.) /. (2. *. variances[j])); + let _ = Belt.Array.set(outputYs, i, outputYs[i] +. contribution); + (); + }; (); }; (); }; {xs: outputXs, ys: outputYs}; -}; \ No newline at end of file +}; From 0d5a0f5aebc4d46e465c1496369d93f818ecf0e0 Mon Sep 17 00:00:00 2001 From: Sebastian Kosch Date: Fri, 3 Jul 2020 17:15:37 -0700 Subject: [PATCH 31/31] ShapeConvolution -> AlgebraicShapeCombination --- .../{ShapeConvolution.re => AlgebraicShapeCombination.re} | 0 src/distPlus/distribution/Distributions.re | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename src/distPlus/distribution/{ShapeConvolution.re => AlgebraicShapeCombination.re} (100%) diff --git a/src/distPlus/distribution/ShapeConvolution.re b/src/distPlus/distribution/AlgebraicShapeCombination.re similarity index 100% rename from src/distPlus/distribution/ShapeConvolution.re rename to src/distPlus/distribution/AlgebraicShapeCombination.re diff --git a/src/distPlus/distribution/Distributions.re b/src/distPlus/distribution/Distributions.re index 9d7fe31c..c3fdf6f5 100644 --- a/src/distPlus/distribution/Distributions.re +++ b/src/distPlus/distribution/Distributions.re @@ -342,7 +342,7 @@ module Continuous = { empty; } else { let combinedShape = - ShapeConvolution.combineShapesContinuousContinuous(op, s1, s2); + AlgebraicShapeCombination.combineShapesContinuousContinuous(op, s1, s2); let combinedIntegralSum = Common.combineIntegralSums( (a, b) => Some(a *. b),