From f6c1918b122a9c4c28bed10d1c9a6b790f8b1271 Mon Sep 17 00:00:00 2001 From: Sebastian Kosch Date: Fri, 12 Jun 2020 23:30:51 -0700 Subject: [PATCH] Big refactor towards proper distTree, still slow and untested --- src/components/Drawer.re | 2 +- src/distPlus/distribution/DistTypes.re | 2 +- src/distPlus/distribution/Distributions.re | 18 +- src/distPlus/distribution/XYShape.re | 29 +- src/distPlus/renderers/RenderTypes.re | 4 +- src/distPlus/symbolic/MathJsParser.re | 121 +++-- src/distPlus/symbolic/SymbolicDist.re | 546 +++++++++++++-------- 7 files changed, 475 insertions(+), 247 deletions(-) diff --git a/src/components/Drawer.re b/src/components/Drawer.re index fa0babbe..090447b5 100644 --- a/src/components/Drawer.re +++ b/src/components/Drawer.re @@ -986,4 +986,4 @@ let make = () => { ; -}; \ No newline at end of file +}; diff --git a/src/distPlus/distribution/DistTypes.re b/src/distPlus/distribution/DistTypes.re index 7a598c01..948eb3ae 100644 --- a/src/distPlus/distribution/DistTypes.re +++ b/src/distPlus/distribution/DistTypes.re @@ -153,4 +153,4 @@ module MixedPoint = { }; let add = combine2((a, b) => a +. b); -}; \ No newline at end of file +}; diff --git a/src/distPlus/distribution/Distributions.re b/src/distPlus/distribution/Distributions.re index 2472f2ab..16be6872 100644 --- a/src/distPlus/distribution/Distributions.re +++ b/src/distPlus/distribution/Distributions.re @@ -74,6 +74,21 @@ module Continuous = { (fn, {xyShape, interpolation}: t): option(DistTypes.continuousShape) => fn(xyShape) |> E.O.fmap(make(interpolation)); + let empty: DistTypes.continuousShape = {xyShape: XYShape.T.empty, interpolation: `Linear}; + let combine = + (fn, t1: DistTypes.continuousShape, t2: DistTypes.continuousShape) + : DistTypes.continuousShape => { + make(`Linear, XYShape.Combine.combine( + ~xsSelection=ALL_XS, + ~xToYSelection=XYShape.XtoY.linear, + ~fn, + t1.xyShape, + t2.xyShape, + )); + }; + let reduce = (fn, items) => + items |> E.A.fold_left(combine(fn), empty); + let toLinear = (t: t): option(t) => { switch (t) { | {interpolation: `Stepwise, xyShape} => @@ -166,6 +181,7 @@ module Discrete = { let sortedByX = (t: DistTypes.discreteShape) => t |> XYShape.T.zip |> XYShape.Zipped.sortByX; let empty = XYShape.T.empty; + let make = (s: DistTypes.discreteShape) => s; let combine = (fn, t1: DistTypes.discreteShape, t2: DistTypes.discreteShape) : DistTypes.discreteShape => { @@ -708,4 +724,4 @@ module DistPlusTime = { |> E.O.fmap(x => DistPlus.T.Integral.xToY(~cache=None, x, t)); }; }; -}; \ No newline at end of file +}; diff --git a/src/distPlus/distribution/XYShape.re b/src/distPlus/distribution/XYShape.re index aeed1bae..cf3600a9 100644 --- a/src/distPlus/distribution/XYShape.re +++ b/src/distPlus/distribution/XYShape.re @@ -179,16 +179,25 @@ module Combine = { t1: T.t, t2: T.t, ) => { - let allXs = - switch (xsSelection) { - | ALL_XS => Ts.allXs([|t1, t2|]) - | XS_EVENLY_DIVIDED(sampleCount) => - Ts.equallyDividedXs([|t1, t2|], sampleCount) - }; - let allYs = - allXs |> E.A.fmap(x => fn(xToYSelection(x, t1), xToYSelection(x, t2))); - T.fromArrays(allXs, allYs); + switch ((E.A.length(t1.xs), E.A.length(t2.xs))) { + | (0, 0) => T.empty + | (0, _) => t2 + | (_, 0) => t1 + | (_, _) => { + let allXs = + switch (xsSelection) { + | ALL_XS => Ts.allXs([|t1, t2|]) + | XS_EVENLY_DIVIDED(sampleCount) => + Ts.equallyDividedXs([|t1, t2|], sampleCount) + }; + + let allYs = + allXs |> E.A.fmap(x => fn(xToYSelection(x, t1), xToYSelection(x, t2))); + + T.fromArrays(allXs, allYs); + } + } }; let combineLinear = combine(~xToYSelection=XtoY.linear); @@ -354,4 +363,4 @@ module Analysis = { }; let squareXYShape = T.mapX(x => x ** 2.0) -}; \ No newline at end of file +}; diff --git a/src/distPlus/renderers/RenderTypes.re b/src/distPlus/renderers/RenderTypes.re index 95a36204..99a53aae 100644 --- a/src/distPlus/renderers/RenderTypes.re +++ b/src/distPlus/renderers/RenderTypes.re @@ -43,7 +43,7 @@ module ShapeRenderer = { module Symbolic = { type inputs = {length: int}; type outputs = { - graph: SymbolicDist.bigDist, + graph: SymbolicDist.distTree, shape: DistTypes.shape, }; let make = (graph, shape) => {graph, shape}; @@ -124,4 +124,4 @@ module DistPlusRenderer = { let shapeRenderOutputs = (t:outputs) => t.shapeRenderOutputs let make = (shapeRenderOutputs, distPlus) => {shapeRenderOutputs, distPlus}; } -}; \ No newline at end of file +}; diff --git a/src/distPlus/symbolic/MathJsParser.re b/src/distPlus/symbolic/MathJsParser.re index 41667ad8..b962df90 100644 --- a/src/distPlus/symbolic/MathJsParser.re +++ b/src/distPlus/symbolic/MathJsParser.re @@ -88,73 +88,69 @@ module MathAdtToDistDst = { ); }; - let normal: array(arg) => result(SymbolicDist.bigDist, string) = + let normal: array(arg) => result(SymbolicDist.distTree, string) = fun | [|Value(mean), Value(stdev)|] => - Ok(`Simple(`Normal({mean, stdev}))) + Ok(`Distribution(`Normal({mean, stdev}))) | _ => Error("Wrong number of variables in normal distribution"); - let lognormal: array(arg) => result(SymbolicDist.bigDist, string) = + let lognormal: array(arg) => result(SymbolicDist.distTree, string) = fun - | [|Value(mu), Value(sigma)|] => Ok(`Simple(`Lognormal({mu, sigma}))) + | [|Value(mu), Value(sigma)|] => Ok(`Distribution(`Lognormal({mu, sigma}))) | [|Object(o)|] => { let g = Js.Dict.get(o); switch (g("mean"), g("stdev"), g("mu"), g("sigma")) { | (Some(Value(mean)), Some(Value(stdev)), _, _) => - Ok(`Simple(SymbolicDist.Lognormal.fromMeanAndStdev(mean, stdev))) + Ok(`Distribution(SymbolicDist.Lognormal.fromMeanAndStdev(mean, stdev))) | (_, _, Some(Value(mu)), Some(Value(sigma))) => - Ok(`Simple(`Lognormal({mu, sigma}))) + Ok(`Distribution(`Lognormal({mu, sigma}))) | _ => Error("Lognormal distribution would need mean and stdev") }; } | _ => Error("Wrong number of variables in lognormal distribution"); - let to_: array(arg) => result(SymbolicDist.bigDist, string) = + let to_: array(arg) => result(SymbolicDist.distTree, string) = fun | [|Value(low), Value(high)|] when low <= 0.0 && low < high=> { - Ok(`Simple(SymbolicDist.Normal.from90PercentCI(low, high))); + Ok(`Distribution(SymbolicDist.Normal.from90PercentCI(low, high))); } | [|Value(low), Value(high)|] when low < high => { - Ok(`Simple(SymbolicDist.Lognormal.from90PercentCI(low, high))); + Ok(`Distribution(SymbolicDist.Lognormal.from90PercentCI(low, high))); } | [|Value(_), Value(_)|] => Error("Low value must be less than high value.") | _ => Error("Wrong number of variables in lognormal distribution"); - let uniform: array(arg) => result(SymbolicDist.bigDist, string) = + let uniform: array(arg) => result(SymbolicDist.distTree, string) = fun - | [|Value(low), Value(high)|] => Ok(`Simple(`Uniform({low, high}))) + | [|Value(low), Value(high)|] => Ok(`Distribution(`Uniform({low, high}))) | _ => Error("Wrong number of variables in lognormal distribution"); - let beta: array(arg) => result(SymbolicDist.bigDist, string) = + let beta: array(arg) => result(SymbolicDist.distTree, string) = fun - | [|Value(alpha), Value(beta)|] => Ok(`Simple(`Beta({alpha, beta}))) + | [|Value(alpha), Value(beta)|] => Ok(`Distribution(`Beta({alpha, beta}))) | _ => Error("Wrong number of variables in lognormal distribution"); - let exponential: array(arg) => result(SymbolicDist.bigDist, string) = + let exponential: array(arg) => result(SymbolicDist.distTree, string) = fun - | [|Value(rate)|] => Ok(`Simple(`Exponential({rate: rate}))) + | [|Value(rate)|] => Ok(`Distribution(`Exponential({rate: rate}))) | _ => Error("Wrong number of variables in Exponential distribution"); - let cauchy: array(arg) => result(SymbolicDist.bigDist, string) = + let cauchy: array(arg) => result(SymbolicDist.distTree, string) = fun | [|Value(local), Value(scale)|] => - Ok(`Simple(`Cauchy({local, scale}))) + Ok(`Distribution(`Cauchy({local, scale}))) | _ => Error("Wrong number of variables in cauchy distribution"); - let triangular: array(arg) => result(SymbolicDist.bigDist, string) = + let triangular: array(arg) => result(SymbolicDist.distTree, string) = fun | [|Value(low), Value(medium), Value(high)|] => - Ok(`Simple(`Triangular({low, medium, high}))) + Ok(`Distribution(`Triangular({low, medium, high}))) | _ => Error("Wrong number of variables in triangle distribution"); - /*let add: array(arg) => result(SymbolicDist.bigDist, string) = - fun - | */ - let multiModal = ( - args: array(result(SymbolicDist.bigDist, string)), + args: array(result(SymbolicDist.distTree, string)), weights: option(array(float)), ) => { let weights = weights |> E.O.default([||]); @@ -162,8 +158,14 @@ module MathAdtToDistDst = { args |> E.A.fmap( fun - | Ok(`Simple(d)) => Ok(`Simple(d)) - | Ok(`PointwiseCombination(dists)) => Ok(`PointwiseCombination(dists)) + | Ok(`Distribution(d)) => Ok(`Distribution(d)) + | Ok(`Combination(t1, t2, op)) => Ok(`Combination(t1, t2, op)) + | Ok(`PointwiseSum(t1, t2)) => Ok(`PointwiseSum(t1, t2)) + | Ok(`PointwiseProduct(t1, t2)) => Ok(`PointwiseProduct(t1, t2)) + | Ok(`Normalize(t)) => Ok(`Normalize(t)) + | Ok(`LeftTruncate(t, x)) => Ok(`LeftTruncate(t, x)) + | Ok(`RightTruncate(t, x)) => Ok(`RightTruncate(t, x)) + | Ok(`Render(t)) => Ok(`Render(t)) | Error(e) => Error(e) | _ => Error("Unexpected dist") ); @@ -175,16 +177,26 @@ module MathAdtToDistDst = { | Some(Error(e)) => Error(e) | None when withoutErrors |> E.A.length == 0 => Error("Multimodals need at least one input") - | _ => - withoutErrors - |> E.A.fmapi((index, item) => - (item, weights |> E.A.get(_, index) |> E.O.default(1.0)) - ) - |> (r => Ok(`PointwiseCombination(r))) + | _ => { + let components = withoutErrors + |> E.A.fmapi((index, t) => { + let w = weights |> E.A.get(_, index) |> E.O.default(1.0); + + `VerticalScaling(t, `Distribution(`Float(w))) + }); + + let pointwiseSum = components + |> Js.Array.sliceFrom(1) + |> E.A.fold_left((acc, x) => { + `PointwiseSum(acc, x) + }, E.A.unsafe_get(components, 0)) + + Ok(`Normalize(pointwiseSum)) + } }; }; - let arrayParser = (args:array(arg)):result(SymbolicDist.bigDist, string) => { + let arrayParser = (args:array(arg)):result(SymbolicDist.distTree, string) => { let samples = args |> E.A.fmap( fun @@ -200,13 +212,13 @@ module MathAdtToDistDst = { SymbolicDist.ContinuousShape.make(_pdf, cdf) }); switch(shape){ - | Some(s) => Ok(`Simple(`ContinuousShape(s))) + | Some(s) => Ok(`Distribution(`ContinuousShape(s))) | None => Error("Rendering did not work") } } - let rec functionParser = (r): result(SymbolicDist.bigDist, string) => + let rec functionParser = (r): result(SymbolicDist.distTree, string) => r |> ( fun @@ -218,7 +230,7 @@ module MathAdtToDistDst = { | Fn({name: "exponential", args}) => exponential(args) | Fn({name: "cauchy", args}) => cauchy(args) | Fn({name: "triangular", args}) => triangular(args) - | Value(f) => Ok(`Simple(`Float(f))) + | Value(f) => Ok(`Distribution(`Float(f))) | Fn({name: "mm", args}) => { let weights = args @@ -245,25 +257,54 @@ module MathAdtToDistDst = { let dists = possibleDists |> E.A.fmap(functionParser); multiModal(dists, weights); } - //| Fn({name: "add", args}) => add(args) + + | Fn({name: "add", args}) => { + args + |> E.A.fmap(functionParser) + |> (fun + | [|Ok(l), Ok(r)|] => Ok(`Combination(l, r, `AddOperation)) + | _ => Error("Addition needs two operands")) + } + | Fn({name: "subtract", args}) => { + args + |> E.A.fmap(functionParser) + |> (fun + | [|Ok(l), Ok(r)|] => Ok(`Combination(l, r, `SubtractOperation)) + | _ => Error("Subtraction needs two operands")) + } + | Fn({name: "multiply", args}) => { + args + |> E.A.fmap(functionParser) + |> (fun + | [|Ok(l), Ok(r)|] => Ok(`Combination(l, r, `MultiplyOperation)) + | _ => Error("Multiplication needs two operands")) + } + | Fn({name: "divide", args}) => { + args + |> E.A.fmap(functionParser) + |> (fun + | [|Ok(l), Ok(`Distribution(`Float(0.0)))|] => Error("Division by zero") + | [|Ok(l), Ok(r)|] => Ok(`Combination(l, r, `DivideOperation)) + | _ => Error("Division needs two operands")) + } | Fn({name}) => Error(name ++ ": function not supported") | _ => { Error("This type not currently supported"); } ); - let topLevel = (r): result(SymbolicDist.bigDist, string) => + let topLevel = (r): result(SymbolicDist.distTree, string) => r |> ( fun | Fn(_) => functionParser(r) - | Value(r) => Ok(`Simple(`Float(r))) + | Value(r) => Ok(`Distribution(`Float(r))) | Array(r) => arrayParser(r) | Symbol(_) => Error("Symbol not valid as top level") | Object(_) => Error("Object not valid as top level") ); - let run = (r): result(SymbolicDist.bigDist, string) => + let run = (r): result(SymbolicDist.distTree, string) => r |> MathAdtCleaner.run |> topLevel; }; diff --git a/src/distPlus/symbolic/SymbolicDist.re b/src/distPlus/symbolic/SymbolicDist.re index 05eec52c..f141bf92 100644 --- a/src/distPlus/symbolic/SymbolicDist.re +++ b/src/distPlus/symbolic/SymbolicDist.re @@ -50,41 +50,28 @@ type dist = [ | `Float(float) // Dirac delta at x. Practically useful only in the context of multimodals. ]; -/* Build a tree. +type integral = float; +type cutoffX = float; +type operation = [ + | `AddOperation + | `SubtractOperation + | `MultiplyOperation + | `DivideOperation + | `ExponentiateOperation +]; - Multiple operations possible: - - - PointwiseSum(Scalar, Scalar) - - PointwiseSum(WeightedDist, WeightedDist) - - PointwiseProduct(Scalar, Scalar) - - PointwiseProduct(Scalar, WeightedDist) - - PointwiseProduct(WeightedDist, WeightedDist) - - - IndependentVariableSum(WeightedDist, WeightedDist) [i.e., convolution] - - IndependentVariableProduct(WeightedDist, WeightedDist) [i.e. distribution product] - */ - -/*type weightedDist = (float, dist); - -type bigDistTree = - /* | DistLeaf(dist) */ - /* | ScalarLeaf(float) */ - /* | PointwiseScalarDistProduct(DistLeaf(d), ScalarLeaf(s)) */ - | WeightedDistLeaf(weightedDist) - | PointwiseNormalizedDistSum(array(bigDistTree)); - -let rec treeIntegral = item => { - switch (item) { - | WeightedDistLeaf((w, d)) => w - | PointwiseNormalizedDistSum(childTrees) => - childTrees |> E.A.fmap(treeIntegral) |> E.A.Floats.sum - }; -};*/ - -/* bigDist can either be a single distribution, or a - PointwiseCombination, i.e. an array of (dist, weight) tuples */ -type bigDist = [ | `Simple(dist) | `PointwiseCombination(pointwiseAdd)] -and pointwiseAdd = array((bigDist, float)); +type distTree = [ + | `Distribution(dist) + | `Combination(distTree, distTree, operation) + | `PointwiseSum(distTree, distTree) + | `PointwiseProduct(distTree, distTree) + | `VerticalScaling(distTree, distTree) + | `Normalize(distTree) + | `LeftTruncate(distTree, cutoffX) + | `RightTruncate(distTree, cutoffX) + | `Render(distTree) +] +and weightedDists = array((distTree, float)); module ContinuousShape = { type t = continuousShape; @@ -326,138 +313,331 @@ module GenericSimple = { }; }; -module PointwiseAddDistributionsWeighted = { - type t = pointwiseAdd; +module DistTree = { + type nodeResult = [ + | `Distribution(dist) + // RenderedShape: continuous xyShape, discrete xyShape, total value. + | `RenderedShape(DistTypes.continuousShape, DistTypes.discreteShape, integral) + ]; - let normalizeWeights = (weightedDists: t) => { - let total = weightedDists |> E.A.fmap(snd) |> E.A.Floats.sum; - weightedDists |> E.A.fmap(((d, w)) => (d, w /. total)); + let evaluateDistribution = (d: dist): nodeResult => { + // certain distributions we may want to evaluate to RenderedShapes right away, e.g. discrete + `Distribution(d) }; - let rec pdf = (x: float, weightedNormalizedDists: t) => - weightedNormalizedDists - |> E.A.fmap(((d, w)) => { - switch (d) { - | `PointwiseCombination(ts) => pdf(x, ts) *. w - | `Simple(d) => GenericSimple.pdf(x, d) *. w - } - }) - |> E.A.Floats.sum; + // This is a performance bottleneck! + // Using raw JS here so we can use native for loops and access array elements + // directly, without option checks. + let jsCombinationConvolve: (array(float), array(float), array(float), array(float), float => float => float) => (array(float), array(float)) = [%bs.raw + {| + function (s1xs, s1ys, s2xs, s2ys, func) { + const r = new Map(); - // TODO: perhaps rename into minCdfX? - // TODO: how should nonexistent min values be handled? They should never happen - let rec min = (dists: t) => - dists - |> E.A.fmap(((d, w)) => { - switch (d) { - | `PointwiseCombination(ts) => E.O.toExn("Dist has no min", min(ts)) - | `Simple(d) => GenericSimple.min(d) - } - }) - |> E.A.min; + // To convolve, add the xs and multiply the ys: + for (let i = 0; i < s1xs.length; i++) { + for (let j = 0; j < s2xs.length; j++) { + const x = func(s1xs[i], s2xs[j]); + const cv = r.get(x) | 0; + r.set(x, cv + s1ys[i] * s2ys[j]); // add up the ys, if same x + } + } - // TODO: perhaps rename into minCdfX? - let rec max = (dists: t) => - dists - |> E.A.fmap(((d, w)) => { - switch (d) { - | `PointwiseCombination(ts) => E.O.toExn("Dist has no max", max(ts)) - | `Simple(d) => GenericSimple.max(d) - } - }) - |> E.A.max; + const rxys = [...r.entries()]; + rxys.sort(([x1, y1], [x2, y2]) => x1 - x2); + const rxs = new Array(rxys.length); + const rys = new Array(rxys.length); - /*let rec discreteShape = (dists: t, sampleCount: int) => { - let discrete = - dists - |> E.A.fmap(((x, w)) => { - switch (d) { - | `Float(d) => Some((d, w)) // if the distribution is just a number, then the weight is considered the y - | _ => None - } - }) - |> E.A.O.concatSomes - |> E.A.fmap(((x, y)) => - ({xs: [|x|], ys: [|y|]}: DistTypes.xyShape) - ) - // take an array of xyShapes and combine them together - //* r - |> ( - fun - | `Float(r) => Some((r, e)) - | _ => None - ) - )*/ - |> Distributions.Discrete.reduce((+.)); - discrete; - };*/ + for (let i = 0; i < rxys.length; i++) { + rxs[i] = rxys[i][0]; + rys[i] = rxys[i][1]; + } + return [rxs, rys]; + } + |}]; - let rec findContinuousXs = (dists: t, sampleCount: int) => { - // we need to go through the tree of distributions and, for the continuous ones, find the xs at which - // later, all distributions will get evaluated. + let funcFromOp = (op: operation) => { + switch (op) { + | `AddOperation => (+.) + | `SubtractOperation => (-.) + | `MultiplyOperation => (*.) + | `DivideOperation => (/.) + | `ExponentiateOperation => (**) + } + } - // we want to accumulate a set of xs. - let xs: array(float) = - dists - |> E.A.fold_left((accXs, (d, w)) => { - switch (d) { - | `Simple(t) when (GenericSimple.contType(t) == `Discrete) => accXs - | `Simple(d) => { - let xs = GenericSimple.interpolateXs(~xSelection=`ByWeight, d, sampleCount) - - E.A.append(accXs, xs) - } - | `PointwiseCombination(ts) => { - let xs = findContinuousXs(ts, sampleCount); - E.A.append(accXs, xs) - } - } - }, [||]); - xs + let renderDistributionToXYShape = (d: dist, sampleCount: int): (DistTypes.continuousShape, DistTypes.discreteShape) => { + // render the distribution into an XY shape + switch (d) { + | `Float(v) => (Distributions.Continuous.empty, {xs: [|v|], ys: [|1.0|]}) + | _ => { + let xs = GenericSimple.interpolateXs(~xSelection=`ByWeight, d, sampleCount); + let ys = xs |> E.A.fmap(x => GenericSimple.pdf(x, d)); + (Distributions.Continuous.make(`Linear, {xs: xs, ys: ys}), XYShape.T.empty) + } + } }; - /* Accumulate (accContShapes, accDistShapes), each of which is an array of {xs, ys} shapes. */ - let rec accumulateContAndDiscShapes = (dists: t, continuousXs: array(float), currentWeight) => { - let normalized = normalizeWeights(dists); + let combinationDistributionOfXYShapes = (sc1: DistTypes.continuousShape, // continuous shape + sd1: DistTypes.discreteShape, // discrete shape + sc2: DistTypes.continuousShape, + sd2: DistTypes.discreteShape, func): (DistTypes.continuousShape, DistTypes.discreteShape) => { - normalized - |> E.A.fold_left(((accContShapes: array(DistTypes.xyShape), accDiscShapes: array(DistTypes.xyShape)), (d, w)) => { - switch (d) { + let (ccxs, ccys) = jsCombinationConvolve(sc1.xyShape.xs, sc1.xyShape.ys, sc2.xyShape.xs, sc2.xyShape.ys, func); + let (dcxs, dcys) = jsCombinationConvolve(sd1.xs, sd1.ys, sc2.xyShape.xs, sc2.xyShape.ys, func); + let (cdxs, cdys) = jsCombinationConvolve(sc1.xyShape.xs, sc1.xyShape.ys, sd2.xs, sd2.ys, func); + let (ddxs, ddys) = jsCombinationConvolve(sd1.xs, sd1.ys, sd2.xs, sd2.ys, func); - | `Simple(`Float(x)) => { - let ds: DistTypes.xyShape = {xs: [|x|], ys: [|w *. currentWeight|]}; - (accContShapes, E.A.append(accDiscShapes, [|ds|])) - } + let ccxy = Distributions.Continuous.make(`Linear, {xs: ccxs, ys: ccys}); + let dcxy = Distributions.Continuous.make(`Linear, {xs: dcxs, ys: dcys}); + let cdxy = Distributions.Continuous.make(`Linear, {xs: cdxs, ys: cdys}); + // the continuous parts are added up; only the discrete-discrete sum is discrete + let continuousShapeSum = Distributions.Continuous.reduce((+.), [|ccxy, dcxy, cdxy|]); - | `Simple(d) when (GenericSimple.contType(d) == `Continuous) => { - let ys = continuousXs |> E.A.fmap(x => GenericSimple.pdf(x, d) *. w *. currentWeight); - let cs = XYShape.T.fromArrays(continuousXs, ys); + let ddxy: DistTypes.discreteShape = {xs: cdxs, ys: cdys}; - (E.A.append(accContShapes, [|cs|]), accDiscShapes) - } + (continuousShapeSum, ddxy) + }; - | `Simple(d) => (accContShapes, accDiscShapes) // default -- should never happen + let evaluateCombinationDistribution = (et1: nodeResult, et2: nodeResult, op: operation, sampleCount: int) => { + /* return either a Distribution or a RenderedShape. Must integrate to 1. */ - | `PointwiseCombination(ts) => { - let (cs, ds) = accumulateContAndDiscShapes(ts, continuousXs, w *. currentWeight); - (E.A.append(accContShapes, cs), E.A.append(accDiscShapes, ds)) - } + let func = funcFromOp(op); + switch ((et1, et2, op)) { + /* Known cases: replace symbolic with symbolic distribution */ + | (`Distribution(`Float(v1)), `Distribution(`Float(v2)), _) => { + `Distribution(`Float(func(v1, v2))) + } + + | (`Distribution(`Float(v1)), `Distribution(`Normal(n2)), `AddOperation) => { + let n: normal = {mean: v1 +. n2.mean, stdev: n2.stdev}; + `Distribution(`Normal(n)) + } + + | (`Distribution(`Normal(n1)), `Distribution(`Normal(n2)), `AddOperation) => { + let n: normal = {mean: n1.mean +. n2.mean, stdev: sqrt(n1.stdev ** 2. +. n2.stdev ** 2.)}; + `Distribution(`Normal(n)); + } + + /* General cases: convolve the XYShapes */ + | (`Distribution(d1), `Distribution(d2), _) => { + let (sc1, sd1) = renderDistributionToXYShape(d1, sampleCount); + let (sc2, sd2) = renderDistributionToXYShape(d2, sampleCount); + let (sc, sd) = combinationDistributionOfXYShapes(sc1, sd1, sc2, sd2, func); + `RenderedShape(sc, sd, 1.0) + } + | (`Distribution(d1), `RenderedShape(sc2, sd2, i2), _) => { + let (sc1, sd1) = renderDistributionToXYShape(d1, sampleCount); + let (sc, sd) = combinationDistributionOfXYShapes(sc1, sd1, sc2, sd2, func); + `RenderedShape(sc, sd, i2) + } + | (`RenderedShape(sc1, sd1, i1), `Distribution(d2), _) => { + let (sc2, sd2) = renderDistributionToXYShape(d2, sampleCount); + let (sc, sd) = combinationDistributionOfXYShapes(sc1, sd1, sc2, sd2, func); + `RenderedShape(sc, sd, i1); + } + | (`RenderedShape(sc1, sd1, i1), `RenderedShape(sc2, sd2, i2), _) => { + // sum of two multimodals that have a continuous and discrete each. + let (sc, sd) = combinationDistributionOfXYShapes(sc1, sd1, sc2, sd2, func); + + `RenderedShape(sc, sd, i1); + } + } + }; + + let evaluatePointwiseSum = (et1: nodeResult, et2: nodeResult, sampleCount: int) => { + switch ((et1, et2)) { + /* Known cases: */ + | (`Distribution(`Float(v1)), `Distribution(`Float(v2))) => { + v1 == v2 + ? `RenderedShape(Distributions.Continuous.empty, Distributions.Discrete.make({xs: [|v1|], ys: [|2.|]}), 2.) + : `RenderedShape(Distributions.Continuous.empty, Distributions.Discrete.empty, 0.) // TODO: add warning: shouldn't pointwise add scalars. + } + | (`Distribution(`Float(v1)), `Distribution(d2)) => { + let sd1: DistTypes.xyShape = {xs: [|v1|], ys: [|1.|]}; + let (sc2, sd2) = renderDistributionToXYShape(d2, sampleCount); + `RenderedShape(sc2, Distributions.Discrete.reduce((+.), [|sd1, sd2|]), 2.) + } + | (`Distribution(d1), `Distribution(`Float(v2))) => { + let (sc1, sd1) = renderDistributionToXYShape(d1, sampleCount); + let sd2: DistTypes.xyShape = {xs: [|v2|], ys: [|1.|]}; + `RenderedShape(sc1, Distributions.Discrete.reduce((+.), [|sd1, sd2|]), 2.) + } + | (`Distribution(d1), `Distribution(d2)) => { + let (sc1, sd1) = renderDistributionToXYShape(d1, sampleCount); + let (sc2, sd2) = renderDistributionToXYShape(d2, sampleCount); + `RenderedShape(Distributions.Continuous.reduce((+.), [|sc1, sc2|]), Distributions.Discrete.reduce((+.), [|sd1, sd2|]), 2.) + } + | (`Distribution(d1), `RenderedShape(sc2, sd2, i2)) + | (`RenderedShape(sc2, sd2, i2), `Distribution(d1)) => { + let (sc1, sd1) = renderDistributionToXYShape(d1, sampleCount); + `RenderedShape(Distributions.Continuous.reduce((+.), [|sc1, sc2|]), Distributions.Discrete.reduce((+.), [|sd1, sd2|]), 1. +. i2) + } + | (`RenderedShape(sc1, sd1, i1), `RenderedShape(sc2, sd2, i2)) => { + Js.log3("Reducing continuous rr", sc1, sc2); + Js.log2("Continuous reduction:", Distributions.Continuous.reduce((+.), [|sc1, sc2|])); + Js.log2("Discrete reduction:", Distributions.Discrete.reduce((+.), [|sd1, sd2|])); + `RenderedShape(Distributions.Continuous.reduce((+.), [|sc1, sc2|]), Distributions.Discrete.reduce((+.), [|sd1, sd2|]), i1 +. i2) + } + } + }; + + let evaluatePointwiseProduct = (et1: nodeResult, et2: nodeResult, sampleCount: int) => { + switch ((et1, et2)) { + /* Known cases: */ + | (`Distribution(`Float(v1)), `Distribution(`Float(v2))) => { + v1 == v2 + ? `RenderedShape(Distributions.Continuous.empty, Distributions.Discrete.make({xs: [|v1|], ys: [|1.|]}), 1.) + : `RenderedShape(Distributions.Continuous.empty, Distributions.Discrete.empty, 0.) // TODO: add warning: shouldn't pointwise multiply scalars. + } + | (`Distribution(`Float(v1)), `Distribution(d2)) => { + // evaluate d2 at v1 + let y = GenericSimple.pdf(v1, d2); + `RenderedShape(Distributions.Continuous.empty, Distributions.Discrete.make({xs: [|v1|], ys: [|y|]}), y) + } + | (`Distribution(d1), `Distribution(`Float(v2))) => { + // evaluate d1 at v2 + let y = GenericSimple.pdf(v2, d1); + `RenderedShape(Distributions.Continuous.empty, Distributions.Discrete.make({xs: [|v2|], ys: [|y|]}), y) + } + | (`Distribution(`Normal(n1)), `Distribution(`Normal(n2))) => { + let mean = (n1.mean *. n2.stdev**2. +. n2.mean *. n1.stdev**2.) /. (n1.stdev**2. +. n2.stdev**2.); + let stdev = 1. /. ((1. /. n1.stdev**2.) +. (1. /. n2.stdev**2.)); + let integral = 0; // TODO + `RenderedShape(Distributions.Continuous.empty, Distributions.Discrete.empty, 0.) + } + /* General cases */ + | (`Distribution(d1), `Distribution(d2)) => { + // NOT IMPLEMENTED YET + // TODO: evaluate integral properly + let (sc1, sd1) = renderDistributionToXYShape(d1, sampleCount); + let (sc2, sd2) = renderDistributionToXYShape(d2, sampleCount); + `RenderedShape(Distributions.Continuous.empty, Distributions.Discrete.empty, 0.) + } + | (`Distribution(d1), `RenderedShape(sc2, sd2, i2)) => { + // NOT IMPLEMENTED YET + // TODO: evaluate integral properly + let (sc1, sd1) = renderDistributionToXYShape(d1, sampleCount); + `RenderedShape(Distributions.Continuous.empty, Distributions.Discrete.empty, 0.) + } + | (`RenderedShape(sc1, sd1, i1), `Distribution(d1)) => { + // NOT IMPLEMENTED YET + // TODO: evaluate integral properly + let (sc2, sd2) = renderDistributionToXYShape(d1, sampleCount); + `RenderedShape(Distributions.Continuous.empty, Distributions.Discrete.empty, 0.) + } + | (`RenderedShape(sc1, sd1, i1), `RenderedShape(sc2, sd2, i2)) => { + `RenderedShape(Distributions.Continuous.empty, Distributions.Discrete.empty, 0.) + } + } + }; + + + let evaluateNormalize = (et: nodeResult, sampleCount: int) => { + // just divide everything by the integral. + switch (et) { + | `RenderedShape(sc, sd, i) => { + // loop through all ys and divide them by i + let normalize = (s: DistTypes.xyShape): DistTypes.xyShape => {xs: s.xs, ys: s.ys |> E.A.fmap(y => y /. i)}; + + let scn = sc |> Distributions.Continuous.shapeMap(normalize); + let sdn = sd |> normalize; + + `RenderedShape(scn, sdn, 1.) + } + | `Distribution(d) => `Distribution(d) // any kind of atomic dist should already be normalized -- TODO: THIS IS ACTUALLY FALSE! E.g. pointwise product of normal * normal + } + }; + + let evaluateTruncate = (et: nodeResult, xc: cutoffX, compareFunc: (float, float) => bool, sampleCount: int) => { + let cut = (s: DistTypes.xyShape): DistTypes.xyShape => { + let (xs, ys) = s.ys + |> Belt.Array.zip(s.xs) + |> E.A.filter(((x, y)) => compareFunc(x, xc)) + |> Belt.Array.unzip + + let cutShape: DistTypes.xyShape = {xs, ys}; + cutShape; + }; + + switch (et) { + | `Distribution(d) => { + let (sc, sd) = renderDistributionToXYShape(d, sampleCount); + + let scc = sc |> Distributions.Continuous.shapeMap(cut); + let sdc = sd |> cut; + + let newIntegral = 1.; // TODO + + `RenderedShape(scc, sdc, newIntegral); + } + | `RenderedShape(sc, sd, i) => { + let scc = sc |> Distributions.Continuous.shapeMap(cut); + let sdc = sd |> cut; + + let newIntegral = 1.; // TODO + + `RenderedShape(scc, sdc, newIntegral); + } + } + }; + + let evaluateVerticalScaling = (et1: nodeResult, et2: nodeResult, sampleCount: int) => { + let scale = (i: float, s: DistTypes.xyShape): DistTypes.xyShape => {xs: s.xs, ys: s.ys |> E.A.fmap(y => y *. i)}; + + switch ((et1, et2)) { + | (`Distribution(`Float(v)), `Distribution(d)) + | (`Distribution(d), `Distribution(`Float(v))) => { + let (sc, sd) = renderDistributionToXYShape(d, sampleCount); + + let scc = sc |> Distributions.Continuous.shapeMap(scale(v)); + let sdc = sd |> scale(v); + + let newIntegral = v; // TODO + + `RenderedShape(scc, sdc, newIntegral); + } + | (`Distribution(`Float(v)), `RenderedShape(sc, sd, i)) + | (`RenderedShape(sc, sd, i), `Distribution(`Float(v))) => { + let scc = sc |> Distributions.Continuous.shapeMap(scale(v)); + let sdc = sd |> scale(v); + + let newIntegral = v; // TODO + + `RenderedShape(scc, sdc, newIntegral); } + | _ => `RenderedShape(Distributions.Continuous.empty, Distributions.Discrete.empty, 0.) // TODO: give warning + } + } - }, ([||]: array(DistTypes.xyShape), [||]: array(DistTypes.xyShape))) + let renderNode = (et: nodeResult, sampleCount: int) => { + switch (et) { + | `Distribution(d) => { + let (sc, sd) = renderDistributionToXYShape(d, sampleCount); + `RenderedShape(sc, sd, 1.0); + } + | s => s + } + } + + let rec evaluateNode = (treeNode: distTree, sampleCount: int): nodeResult => { + // returns either a new symbolic distribution + switch (treeNode) { + | `Distribution(d) => evaluateDistribution(d) + | `Combination(t1, t2, op) => evaluateCombinationDistribution(evaluateNode(t1, sampleCount), evaluateNode(t2, sampleCount), op, sampleCount) + | `PointwiseSum(t1, t2) => evaluatePointwiseSum(evaluateNode(t1, sampleCount), evaluateNode(t2, sampleCount), sampleCount) + | `PointwiseProduct(t1, t2) => evaluatePointwiseProduct(evaluateNode(t1, sampleCount), evaluateNode(t2, sampleCount), sampleCount) + | `VerticalScaling(t1, t2) => evaluateVerticalScaling(evaluateNode(t1, sampleCount), evaluateNode(t2, sampleCount), sampleCount) + | `Normalize(t) => evaluateNormalize(evaluateNode(t, sampleCount), sampleCount) + | `LeftTruncate(t, x) => evaluateTruncate(evaluateNode(t, sampleCount), x, (<=), sampleCount) + | `RightTruncate(t, x) => evaluateTruncate(evaluateNode(t, sampleCount), x, (>=), sampleCount) + | `Render(t) => renderNode(evaluateNode(t, sampleCount), sampleCount) + } }; - /* - We will assume that each dist (of t) in the multimodal has a total of one. - We can therefore normalize the weights of the parts. - - However, a multimodal can consist of both discrete and continuous shapes. - These need to be added and collected individually. - */ - let toShape = (dists: t, sampleCount: int) => { - let continuousXs = findContinuousXs(dists, sampleCount); + let toShape = (treeNode: distTree, sampleCount: int) => { + /*let continuousXs = findContinuousXs(dists, sampleCount); continuousXs |> Array.fast_sort(compare); let (contShapes, distShapes) = accumulateContAndDiscShapes(dists, continuousXs, 1.0); @@ -469,60 +649,42 @@ module PointwiseAddDistributionsWeighted = { }, {xs: continuousXs, ys: Array.make(Array.length(continuousXs), 0.0)}) |> Distributions.Continuous.make(`Linear); - let combinedDiscrete = Distributions.Discrete.reduce((+.), distShapes) + let combinedDiscrete = Distributions.Discrete.reduce((+.), distShapes)*/ - let shape = MixedShapeBuilder.buildSimple(~continuous=Some(combinedContinuous), ~discrete=combinedDiscrete); + let treeShape = evaluateNode(`Render(`Normalize(treeNode)), sampleCount); + switch (treeShape) { + | `Distribution(_) => E.O.toExn("No shape found!", None) + | `RenderedShape(sc, sd, _) => { + let shape = MixedShapeBuilder.buildSimple(~continuous=Some(sc), ~discrete=sd); - shape |> E.O.toExt(""); + shape |> E.O.toExt(""); + } + } }; - let rec toString = (dists: t): string => { - let distString = - dists - |> E.A.fmap(((d, _)) => - switch (d) { - | `Simple(d) => GenericSimple.toString(d) - | `PointwiseCombination(ts: t) => ts |> toString - } - ) - |> Js.Array.joinWith(","); + let rec toString = (treeNode: distTree): string => { + let stringFromOp = op => switch (op) { + | `AddOperation => " + " + | `SubtractOperation => " - " + | `MultiplyOperation => " * " + | `DivideOperation => " / " + | `ExponentiateOperation => "^" + }; - // mm(normal(0,1), normal(1,2)) => "multimodal(normal(0,1), normal(1,2), ) - - let weights = - dists - |> E.A.fmap(((_, w)) => - Js.Float.toPrecisionWithPrecision(w, ~digits=2) - ) - |> Js.Array.joinWith(","); - - {j|multimodal($distString, [$weights])|j}; + switch (treeNode) { + | `Distribution(d) => GenericSimple.toString(d) + | `Combination(t1, t2, op) => toString(t1) ++ stringFromOp(op) ++ toString(t2) + | `PointwiseSum(t1, t2) => toString(t1) ++ " .+ " ++ toString(t2) + | `PointwiseProduct(t1, t2) => toString(t1) ++ " .* " ++ toString(t2) + | `VerticalScaling(t1, t2) => toString(t1) ++ " @ " ++ toString(t2) + | `Normalize(t) => "normalize(" ++ toString(t) ++ ")" + | `LeftTruncate(t, x) => "leftTruncate(" ++ toString(t) ++ ", " ++ string_of_float(x) ++ ")" + | `RightTruncate(t, x) => "rightTruncate(" ++ toString(t) ++ ", " ++ string_of_float(x) ++ ")" + } }; }; -// assume that recursive pointwiseNormalizedDistSums are the only type of operation there is. -// in the original, it was a list of (dist, weight) tuples. Now, it's a tree of (dist, weight) tuples, just that every -// dist can be either a GenericSimple or another PointwiseAdd. +let toString = (treeNode: distTree) => DistTree.toString(treeNode) -/*let toString = (r: bigDistTree) => { - switch (r) { - | WeightedDistLeaf((w, d)) => GenericWeighted.toString(w) // "normal " - | PointwiseNormalizedDistSum(childTrees) => childTrees |> E.A.fmap(toString) |> Js.Array.joinWith("") - } - }*/ - -let toString = (r: bigDist) => - // we need to recursively create the string representation of the tree. - r - |> ( - fun - | `Simple(d) => GenericSimple.toString(d) - | `PointwiseCombination(d) => - PointwiseAddDistributionsWeighted.toString(d) - ); - -let toShape = n => - fun - | `Simple(d) => GenericSimple.toShape(~xSelection=`ByWeight, d, n) - | `PointwiseCombination(d) => - PointwiseAddDistributionsWeighted.toShape(d, n); +let toShape = (sampleCount: int, treeNode: distTree) => + DistTree.toShape(treeNode, sampleCount) //~xSelection=`ByWeight,