From d2e7e5f9288c1e3fef86c3001c1ab63c980a909b Mon Sep 17 00:00:00 2001 From: Sebastian Kosch Date: Fri, 26 Jun 2020 22:37:24 -0700 Subject: [PATCH] Fixed some indexing errors in convolution code --- src/components/DistBuilder.re | 9 +- src/distPlus/distribution/Distributions.re | 176 ++++++++++++--------- src/distPlus/distribution/XYShape.re | 2 +- src/distPlus/symbolic/SymbolicDist.re | 4 +- src/distPlus/symbolic/TreeNode.re | 94 +++++------ 5 files changed, 156 insertions(+), 129 deletions(-) diff --git a/src/components/DistBuilder.re b/src/components/DistBuilder.re index 93856fc9..c28ef275 100644 --- a/src/components/DistBuilder.re +++ b/src/components/DistBuilder.re @@ -171,7 +171,8 @@ let make = () => { ~schema, ~onSubmit=({state}) => {None}, ~initialState={ - guesstimatorString: "mm(normal(-10, 2), uniform(18, 25), lognormal({mean: 10, stdev: 8}), triangular(31,40,50))", + //guesstimatorString: "mm(normal(-10, 2), uniform(18, 25), lognormal({mean: 10, stdev: 8}), triangular(31,40,50))", + guesstimatorString: "uniform(0, 1) + normal(1, 2)", domainType: "Complete", xPoint: "50.0", xPoint2: "60.0", @@ -180,9 +181,9 @@ let make = () => { unitType: "UnspecifiedDistribution", zero: MomentRe.momentNow(), unit: "days", - sampleCount: "30000", - outputXYPoints: "10000", - downsampleTo: "1000", + sampleCount: "3000", + outputXYPoints: "100", + downsampleTo: "100", kernelWidth: "5", }, (), diff --git a/src/distPlus/distribution/Distributions.re b/src/distPlus/distribution/Distributions.re index 9497957d..7f0d642f 100644 --- a/src/distPlus/distribution/Distributions.re +++ b/src/distPlus/distribution/Distributions.re @@ -154,6 +154,7 @@ module Continuous = { // This is essentially like integrateWithTriangles, without the accumulation. let toDiscretePointMasses = (t: t): DistTypes.discreteShape => { let tl = t |> getShape |> XYShape.T.length; + let pointMassesX: array(float) = Belt.Array.make(tl - 1, 0.0); let pointMassesY: array(float) = Belt.Array.make(tl - 1, 0.0); let {xs, ys}: XYShape.T.t = t |> getShape; for (x in 0 to E.A.length(xs) - 2) { @@ -163,75 +164,24 @@ module Continuous = { x, (xs[x + 1] -. xs[x]) *. ((ys[x] +. ys[x + 1]) /. 2.), ); // = dx * (1/2) * (avgY) + let _ = + Belt.Array.set( + pointMassesX, + x, + (xs[x] +. xs[x + 1]) /. 2., + ); // midpoints (); }; { xyShape: { - xs, + xs: pointMassesX, ys: pointMassesY, }, knownIntegralSum: t.knownIntegralSum, }; }; - /* Performs a discrete convolution between two continuous distributions A and B. - * It is an extremely good idea to downsample the distributions beforehand, - * because the number of samples in the convolution can be up to length(A) * length(B). - * - * Conventional convolution uses fn = (+.), but we also allow other operations to combine the xs. - * - * In practice, the convolution works by multiplying the ys for each possible combo of points of - * the two shapes. This creates a new shape for each point of A. These new shapes are then combined - * linearly. This may not always be the most efficient way, but it is probably the most robust for now. - * - * In the future, it may be possible to use a non-uniform fast Fourier transform instead (although only for addition). - */ - let convolveWithDiscrete = (fn, t1: t, t2: DistTypes.discreteShape) => { - let t1s = t1 |> getShape; - let t2s = t2.xyShape; // would like to use Discrete.getShape here, but current file structure doesn't allow for that - let t1n = t1s |> XYShape.T.length; - let t2n = t2s |> XYShape.T.length; - - let outXYShapes: array(array((float, float))) = - Belt.Array.makeUninitializedUnsafe(t1n); - - for (i in 0 to t1n - 1) { - // create a new distribution - let dxyShape: array((float, float)) = - Belt.Array.makeUninitializedUnsafe(t2n); - for (j in 0 to t2n - 1) { - let _ = - Belt.Array.set( - dxyShape, - j, - (fn(t1s.xs[i], t2s.xs[j]), t1s.ys[i] *. t2s.ys[j]), - ); - (); - }; - let _ = Belt.Array.set(outXYShapes, i, dxyShape); - (); - }; - - let combinedIntegralSum = - switch (t1.knownIntegralSum, t2.knownIntegralSum) { - | (None, _) - | (_, None) => None - | (Some(s1), Some(s2)) => Some(s1 *. s2) - }; - - outXYShapes - |> E.A.fmap(s => { - let xyShape = XYShape.T.fromZippedArray(s); - make(`Linear, xyShape, None); - }) - |> reduce((+.)) - |> updateKnownIntegralSum(combinedIntegralSum); - }; - - let convolve = (fn, t1: t, t2: t) => - convolveWithDiscrete(fn, t1, toDiscretePointMasses(t2)); - let mapY = (~knownIntegralSumFn=previousKnownIntegralSum => None, fn, t: t) => { let u = E.O.bind(_, knownIntegralSumFn); let yMapFn = shapeMap(XYShape.T.mapY(fn)); @@ -350,6 +300,69 @@ module Continuous = { XYShape.Analysis.getMeanOfSquaresContinuousShape, ); }); + + + /* Performs a discrete convolution between two continuous distributions A and B. + * It is an extremely good idea to downsample the distributions beforehand, + * because the number of samples in the convolution can be up to length(A) * length(B). + * + * Conventional convolution uses fn = (+.), but we also allow other operations to combine the xs. + * + * In practice, the convolution works by multiplying the ys for each possible combo of points of + * the two shapes. This creates a new shape for each point of A. These new shapes are then combined + * linearly. This may not always be the most efficient way, but it is probably the most robust for now. + * + * In the future, it may be possible to use a non-uniform fast Fourier transform instead (although only for addition). + */ + let convolveWithDiscrete = (~downsample=false, fn, t1: t, t2: DistTypes.discreteShape) => { + let t1s = t1 |> getShape; + let t2s = t2.xyShape; // would like to use Discrete.getShape here, but current file structure doesn't allow for that + let t1n = t1s |> XYShape.T.length; + let t2n = t2s |> XYShape.T.length; + + let outXYShapes: array(array((float, float))) = + Belt.Array.makeUninitializedUnsafe(t1n); + + for (i in 0 to t1n - 1) { + // create a new distribution + let dxyShape: array((float, float)) = + Belt.Array.makeUninitializedUnsafe(t2n); + for (j in 0 to t2n - 1) { + let _ = + Belt.Array.set( + dxyShape, + j, + (fn(t1s.xs[i], t2s.xs[j]), t1s.ys[i] *. t2s.ys[j]), + ); + (); + }; + + let _ = Belt.Array.set(outXYShapes, i, dxyShape); + (); + }; + + let combinedIntegralSum = Common.combineIntegralSums((a, b) => Some(a *. b), t1.knownIntegralSum, t2.knownIntegralSum); + + outXYShapes + |> E.A.fmap(s => { + let xyShape = XYShape.T.fromZippedArray(s); + make(`Linear, xyShape, None); + }) + |> reduce((+.)) + |> updateKnownIntegralSum(combinedIntegralSum); + }; + + let convolve = (~downsample=false, fn, t1: t, t2: t) => { + let downsampleIfTooLarge = (t: t) => { + let sqtl = sqrt(float_of_int(t |> getShape |> XYShape.T.length)); + sqtl > 10. && downsample ? T.downsample(int_of_float(sqtl), t) : t; + }; + + let t1d = downsampleIfTooLarge(t1); + let t2d = downsampleIfTooLarge(t2); + + convolveWithDiscrete(~downsample=false, fn, t1, toDiscretePointMasses(t2)); + }; }; module Discrete = { @@ -489,17 +502,23 @@ module Discrete = { let downsample = (~cache=None, i, t: t): t => { // It's not clear how to downsample a set of discrete points in a meaningful way. // The best we can do is to clip off the smallest values. - let clippedShape = - t - |> getShape - |> XYShape.T.zip - |> XYShape.Zipped.sortByY - |> Belt.Array.reverse - |> Belt.Array.slice(_, ~offset=0, ~len=i) - |> XYShape.Zipped.sortByX - |> XYShape.T.fromZippedArray; + let currentLength = t |> getShape |> XYShape.T.length; - make(clippedShape, None); // if someone needs the sum, they'll have to recompute it + if (i < currentLength) { + let clippedShape = + t + |> getShape + |> XYShape.T.zip + |> XYShape.Zipped.sortByY + |> Belt.Array.reverse + |> Belt.Array.slice(_, ~offset=0, ~len=i) + |> XYShape.Zipped.sortByX + |> XYShape.T.fromZippedArray; + + make(clippedShape, None); // if someone needs the sum, they'll have to recompute it + } else { + t; + } }; let truncate = @@ -663,6 +682,8 @@ module Mixed = { Continuous.T.Integral.sum(~cache=None, continuous); let totalIntegralSum = discreteIntegralSum +. continuousIntegralSum; + // TODO: figure out what to do when the totalIntegralSum is zero. + let downsampledDiscrete = Discrete.T.downsample( int_of_float( @@ -811,17 +832,16 @@ module Mixed = { }; }); - let convolve = (fn: (float, float) => float, t1: t, t2: t): t => { + let convolve = (~downsample=false, fn: (float, float) => float, t1: t, t2: t): t => { // Discrete convolution can cause a huge increase in the number of samples, // so we'll first downsample. // An alternative (to be explored in the future) may be to first perform the full convolution and then to downsample the result; // to use non-uniform fast Fourier transforms (for addition only), add web workers or gpu.js, etc. ... - // TODO: make this optional or customizable let downsampleIfTooLarge = (t: t) => { let sqtl = sqrt(float_of_int(totalLength(t))); - sqtl > 10. ? T.downsample(int_of_float(sqtl), t) : t; + sqtl > 10. && downsample ? T.downsample(int_of_float(sqtl), t) : t; }; let t1d = downsampleIfTooLarge(t1); @@ -830,11 +850,11 @@ module Mixed = { // continuous (*) continuous => continuous, but also // discrete (*) continuous => continuous (and vice versa). We have to take care of all combos and then combine them: let ccConvResult = - Continuous.convolve(fn, t1d.continuous, t2d.continuous); + Continuous.convolve(~downsample=false, fn, t1d.continuous, t2d.continuous); let dcConvResult = - Continuous.convolveWithDiscrete(fn, t2d.continuous, t1d.discrete); + Continuous.convolveWithDiscrete(~downsample=false, fn, t2d.continuous, t1d.discrete); let cdConvResult = - Continuous.convolveWithDiscrete(fn, t1d.continuous, t2d.discrete); + Continuous.convolveWithDiscrete(~downsample=false, fn, t1d.continuous, t2d.discrete); let continuousConvResult = Continuous.reduce((+.), [|ccConvResult, dcConvResult, cdConvResult|]); @@ -870,7 +890,13 @@ module Shape = { )); let convolve = (fn, t1: t, t2: t): t => { - Mixed(Mixed.convolve(fn, toMixed(t1), toMixed(t2))); + switch ((t1, t2)) { + | (Continuous(m1), Continuous(m2)) => DistTypes.Continuous(Continuous.convolve(~downsample=true, fn, m1, m2)) + | (Discrete(m1), Discrete(m2)) => DistTypes.Discrete(Discrete.convolve(fn, m1, m2)) + | (m1, m2) => { + DistTypes.Mixed(Mixed.convolve(~downsample=true, fn, toMixed(m1), toMixed(m2))) + } + }; }; let combine = (~knownIntegralSumsFn=(_, _) => None, fn, t1: t, t2: t) => diff --git a/src/distPlus/distribution/XYShape.re b/src/distPlus/distribution/XYShape.re index 9451fb23..8e684b93 100644 --- a/src/distPlus/distribution/XYShape.re +++ b/src/distPlus/distribution/XYShape.re @@ -9,7 +9,7 @@ let interpolate = }; // TODO: Make sure that shapes cannot be empty. -let extImp = E.O.toExt("Should not be possible"); +let extImp = E.O.toExt("Tried to perform an operation on an empty XYShape."); module T = { type t = xyShape; diff --git a/src/distPlus/symbolic/SymbolicDist.re b/src/distPlus/symbolic/SymbolicDist.re index 8cab8227..e5df481c 100644 --- a/src/distPlus/symbolic/SymbolicDist.re +++ b/src/distPlus/symbolic/SymbolicDist.re @@ -283,11 +283,11 @@ module GenericDistFunctions = { (~xSelection: [ | `Linear | `ByWeight]=`Linear, dist: dist, n) => { switch (xSelection, dist) { | (`Linear, _) => E.A.Floats.range(min(dist), max(dist), n) - | (`ByWeight, `Uniform(n)) => +/* | (`ByWeight, `Uniform(n)) => // In `ByWeight mode, uniform distributions get special treatment because we need two x's // on either side for proper rendering (just left and right of the discontinuities). let dx = 0.00001 *. (n.high -. n.low); - [|n.low -. dx, n.low +. dx, n.high -. dx, n.high +. dx|]; + [|n.low -. dx, n.low +. dx, n.high -. dx, n.high +. dx|]; */ | (`ByWeight, _) => let ys = E.A.Floats.range(minCdfValue, maxCdfValue, n); ys |> E.A.fmap(y => inv(y, dist)); diff --git a/src/distPlus/symbolic/TreeNode.re b/src/distPlus/symbolic/TreeNode.re index 4aa645fb..8238d65b 100644 --- a/src/distPlus/symbolic/TreeNode.re +++ b/src/distPlus/symbolic/TreeNode.re @@ -55,6 +55,52 @@ module TreeNode = { type t = treeNode; type simplifier = treeNode => result(treeNode, string); + let rec toString = (t: t): string => { + let stringFromStandardOperation = + fun + | `Add => " + " + | `Subtract => " - " + | `Multiply => " * " + | `Divide => " / " + | `Exponentiate => "^"; + + let stringFromPointwiseOperation = + fun + | `Add => " .+ " + | `Multiply => " .* "; + + let stringFromFloatFromDistOperation = + fun + | `Pdf(f) => "pdf(x=$f, " + | `Inv(f) => "inv(c=$f, " + | `Sample => "sample(" + | `Mean => "mean("; + + + switch (t) { + | `DistData(`Symbolic(d)) => + SymbolicDist.GenericDistFunctions.toString(d) + | `DistData(`RenderedShape(s)) => "[shape]" + | `Operation(`StandardOperation(op, t1, t2)) => + toString(t1) ++ stringFromStandardOperation(op) ++ toString(t2) + | `Operation(`PointwiseOperation(op, t1, t2)) => + toString(t1) ++ stringFromPointwiseOperation(op) ++ toString(t2) + | `Operation(`ScaleOperation(_scaleOp, t, scaleBy)) => + toString(t) ++ " @ " ++ toString(scaleBy) + | `Operation(`Normalize(t)) => "normalize(" ++ toString(t) ++ ")" + | `Operation(`FloatFromDist(floatFromDistOp, t)) => stringFromFloatFromDistOperation(floatFromDistOp) ++ toString(t) ++ ")" + | `Operation(`Truncate(lc, rc, t)) => + "truncate(" + ++ toString(t) + ++ ", " + ++ E.O.dimap(Js.Float.toString, () => "-inf", lc) + ++ ", " + ++ E.O.dimap(Js.Float.toString, () => "inf", rc) + ++ ")" + | `Operation(`Render(t)) => toString(t) + }; + }; + /* The following modules encapsulate everything we can do with * different kinds of operations. */ @@ -482,52 +528,6 @@ module TreeNode = { | `Operation(op) => operationToDistData(sampleCount, op) }; }; - - let rec toString = (t: t): string => { - let stringFromStandardOperation = - fun - | `Add => " + " - | `Subtract => " - " - | `Multiply => " * " - | `Divide => " / " - | `Exponentiate => "^"; - - let stringFromPointwiseOperation = - fun - | `Add => " .+ " - | `Multiply => " .* "; - - let stringFromFloatFromDistOperation = - fun - | `Pdf(f) => "pdf(x=$f, " - | `Inv(f) => "inv(c=$f, " - | `Sample => "sample(" - | `Mean => "mean("; - - - switch (t) { - | `DistData(`Symbolic(d)) => - SymbolicDist.GenericDistFunctions.toString(d) - | `DistData(`RenderedShape(s)) => "[shape]" - | `Operation(`StandardOperation(op, t1, t2)) => - toString(t1) ++ stringFromStandardOperation(op) ++ toString(t2) - | `Operation(`PointwiseOperation(op, t1, t2)) => - toString(t1) ++ stringFromPointwiseOperation(op) ++ toString(t2) - | `Operation(`ScaleOperation(_scaleOp, t, scaleBy)) => - toString(t) ++ " @ " ++ toString(scaleBy) - | `Operation(`Normalize(t)) => "normalize(" ++ toString(t) ++ ")" - | `Operation(`FloatFromDist(floatFromDistOp, t)) => stringFromFloatFromDistOperation(floatFromDistOp) ++ toString(t) ++ ")" - | `Operation(`Truncate(lc, rc, t)) => - "truncate(" - ++ toString(t) - ++ ", " - ++ E.O.dimap(Js.Float.toString, () => "-inf", lc) - ++ ", " - ++ E.O.dimap(Js.Float.toString, () => "inf", rc) - ++ ")" - | `Operation(`Render(t)) => toString(t) - }; - }; }; let toShape = (sampleCount: int, treeNode: treeNode) => { @@ -539,7 +539,7 @@ let toShape = (sampleCount: int, treeNode: treeNode) => { let continuous = Distributions.Shape.T.toContinuous(rs); let discrete = Distributions.Shape.T.toDiscrete(rs); let shape = MixedShapeBuilder.buildSimple(~continuous, ~discrete); - shape |> E.O.toExt(""); + shape |> E.O.toExt("Could not build final shape."); | Ok(_) => E.O.toExn("Rendering failed.", None) | Error(message) => E.O.toExn("No shape found, error: " ++ message, None) };