diff --git a/__tests__/Distributions__Test.re b/__tests__/Distributions__Test.re index d83c1ac2..0b2e30e6 100644 --- a/__tests__/Distributions__Test.re +++ b/__tests__/Distributions__Test.re @@ -24,7 +24,7 @@ let makeTestCloseEquality = (~only=false, str, item1, item2, ~digits) => describe("Shape", () => { describe("Continuous", () => { open Distributions.Continuous; - let continuous = make(`Linear, shape); + let continuous = make(`Linear, shape, None); makeTest("minX", T.minX(continuous), 1.0); makeTest("maxX", T.maxX(continuous), 8.0); makeTest( @@ -57,7 +57,7 @@ describe("Shape", () => { ); }); describe("when Stepwise", () => { - let continuous = make(`Stepwise, shape); + let continuous = make(`Stepwise, shape, None); makeTest( "at 4.0", T.xToY(4., continuous), @@ -89,7 +89,7 @@ describe("Shape", () => { "toLinear", { let continuous = - make(`Stepwise, {xs: [|1., 4., 8.|], ys: [|0.1, 5., 1.0|]}); + make(`Stepwise, {xs: [|1., 4., 8.|], ys: [|0.1, 5., 1.0|]}, None); continuous |> toLinear |> E.O.fmap(getShape); }, Some({ @@ -100,7 +100,7 @@ describe("Shape", () => { makeTest( "toLinear", { - let continuous = make(`Stepwise, {xs: [|0.0|], ys: [|0.3|]}); + let continuous = make(`Stepwise, {xs: [|0.0|], ys: [|0.3|]}, None); continuous |> toLinear |> E.O.fmap(getShape); }, Some({xs: [|0.0|], ys: [|0.3|]}), @@ -123,7 +123,7 @@ describe("Shape", () => { makeTest( "integralEndY", continuous - |> T.scaleToIntegralSum(~intendedSum=1.0) + |> T.normalize //scaleToIntegralSum(~intendedSum=1.0) |> T.Integral.sum(~cache=None), 1.0, ); @@ -135,12 +135,12 @@ describe("Shape", () => { xs: [|1., 4., 8.|], ys: [|0.3, 0.5, 0.2|], }; - let discrete = shape; + let discrete = make(shape, None); makeTest("minX", T.minX(discrete), 1.0); makeTest("maxX", T.maxX(discrete), 8.0); makeTest( "mapY", - T.mapY(r => r *. 2.0, discrete) |> (r => r.ys), + T.mapY(r => r *. 2.0, discrete) |> (r => getShape(r).ys), [|0.6, 1.0, 0.4|], ); makeTest( @@ -160,19 +160,22 @@ describe("Shape", () => { ); makeTest( "scaleBy", - T.scaleBy(~scale=4.0, discrete), - {xs: [|1., 4., 8.|], ys: [|1.2, 2.0, 0.8|]}, + scaleBy(~scale=4.0, discrete), + make({xs: [|1., 4., 8.|], ys: [|1.2, 2.0, 0.8|]}, None), ); makeTest( - "scaleToIntegralSum", - T.scaleToIntegralSum(~intendedSum=4.0, discrete), - {xs: [|1., 4., 8.|], ys: [|1.2, 2.0, 0.8|]}, + "normalize, then scale by 4.0", + discrete + |> T.normalize + |> scaleBy(~scale=4.0), + make({xs: [|1., 4., 8.|], ys: [|1.2, 2.0, 0.8|]}, None), ); makeTest( "scaleToIntegralSum: back and forth", discrete - |> T.scaleToIntegralSum(~intendedSum=4.0) - |> T.scaleToIntegralSum(~intendedSum=1.0), + |> T.normalize + |> scaleBy(~scale=4.0) + |> T.normalize, discrete, ); makeTest( @@ -181,12 +184,13 @@ describe("Shape", () => { Distributions.Continuous.make( `Stepwise, {xs: [|1., 4., 8.|], ys: [|0.3, 0.8, 1.0|]}, + None ), ); makeTest( "integral with 1 element", - T.Integral.get(~cache=None, {xs: [|0.0|], ys: [|1.0|]}), - Distributions.Continuous.make(`Stepwise, {xs: [|0.0|], ys: [|1.0|]}), + T.Integral.get(~cache=None, Distributions.Discrete.make({xs: [|0.0|], ys: [|1.0|]}, None)), + Distributions.Continuous.make(`Stepwise, {xs: [|0.0|], ys: [|1.0|]}, None), ); makeTest( "integralXToY", @@ -205,27 +209,22 @@ describe("Shape", () => { describe("Mixed", () => { open Distributions.Mixed; - let discrete: DistTypes.xyShape = { + let discreteShape: DistTypes.xyShape = { xs: [|1., 4., 8.|], ys: [|0.3, 0.5, 0.2|], }; + let discrete = Distributions.Discrete.make(discreteShape, None); let continuous = Distributions.Continuous.make( `Linear, {xs: [|3., 7., 14.|], ys: [|0.058, 0.082, 0.124|]}, + None ) - |> Distributions.Continuous.T.scaleToIntegralSum(~intendedSum=1.0); - let mixed = - MixedShapeBuilder.build( + |> Distributions.Continuous.T.normalize; //scaleToIntegralSum(~intendedSum=1.0); + let mixed = Distributions.Mixed.make( ~continuous, ~discrete, - ~assumptions={ - continuous: ADDS_TO_CORRECT_PROBABILITY, - discrete: ADDS_TO_CORRECT_PROBABILITY, - discreteProbabilityMass: Some(0.5), - }, - ) - |> E.O.toExn(""); + ); makeTest("minX", T.minX(mixed), 1.0); makeTest("maxX", T.maxX(mixed), 14.0); makeTest( @@ -243,9 +242,9 @@ describe("Shape", () => { 0.24775224775224775, |], }, + None ), - ~discrete={xs: [|1., 4., 8.|], ys: [|0.6, 1.0, 0.4|]}, - ~discreteProbabilityMassFraction=0.5, + ~discrete=Distributions.Discrete.make({xs: [|1., 4., 8.|], ys: [|0.6, 1.0, 0.4|]}, None) ), ); makeTest( @@ -266,7 +265,7 @@ describe("Shape", () => { makeTest("integralEndY", T.Integral.sum(~cache=None, mixed), 1.0); makeTest( "scaleBy", - T.scaleBy(~scale=2.0, mixed), + Distributions.Mixed.scaleBy(~scale=2.0, mixed), Distributions.Mixed.make( ~continuous= Distributions.Continuous.make( @@ -279,9 +278,9 @@ describe("Shape", () => { 0.24775224775224775, |], }, + None ), - ~discrete={xs: [|1., 4., 8.|], ys: [|0.6, 1.0, 0.4|]}, - ~discreteProbabilityMassFraction=0.5, + ~discrete=Distributions.Discrete.make({xs: [|1., 4., 8.|], ys: [|0.6, 1.0, 0.4|]}, None), ), ); makeTest( @@ -302,34 +301,31 @@ describe("Shape", () => { 0.6913122927072927, 1.0, |], - }, + }, + None, ), ); }); describe("Distplus", () => { open Distributions.DistPlus; - let discrete: DistTypes.xyShape = { + let discreteShape: DistTypes.xyShape = { xs: [|1., 4., 8.|], ys: [|0.3, 0.5, 0.2|], }; + let discrete = Distributions.Discrete.make(discreteShape, None); let continuous = Distributions.Continuous.make( `Linear, {xs: [|3., 7., 14.|], ys: [|0.058, 0.082, 0.124|]}, + None ) - |> Distributions.Continuous.T.scaleToIntegralSum(~intendedSum=1.0); + |> Distributions.Continuous.T.normalize; //scaleToIntegralSum(~intendedSum=1.0); let mixed = - MixedShapeBuilder.build( + Distributions.Mixed.make( ~continuous, ~discrete, - ~assumptions={ - continuous: ADDS_TO_CORRECT_PROBABILITY, - discrete: ADDS_TO_CORRECT_PROBABILITY, - discreteProbabilityMass: Some(0.5), - }, - ) - |> E.O.toExn(""); + ); let distPlus = Distributions.DistPlus.make( ~shape=Mixed(mixed), @@ -374,6 +370,7 @@ describe("Shape", () => { 1.0, |], }, + None, ), ), ); @@ -385,11 +382,10 @@ describe("Shape", () => { let variance = stdev ** 2.0; let numSamples = 10000; open Distributions.Shape; - let normal: SymbolicDist.dist = `Normal({mean, stdev}); - let normalShape = SymbolicDist.GenericSimple.toShape(normal, numSamples); + let normal: SymbolicTypes.symbolicDist = `Normal({mean, stdev}); + let normalShape = ExpressionTree.toShape(numSamples, `SymbolicDist(normal)); let lognormal = SymbolicDist.Lognormal.fromMeanAndStdev(mean, stdev); - let lognormalShape = - SymbolicDist.GenericSimple.toShape(lognormal, numSamples); + let lognormalShape = ExpressionTree.toShape(numSamples, `SymbolicDist(lognormal)); makeTestCloseEquality( "Mean of a normal", @@ -416,4 +412,4 @@ describe("Shape", () => { ~digits=0, ); }); -}); \ No newline at end of file +}); diff --git a/showcase/Entries.re b/showcase/Entries.re index ae4cef64..f3e96e75 100644 --- a/showcase/Entries.re +++ b/showcase/Entries.re @@ -1 +1 @@ -let entries = EntryTypes.[Continuous.entry]; \ No newline at end of file +let entries = EntryTypes.[Continuous.entry,ExpressionTreeExamples.entry]; \ No newline at end of file diff --git a/showcase/entries/Continuous.re b/showcase/entries/Continuous.re index 237b1081..86823fc0 100644 --- a/showcase/entries/Continuous.re +++ b/showcase/entries/Continuous.re @@ -84,4 +84,4 @@ let distributions = () => ; -let entry = EntryTypes.(entry(~title="Pdf", ~render=distributions)); \ No newline at end of file +let entry = EntryTypes.(entry(~title="Mixed Distributions", ~render=distributions)); \ No newline at end of file diff --git a/showcase/entries/ExpressionTreeExamples.re b/showcase/entries/ExpressionTreeExamples.re new file mode 100644 index 00000000..ef29cdaf --- /dev/null +++ b/showcase/entries/ExpressionTreeExamples.re @@ -0,0 +1,71 @@ +let setup = dist => + RenderTypes.DistPlusRenderer.make(~distPlusIngredients=dist, ()) + |> DistPlusRenderer.run + |> RenderTypes.DistPlusRenderer.Outputs.distplus + |> R.O.fmapOrNull(distPlus => ); + +let simpleExample = (guesstimatorString, ~problem="", ()) => + <> +

{guesstimatorString |> ReasonReact.string}

+

{problem |> (e => "problem: " ++ e) |> ReasonReact.string}

+ {setup( + RenderTypes.DistPlusRenderer.Ingredients.make(~guesstimatorString, ()), + )} + ; + +let distributions = () => +
+
+

+ {"Initial Section" |> ReasonReact.string} +

+ {simpleExample( + "normal(-1, 1) + normal(5, 2)", + ~problem="Tails look too flat", + (), + )} + {simpleExample( + "mm(normal(4,2), normal(10,1))", + ~problem="Tails look too flat", + (), + )} + {simpleExample( + "normal(-1, 1) * normal(5, 2)", + ~problem="This looks really weird", + (), + )} + {simpleExample( + "normal(1,2) * normal(2,2) * normal(3,1)", + ~problem="Seems like important parts are cut off", + (), + )} + {simpleExample( + "mm(uniform(0, 1) , normal(3,2))", + ~problem="Uniform distribution seems to break multimodal", + (), + )} + {simpleExample( + "truncate(mm(1 to 10, 10 to 30), 10, 20)", + ~problem="Truncate seems to have no effect", + (), + )} + {simpleExample( + "normal(5,2)*(10^3)", + ~problem="Multiplied items should be evaluated.", + (), + )} + {simpleExample( + "normal(5,10*3)", + ~problem="At least simple operations in the distributions should be evaluated.", + (), + )} + {simpleExample( + "normal(5,10)^3", + ~problem="Exponentiation not yet supported", + (), + )} +
+
; + +let entry = + EntryTypes.(entry(~title="ExpressionTree", ~render=distributions)); diff --git a/src/components/DistBuilder.re b/src/components/DistBuilder.re index 7f57f9c1..77c64aab 100644 --- a/src/components/DistBuilder.re +++ b/src/components/DistBuilder.re @@ -17,7 +17,7 @@ module FormConfig = [%lenses // sampleCount: string, outputXYPoints: string, - truncateTo: string, + downsampleTo: string, kernelWidth: string, } ]; @@ -25,7 +25,7 @@ module FormConfig = [%lenses type options = { sampleCount: int, outputXYPoints: int, - truncateTo: option(int), + downsampleTo: option(int), kernelWidth: option(float), }; @@ -115,7 +115,7 @@ type inputs = { samplingInputs: RenderTypes.ShapeRenderer.Sampling.inputs, guesstimatorString: string, length: int, - shouldTruncateSampledDistribution: int, + shouldDownsampleSampledDistribution: int, }; module DemoDist = { @@ -141,8 +141,8 @@ module DemoDist = { kernelWidth: options.kernelWidth, }, ~distPlusIngredients, - ~shouldTruncate=options.truncateTo |> E.O.isSome, - ~recommendedLength=options.truncateTo |> E.O.default(10000), + ~shouldDownsample=options.downsampleTo |> E.O.isSome, + ~recommendedLength=options.downsampleTo |> E.O.default(10000), (), ); let response = DistPlusRenderer.run(inputs); @@ -171,7 +171,8 @@ let make = () => { ~schema, ~onSubmit=({state}) => {None}, ~initialState={ - guesstimatorString: "mm(normal(-10, 2), uniform(18, 25), lognormal({mean: 10, stdev: 8}), triangular(31,40,50))", + //guesstimatorString: "mm(normal(-10, 2), uniform(18, 25), lognormal({mean: 10, stdev: 8}), triangular(31,40,50))", + guesstimatorString: "uniform(0, 1) * normal(1, 2) - 1", domainType: "Complete", xPoint: "50.0", xPoint2: "60.0", @@ -180,9 +181,9 @@ let make = () => { unitType: "UnspecifiedDistribution", zero: MomentRe.momentNow(), unit: "days", - sampleCount: "30000", - outputXYPoints: "10000", - truncateTo: "1000", + sampleCount: "3000", + outputXYPoints: "100", + downsampleTo: "100", kernelWidth: "5", }, (), @@ -210,7 +211,7 @@ let make = () => { let sampleCount = reform.state.values.sampleCount |> Js.Float.fromString; let outputXYPoints = reform.state.values.outputXYPoints |> Js.Float.fromString; - let truncateTo = reform.state.values.truncateTo |> Js.Float.fromString; + let downsampleTo = reform.state.values.downsampleTo |> Js.Float.fromString; let kernelWidth = reform.state.values.kernelWidth |> Js.Float.fromString; let domain = @@ -252,20 +253,20 @@ let make = () => { }; let options = - switch (sampleCount, outputXYPoints, truncateTo) { + switch (sampleCount, outputXYPoints, downsampleTo) { | (_, _, _) when !Js.Float.isNaN(sampleCount) && !Js.Float.isNaN(outputXYPoints) - && !Js.Float.isNaN(truncateTo) + && !Js.Float.isNaN(downsampleTo) && sampleCount > 10. && outputXYPoints > 10. => Some({ sampleCount: sampleCount |> int_of_float, outputXYPoints: outputXYPoints |> int_of_float, - truncateTo: - int_of_float(truncateTo) > 0 - ? Some(int_of_float(truncateTo)) : None, + downsampleTo: + int_of_float(downsampleTo) > 0 + ? Some(int_of_float(downsampleTo)) : None, kernelWidth: kernelWidth == 0.0 ? None : Some(kernelWidth), }) | _ => None @@ -287,7 +288,7 @@ let make = () => { reform.state.values.unit, reform.state.values.sampleCount, reform.state.values.outputXYPoints, - reform.state.values.truncateTo, + reform.state.values.downsampleTo, reform.state.values.kernelWidth, reloader |> string_of_int, |], @@ -481,7 +482,7 @@ let make = () => { /> - + @@ -496,4 +497,4 @@ let make = () => {
; -}; \ No newline at end of file +}; diff --git a/src/components/DistBuilder2.re b/src/components/DistBuilder2.re index 9c7ad6bb..b912e223 100644 --- a/src/components/DistBuilder2.re +++ b/src/components/DistBuilder2.re @@ -44,14 +44,14 @@ module DemoDist = { Distributions.DistPlus.make( ~shape= Continuous( - Distributions.Continuous.make(`Linear, {xs, ys}), + Distributions.Continuous.make(`Linear, {xs, ys}, None), ), ~domain=Complete, ~unit=UnspecifiedDistribution, ~guesstimatorString=None, (), ) - |> Distributions.DistPlus.T.scaleToIntegralSum(~intendedSum=1.0); + |> Distributions.DistPlus.T.normalize; ; }; R.ste}> @@ -102,4 +102,4 @@ let make = () => {
; -}; \ No newline at end of file +}; diff --git a/src/components/DistBuilder3.re b/src/components/DistBuilder3.re index 662a3241..c0a5aac3 100644 --- a/src/components/DistBuilder3.re +++ b/src/components/DistBuilder3.re @@ -37,13 +37,13 @@ module DemoDist = { let parsed1 = MathJsParser.fromString(guesstimatorString); let shape = switch (parsed1) { - | Ok(r) => Some(SymbolicDist.toShape(10000, r)) + | Ok(r) => Some(ExpressionTree.toShape(10000, r)) | _ => None }; let str = switch (parsed1) { - | Ok(r) => SymbolicDist.toString(r) + | Ok(r) => ExpressionTree.toString(r) | Error(e) => e }; @@ -58,7 +58,7 @@ module DemoDist = { ~guesstimatorString=None, (), ) - |> Distributions.DistPlus.T.scaleToIntegralSum(~intendedSum=1.0); + |> Distributions.DistPlus.T.normalize; ; }) |> E.O.default(ReasonReact.null); @@ -111,4 +111,4 @@ let make = () => {
; -}; \ No newline at end of file +}; diff --git a/src/components/Drawer.re b/src/components/Drawer.re index fa0babbe..f9ae5ddb 100644 --- a/src/components/Drawer.re +++ b/src/components/Drawer.re @@ -177,6 +177,7 @@ module Convert = { let continuousShape: Types.continuousShape = { xyShape, interpolation: `Linear, + knownIntegralSum: None, }; let integral = XYShape.Analysis.integrateContinuousShape(continuousShape); @@ -188,6 +189,7 @@ module Convert = { ys, }, interpolation: `Linear, + knownIntegralSum: Some(1.0), }; continuousShape; }; @@ -386,8 +388,8 @@ module Draw = { let stdev = 15.0; let numSamples = 3000; - let normal: SymbolicDist.dist = `Normal({mean, stdev}); - let normalShape = SymbolicDist.GenericSimple.toShape(normal, numSamples); + let normal: SymbolicTypes.symbolicDist = `Normal({mean, stdev}); + let normalShape = ExpressionTree.toShape(numSamples, `SymbolicDist(normal)); let xyShape: Types.xyShape = switch (normalShape) { | Mixed(_) => {xs: [||], ys: [||]} @@ -396,9 +398,9 @@ module Draw = { }; /* // To use a lognormal instead: - let lognormal = SymbolicDist.Lognormal.fromMeanAndStdev(mean, stdev); + let lognormal = SymbolicTypes.Lognormal.fromMeanAndStdev(mean, stdev); let lognormalShape = - SymbolicDist.GenericSimple.toShape(lognormal, numSamples); + SymbolicTypes.GenericSimple.toShape(lognormal, numSamples); let lognormalXYShape: Types.xyShape = switch (lognormalShape) { | Mixed(_) => {xs: [||], ys: [||]} @@ -667,9 +669,7 @@ module State = { /* create a cdf from a pdf */ let _pdf = - Distributions.Continuous.T.scaleToIntegralSum( - ~cache=None, - ~intendedSum=1.0, + Distributions.Continuous.T.normalize( pdf, ); @@ -986,4 +986,4 @@ let make = () => { ; -}; \ No newline at end of file +}; diff --git a/src/components/charts/DistPlusPlot.re b/src/components/charts/DistPlusPlot.re index a6b35f22..93feb7d2 100644 --- a/src/components/charts/DistPlusPlot.re +++ b/src/components/charts/DistPlusPlot.re @@ -95,7 +95,7 @@ let table = (distPlus, x) => { {distPlus - |> Distributions.DistPlus.T.toScaledContinuous + |> Distributions.DistPlus.T.normalizedToContinuous |> E.O.fmap( Distributions.Continuous.T.Integral.sum(~cache=None), ) @@ -113,7 +113,7 @@ let table = (distPlus, x) => { {distPlus - |> Distributions.DistPlus.T.toScaledDiscrete + |> Distributions.DistPlus.T.normalizedToDiscrete |> E.O.fmap(Distributions.Discrete.T.Integral.sum(~cache=None)) |> E.O.fmap(E.Float.with2DigitsPrecision) |> E.O.default("") @@ -211,15 +211,13 @@ let percentiles = distPlus => { ; }; -let adjustBoth = discreteProbabilityMass => { - let yMaxDiscreteDomainFactor = discreteProbabilityMass; - let yMaxContinuousDomainFactor = 1.0 -. discreteProbabilityMass; - let yMax = - yMaxDiscreteDomainFactor > yMaxContinuousDomainFactor - ? yMaxDiscreteDomainFactor : yMaxContinuousDomainFactor; +let adjustBoth = discreteProbabilityMassFraction => { + let yMaxDiscreteDomainFactor = discreteProbabilityMassFraction; + let yMaxContinuousDomainFactor = 1.0 -. discreteProbabilityMassFraction; + let yMax = (yMaxDiscreteDomainFactor > 0.5 ? yMaxDiscreteDomainFactor : yMaxContinuousDomainFactor); ( - 1.0 /. (yMaxDiscreteDomainFactor /. yMax), - 1.0 /. (yMaxContinuousDomainFactor /. yMax), + yMax /. yMaxDiscreteDomainFactor, + yMax /. yMaxContinuousDomainFactor, ); }; @@ -227,10 +225,10 @@ module DistPlusChart = { [@react.component] let make = (~distPlus: DistTypes.distPlus, ~config: chartConfig, ~onHover) => { open Distributions.DistPlus; - let discrete = distPlus |> T.toScaledDiscrete; + let discrete = distPlus |> T.normalizedToDiscrete |> E.O.fmap(Distributions.Discrete.getShape); let continuous = distPlus - |> T.toScaledContinuous + |> T.normalizedToContinuous |> E.O.fmap(Distributions.Continuous.getShape); let range = T.xTotalRange(distPlus); @@ -254,10 +252,10 @@ module DistPlusChart = { }; let timeScale = distPlus.unit |> DistTypes.DistributionUnit.toJson; - let toDiscreteProbabilityMass = - distPlus |> Distributions.DistPlus.T.toDiscreteProbabilityMass; + let discreteProbabilityMassFraction = + distPlus |> Distributions.DistPlus.T.toDiscreteProbabilityMassFraction; let (yMaxDiscreteDomainFactor, yMaxContinuousDomainFactor) = - adjustBoth(toDiscreteProbabilityMass); + adjustBoth(discreteProbabilityMassFraction); {
{state.distributions |> E.L.fmapi((index, config) => -
+
{setX(_ => r)}} />
@@ -406,4 +404,4 @@ let make = (~distPlus: DistTypes.distPlus) => { {state.showStats ? table(distPlus, x) : ReasonReact.null} {state.showPercentiles ? percentiles(distPlus) : ReasonReact.null}
; -}; \ No newline at end of file +}; diff --git a/src/components/charts/DistributionPlot/distPlotD3.js b/src/components/charts/DistributionPlot/distPlotD3.js index 3bb13fb2..03cc671c 100644 --- a/src/components/charts/DistributionPlot/distPlotD3.js +++ b/src/components/charts/DistributionPlot/distPlotD3.js @@ -427,7 +427,7 @@ export class DistPlotD3 { addLollipopsChart(common) { const data = this.getDataPoints('discrete'); - const yMin = d3.min(this.attrs.data.discrete.ys); + const yMin = 0.; //d3.min(this.attrs.data.discrete.ys); const yMax = d3.max(this.attrs.data.discrete.ys); // X axis. diff --git a/src/distPlus/distribution/AlgebraicShapeCombination.re b/src/distPlus/distribution/AlgebraicShapeCombination.re new file mode 100644 index 00000000..5fadda1c --- /dev/null +++ b/src/distPlus/distribution/AlgebraicShapeCombination.re @@ -0,0 +1,210 @@ +type pointMassesWithMoments = { + n: int, + masses: array(float), + means: array(float), + variances: array(float), +}; + +/* This function takes a continuous distribution and efficiently approximates it as + point masses that have variances associated with them. + We estimate the means and variances from overlapping triangular distributions which we imagine are making up the + XYShape. + We can then use the algebra of random variables to "convolve" the point masses and their variances, + and finally reconstruct a new distribution from them, e.g. using a Fast Gauss Transform or Raykar et al. (2007). */ +let toDiscretePointMassesFromTriangulars = + (~inverse=false, s: XYShape.T.t): pointMassesWithMoments => { + // TODO: what if there is only one point in the distribution? + let n = s |> XYShape.T.length; + // first, double up the leftmost and rightmost points: + let {xs, ys}: XYShape.T.t = s; + let _ = Js.Array.unshift(xs[0], xs); + let _ = Js.Array.unshift(ys[0], ys); + let _ = Js.Array.push(xs[n - 1], xs); + let _ = Js.Array.push(ys[n - 1], ys); + let n = E.A.length(xs); + // squares and neighbourly products of the xs + let xsSq: array(float) = Belt.Array.makeUninitializedUnsafe(n); + let xsProdN1: array(float) = Belt.Array.makeUninitializedUnsafe(n - 1); + let xsProdN2: array(float) = Belt.Array.makeUninitializedUnsafe(n - 2); + for (i in 0 to n - 1) { + let _ = Belt.Array.set(xsSq, i, xs[i] *. xs[i]); + (); + }; + for (i in 0 to n - 2) { + let _ = Belt.Array.set(xsProdN1, i, xs[i] *. xs[i + 1]); + (); + }; + for (i in 0 to n - 3) { + let _ = Belt.Array.set(xsProdN2, i, xs[i] *. xs[i + 2]); + (); + }; + // means and variances + let masses: array(float) = Belt.Array.makeUninitializedUnsafe(n - 2); // doesn't include the fake first and last points + let means: array(float) = Belt.Array.makeUninitializedUnsafe(n - 2); + let variances: array(float) = Belt.Array.makeUninitializedUnsafe(n - 2); + + if (inverse) { + for (i in 1 to n - 2) { + let _ = + Belt.Array.set( + masses, + i - 1, + (xs[i + 1] -. xs[i - 1]) *. ys[i] /. 2., + ); + + // this only works when the whole triange is either on the left or on the right of zero + let a = xs[i - 1]; + let c = xs[i]; + let b = xs[i + 1]; + + // These are the moments of the reciprocal of a triangular distribution, as symbolically integrated by Mathematica. + // They're probably pretty close to invMean ~ 1/mean = 3/(a+b+c) and invVar. But I haven't worked out + // the worst case error, so for now let's use these monster equations + let inverseMean = + 2. + *. (a *. log(a /. c) /. (a -. c) +. b *. log(c /. b) /. (b -. c)) + /. (a -. b); + let inverseVar = + 2. + *. (log(c /. a) /. (a -. c) +. b *. log(b /. c) /. (b -. c)) + /. (a -. b) + -. inverseMean + ** 2.; + + let _ = Belt.Array.set(means, i - 1, inverseMean); + + let _ = Belt.Array.set(variances, i - 1, inverseVar); + (); + }; + + {n: n - 2, masses, means, variances}; + } else { + for (i in 1 to n - 2) { + + // area of triangle = width * height / 2 + let _ = + Belt.Array.set( + masses, + i - 1, + (xs[i + 1] -. xs[i - 1]) *. ys[i] /. 2., + ); + + // means of triangle = (a + b + c) / 3 + let _ = + Belt.Array.set(means, i - 1, (xs[i - 1] +. xs[i] +. xs[i + 1]) /. 3.); + + // variance of triangle = (a^2 + b^2 + c^2 - ab - ac - bc) / 18 + let _ = + Belt.Array.set( + variances, + i - 1, + ( + xsSq[i - 1] + +. xsSq[i] + +. xsSq[i + 1] + -. xsProdN1[i - 1] + -. xsProdN1[i] + -. xsProdN2[i - 1] + ) + /. 18., + ); + (); + }; + {n: n - 2, masses, means, variances}; + }; +}; + +let combineShapesContinuousContinuous = + (op: ExpressionTypes.algebraicOperation, s1: DistTypes.xyShape, s2: DistTypes.xyShape) + : DistTypes.xyShape => { + let t1n = s1 |> XYShape.T.length; + let t2n = s2 |> XYShape.T.length; + + // if we add the two distributions, we should probably use normal filters. + // if we multiply the two distributions, we should probably use lognormal filters. + let t1m = toDiscretePointMassesFromTriangulars(s1); + let t2m = switch (op) { + | `Divide => toDiscretePointMassesFromTriangulars(~inverse=true, s2) + | _ => toDiscretePointMassesFromTriangulars(~inverse=false, s2) + }; + + let combineMeansFn = + switch (op) { + | `Add => ((m1, m2) => m1 +. m2) + | `Subtract => ((m1, m2) => m1 -. m2) + | `Multiply => ((m1, m2) => m1 *. m2) + | `Divide => ((m1, mInv2) => m1 *. mInv2) + }; // note: here, mInv2 = mean(1 / t2) ~= 1 / mean(t2) + + // converts the variances and means of the two inputs into the variance of the output + let combineVariancesFn = + switch (op) { + | `Add => ((v1, v2, m1, m2) => v1 +. v2) + | `Subtract => ((v1, v2, m1, m2) => v1 +. v2) + | `Multiply => ( + (v1, v2, m1, m2) => v1 *. v2 +. v1 *. m2 ** 2. +. v2 *. m1 ** 2. + ) + | `Divide => ( + (v1, vInv2, m1, mInv2) => + v1 *. vInv2 +. v1 *. mInv2 ** 2. +. vInv2 *. m1 ** 2. + ) + }; + + // TODO: If operating on two positive-domain distributions, we should take that into account + let outputMinX: ref(float) = ref(infinity); + let outputMaxX: ref(float) = ref(neg_infinity); + let masses: array(float) = + Belt.Array.makeUninitializedUnsafe(t1m.n * t2m.n); + let means: array(float) = + Belt.Array.makeUninitializedUnsafe(t1m.n * t2m.n); + let variances: array(float) = + Belt.Array.makeUninitializedUnsafe(t1m.n * t2m.n); + // then convolve the two sets of pointMassesWithMoments + for (i in 0 to t1m.n - 1) { + for (j in 0 to t2m.n - 1) { + let k = i * t2m.n + j; + let _ = Belt.Array.set(masses, k, t1m.masses[i] *. t2m.masses[j]); + + let mean = combineMeansFn(t1m.means[i], t2m.means[j]); + let variance = + combineVariancesFn( + t1m.variances[i], + t2m.variances[j], + t1m.means[i], + t2m.means[j], + ); + let _ = Belt.Array.set(means, k, mean); + let _ = Belt.Array.set(variances, k, variance); + // update bounds + let minX = mean -. variance *. 1.644854; + let maxX = mean +. variance *. 1.644854; + if (minX < outputMinX^) { + outputMinX := minX; + }; + if (maxX > outputMaxX^) { + outputMaxX := maxX; + }; + }; + }; + + // we now want to create a set of target points. For now, let's just evenly distribute 200 points between + // between the outputMinX and outputMaxX + let nOut = 300; + let outputXs: array(float) = E.A.Floats.range(outputMinX^, outputMaxX^, nOut); + let outputYs: array(float) = Belt.Array.make(nOut, 0.0); + // now, for each of the outputYs, accumulate from a Gaussian kernel over each input point. + for (j in 0 to E.A.length(masses) - 1) { + let _ = if (variances[j] > 0.) { + for (i in 0 to E.A.length(outputXs) - 1) { + let dx = outputXs[i] -. means[j]; + let contribution = masses[j] *. exp(-. (dx ** 2.) /. (2. *. variances[j])); + let _ = Belt.Array.set(outputYs, i, outputYs[i] +. contribution); + (); + }; + (); + }; + (); + }; + + {xs: outputXs, ys: outputYs}; +}; diff --git a/src/distPlus/distribution/DistTypes.re b/src/distPlus/distribution/DistTypes.re index 7a598c01..6c590733 100644 --- a/src/distPlus/distribution/DistTypes.re +++ b/src/distPlus/distribution/DistTypes.re @@ -17,14 +17,18 @@ type xyShape = { type continuousShape = { xyShape, interpolation: [ | `Stepwise | `Linear], + knownIntegralSum: option(float), }; -type discreteShape = xyShape; +type discreteShape = { + xyShape, + knownIntegralSum: option(float), +}; type mixedShape = { continuous: continuousShape, discrete: discreteShape, - discreteProbabilityMassFraction: float, +// discreteProbabilityMassFraction: float, }; type shapeMonad('a, 'b, 'c) = @@ -153,4 +157,4 @@ module MixedPoint = { }; let add = combine2((a, b) => a +. b); -}; \ No newline at end of file +}; diff --git a/src/distPlus/distribution/Distributions.re b/src/distPlus/distribution/Distributions.re index 2472f2ab..c3fdf6f5 100644 --- a/src/distPlus/distribution/Distributions.re +++ b/src/distPlus/distribution/Distributions.re @@ -3,15 +3,18 @@ module type dist = { type integral; let minX: t => float; let maxX: t => float; - let mapY: (float => float, t) => t; + let mapY: + (~knownIntegralSumFn: float => option(float)=?, float => float, t) => t; let xToY: (float, t) => DistTypes.mixedPoint; let toShape: t => DistTypes.shape; let toContinuous: t => option(DistTypes.continuousShape); let toDiscrete: t => option(DistTypes.discreteShape); - let toScaledContinuous: t => option(DistTypes.continuousShape); - let toScaledDiscrete: t => option(DistTypes.discreteShape); - let toDiscreteProbabilityMass: t => float; - let truncate: (~cache: option(integral)=?, int, t) => t; + let normalize: t => t; + let normalizedToContinuous: t => option(DistTypes.continuousShape); + let normalizedToDiscrete: t => option(DistTypes.discreteShape); + let toDiscreteProbabilityMassFraction: t => float; + let downsample: (~cache: option(integral)=?, int, t) => t; + let truncate: (option(float), option(float), t) => t; let integral: (~cache: option(integral), t) => integral; let integralEndY: (~cache: option(integral), t) => float; @@ -31,19 +34,18 @@ module Dist = (T: dist) => { let xTotalRange = (t: t) => maxX(t) -. minX(t); let mapY = T.mapY; let xToY = T.xToY; - let truncate = T.truncate; + let downsample = T.downsample; let toShape = T.toShape; - let toDiscreteProbabilityMass = T.toDiscreteProbabilityMass; + let toDiscreteProbabilityMassFraction = T.toDiscreteProbabilityMassFraction; let toContinuous = T.toContinuous; let toDiscrete = T.toDiscrete; - let toScaledContinuous = T.toScaledContinuous; - let toScaledDiscrete = T.toScaledDiscrete; + let normalize = T.normalize; + let truncate = T.truncate; + let normalizedToContinuous = T.normalizedToContinuous; + let normalizedToDiscrete = T.normalizedToDiscrete; let mean = T.mean; let variance = T.variance; - // TODO: Move this to each class, have use integral to produce integral in DistPlus class. - let scaleBy = (~scale=1.0, t: t) => t |> mapY((r: float) => r *. scale); - module Integral = { type t = T.integral; let get = T.integral; @@ -51,12 +53,20 @@ module Dist = (T: dist) => { let yToX = T.integralYtoX; let sum = T.integralEndY; }; +}; - // This is suboptimal because it could get the cache but doesn't here. - let scaleToIntegralSum = - (~cache: option(integral)=None, ~intendedSum=1.0, t: t) => { - let scale = intendedSum /. Integral.sum(~cache, t); - scaleBy(~scale, t); +module Common = { + let combineIntegralSums = + ( + combineFn: (float, float) => option(float), + t1KnownIntegralSum: option(float), + t2KnownIntegralSum: option(float), + ) => { + switch (t1KnownIntegralSum, t2KnownIntegralSum) { + | (None, _) + | (_, None) => None + | (Some(s1), Some(s2)) => combineFn(s1, s2) + }; }; }; @@ -64,24 +74,95 @@ module Continuous = { type t = DistTypes.continuousShape; let getShape = (t: t) => t.xyShape; let interpolation = (t: t) => t.interpolation; - let make = (interpolation, xyShape): t => {xyShape, interpolation}; - let shapeMap = (fn, {xyShape, interpolation}: t): t => { + let make = (interpolation, xyShape, knownIntegralSum): t => { + xyShape, + interpolation, + knownIntegralSum, + }; + let shapeMap = (fn, {xyShape, interpolation, knownIntegralSum}: t): t => { xyShape: fn(xyShape), interpolation, + knownIntegralSum, }; let lastY = (t: t) => t |> getShape |> XYShape.T.lastY; let oShapeMap = - (fn, {xyShape, interpolation}: t): option(DistTypes.continuousShape) => - fn(xyShape) |> E.O.fmap(make(interpolation)); + (fn, {xyShape, interpolation, knownIntegralSum}: t) + : option(DistTypes.continuousShape) => + fn(xyShape) |> E.O.fmap(make(interpolation, _, knownIntegralSum)); + + let empty: DistTypes.continuousShape = { + xyShape: XYShape.T.empty, + interpolation: `Linear, + knownIntegralSum: Some(0.0), + }; + let combinePointwise = + ( + ~knownIntegralSumsFn, + fn, + t1: DistTypes.continuousShape, + t2: DistTypes.continuousShape, + ) + : DistTypes.continuousShape => { + // If we're adding the distributions, and we know the total of each, then we + // can just sum them up. Otherwise, all bets are off. + let combinedIntegralSum = + Common.combineIntegralSums( + knownIntegralSumsFn, + t1.knownIntegralSum, + t2.knownIntegralSum, + ); + + make( + `Linear, + XYShape.PointwiseCombination.combine( + ~xsSelection=ALL_XS, + ~xToYSelection=XYShape.XtoY.linear, + ~fn, + t1.xyShape, + t2.xyShape, + ), + combinedIntegralSum, + ); + }; let toLinear = (t: t): option(t) => { switch (t) { - | {interpolation: `Stepwise, xyShape} => - xyShape |> XYShape.Range.stepsToContinuous |> E.O.fmap(make(`Linear)) - | {interpolation: `Linear, _} => Some(t) + | {interpolation: `Stepwise, xyShape, knownIntegralSum} => + xyShape + |> XYShape.Range.stepsToContinuous + |> E.O.fmap(make(`Linear, _, knownIntegralSum)) + | {interpolation: `Linear} => Some(t) }; }; let shapeFn = (fn, t: t) => t |> getShape |> fn; + let updateKnownIntegralSum = (knownIntegralSum, t: t): t => { + ...t, + knownIntegralSum, + }; + + let reduce = + ( + ~knownIntegralSumsFn: (float, float) => option(float)=(_, _) => None, + fn, + continuousShapes, + ) => + continuousShapes + |> E.A.fold_left(combinePointwise(~knownIntegralSumsFn, fn), empty); + + let mapY = (~knownIntegralSumFn=_ => None, fn, t: t) => { + let u = E.O.bind(_, knownIntegralSumFn); + let yMapFn = shapeMap(XYShape.T.mapY(fn)); + + t |> yMapFn |> updateKnownIntegralSum(u(t.knownIntegralSum)); + }; + + let scaleBy = (~scale=1.0, t: t): t => { + t + |> mapY((r: float) => r *. scale) + |> updateKnownIntegralSum( + E.O.bind(t.knownIntegralSum, v => Some(scale *. v)), + ); + }; module T = Dist({ @@ -89,8 +170,8 @@ module Continuous = { type integral = DistTypes.continuousShape; let minX = shapeFn(XYShape.T.minX); let maxX = shapeFn(XYShape.T.maxX); - let toDiscreteProbabilityMass = _ => 0.0; - let mapY = fn => shapeMap(XYShape.T.mapY(fn)); + let mapY = mapY; + let toDiscreteProbabilityMassFraction = _ => 0.0; let toShape = (t: t): DistTypes.shape => Continuous(t); let xToY = (f, {interpolation, xyShape}: t) => { ( @@ -105,25 +186,53 @@ module Continuous = { |> DistTypes.MixedPoint.makeContinuous; }; - // let combineWithFn = (t1: t, t2: t, fn: (float, float) => float) => { - // switch(t1, t2){ - // | ({interpolation: `Stepwise}, {interpolation: `Stepwise}) => 3.0 - // | ({interpolation: `Linear}, {interpolation: `Linear}) => 3.0 - // } - // }; + let truncate = + (leftCutoff: option(float), rightCutoff: option(float), t: t) => { + let truncatedZippedPairs = + t + |> getShape + |> XYShape.T.zip + |> XYShape.Zipped.filterByX(x => + x >= E.O.default(neg_infinity, leftCutoff) + || x <= E.O.default(infinity, rightCutoff) + ); + + let eps = (t |> getShape |> XYShape.T.xTotalRange) *. 0.0001; + + let leftNewPoint = + leftCutoff |> E.O.dimap(lc => [|(lc -. eps, 0.)|], _ => [||]); + let rightNewPoint = + rightCutoff |> E.O.dimap(rc => [|(rc +. eps, 0.)|], _ => [||]); + + let truncatedZippedPairsWithNewPoints = + E.A.concatMany([| + leftNewPoint, + truncatedZippedPairs, + rightNewPoint, + |]); + let truncatedShape = + XYShape.T.fromZippedArray(truncatedZippedPairsWithNewPoints); + + make(`Linear, truncatedShape, None); + }; // TODO: This should work with stepwise plots. let integral = (~cache, t) => - switch (cache) { - | Some(cache) => cache - | None => - t - |> getShape - |> XYShape.Range.integrateWithTriangles - |> E.O.toExt("This should not have happened") - |> make(`Linear) + if (t |> getShape |> XYShape.T.length > 0) { + switch (cache) { + | Some(cache) => cache + | None => + t + |> getShape + |> XYShape.Range.integrateWithTriangles + |> E.O.toExt("This should not have happened") + |> make(`Linear, _, None) + }; + } else { + make(`Linear, {xs: [|neg_infinity|], ys: [|0.0|]}, None); }; - let truncate = (~cache=None, length, t) => + + let downsample = (~cache=None, length, t): t => t |> shapeMap( XYShape.XsConversion.proportionByProbabilityMass( @@ -131,20 +240,29 @@ module Continuous = { integral(~cache, t).xyShape, ), ); - let integralEndY = (~cache, t) => t |> integral(~cache) |> lastY; - let integralXtoY = (~cache, f, t) => + let integralEndY = (~cache, t: t) => + t.knownIntegralSum |> E.O.default(t |> integral(~cache) |> lastY); + let integralXtoY = (~cache, f, t: t) => t |> integral(~cache) |> shapeFn(XYShape.XtoY.linear(f)); - let integralYtoX = (~cache, f, t) => + let integralYtoX = (~cache, f, t: t) => t |> integral(~cache) |> shapeFn(XYShape.YtoX.linear(f)); let toContinuous = t => Some(t); let toDiscrete = _ => None; - let toScaledContinuous = t => Some(t); - let toScaledDiscrete = _ => None; + + let normalize = (t: t): t => { + t + |> scaleBy(~scale=1. /. integralEndY(~cache=None, t)) + |> updateKnownIntegralSum(Some(1.0)); + }; + + let normalizedToContinuous = t => Some(t); // TODO: this should be normalized + let normalizedToDiscrete = _ => None; let mean = (t: t) => { let indefiniteIntegralStepwise = (p, h1) => h1 *. p ** 2.0 /. 2.0; let indefiniteIntegralLinear = (p, a, b) => a *. p ** 2.0 /. 2.0 +. b *. p ** 3.0 /. 3.0; + XYShape.Analysis.integrateContinuousShape( ~indefiniteIntegralStepwise, ~indefiniteIntegralLinear, @@ -158,64 +276,277 @@ module Continuous = { XYShape.Analysis.getMeanOfSquaresContinuousShape, ); }); + + /* This simply creates multiple copies of the continuous distribution, scaled and shifted according to + each discrete data point, and then adds them all together. */ + let combineAlgebraicallyWithDiscrete = + ( + ~downsample=false, + op: ExpressionTypes.algebraicOperation, + t1: t, + t2: DistTypes.discreteShape, + ) => { + let t1s = t1 |> getShape; + let t2s = t2.xyShape; // would like to use Discrete.getShape here, but current file structure doesn't allow for that + let t1n = t1s |> XYShape.T.length; + let t2n = t2s |> XYShape.T.length; + + let fn = Operation.Algebraic.toFn(op); + + let outXYShapes: array(array((float, float))) = + Belt.Array.makeUninitializedUnsafe(t2n); + + for (j in 0 to t2n - 1) { + // for each one of the discrete points + // create a new distribution, as long as the original continuous one + + let dxyShape: array((float, float)) = + Belt.Array.makeUninitializedUnsafe(t1n); + for (i in 0 to t1n - 1) { + let _ = + Belt.Array.set( + dxyShape, + i, + (fn(t1s.xs[i], t2s.xs[j]), t1s.ys[i] *. t2s.ys[j]), + ); + (); + }; + + let _ = Belt.Array.set(outXYShapes, j, dxyShape); + (); + }; + + let combinedIntegralSum = + Common.combineIntegralSums( + (a, b) => Some(a *. b), + t1.knownIntegralSum, + t2.knownIntegralSum, + ); + + outXYShapes + |> E.A.fmap(s => { + let xyShape = XYShape.T.fromZippedArray(s); + make(`Linear, xyShape, None); + }) + |> reduce((+.)) + |> updateKnownIntegralSum(combinedIntegralSum); + }; + + let combineAlgebraically = + (~downsample=false, op: ExpressionTypes.algebraicOperation, t1: t, t2: t) => { + let s1 = t1 |> getShape; + let s2 = t2 |> getShape; + let t1n = s1 |> XYShape.T.length; + let t2n = s2 |> XYShape.T.length; + if (t1n == 0 || t2n == 0) { + empty; + } else { + let combinedShape = + AlgebraicShapeCombination.combineShapesContinuousContinuous(op, s1, s2); + let combinedIntegralSum = + Common.combineIntegralSums( + (a, b) => Some(a *. b), + t1.knownIntegralSum, + t2.knownIntegralSum, + ); + // return a new Continuous distribution + make(`Linear, combinedShape, combinedIntegralSum); + }; + }; }; module Discrete = { - let sortedByY = (t: DistTypes.discreteShape) => - t |> XYShape.T.zip |> XYShape.Zipped.sortByY; - let sortedByX = (t: DistTypes.discreteShape) => - t |> XYShape.T.zip |> XYShape.Zipped.sortByX; - let empty = XYShape.T.empty; - let combine = - (fn, t1: DistTypes.discreteShape, t2: DistTypes.discreteShape) + type t = DistTypes.discreteShape; + + let make = (xyShape, knownIntegralSum): t => {xyShape, knownIntegralSum}; + let shapeMap = (fn, {xyShape, knownIntegralSum}: t): t => { + xyShape: fn(xyShape), + knownIntegralSum, + }; + let getShape = (t: t) => t.xyShape; + let oShapeMap = (fn, {xyShape, knownIntegralSum}: t): option(t) => + fn(xyShape) |> E.O.fmap(make(_, knownIntegralSum)); + + let empty: t = {xyShape: XYShape.T.empty, knownIntegralSum: Some(0.0)}; + let shapeFn = (fn, t: t) => t |> getShape |> fn; + + let lastY = (t: t) => t |> getShape |> XYShape.T.lastY; + + let combinePointwise = + ( + ~knownIntegralSumsFn, + fn, + t1: DistTypes.discreteShape, + t2: DistTypes.discreteShape, + ) : DistTypes.discreteShape => { - XYShape.Combine.combine( - ~xsSelection=ALL_XS, - ~xToYSelection=XYShape.XtoY.stepwiseIfAtX, - ~fn, - t1, - t2, + let combinedIntegralSum = + Common.combineIntegralSums( + knownIntegralSumsFn, + t1.knownIntegralSum, + t2.knownIntegralSum, + ); + + make( + XYShape.PointwiseCombination.combine( + ~xsSelection=ALL_XS, + ~xToYSelection=XYShape.XtoY.stepwiseIfAtX, + ~fn=(a, b) => fn(E.O.default(0.0, a), E.O.default(0.0, b)), // stepwiseIfAtX returns option(float), so this fn needs to handle None + t1.xyShape, + t2.xyShape, + ), + combinedIntegralSum, ); }; - let _default0 = (fn, a, b) => - fn(E.O.default(0.0, a), E.O.default(0.0, b)); - let reduce = (fn, items) => - items |> E.A.fold_left(combine(_default0(fn)), empty); + + let reduce = + (~knownIntegralSumsFn=(_, _) => None, fn, discreteShapes) + : DistTypes.discreteShape => + discreteShapes + |> E.A.fold_left(combinePointwise(~knownIntegralSumsFn, fn), empty); + + let updateKnownIntegralSum = (knownIntegralSum, t: t): t => { + ...t, + knownIntegralSum, + }; + + /* This multiples all of the data points together and creates a new discrete distribution from the results. + Data points at the same xs get added together. It may be a good idea to downsample t1 and t2 before and/or the result after. */ + let combineAlgebraically = + (op: ExpressionTypes.algebraicOperation, t1: t, t2: t) => { + let t1s = t1 |> getShape; + let t2s = t2 |> getShape; + let t1n = t1s |> XYShape.T.length; + let t2n = t2s |> XYShape.T.length; + + let combinedIntegralSum = + Common.combineIntegralSums( + (s1, s2) => Some(s1 *. s2), + t1.knownIntegralSum, + t2.knownIntegralSum, + ); + + let fn = Operation.Algebraic.toFn(op); + let xToYMap = E.FloatFloatMap.empty(); + + for (i in 0 to t1n - 1) { + for (j in 0 to t2n - 1) { + let x = fn(t1s.xs[i], t2s.xs[j]); + let cv = xToYMap |> E.FloatFloatMap.get(x) |> E.O.default(0.); + let my = t1s.ys[i] *. t2s.ys[j]; + let _ = Belt.MutableMap.set(xToYMap, x, cv +. my); + (); + }; + }; + + let rxys = xToYMap |> E.FloatFloatMap.toArray |> XYShape.Zipped.sortByX; + + let combinedShape = XYShape.T.fromZippedArray(rxys); + + make(combinedShape, combinedIntegralSum); + }; + + let mapY = (~knownIntegralSumFn=previousKnownIntegralSum => None, fn, t: t) => { + let u = E.O.bind(_, knownIntegralSumFn); + let yMapFn = shapeMap(XYShape.T.mapY(fn)); + + t |> yMapFn |> updateKnownIntegralSum(u(t.knownIntegralSum)); + }; + + let scaleBy = (~scale=1.0, t: t): t => { + t + |> mapY((r: float) => r *. scale) + |> updateKnownIntegralSum( + E.O.bind(t.knownIntegralSum, v => Some(scale *. v)), + ); + }; module T = Dist({ type t = DistTypes.discreteShape; type integral = DistTypes.continuousShape; let integral = (~cache, t) => - switch (cache) { - | Some(c) => c - | None => Continuous.make(`Stepwise, XYShape.T.accumulateYs((+.), t)) + if (t |> getShape |> XYShape.T.length > 0) { + switch (cache) { + | Some(c) => c + | None => + Continuous.make( + `Stepwise, + XYShape.T.accumulateYs((+.), getShape(t)), + None, + ) + }; + } else { + Continuous.make( + `Stepwise, + {xs: [|neg_infinity|], ys: [|0.0|]}, + None, + ); }; - let integralEndY = (~cache, t) => - t |> integral(~cache) |> Continuous.lastY; - let minX = XYShape.T.minX; - let maxX = XYShape.T.maxX; - let toDiscreteProbabilityMass = _ => 1.0; - let mapY = XYShape.T.mapY; + + let integralEndY = (~cache, t: t) => + t.knownIntegralSum + |> E.O.default(t |> integral(~cache) |> Continuous.lastY); + let minX = shapeFn(XYShape.T.minX); + let maxX = shapeFn(XYShape.T.maxX); + let toDiscreteProbabilityMassFraction = _ => 1.0; + let mapY = mapY; let toShape = (t: t): DistTypes.shape => Discrete(t); let toContinuous = _ => None; let toDiscrete = t => Some(t); - let toScaledContinuous = _ => None; - let toScaledDiscrete = t => Some(t); - let truncate = (~cache=None, i, t: t): DistTypes.discreteShape => - t - |> XYShape.T.zip - |> XYShape.Zipped.sortByY - |> Belt.Array.reverse - |> Belt.Array.slice(_, ~offset=0, ~len=i) - |> XYShape.Zipped.sortByX - |> XYShape.T.fromZippedArray; - let xToY = (f, t) => { - XYShape.XtoY.stepwiseIfAtX(f, t) + let normalize = (t: t): t => { + t + |> scaleBy(~scale=1. /. integralEndY(~cache=None, t)) + |> updateKnownIntegralSum(Some(1.0)); + }; + + let normalizedToContinuous = _ => None; + let normalizedToDiscrete = t => Some(t); // TODO: this should be normalized! + + let downsample = (~cache=None, i, t: t): t => { + // It's not clear how to downsample a set of discrete points in a meaningful way. + // The best we can do is to clip off the smallest values. + let currentLength = t |> getShape |> XYShape.T.length; + + if (i < currentLength && i >= 1 && currentLength > 1) { + let clippedShape = + t + |> getShape + |> XYShape.T.zip + |> XYShape.Zipped.sortByY + |> Belt.Array.reverse + |> Belt.Array.slice(_, ~offset=0, ~len=i) + |> XYShape.Zipped.sortByX + |> XYShape.T.fromZippedArray; + + make(clippedShape, None); // if someone needs the sum, they'll have to recompute it + } else { + t; + }; + }; + + let truncate = + (leftCutoff: option(float), rightCutoff: option(float), t: t): t => { + let truncatedShape = + t + |> getShape + |> XYShape.T.zip + |> XYShape.Zipped.filterByX(x => + x >= E.O.default(neg_infinity, leftCutoff) + || x <= E.O.default(infinity, rightCutoff) + ) + |> XYShape.T.fromZippedArray; + + make(truncatedShape, None); + }; + + let xToY = (f, t) => + t + |> getShape + |> XYShape.XtoY.stepwiseIfAtX(f) |> E.O.default(0.0) |> DistTypes.MixedPoint.makeDiscrete; - }; let integralXtoY = (~cache, f, t) => t @@ -229,49 +560,54 @@ module Discrete = { |> Continuous.getShape |> XYShape.YtoX.linear(f); - let mean = (t: t): float => - E.A.reducei(t.xs, 0.0, (acc, x, i) => acc +. x *. t.ys[i]); + let mean = (t: t): float => { + let s = getShape(t); + E.A.reducei(s.xs, 0.0, (acc, x, i) => acc +. x *. s.ys[i]); + }; let variance = (t: t): float => { let getMeanOfSquares = t => - mean(XYShape.Analysis.squareXYShape(t)); + t |> shapeMap(XYShape.Analysis.squareXYShape) |> mean; XYShape.Analysis.getVarianceDangerously(t, mean, getMeanOfSquares); }; }); }; -// TODO: I think this shouldn't assume continuous/discrete are normalized to 1.0, and thus should not need the discreteProbabilityMassFraction being separate. module Mixed = { type t = DistTypes.mixedShape; - let make = - (~continuous, ~discrete, ~discreteProbabilityMassFraction) - : DistTypes.mixedShape => { - continuous, - discrete, - discreteProbabilityMassFraction, + let make = (~continuous, ~discrete): t => {continuous, discrete}; + + let totalLength = (t: t): int => { + let continuousLength = + t.continuous |> Continuous.getShape |> XYShape.T.length; + let discreteLength = t.discrete |> Discrete.getShape |> XYShape.T.length; + + continuousLength + discreteLength; }; - // todo: Put into scaling module - let scaleDiscreteFn = - ({discreteProbabilityMassFraction}: DistTypes.mixedShape, f) => - f *. discreteProbabilityMassFraction; + let scaleBy = (~scale=1.0, {discrete, continuous}: t): t => { + let scaledDiscrete = Discrete.scaleBy(~scale, discrete); + let scaledContinuous = Continuous.scaleBy(~scale, continuous); + make(~discrete=scaledDiscrete, ~continuous=scaledContinuous); + }; - //TODO: Warning: This currently computes the integral, which is expensive. - let scaleContinuousFn = - ({discreteProbabilityMassFraction}: DistTypes.mixedShape, f) => - f *. (1.0 -. discreteProbabilityMassFraction); + let toContinuous = ({continuous}: t) => Some(continuous); + let toDiscrete = ({discrete}: t) => Some(discrete); - //TODO: Warning: This currently computes the integral, which is expensive. - let scaleContinuous = ({discreteProbabilityMassFraction}: t, continuous) => - continuous - |> Continuous.T.scaleToIntegralSum( - ~intendedSum=1.0 -. discreteProbabilityMassFraction, - ); + let combinePointwise = (~knownIntegralSumsFn, fn, t1: t, t2: t) => { + let reducedDiscrete = + [|t1, t2|] + |> E.A.fmap(toDiscrete) + |> E.A.O.concatSomes + |> Discrete.reduce(~knownIntegralSumsFn, fn); - let scaleDiscrete = ({discreteProbabilityMassFraction}: t, disrete) => - disrete - |> Discrete.T.scaleToIntegralSum( - ~intendedSum=discreteProbabilityMassFraction, - ); + let reducedContinuous = + [|t1, t2|] + |> E.A.fmap(toContinuous) + |> E.A.O.concatSomes + |> Continuous.reduce(~knownIntegralSumsFn, fn); + + make(~discrete=reducedDiscrete, ~continuous=reducedContinuous); + }; module T = Dist({ @@ -283,100 +619,122 @@ module Mixed = { let maxX = ({continuous, discrete}: t) => max(Continuous.T.maxX(continuous), Discrete.T.maxX(discrete)); let toShape = (t: t): DistTypes.shape => Mixed(t); - let toContinuous = ({continuous}: t) => Some(continuous); - let toDiscrete = ({discrete}: t) => Some(discrete); - let toDiscreteProbabilityMass = ({discreteProbabilityMassFraction}: t) => discreteProbabilityMassFraction; - let xToY = (f, {discrete, continuous} as t: t) => { - let c = - continuous - |> Continuous.T.xToY(f) - |> DistTypes.MixedPoint.fmap(scaleContinuousFn(t)); - let d = - discrete - |> Discrete.T.xToY(f) - |> DistTypes.MixedPoint.fmap(scaleDiscreteFn(t)); - DistTypes.MixedPoint.add(c, d); - }; + let toContinuous = toContinuous; + let toDiscrete = toDiscrete; - // Warning: It's not clear how to update the discreteProbabilityMassFraction, so this may create small errors. let truncate = ( - ~cache=None, - count, - {discrete, continuous, discreteProbabilityMassFraction}: t, - ) - : t => { - { - discrete: - Discrete.T.truncate( - int_of_float( - float_of_int(count) *. discreteProbabilityMassFraction, - ), - discrete, - ), - continuous: - Continuous.T.truncate( - int_of_float( - float_of_int(count) - *. (1.0 -. discreteProbabilityMassFraction), - ), - continuous, - ), - discreteProbabilityMassFraction, - }; + leftCutoff: option(float), + rightCutoff: option(float), + {discrete, continuous}: t, + ) => { + let truncatedContinuous = + Continuous.T.truncate(leftCutoff, rightCutoff, continuous); + let truncatedDiscrete = + Discrete.T.truncate(leftCutoff, rightCutoff, discrete); + + make(~discrete=truncatedDiscrete, ~continuous=truncatedContinuous); }; - let toScaledContinuous = ({continuous} as t: t) => - Some(scaleContinuous(t, continuous)); + let normalize = (t: t): t => { + let continuousIntegralSum = + Continuous.T.Integral.sum(~cache=None, t.continuous); + let discreteIntegralSum = + Discrete.T.Integral.sum(~cache=None, t.discrete); + let totalIntegralSum = continuousIntegralSum +. discreteIntegralSum; - let toScaledDiscrete = ({discrete} as t: t) => - Some(scaleDiscrete(t, discrete)); + let newContinuousSum = continuousIntegralSum /. totalIntegralSum; + let newDiscreteSum = discreteIntegralSum /. totalIntegralSum; - let integral = - ( - ~cache, - {continuous, discrete, discreteProbabilityMassFraction}: t, - ) => { + let normalizedContinuous = + t.continuous + |> Continuous.scaleBy(~scale=1. /. newContinuousSum) + |> Continuous.updateKnownIntegralSum(Some(newContinuousSum)); + let normalizedDiscrete = + t.discrete + |> Discrete.scaleBy(~scale=1. /. newDiscreteSum) + |> Discrete.updateKnownIntegralSum(Some(newDiscreteSum)); + + make(~continuous=normalizedContinuous, ~discrete=normalizedDiscrete); + }; + + let xToY = (x, t: t) => { + // This evaluates the mixedShape at x, interpolating if necessary. + // Note that we normalize entire mixedShape first. + let {continuous, discrete}: t = normalize(t); + let c = Continuous.T.xToY(x, continuous); + let d = Discrete.T.xToY(x, discrete); + DistTypes.MixedPoint.add(c, d); // "add" here just combines the two values into a single MixedPoint. + }; + + let toDiscreteProbabilityMassFraction = ({discrete, continuous}: t) => { + let discreteIntegralSum = + Discrete.T.Integral.sum(~cache=None, discrete); + let continuousIntegralSum = + Continuous.T.Integral.sum(~cache=None, continuous); + let totalIntegralSum = discreteIntegralSum +. continuousIntegralSum; + + discreteIntegralSum /. totalIntegralSum; + }; + + let downsample = (~cache=None, count, {discrete, continuous}: t): t => { + // We will need to distribute the new xs fairly between the discrete and continuous shapes. + // The easiest way to do this is to simply go by the previous probability masses. + + // The cache really isn't helpful here, because we would need two separate caches + let discreteIntegralSum = + Discrete.T.Integral.sum(~cache=None, discrete); + let continuousIntegralSum = + Continuous.T.Integral.sum(~cache=None, continuous); + let totalIntegralSum = discreteIntegralSum +. continuousIntegralSum; + + // TODO: figure out what to do when the totalIntegralSum is zero. + + let downsampledDiscrete = + Discrete.T.downsample( + int_of_float( + float_of_int(count) *. (discreteIntegralSum /. totalIntegralSum), + ), + discrete, + ); + + let downsampledContinuous = + Continuous.T.downsample( + int_of_float( + float_of_int(count) + *. (continuousIntegralSum /. totalIntegralSum), + ), + continuous, + ); + + {discrete: downsampledDiscrete, continuous: downsampledContinuous}; + }; + + let normalizedToContinuous = (t: t) => Some(normalize(t).continuous); + + let normalizedToDiscrete = ({discrete} as t: t) => + Some(normalize(t).discrete); + + let integral = (~cache, {continuous, discrete}: t) => { switch (cache) { | Some(cache) => cache | None => - let scaleContinuousBy = - (1.0 -. discreteProbabilityMassFraction) - /. (continuous |> Continuous.T.Integral.sum(~cache=None)); + // note: if the underlying shapes aren't normalized, then these integrals won't be either! + let continuousIntegral = + Continuous.T.Integral.get(~cache=None, continuous); + let discreteIntegral = + Discrete.T.Integral.get(~cache=None, discrete); - let scaleDiscreteBy = - discreteProbabilityMassFraction - /. ( - discrete - |> Discrete.T.Integral.get(~cache=None) - |> Continuous.toLinear - |> E.O.fmap(Continuous.lastY) - |> E.O.toExn("") - ); - - let cont = - continuous - |> Continuous.T.Integral.get(~cache=None) - |> Continuous.T.scaleBy(~scale=scaleContinuousBy); - - let dist = - discrete - |> Discrete.T.Integral.get(~cache=None) - |> Continuous.toLinear - |> E.O.toExn("") - |> Continuous.T.scaleBy(~scale=scaleDiscreteBy); - - let result = - Continuous.make( - `Linear, - XYShape.Combine.combineLinear( - ~fn=(+.), - Continuous.getShape(cont), - Continuous.getShape(dist), - ), - ); - result; + Continuous.make( + `Linear, + XYShape.PointwiseCombination.combineLinear( + ~fn=(+.), + Continuous.getShape(continuousIntegral), + Continuous.getShape(discreteIntegral), + ), + None, + ); }; }; @@ -398,80 +756,263 @@ module Mixed = { |> XYShape.YtoX.linear(f); }; - // TODO: This part really needs to be rethought, I'm quite sure this is just broken. Mapping Ys would change the desired discreteProbabilityMassFraction. + // This pipes all ys (continuous and discrete) through fn. + // If mapY is a linear operation, we might be able to update the knownIntegralSums as well; + // if not, they'll be set to None. let mapY = - (fn, {discrete, continuous, discreteProbabilityMassFraction}: t): t => { - { - discrete: Discrete.T.mapY(fn, discrete), - continuous: Continuous.T.mapY(fn, continuous), - discreteProbabilityMassFraction, - }; - }; - - let mean = (t: t): float => { - let discreteProbabilityMassFraction = - t.discreteProbabilityMassFraction; - switch (discreteProbabilityMassFraction) { - | 1.0 => Discrete.T.mean(t.discrete) - | 0.0 => Continuous.T.mean(t.continuous) - | _ => - Discrete.T.mean(t.discrete) - *. discreteProbabilityMassFraction - +. Continuous.T.mean(t.continuous) - *. (1.0 -. discreteProbabilityMassFraction) - }; - }; - - let variance = (t: t): float => { - let discreteProbabilityMassFraction = - t.discreteProbabilityMassFraction; - let getMeanOfSquares = (t: t) => { - Discrete.T.mean(XYShape.Analysis.squareXYShape(t.discrete)) - *. t.discreteProbabilityMassFraction - +. XYShape.Analysis.getMeanOfSquaresContinuousShape(t.continuous) - *. (1.0 -. t.discreteProbabilityMassFraction); - }; - switch (discreteProbabilityMassFraction) { - | 1.0 => Discrete.T.variance(t.discrete) - | 0.0 => Continuous.T.variance(t.continuous) - | _ => - XYShape.Analysis.getVarianceDangerously( - t, - mean, - getMeanOfSquares, + ( + ~knownIntegralSumFn=previousIntegralSum => None, + fn, + {discrete, continuous}: t, ) + : t => { + let u = E.O.bind(_, knownIntegralSumFn); + + let yMappedDiscrete = + discrete + |> Discrete.T.mapY(fn) + |> Discrete.updateKnownIntegralSum(u(discrete.knownIntegralSum)); + + let yMappedContinuous = + continuous + |> Continuous.T.mapY(fn) + |> Continuous.updateKnownIntegralSum( + u(continuous.knownIntegralSum), + ); + + { + discrete: yMappedDiscrete, + continuous: Continuous.T.mapY(fn, continuous), + }; + }; + + let mean = ({discrete, continuous}: t): float => { + let discreteMean = Discrete.T.mean(discrete); + let continuousMean = Continuous.T.mean(continuous); + + // the combined mean is the weighted sum of the two: + let discreteIntegralSum = + Discrete.T.Integral.sum(~cache=None, discrete); + let continuousIntegralSum = + Continuous.T.Integral.sum(~cache=None, continuous); + let totalIntegralSum = discreteIntegralSum +. continuousIntegralSum; + + ( + discreteMean + *. discreteIntegralSum + +. continuousMean + *. continuousIntegralSum + ) + /. totalIntegralSum; + }; + + let variance = ({discrete, continuous} as t: t): float => { + // the combined mean is the weighted sum of the two: + let discreteIntegralSum = + Discrete.T.Integral.sum(~cache=None, discrete); + let continuousIntegralSum = + Continuous.T.Integral.sum(~cache=None, continuous); + let totalIntegralSum = discreteIntegralSum +. continuousIntegralSum; + + let getMeanOfSquares = ({discrete, continuous} as t: t) => { + let discreteMean = + discrete + |> Discrete.shapeMap(XYShape.Analysis.squareXYShape) + |> Discrete.T.mean; + let continuousMean = + continuous |> XYShape.Analysis.getMeanOfSquaresContinuousShape; + ( + discreteMean + *. discreteIntegralSum + +. continuousMean + *. continuousIntegralSum + ) + /. totalIntegralSum; + }; + + switch (discreteIntegralSum /. totalIntegralSum) { + | 1.0 => Discrete.T.variance(discrete) + | 0.0 => Continuous.T.variance(continuous) + | _ => + XYShape.Analysis.getVarianceDangerously(t, mean, getMeanOfSquares) }; }; }); + + let combineAlgebraically = + (~downsample=false, op: ExpressionTypes.algebraicOperation, t1: t, t2: t) + : t => { + // Discrete convolution can cause a huge increase in the number of samples, + // so we'll first downsample. + + // An alternative (to be explored in the future) may be to first perform the full convolution and then to downsample the result; + // to use non-uniform fast Fourier transforms (for addition only), add web workers or gpu.js, etc. ... + + let downsampleIfTooLarge = (t: t) => { + let sqtl = sqrt(float_of_int(totalLength(t))); + sqtl > 10. && downsample ? T.downsample(int_of_float(sqtl), t) : t; + }; + + let t1d = downsampleIfTooLarge(t1); + let t2d = downsampleIfTooLarge(t2); + + // continuous (*) continuous => continuous, but also + // discrete (*) continuous => continuous (and vice versa). We have to take care of all combos and then combine them: + let ccConvResult = + Continuous.combineAlgebraically( + ~downsample=false, + op, + t1d.continuous, + t2d.continuous, + ); + let dcConvResult = + Continuous.combineAlgebraicallyWithDiscrete( + ~downsample=false, + op, + t2d.continuous, + t1d.discrete, + ); + let cdConvResult = + Continuous.combineAlgebraicallyWithDiscrete( + ~downsample=false, + op, + t1d.continuous, + t2d.discrete, + ); + let continuousConvResult = + Continuous.reduce((+.), [|ccConvResult, dcConvResult, cdConvResult|]); + + // ... finally, discrete (*) discrete => discrete, obviously: + let discreteConvResult = + Discrete.combineAlgebraically(op, t1d.discrete, t2d.discrete); + + {discrete: discreteConvResult, continuous: continuousConvResult}; + }; }; module Shape = { + type t = DistTypes.shape; + let mapToAll = ((fn1, fn2, fn3), t: t) => + switch (t) { + | Mixed(m) => fn1(m) + | Discrete(m) => fn2(m) + | Continuous(m) => fn3(m) + }; + + let fmap = ((fn1, fn2, fn3), t: t): t => + switch (t) { + | Mixed(m) => Mixed(fn1(m)) + | Discrete(m) => Discrete(fn2(m)) + | Continuous(m) => Continuous(fn3(m)) + }; + + let toMixed = + mapToAll(( + m => m, + d => Mixed.make(~discrete=d, ~continuous=Continuous.empty), + c => Mixed.make(~discrete=Discrete.empty, ~continuous=c), + )); + + let combineAlgebraically = + (op: ExpressionTypes.algebraicOperation, t1: t, t2: t): t => { + switch (t1, t2) { + | (Continuous(m1), Continuous(m2)) => + DistTypes.Continuous( + Continuous.combineAlgebraically(~downsample=true, op, m1, m2), + ) + | (Discrete(m1), Discrete(m2)) => + DistTypes.Discrete(Discrete.combineAlgebraically(op, m1, m2)) + | (m1, m2) => + DistTypes.Mixed( + Mixed.combineAlgebraically( + ~downsample=true, + op, + toMixed(m1), + toMixed(m2), + ), + ) + }; + }; + + let combinePointwise = + (~knownIntegralSumsFn=(_, _) => None, fn, t1: t, t2: t) => + switch (t1, t2) { + | (Continuous(m1), Continuous(m2)) => + DistTypes.Continuous( + Continuous.combinePointwise(~knownIntegralSumsFn, fn, m1, m2), + ) + | (Discrete(m1), Discrete(m2)) => + DistTypes.Discrete( + Discrete.combinePointwise(~knownIntegralSumsFn, fn, m1, m2), + ) + | (m1, m2) => + DistTypes.Mixed( + Mixed.combinePointwise( + ~knownIntegralSumsFn, + fn, + toMixed(m1), + toMixed(m2), + ), + ) + }; + + // TODO: implement these functions + let pdf = (f: float, t: t): float => { + 0.0; + }; + + let inv = (f: float, t: t): float => { + 0.0; + }; + + let sample = (t: t): float => { + 0.0; + }; + module T = Dist({ type t = DistTypes.shape; type integral = DistTypes.continuousShape; - let mapToAll = ((fn1, fn2, fn3), t: t) => - switch (t) { - | Mixed(m) => fn1(m) - | Discrete(m) => fn2(m) - | Continuous(m) => fn3(m) - }; - - let fmap = ((fn1, fn2, fn3), t: t): t => - switch (t) { - | Mixed(m) => Mixed(fn1(m)) - | Discrete(m) => Discrete(fn2(m)) - | Continuous(m) => Continuous(fn3(m)) - }; - - let xToY = f => + let xToY = (f: float) => mapToAll(( Mixed.T.xToY(f), Discrete.T.xToY(f), Continuous.T.xToY(f), )); + let toShape = (t: t) => t; + + let toContinuous = t => None; + let toDiscrete = t => None; + + let downsample = (~cache=None, i, t) => + fmap( + ( + Mixed.T.downsample(i), + Discrete.T.downsample(i), + Continuous.T.downsample(i), + ), + t, + ); + + let truncate = (leftCutoff, rightCutoff, t): t => + fmap( + ( + Mixed.T.truncate(leftCutoff, rightCutoff), + Discrete.T.truncate(leftCutoff, rightCutoff), + Continuous.T.truncate(leftCutoff, rightCutoff), + ), + t, + ); + + let toDiscreteProbabilityMassFraction = t => 0.0; + let normalize = + fmap(( + Mixed.T.normalize, + Discrete.T.normalize, + Continuous.T.normalize, + )); let toContinuous = mapToAll(( Mixed.T.toContinuous, @@ -485,31 +1026,24 @@ module Shape = { Continuous.T.toDiscrete, )); - let truncate = (~cache=None, i) => - fmap(( - Mixed.T.truncate(i), - Discrete.T.truncate(i), - Continuous.T.truncate(i), + let toDiscreteProbabilityMassFraction = + mapToAll(( + Mixed.T.toDiscreteProbabilityMassFraction, + Discrete.T.toDiscreteProbabilityMassFraction, + Continuous.T.toDiscreteProbabilityMassFraction, )); - let toDiscreteProbabilityMass = + let normalizedToDiscrete = mapToAll(( - Mixed.T.toDiscreteProbabilityMass, - Discrete.T.toDiscreteProbabilityMass, - Continuous.T.toDiscreteProbabilityMass, + Mixed.T.normalizedToDiscrete, + Discrete.T.normalizedToDiscrete, + Continuous.T.normalizedToDiscrete, )); - - let toScaledDiscrete = + let normalizedToContinuous = mapToAll(( - Mixed.T.toScaledDiscrete, - Discrete.T.toScaledDiscrete, - Continuous.T.toScaledDiscrete, - )); - let toScaledContinuous = - mapToAll(( - Mixed.T.toScaledContinuous, - Discrete.T.toScaledContinuous, - Continuous.T.toScaledContinuous, + Mixed.T.normalizedToContinuous, + Discrete.T.normalizedToContinuous, + Continuous.T.normalizedToContinuous, )); let minX = mapToAll((Mixed.T.minX, Discrete.T.minX, Continuous.T.minX)); let integral = (~cache) => { @@ -540,11 +1074,11 @@ module Shape = { )); }; let maxX = mapToAll((Mixed.T.maxX, Discrete.T.maxX, Continuous.T.maxX)); - let mapY = fn => + let mapY = (~knownIntegralSumFn=previousIntegralSum => None, fn) => fmap(( - Mixed.T.mapY(fn), - Discrete.T.mapY(fn), - Continuous.T.mapY(fn), + Mixed.T.mapY(~knownIntegralSumFn, fn), + Discrete.T.mapY(~knownIntegralSumFn, fn), + Continuous.T.mapY(~knownIntegralSumFn, fn), )); let mean = (t: t): float => @@ -561,6 +1095,14 @@ module Shape = { | Continuous(m) => Continuous.T.variance(m) }; }); + + let operate = (distToFloatOp: ExpressionTypes.distToFloatOperation, s) => + switch (distToFloatOp) { + | `Pdf(f) => pdf(f, s) + | `Inv(f) => inv(f, s) + | `Sample => sample(s) + | `Mean => T.mean(s) + }; }; module DistPlus = { @@ -620,21 +1162,35 @@ module DistPlus = { let toShape = toShape; let toContinuous = shapeFn(Shape.T.toContinuous); let toDiscrete = shapeFn(Shape.T.toDiscrete); - // todo: Adjust for total mass. - let toScaledContinuous = (t: t) => { + let normalize = (t: t): t => { + let normalizedShape = t |> toShape |> Shape.T.normalize; + + t |> updateShape(normalizedShape); + // TODO: also adjust for domainIncludedProbabilityMass here. + }; + + let truncate = (leftCutoff, rightCutoff, t: t): t => { + let truncatedShape = + t |> toShape |> Shape.T.truncate(leftCutoff, rightCutoff); + + t |> updateShape(truncatedShape); + }; + + // TODO: replace this with + let normalizedToContinuous = (t: t) => { t |> toShape - |> Shape.T.toScaledContinuous + |> Shape.T.normalizedToContinuous |> E.O.fmap( Continuous.T.mapY(domainIncludedProbabilityMassAdjustment(t)), ); }; - let toScaledDiscrete = (t: t) => { + let normalizedToDiscrete = (t: t) => { t |> toShape - |> Shape.T.toScaledDiscrete + |> Shape.T.normalizedToDiscrete |> E.O.fmap( Discrete.T.mapY(domainIncludedProbabilityMassAdjustment(t)), ); @@ -648,18 +1204,24 @@ module DistPlus = { let minX = shapeFn(Shape.T.minX); let maxX = shapeFn(Shape.T.maxX); - let toDiscreteProbabilityMass = - shapeFn(Shape.T.toDiscreteProbabilityMass); + let toDiscreteProbabilityMassFraction = + shapeFn(Shape.T.toDiscreteProbabilityMassFraction); // This bit is kind of awkward, could probably use rethinking. let integral = (~cache, t: t) => updateShape(Continuous(t.integralCache), t); - let truncate = (~cache=None, i, t) => - updateShape(t |> toShape |> Shape.T.truncate(i), t); + let downsample = (~cache=None, i, t): t => + updateShape(t |> toShape |> Shape.T.downsample(i), t); // todo: adjust for limit, maybe? - let mapY = (fn, {shape, _} as t: t): t => - Shape.T.mapY(fn, shape) |> updateShape(_, t); + let mapY = + ( + ~knownIntegralSumFn=previousIntegralSum => None, + fn, + {shape, _} as t: t, + ) + : t => + Shape.T.mapY(~knownIntegralSumFn, fn, shape) |> updateShape(_, t); let integralEndY = (~cache as _, t: t) => Shape.T.Integral.sum(~cache=Some(t.integralCache), toShape(t)); @@ -674,7 +1236,9 @@ module DistPlus = { let integralYtoX = (~cache as _, f, t: t) => { Shape.T.Integral.yToX(~cache=Some(t.integralCache), f, toShape(t)); }; - let mean = (t: t) => Shape.T.mean(t.shape); + let mean = (t: t) => { + Shape.T.mean(t.shape); + }; let variance = (t: t) => Shape.T.variance(t.shape); }); }; @@ -708,4 +1272,4 @@ module DistPlusTime = { |> E.O.fmap(x => DistPlus.T.Integral.xToY(~cache=None, x, t)); }; }; -}; \ No newline at end of file +}; diff --git a/src/distPlus/distribution/MixedShapeBuilder.re b/src/distPlus/distribution/MixedShapeBuilder.re index 949a6f20..9689c1c4 100644 --- a/src/distPlus/distribution/MixedShapeBuilder.re +++ b/src/distPlus/distribution/MixedShapeBuilder.re @@ -8,14 +8,15 @@ type assumptions = { discreteProbabilityMass: option(float), }; -let buildSimple = (~continuous: option(DistTypes.continuousShape), ~discrete): option(DistTypes.shape) => { - let continuous = continuous |> E.O.default(Distributions.Continuous.make(`Linear, {xs: [||], ys: [||]})) +let buildSimple = (~continuous: option(DistTypes.continuousShape), ~discrete: option(DistTypes.discreteShape)): option(DistTypes.shape) => { + let continuous = continuous |> E.O.default(Distributions.Continuous.make(`Linear, {xs: [||], ys: [||]}, Some(0.0))); + let discrete = discrete |> E.O.default(Distributions.Discrete.make({xs: [||], ys: [||]}, Some(0.0))); let cLength = continuous |> Distributions.Continuous.getShape |> XYShape.T.xs |> E.A.length; - let dLength = discrete |> XYShape.T.xs |> E.A.length; + let dLength = discrete |> Distributions.Discrete.getShape |> XYShape.T.xs |> E.A.length; switch (cLength, dLength) { | (0 | 1, 0) => None | (0 | 1, _) => Some(Discrete(discrete)) @@ -23,83 +24,13 @@ let buildSimple = (~continuous: option(DistTypes.continuousShape), ~discrete): o | (_, _) => let discreteProbabilityMassFraction = Distributions.Discrete.T.Integral.sum(~cache=None, discrete); - let discrete = - Distributions.Discrete.T.scaleToIntegralSum(~intendedSum=1.0, discrete); - let continuous = - Distributions.Continuous.T.scaleToIntegralSum( - ~intendedSum=1.0, - continuous, - ); + let discrete = Distributions.Discrete.T.normalize(discrete); + let continuous = Distributions.Continuous.T.normalize(continuous); let mixedDist = Distributions.Mixed.make( ~continuous, - ~discrete, - ~discreteProbabilityMassFraction, + ~discrete ); Some(Mixed(mixedDist)); }; -}; - - -// TODO: Delete, only being used in tests -let build = (~continuous, ~discrete, ~assumptions) => - switch (assumptions) { - | { - continuous: ADDS_TO_CORRECT_PROBABILITY, - discrete: ADDS_TO_CORRECT_PROBABILITY, - discreteProbabilityMass: Some(r), - } => - // TODO: Fix this, it's wrong :( - Some( - Distributions.Mixed.make( - ~continuous, - ~discrete, - ~discreteProbabilityMassFraction=r, - ), - ) - - | { - continuous: ADDS_TO_1, - discrete: ADDS_TO_1, - discreteProbabilityMass: Some(r), - } => - Some( - Distributions.Mixed.make( - ~continuous, - ~discrete, - ~discreteProbabilityMassFraction=r, - ), - ) - - | { - continuous: ADDS_TO_1, - discrete: ADDS_TO_1, - discreteProbabilityMass: None, - } => - None - - | { - continuous: ADDS_TO_CORRECT_PROBABILITY, - discrete: ADDS_TO_1, - discreteProbabilityMass: None, - } => - None - - | { - continuous: ADDS_TO_1, - discrete: ADDS_TO_CORRECT_PROBABILITY, - discreteProbabilityMass: None, - } => - let discreteProbabilityMassFraction = - Distributions.Discrete.T.Integral.sum(~cache=None, discrete); - let discrete = - Distributions.Discrete.T.scaleToIntegralSum(~intendedSum=1.0, discrete); - Some( - Distributions.Mixed.make( - ~continuous, - ~discrete, - ~discreteProbabilityMassFraction, - ), - ); - | _ => None - }; \ No newline at end of file +}; \ No newline at end of file diff --git a/src/distPlus/distribution/XYShape.re b/src/distPlus/distribution/XYShape.re index aeed1bae..7bea8b06 100644 --- a/src/distPlus/distribution/XYShape.re +++ b/src/distPlus/distribution/XYShape.re @@ -9,7 +9,7 @@ let interpolate = }; // TODO: Make sure that shapes cannot be empty. -let extImp = E.O.toExt("Should not be possible"); +let extImp = E.O.toExt("Tried to perform an operation on an empty XYShape."); module T = { type t = xyShape; @@ -17,6 +17,7 @@ module T = { type ts = array(xyShape); let xs = (t: t) => t.xs; let ys = (t: t) => t.ys; + let length = (t: t) => E.A.length(t.xs); let empty = {xs: [||], ys: [||]}; let minX = (t: t) => t |> xs |> E.A.Sorted.min |> extImp; let maxX = (t: t) => t |> xs |> E.A.Sorted.max |> extImp; @@ -154,7 +155,9 @@ module XsConversion = { let proportionByProbabilityMass = (newLength: int, integral: T.t, t: T.t): T.t => { - equallyDivideXByMass(newLength, integral) |> _replaceWithXs(_, t); + integral + |> equallyDivideXByMass(newLength) // creates a new set of xs at evenly spaced percentiles + |> _replaceWithXs(_, t); // linearly interpolates new ys for the new xs }; }; @@ -164,9 +167,10 @@ module Zipped = { let compareXs = ((x1, _), (x2, _)) => x1 > x2 ? 1 : 0; let sortByY = (t: zipped) => t |> E.A.stableSortBy(_, compareYs); let sortByX = (t: zipped) => t |> E.A.stableSortBy(_, compareXs); + let filterByX = (testFn: (float => bool), t: zipped) => t |> E.A.filter(((x, _)) => testFn(x)); }; -module Combine = { +module PointwiseCombination = { type xsSelection = | ALL_XS | XS_EVENLY_DIVIDED(int); @@ -179,16 +183,25 @@ module Combine = { t1: T.t, t2: T.t, ) => { - let allXs = - switch (xsSelection) { - | ALL_XS => Ts.allXs([|t1, t2|]) - | XS_EVENLY_DIVIDED(sampleCount) => - Ts.equallyDividedXs([|t1, t2|], sampleCount) - }; - let allYs = - allXs |> E.A.fmap(x => fn(xToYSelection(x, t1), xToYSelection(x, t2))); - T.fromArrays(allXs, allYs); + switch ((E.A.length(t1.xs), E.A.length(t2.xs))) { + | (0, 0) => T.empty + | (0, _) => t2 + | (_, 0) => t1 + | (_, _) => { + let allXs = + switch (xsSelection) { + | ALL_XS => Ts.allXs([|t1, t2|]) + | XS_EVENLY_DIVIDED(sampleCount) => + Ts.equallyDividedXs([|t1, t2|], sampleCount) + }; + + let allYs = + allXs |> E.A.fmap(x => fn(xToYSelection(x, t1), xToYSelection(x, t2))); + + T.fromArrays(allXs, allYs); + } + } }; let combineLinear = combine(~xToYSelection=XtoY.linear); @@ -244,8 +257,8 @@ module Range = { Belt.Array.set( cumulativeY, x + 1, - (xs[x + 1] -. xs[x]) - *. ((ys[x] +. ys[x + 1]) /. 2.) + (xs[x + 1] -. xs[x]) // dx + *. ((ys[x] +. ys[x + 1]) /. 2.) // (1/2) * (avgY) +. cumulativeY[x], ); (); @@ -265,7 +278,7 @@ module Range = { items |> Belt.Array.map(_, rangePointAssumingSteps) |> T.fromZippedArray - |> Combine.intersperse(t |> T.mapX(e => e +. diff)), + |> PointwiseCombination.intersperse(t |> T.mapX(e => e +. diff)), ) | _ => Some(t) }; @@ -287,7 +300,7 @@ let pointLogScore = (prediction, answer) => }; let logScorePoint = (sampleCount, t1, t2) => - Combine.combine( + PointwiseCombination.combine( ~xsSelection=XS_EVENLY_DIVIDED(sampleCount), ~xToYSelection=XtoY.linear, ~fn=pointLogScore, @@ -315,6 +328,7 @@ module Analysis = { 0.0, (acc, _x, i) => { let areaUnderIntegral = + // TODO Take this switch statement out of the loop body switch (t.interpolation, i) { | (_, 0) => 0.0 | (`Stepwise, _) => @@ -323,12 +337,16 @@ module Analysis = { | (`Linear, _) => let x1 = xs[i - 1]; let x2 = xs[i]; - let h1 = ys[i - 1]; - let h2 = ys[i]; - let b = (h1 -. h2) /. (x1 -. x2); - let a = h1 -. b *. x1; - indefiniteIntegralLinear(x2, a, b) - -. indefiniteIntegralLinear(x1, a, b); + if (x1 == x2) { + 0.0 + } else { + let h1 = ys[i - 1]; + let h2 = ys[i]; + let b = (h1 -. h2) /. (x1 -. x2); + let a = h1 -. b *. x1; + indefiniteIntegralLinear(x2, a, b) + -. indefiniteIntegralLinear(x1, a, b); + }; }; acc +. areaUnderIntegral; }, @@ -354,4 +372,4 @@ module Analysis = { }; let squareXYShape = T.mapX(x => x ** 2.0) -}; \ No newline at end of file +}; diff --git a/src/distPlus/expressionTree/ExpressionTree.re b/src/distPlus/expressionTree/ExpressionTree.re new file mode 100644 index 00000000..bd162bbf --- /dev/null +++ b/src/distPlus/expressionTree/ExpressionTree.re @@ -0,0 +1,23 @@ +open ExpressionTypes.ExpressionTree; + +let toShape = (sampleCount: int, node: node) => { + let renderResult = + `Render(`Normalize(node)) + |> ExpressionTreeEvaluator.toLeaf({sampleCount: sampleCount}); + + switch (renderResult) { + | Ok(`RenderedDist(rs)) => + let continuous = Distributions.Shape.T.toContinuous(rs); + let discrete = Distributions.Shape.T.toDiscrete(rs); + let shape = MixedShapeBuilder.buildSimple(~continuous, ~discrete); + shape |> E.O.toExt("Could not build final shape."); + | Ok(_) => E.O.toExn("Rendering failed.", None) + | Error(message) => E.O.toExn("No shape found, error: " ++ message, None) + }; +}; + +let rec toString = + fun + | `SymbolicDist(d) => SymbolicDist.T.toString(d) + | `RenderedDist(_) => "[shape]" + | op => Operation.T.toString(toString, op); diff --git a/src/distPlus/expressionTree/ExpressionTreeEvaluator.re b/src/distPlus/expressionTree/ExpressionTreeEvaluator.re new file mode 100644 index 00000000..6c5210f8 --- /dev/null +++ b/src/distPlus/expressionTree/ExpressionTreeEvaluator.re @@ -0,0 +1,266 @@ +open ExpressionTypes; +open ExpressionTypes.ExpressionTree; + +type t = node; +type tResult = node => result(node, string); + +type renderParams = { + sampleCount: int, +}; + +/* Given two random variables A and B, this returns the distribution + of a new variable that is the result of the operation on A and B. + For instance, normal(0, 1) + normal(1, 1) -> normal(1, 2). + In general, this is implemented via convolution. */ +module AlgebraicCombination = { + let tryAnalyticalSimplification = (operation, t1: t, t2: t) => + switch (operation, t1, t2) { + | (operation, + `SymbolicDist(d1), + `SymbolicDist(d2), + ) => + switch (SymbolicDist.T.tryAnalyticalSimplification(d1, d2, operation)) { + | `AnalyticalSolution(symbolicDist) => Ok(`SymbolicDist(symbolicDist)) + | `Error(er) => Error(er) + | `NoSolution => Ok(`AlgebraicCombination(operation, t1, t2)) + } + | _ => Ok(`AlgebraicCombination(operation, t1, t2)) + }; + + let combineAsShapes = (toLeaf, renderParams, algebraicOp, t1, t2) => { + let renderShape = r => toLeaf(renderParams, `Render(r)); + switch (renderShape(t1), renderShape(t2)) { + | (Ok(`RenderedDist(s1)), Ok(`RenderedDist(s2))) => + Ok( + `RenderedDist( + Distributions.Shape.combineAlgebraically(algebraicOp, s1, s2), + ), + ) + | (Error(e1), _) => Error(e1) + | (_, Error(e2)) => Error(e2) + | _ => Error("Algebraic combination: rendering failed.") + }; + }; + + let operationToLeaf = + ( + toLeaf, + renderParams: renderParams, + algebraicOp: ExpressionTypes.algebraicOperation, + t1: t, + t2: t, + ) + : result(node, string) => + + algebraicOp + |> tryAnalyticalSimplification(_, t1, t2) + |> E.R.bind( + _, + fun + | `SymbolicDist(d) as t => Ok(t) + | _ => combineAsShapes(toLeaf, renderParams, algebraicOp, t1, t2) + ); +}; + +module VerticalScaling = { + let operationToLeaf = (toLeaf, renderParams, scaleOp, t, scaleBy) => { + // scaleBy has to be a single float, otherwise we'll return an error. + let fn = Operation.Scale.toFn(scaleOp); + let knownIntegralSumFn = Operation.Scale.toKnownIntegralSumFn(scaleOp); + let renderedShape = toLeaf(renderParams, `Render(t)); + + switch (renderedShape, scaleBy) { + | (Ok(`RenderedDist(rs)), `SymbolicDist(`Float(sm))) => + Ok( + `RenderedDist( + Distributions.Shape.T.mapY( + ~knownIntegralSumFn=knownIntegralSumFn(sm), + fn(sm), + rs, + ), + ), + ) + | (Error(e1), _) => Error(e1) + | (_, _) => Error("Can only scale by float values.") + }; + }; +}; + +module PointwiseCombination = { + let pointwiseAdd = (toLeaf, renderParams, t1, t2) => { + let renderShape = r => toLeaf(renderParams, `Render(r)); + switch (renderShape(t1), renderShape(t2)) { + | (Ok(`RenderedDist(rs1)), Ok(`RenderedDist(rs2))) => + Ok( + `RenderedDist( + Distributions.Shape.combinePointwise( + ~knownIntegralSumsFn=(a, b) => Some(a +. b), + (+.), + rs1, + rs2, + ), + ), + ) + | (Error(e1), _) => Error(e1) + | (_, Error(e2)) => Error(e2) + | _ => Error("Pointwise combination: rendering failed.") + }; + }; + + let pointwiseMultiply = (toLeaf, renderParams, t1, t2) => { + // TODO: construct a function that we can easily sample from, to construct + // a RenderedDist. Use the xMin and xMax of the rendered shapes to tell the sampling function where to look. + Error( + "Pointwise multiplication not yet supported.", + ); + }; + + let operationToLeaf = (toLeaf, renderParams, pointwiseOp, t1, t2) => { + switch (pointwiseOp) { + | `Add => pointwiseAdd(toLeaf, renderParams, t1, t2) + | `Multiply => pointwiseMultiply(toLeaf, renderParams, t1, t2) + }; + }; +}; + +module Truncate = { + let trySimplification = (leftCutoff, rightCutoff, t) => { + switch (leftCutoff, rightCutoff, t) { + | (None, None, t) => Ok(t) + | (lc, rc, `SymbolicDist(`Uniform(u))) => { + // just create a new Uniform distribution + let nu: SymbolicTypes.uniform = u; + let newLow = max(E.O.default(neg_infinity, lc), nu.low); + let newHigh = min(E.O.default(infinity, rc), nu.high); + Ok(`SymbolicDist(`Uniform({low: newLow, high: newHigh}))); + } + | (_, _, t) => Ok(t) + }; + }; + + let truncateAsShape = (toLeaf, renderParams, leftCutoff, rightCutoff, t) => { + // TODO: use named args in renderToShape; if we're lucky we can at least get the tail + // of a distribution we otherwise wouldn't get at all + let renderedShape = toLeaf(renderParams, `Render(t)); + + switch (renderedShape) { + | Ok(`RenderedDist(rs)) => { + let truncatedShape = + rs |> Distributions.Shape.T.truncate(leftCutoff, rightCutoff); + Ok(`RenderedDist(rs)); + } + | Error(e1) => Error(e1) + | _ => Error("Could not truncate distribution.") + }; + }; + + let operationToLeaf = + ( + toLeaf, + renderParams, + leftCutoff: option(float), + rightCutoff: option(float), + t: node, + ) + : result(node, string) => { + t + |> trySimplification(leftCutoff, rightCutoff) + |> E.R.bind( + _, + fun + | `SymbolicDist(d) as t => Ok(t) + | _ => truncateAsShape(toLeaf, renderParams, leftCutoff, rightCutoff, t), + ); + }; +}; + +module Normalize = { + let rec operationToLeaf = (toLeaf, renderParams, t: node): result(node, string) => { + switch (t) { + | `RenderedDist(s) => + Ok(`RenderedDist(Distributions.Shape.T.normalize(s))) + | `SymbolicDist(_) => Ok(t) + | _ => t |> toLeaf(renderParams) |> E.R.bind(_, operationToLeaf(toLeaf, renderParams)) + }; + }; +}; + +module FloatFromDist = { + let symbolicToLeaf = (distToFloatOp: distToFloatOperation, s) => { + SymbolicDist.T.operate(distToFloatOp, s) + |> E.R.bind(_, v => Ok(`SymbolicDist(`Float(v)))); + }; + let renderedToLeaf = + (distToFloatOp: distToFloatOperation, rs: DistTypes.shape) + : result(node, string) => { + Distributions.Shape.operate(distToFloatOp, rs) + |> (v => Ok(`SymbolicDist(`Float(v)))); + }; + let rec operationToLeaf = + (toLeaf, renderParams, distToFloatOp: distToFloatOperation, t: node) + : result(node, string) => { + switch (t) { + | `SymbolicDist(s) => symbolicToLeaf(distToFloatOp, s) + | `RenderedDist(rs) => renderedToLeaf(distToFloatOp, rs) + | _ => t |> toLeaf(renderParams) |> E.R.bind(_, operationToLeaf(toLeaf, renderParams, distToFloatOp)) + }; + }; +}; + +module Render = { + let rec operationToLeaf = + ( + toLeaf, + renderParams, + t: node, + ) + : result(t, string) => { + switch (t) { + | `SymbolicDist(d) => + Ok(`RenderedDist(SymbolicDist.T.toShape(renderParams.sampleCount, d))) + | `RenderedDist(_) as t => Ok(t) // already a rendered shape, we're done here + | _ => t |> toLeaf(renderParams) |> E.R.bind(_, operationToLeaf(toLeaf, renderParams)) + }; + }; +}; + +/* This function recursively goes through the nodes of the parse tree, + replacing each Operation node and its subtree with a Data node. + Whenever possible, the replacement produces a new Symbolic Data node, + but most often it will produce a RenderedDist. + This function is used mainly to turn a parse tree into a single RenderedDist + that can then be displayed to the user. */ +let rec toLeaf = (renderParams, node: t): result(t, string) => { + switch (node) { + // Leaf nodes just stay leaf nodes + | `SymbolicDist(_) + | `RenderedDist(_) => Ok(node) + // Operations need to be turned into leaves + | `AlgebraicCombination(algebraicOp, t1, t2) => + AlgebraicCombination.operationToLeaf( + toLeaf, + renderParams, + algebraicOp, + t1, + t2 + ) + | `PointwiseCombination(pointwiseOp, t1, t2) => + PointwiseCombination.operationToLeaf( + toLeaf, + renderParams, + pointwiseOp, + t1, + t2, + ) + | `VerticalScaling(scaleOp, t, scaleBy) => + VerticalScaling.operationToLeaf( + toLeaf, renderParams, scaleOp, t, scaleBy + ) + | `Truncate(leftCutoff, rightCutoff, t) => + Truncate.operationToLeaf(toLeaf, renderParams, leftCutoff, rightCutoff, t) + | `FloatFromDist(distToFloatOp, t) => + FloatFromDist.operationToLeaf(toLeaf, renderParams, distToFloatOp, t) + | `Normalize(t) => Normalize.operationToLeaf(toLeaf, renderParams, t) + | `Render(t) => Render.operationToLeaf(toLeaf, renderParams, t) + }; +}; diff --git a/src/distPlus/expressionTree/ExpressionTypes.re b/src/distPlus/expressionTree/ExpressionTypes.re new file mode 100644 index 00000000..06be9967 --- /dev/null +++ b/src/distPlus/expressionTree/ExpressionTypes.re @@ -0,0 +1,20 @@ +type algebraicOperation = [ | `Add | `Multiply | `Subtract | `Divide]; +type pointwiseOperation = [ | `Add | `Multiply]; +type scaleOperation = [ | `Multiply | `Exponentiate | `Log]; +type distToFloatOperation = [ | `Pdf(float) | `Inv(float) | `Mean | `Sample]; + +module ExpressionTree = { + type node = [ + // leaf nodes: + | `SymbolicDist(SymbolicTypes.symbolicDist) + | `RenderedDist(DistTypes.shape) + // operations: + | `AlgebraicCombination(algebraicOperation, node, node) + | `PointwiseCombination(pointwiseOperation, node, node) + | `VerticalScaling(scaleOperation, node, node) + | `Render(node) + | `Truncate(option(float), option(float), node) + | `Normalize(node) + | `FloatFromDist(distToFloatOperation, node) + ]; +}; diff --git a/src/distPlus/expressionTree/MathJsParser.re b/src/distPlus/expressionTree/MathJsParser.re new file mode 100644 index 00000000..42ebb3ec --- /dev/null +++ b/src/distPlus/expressionTree/MathJsParser.re @@ -0,0 +1,368 @@ +module MathJsonToMathJsAdt = { + type arg = + | Symbol(string) + | Value(float) + | Fn(fn) + | Array(array(arg)) + | Object(Js.Dict.t(arg)) + and fn = { + name: string, + args: array(arg), + }; + + let rec run = (j: Js.Json.t) => + Json.Decode.( + switch (field("mathjs", string, j)) { + | "FunctionNode" => + let args = j |> field("args", array(run)); + Some( + Fn({ + name: j |> field("fn", field("name", string)), + args: args |> E.A.O.concatSomes, + }), + ); + | "OperatorNode" => + let args = j |> field("args", array(run)); + Some( + Fn({ + name: j |> field("fn", string), + args: args |> E.A.O.concatSomes, + }), + ); + | "ConstantNode" => + optional(field("value", Json.Decode.float), j) + |> E.O.fmap(r => Value(r)) + | "ParenthesisNode" => j |> field("content", run) + | "ObjectNode" => + let properties = j |> field("properties", dict(run)); + Js.Dict.entries(properties) + |> E.A.fmap(((key, value)) => value |> E.O.fmap(v => (key, v))) + |> E.A.O.concatSomes + |> Js.Dict.fromArray + |> (r => Some(Object(r))); + | "ArrayNode" => + let items = field("items", array(run), j); + Some(Array(items |> E.A.O.concatSomes)); + | "SymbolNode" => Some(Symbol(field("name", string, j))) + | n => + Js.log3("Couldn't parse mathjs node", j, n); + None; + } + ); +}; + +module MathAdtToDistDst = { + open MathJsonToMathJsAdt; + + module MathAdtCleaner = { + let transformWithSymbol = (f: float, s: string) => + switch (s) { + | "K" + | "k" => f *. 1000. + | "M" + | "m" => f *. 1000000. + | "B" + | "b" => f *. 1000000000. + | "T" + | "t" => f *. 1000000000000. + | _ => f + }; + + let rec run = + fun + | Fn({name: "multiply", args: [|Value(f), Symbol(s)|]}) => + Value(transformWithSymbol(f, s)) + | Fn({name: "unaryMinus", args: [|Value(f)|]}) => Value((-1.0) *. f) + | Fn({name, args}) => Fn({name, args: args |> E.A.fmap(run)}) + | Array(args) => Array(args |> E.A.fmap(run)) + | Symbol(s) => Symbol(s) + | Value(v) => Value(v) + | Object(v) => + Object( + v + |> Js.Dict.entries + |> E.A.fmap(((key, value)) => (key, run(value))) + |> Js.Dict.fromArray, + ); + }; + + let normal: + array(arg) => result(ExpressionTypes.ExpressionTree.node, string) = + fun + | [|Value(mean), Value(stdev)|] => + Ok(`SymbolicDist(`Normal({mean, stdev}))) + | _ => Error("Wrong number of variables in normal distribution"); + + let lognormal: + array(arg) => result(ExpressionTypes.ExpressionTree.node, string) = + fun + | [|Value(mu), Value(sigma)|] => + Ok(`SymbolicDist(`Lognormal({mu, sigma}))) + | [|Object(o)|] => { + let g = Js.Dict.get(o); + switch (g("mean"), g("stdev"), g("mu"), g("sigma")) { + | (Some(Value(mean)), Some(Value(stdev)), _, _) => + Ok( + `SymbolicDist( + SymbolicDist.Lognormal.fromMeanAndStdev(mean, stdev), + ), + ) + | (_, _, Some(Value(mu)), Some(Value(sigma))) => + Ok(`SymbolicDist(`Lognormal({mu, sigma}))) + | _ => Error("Lognormal distribution would need mean and stdev") + }; + } + | _ => Error("Wrong number of variables in lognormal distribution"); + + let to_: array(arg) => result(ExpressionTypes.ExpressionTree.node, string) = + fun + | [|Value(low), Value(high)|] when low <= 0.0 && low < high => { + Ok(`SymbolicDist(SymbolicDist.Normal.from90PercentCI(low, high))); + } + | [|Value(low), Value(high)|] when low < high => { + Ok( + `SymbolicDist(SymbolicDist.Lognormal.from90PercentCI(low, high)), + ); + } + | [|Value(_), Value(_)|] => + Error("Low value must be less than high value.") + | _ => Error("Wrong number of variables in lognormal distribution"); + + let uniform: + array(arg) => result(ExpressionTypes.ExpressionTree.node, string) = + fun + | [|Value(low), Value(high)|] => + Ok(`SymbolicDist(`Uniform({low, high}))) + | _ => Error("Wrong number of variables in lognormal distribution"); + + let beta: array(arg) => result(ExpressionTypes.ExpressionTree.node, string) = + fun + | [|Value(alpha), Value(beta)|] => + Ok(`SymbolicDist(`Beta({alpha, beta}))) + | _ => Error("Wrong number of variables in lognormal distribution"); + + let exponential: + array(arg) => result(ExpressionTypes.ExpressionTree.node, string) = + fun + | [|Value(rate)|] => Ok(`SymbolicDist(`Exponential({rate: rate}))) + | _ => Error("Wrong number of variables in Exponential distribution"); + + let cauchy: + array(arg) => result(ExpressionTypes.ExpressionTree.node, string) = + fun + | [|Value(local), Value(scale)|] => + Ok(`SymbolicDist(`Cauchy({local, scale}))) + | _ => Error("Wrong number of variables in cauchy distribution"); + + let triangular: + array(arg) => result(ExpressionTypes.ExpressionTree.node, string) = + fun + | [|Value(low), Value(medium), Value(high)|] => + Ok(`SymbolicDist(`Triangular({low, medium, high}))) + | _ => Error("Wrong number of variables in triangle distribution"); + + let multiModal = + ( + args: array(result(ExpressionTypes.ExpressionTree.node, string)), + weights: option(array(float)), + ) => { + let weights = weights |> E.O.default([||]); + + /*let dists: = + args + |> E.A.fmap( + fun + | Ok(a) => a + | Error(e) => Error(e) + );*/ + + let firstWithError = args |> Belt.Array.getBy(_, Belt.Result.isError); + let withoutErrors = args |> E.A.fmap(E.R.toOption) |> E.A.O.concatSomes; + + switch (firstWithError) { + | Some(Error(e)) => Error(e) + | None when withoutErrors |> E.A.length == 0 => + Error("Multimodals need at least one input") + | _ => + let components = + withoutErrors + |> E.A.fmapi((index, t) => { + let w = weights |> E.A.get(_, index) |> E.O.default(1.0); + + `VerticalScaling((`Multiply, t, `SymbolicDist(`Float(w)))); + }); + + let pointwiseSum = + components + |> Js.Array.sliceFrom(1) + |> E.A.fold_left( + (acc, x) => {`PointwiseCombination((`Add, acc, x))}, + E.A.unsafe_get(components, 0), + ); + + Ok(`Normalize(pointwiseSum)); + }; + }; + + let arrayParser = + (args: array(arg)) + : result(ExpressionTypes.ExpressionTree.node, string) => { + let samples = + args + |> E.A.fmap( + fun + | Value(n) => Some(n) + | _ => None, + ) + |> E.A.O.concatSomes; + let outputs = Samples.T.fromSamples(samples); + let pdf = + outputs.shape |> E.O.bind(_, Distributions.Shape.T.toContinuous); + let shape = + pdf + |> E.O.fmap(pdf => { + let _pdf = Distributions.Continuous.T.normalize(pdf); + let cdf = Distributions.Continuous.T.integral(~cache=None, _pdf); + SymbolicDist.ContinuousShape.make(_pdf, cdf); + }); + switch (shape) { + | Some(s) => Ok(`SymbolicDist(`ContinuousShape(s))) + | None => Error("Rendering did not work") + }; + }; + + let operationParser = + ( + name: string, + args: array(result(ExpressionTypes.ExpressionTree.node, string)), + ) => { + let toOkAlgebraic = r => Ok(`AlgebraicCombination(r)); + let toOkTrunctate = r => Ok(`Truncate(r)); + switch (name, args) { + | ("add", [|Ok(l), Ok(r)|]) => toOkAlgebraic((`Add, l, r)) + | ("add", _) => Error("Addition needs two operands") + | ("subtract", [|Ok(l), Ok(r)|]) => toOkAlgebraic((`Subtract, l, r)) + | ("subtract", _) => Error("Subtraction needs two operands") + | ("multiply", [|Ok(l), Ok(r)|]) => toOkAlgebraic((`Multiply, l, r)) + | ("multiply", _) => Error("Multiplication needs two operands") + | ("divide", [|Ok(l), Ok(r)|]) => toOkAlgebraic((`Divide, l, r)) + | ("divide", _) => Error("Division needs two operands") + | ("pow", _) => Error("Exponentiation is not yet supported.") + | ("leftTruncate", [|Ok(d), Ok(`SymbolicDist(`Float(lc)))|]) => + toOkTrunctate((Some(lc), None, d)) + | ("leftTruncate", _) => + Error("leftTruncate needs two arguments: the expression and the cutoff") + | ("rightTruncate", [|Ok(d), Ok(`SymbolicDist(`Float(rc)))|]) => + toOkTrunctate((None, Some(rc), d)) + | ("rightTruncate", _) => + Error( + "rightTruncate needs two arguments: the expression and the cutoff", + ) + | ( + "truncate", + [| + Ok(d), + Ok(`SymbolicDist(`Float(lc))), + Ok(`SymbolicDist(`Float(rc))), + |], + ) => + toOkTrunctate((Some(lc), Some(rc), d)) + | ("truncate", _) => + Error("truncate needs three arguments: the expression and both cutoffs") + | _ => Error("This type not currently supported") + }; + }; + + let functionParser = (nodeParser, name, args) => { + let parseArgs = () => args |> E.A.fmap(nodeParser); + switch (name) { + | "normal" => normal(args) + | "lognormal" => lognormal(args) + | "uniform" => uniform(args) + | "beta" => beta(args) + | "to" => to_(args) + | "exponential" => exponential(args) + | "cauchy" => cauchy(args) + | "triangular" => triangular(args) + | "mm" => + let weights = + args + |> E.A.last + |> E.O.bind( + _, + fun + | Array(values) => Some(values) + | _ => None, + ) + |> E.O.fmap(o => + o + |> E.A.fmap( + fun + | Value(r) => Some(r) + | _ => None, + ) + |> E.A.O.concatSomes + ); + let possibleDists = + E.O.isSome(weights) + ? Belt.Array.slice(args, ~offset=0, ~len=E.A.length(args) - 1) + : args; + let dists = possibleDists |> E.A.fmap(nodeParser); + multiModal(dists, weights); + | "add" + | "subtract" + | "multiply" + | "divide" + | "pow" + | "leftTruncate" + | "rightTruncate" + | "truncate" => operationParser(name, parseArgs()) + | "mean" as n + | "inv" as n + | "sample" as n + | "pdf" as n + | n => Error(n ++ "(...) is not currently supported") + }; + }; + + let rec nodeParser = + fun + | Value(f) => Ok(`SymbolicDist(`Float(f))) + | Fn({name, args}) => functionParser(nodeParser, name, args) + | _ => { + Error("This type not currently supported"); + }; + + let topLevel = + fun + | Array(r) => arrayParser(r) + | Value(_) as r => nodeParser(r) + | Fn(_) as r => nodeParser(r) + | Symbol(_) => Error("Symbol not valid as top level") + | Object(_) => Error("Object not valid as top level"); + + let run = (r): result(ExpressionTypes.ExpressionTree.node, string) => + r |> MathAdtCleaner.run |> topLevel; +}; + +let fromString = str => { + /* We feed the user-typed string into Mathjs.parseMath, + which returns a JSON with (hopefully) a single-element array. + This array element is the top-level node of a nested-object tree + representing the functions/arguments/values/etc. in the string. + + The function MathJsonToMathJsAdt then recursively unpacks this JSON into a typed data structure we can use. + Inside of this function, MathAdtToDistDst is called whenever a distribution function is encountered. + */ + let mathJsToJson = Mathjs.parseMath(str); + let mathJsParse = + E.R.bind(mathJsToJson, r => { + switch (MathJsonToMathJsAdt.run(r)) { + | Some(r) => Ok(r) + | None => Error("MathJsParse Error") + } + }); + + let value = E.R.bind(mathJsParse, MathAdtToDistDst.run); + value; +}; diff --git a/src/distPlus/symbolic/Mathjs.re b/src/distPlus/expressionTree/Mathjs.re similarity index 100% rename from src/distPlus/symbolic/Mathjs.re rename to src/distPlus/expressionTree/Mathjs.re diff --git a/src/distPlus/expressionTree/MathjsWrapper.js b/src/distPlus/expressionTree/MathjsWrapper.js new file mode 100644 index 00000000..3546ba42 --- /dev/null +++ b/src/distPlus/expressionTree/MathjsWrapper.js @@ -0,0 +1,9 @@ +const math = require("mathjs"); + +function parseMath(f) { + return JSON.parse(JSON.stringify(math.parse(f))) +}; + +module.exports = { + parseMath, +}; \ No newline at end of file diff --git a/src/distPlus/expressionTree/Operation.re b/src/distPlus/expressionTree/Operation.re new file mode 100644 index 00000000..33c05461 --- /dev/null +++ b/src/distPlus/expressionTree/Operation.re @@ -0,0 +1,94 @@ +open ExpressionTypes; + +module Algebraic = { + type t = algebraicOperation; + let toFn: (t, float, float) => float = + fun + | `Add => (+.) + | `Subtract => (-.) + | `Multiply => ( *. ) + | `Divide => (/.); + + let applyFn = (t, f1, f2) => { + switch (t, f1, f2) { + | (`Divide, _, 0.) => Error("Cannot divide $v1 by zero.") + | _ => Ok(toFn(t, f1, f2)) + }; + }; + + let toString = + fun + | `Add => "+" + | `Subtract => "-" + | `Multiply => "*" + | `Divide => "/"; + + let format = (a, b, c) => b ++ " " ++ toString(a) ++ " " ++ c; +}; + +module Pointwise = { + type t = pointwiseOperation; + let toString = + fun + | `Add => "+" + | `Multiply => "*"; + + let format = (a, b, c) => b ++ " " ++ toString(a) ++ " " ++ c; +}; + +module DistToFloat = { + type t = distToFloatOperation; + + let format = (operation, value) => + switch (operation) { + | `Pdf(f) => {j|pdf(x=$f,$value)|j} + | `Inv(f) => {j|inv(x=$f,$value)|j} + | `Sample => "sample($value)" + | `Mean => "mean($value)" + }; +}; + +module Scale = { + type t = scaleOperation; + let toFn = + fun + | `Multiply => ( *. ) + | `Exponentiate => ( ** ) + | `Log => ((a, b) => log(a) /. log(b)); + + let format = (operation: t, value, scaleBy) => + switch (operation) { + | `Multiply => {j|verticalMultiply($value, $scaleBy) |j} + | `Exponentiate => {j|verticalExponentiate($value, $scaleBy) |j} + | `Log => {j|verticalLog($value, $scaleBy) |j} + }; + + let toKnownIntegralSumFn = + fun + | `Multiply => ((a, b) => Some(a *. b)) + | `Exponentiate => ((_, _) => None) + | `Log => ((_, _) => None); +}; + +module T = { + let truncateToString = + (left: option(float), right: option(float), nodeToString) => { + let left = left |> E.O.dimap(Js.Float.toString, () => "-inf"); + let right = right |> E.O.dimap(Js.Float.toString, () => "inf"); + {j|truncate($nodeToString, $left, $right)|j}; + }; + let toString = nodeToString => + fun + | `AlgebraicCombination(op, t1, t2) => + Algebraic.format(op, nodeToString(t1), nodeToString(t2)) + | `PointwiseCombination(op, t1, t2) => + Pointwise.format(op, nodeToString(t1), nodeToString(t2)) + | `VerticalScaling(scaleOp, t, scaleBy) => + Scale.format(scaleOp, nodeToString(t), nodeToString(scaleBy)) + | `Normalize(t) => "normalize(k" ++ nodeToString(t) ++ ")" + | `FloatFromDist(floatFromDistOp, t) => + DistToFloat.format(floatFromDistOp, nodeToString(t)) + | `Truncate(lc, rc, t) => truncateToString(lc, rc, nodeToString(t)) + | `Render(t) => nodeToString(t) + | _ => ""; // SymbolicDist and RenderedDist are handled in ExpressionTree.toString. +}; diff --git a/src/distPlus/renderers/DistPlusRenderer.re b/src/distPlus/renderers/DistPlusRenderer.re index c2bf8360..e141d83c 100644 --- a/src/distPlus/renderers/DistPlusRenderer.re +++ b/src/distPlus/renderers/DistPlusRenderer.re @@ -1,13 +1,13 @@ -let truncateIfShould = +let downsampleIfShould = ( - {recommendedLength, shouldTruncate}: RenderTypes.DistPlusRenderer.inputs, + {recommendedLength, shouldDownsample}: RenderTypes.DistPlusRenderer.inputs, outputs: RenderTypes.ShapeRenderer.Combined.outputs, dist, ) => { - let willTruncate = - shouldTruncate + let willDownsample = + shouldDownsample && RenderTypes.ShapeRenderer.Combined.methodUsed(outputs) == `Sampling; - willTruncate ? dist |> Distributions.DistPlus.T.truncate(recommendedLength) : dist; + willDownsample ? dist |> Distributions.DistPlus.T.downsample(recommendedLength) : dist; }; let run = @@ -21,7 +21,7 @@ let run = ~guesstimatorString=Some(inputs.distPlusIngredients.guesstimatorString), (), ) - |> Distributions.DistPlus.T.scaleToIntegralSum(~intendedSum=1.0); + |> Distributions.DistPlus.T.normalize; let outputs = ShapeRenderer.run({ samplingInputs: inputs.samplingInputs, @@ -32,6 +32,6 @@ let run = }); let shape = outputs |> RenderTypes.ShapeRenderer.Combined.getShape; let dist = - shape |> E.O.fmap(toDist) |> E.O.fmap(truncateIfShould(inputs, outputs)); + shape |> E.O.fmap(toDist) |> E.O.fmap(downsampleIfShould(inputs, outputs)); RenderTypes.DistPlusRenderer.Outputs.make(outputs, dist); -}; \ No newline at end of file +}; diff --git a/src/distPlus/renderers/RenderTypes.re b/src/distPlus/renderers/RenderTypes.re index 95a36204..9b37503f 100644 --- a/src/distPlus/renderers/RenderTypes.re +++ b/src/distPlus/renderers/RenderTypes.re @@ -43,7 +43,7 @@ module ShapeRenderer = { module Symbolic = { type inputs = {length: int}; type outputs = { - graph: SymbolicDist.bigDist, + graph: ExpressionTypes.ExpressionTree.node, shape: DistTypes.shape, }; let make = (graph, shape) => {graph, shape}; @@ -75,7 +75,7 @@ module ShapeRenderer = { module DistPlusRenderer = { let defaultRecommendedLength = 10000; - let defaultShouldTruncate = true; + let defaultShouldDownsample = true; type ingredients = { guesstimatorString: string, domain: DistTypes.domain, @@ -85,7 +85,7 @@ module DistPlusRenderer = { distPlusIngredients: ingredients, samplingInputs: ShapeRenderer.Sampling.inputs, recommendedLength: int, - shouldTruncate: bool, + shouldDownsample: bool, }; module Ingredients = { let make = @@ -105,7 +105,7 @@ module DistPlusRenderer = { ( ~samplingInputs=ShapeRenderer.Sampling.Inputs.empty, ~recommendedLength=defaultRecommendedLength, - ~shouldTruncate=defaultShouldTruncate, + ~shouldDownsample=defaultShouldDownsample, ~distPlusIngredients, (), ) @@ -113,7 +113,7 @@ module DistPlusRenderer = { distPlusIngredients, samplingInputs, recommendedLength, - shouldTruncate, + shouldDownsample, }; type outputs = { shapeRenderOutputs: ShapeRenderer.Combined.outputs, @@ -124,4 +124,4 @@ module DistPlusRenderer = { let shapeRenderOutputs = (t:outputs) => t.shapeRenderOutputs let make = (shapeRenderOutputs, distPlus) => {shapeRenderOutputs, distPlus}; } -}; \ No newline at end of file +}; diff --git a/src/distPlus/renderers/ShapeRenderer.re b/src/distPlus/renderers/ShapeRenderer.re index c6f3dc0e..b439240b 100644 --- a/src/distPlus/renderers/ShapeRenderer.re +++ b/src/distPlus/renderers/ShapeRenderer.re @@ -21,7 +21,7 @@ let runSymbolic = (guesstimatorString, length) => { |> E.R.fmap(g => RenderTypes.ShapeRenderer.Symbolic.make( g, - SymbolicDist.toShape(length, g), + ExpressionTree.toShape(length, g), ) ); }; @@ -43,4 +43,4 @@ let run = }; Js.log3("IS SOME?", symbolic |> E.R.toOption |> E.O.isSome, symbolic); {symbolic: Some(symbolic), sampling}; -}; \ No newline at end of file +}; diff --git a/src/distPlus/renderers/samplesRenderer/Guesstimator.re b/src/distPlus/renderers/samplesRenderer/Guesstimator.re index e099889f..a08fb591 100644 --- a/src/distPlus/renderers/samplesRenderer/Guesstimator.re +++ b/src/distPlus/renderers/samplesRenderer/Guesstimator.re @@ -4,10 +4,10 @@ type discrete = { ys: array(float), }; -let jsToDistDiscrete = (d: discrete): DistTypes.discreteShape => { +let jsToDistDiscrete = (d: discrete): DistTypes.discreteShape => {xyShape: { xs: xsGet(d), ys: ysGet(d), -}; +}, knownIntegralSum: None}; [@bs.module "./GuesstimatorLibrary.js"] -external stringToSamples: (string, int) => array(float) = "stringToSamples"; \ No newline at end of file +external stringToSamples: (string, int) => array(float) = "stringToSamples"; diff --git a/src/distPlus/renderers/samplesRenderer/Samples.re b/src/distPlus/renderers/samplesRenderer/Samples.re index 7318a9dd..28f7bdce 100644 --- a/src/distPlus/renderers/samplesRenderer/Samples.re +++ b/src/distPlus/renderers/samplesRenderer/Samples.re @@ -115,11 +115,12 @@ module T = { Array.fast_sort(compare, samples); let (continuousPart, discretePart) = E.A.Sorted.Floats.split(samples); let length = samples |> E.A.length |> float_of_int; - let discrete: DistTypes.xyShape = + let discrete: DistTypes.discreteShape = discretePart |> E.FloatFloatMap.fmap(r => r /. length) |> E.FloatFloatMap.toArray - |> XYShape.T.fromZippedArray; + |> XYShape.T.fromZippedArray + |> Distributions.Discrete.make(_, None); let pdf = continuousPart |> E.A.length > 5 @@ -149,14 +150,14 @@ module T = { ~outputXYPoints=samplingInputs.outputXYPoints, formatUnitWidth(usedUnitWidth), ) - |> Distributions.Continuous.make(`Linear) + |> Distributions.Continuous.make(`Linear, _, None) |> (r => Some((r, foo))); } : None; let shape = MixedShapeBuilder.buildSimple( ~continuous=pdf |> E.O.fmap(fst), - ~discrete, + ~discrete=Some(discrete), ); let samplesParse: RenderTypes.ShapeRenderer.Sampling.outputs = { continuousParseParams: pdf |> E.O.fmap(snd), @@ -196,4 +197,4 @@ module T = { Some(fromSamples(~samplingInputs, samples)); }; }; -}; \ No newline at end of file +}; diff --git a/src/distPlus/symbolic/MathJsParser.re b/src/distPlus/symbolic/MathJsParser.re deleted file mode 100644 index 07d24cb4..00000000 --- a/src/distPlus/symbolic/MathJsParser.re +++ /dev/null @@ -1,273 +0,0 @@ -// todo: rename to SymbolicParser - -module MathJsonToMathJsAdt = { - type arg = - | Symbol(string) - | Value(float) - | Fn(fn) - | Array(array(arg)) - | Object(Js.Dict.t(arg)) - and fn = { - name: string, - args: array(arg), - }; - - let rec run = (j: Js.Json.t) => - Json.Decode.( - switch (field("mathjs", string, j)) { - | "FunctionNode" => - let args = j |> field("args", array(run)); - Some( - Fn({ - name: j |> field("fn", field("name", string)), - args: args |> E.A.O.concatSomes, - }), - ); - | "OperatorNode" => - let args = j |> field("args", array(run)); - Some( - Fn({ - name: j |> field("fn", string), - args: args |> E.A.O.concatSomes, - }), - ); - | "ConstantNode" => - optional(field("value", Json.Decode.float), j) - |> E.O.fmap(r => Value(r)) - | "ParenthesisNode" => j |> field("content", run) - | "ObjectNode" => - let properties = j |> field("properties", dict(run)); - Js.Dict.entries(properties) - |> E.A.fmap(((key, value)) => value |> E.O.fmap(v => (key, v))) - |> E.A.O.concatSomes - |> Js.Dict.fromArray - |> (r => Some(Object(r))); - | "ArrayNode" => - let items = field("items", array(run), j); - Some(Array(items |> E.A.O.concatSomes)); - | "SymbolNode" => Some(Symbol(field("name", string, j))) - | n => - Js.log3("Couldn't parse mathjs node", j, n); - None; - } - ); -}; - -module MathAdtToDistDst = { - open MathJsonToMathJsAdt; - - module MathAdtCleaner = { - let transformWithSymbol = (f: float, s: string) => - switch (s) { - | "K" - | "k" => f *. 1000. - | "M" - | "m" => f *. 1000000. - | "B" - | "b" => f *. 1000000000. - | "T" - | "t" => f *. 1000000000000. - | _ => f - }; - - let rec run = - fun - | Fn({name: "multiply", args: [|Value(f), Symbol(s)|]}) => - Value(transformWithSymbol(f, s)) - | Fn({name: "unaryMinus", args: [|Value(f)|]}) => Value((-1.0) *. f) - | Fn({name, args}) => Fn({name, args: args |> E.A.fmap(run)}) - | Array(args) => Array(args |> E.A.fmap(run)) - | Symbol(s) => Symbol(s) - | Value(v) => Value(v) - | Object(v) => - Object( - v - |> Js.Dict.entries - |> E.A.fmap(((key, value)) => (key, run(value))) - |> Js.Dict.fromArray, - ); - }; - - let normal: array(arg) => result(SymbolicDist.bigDist, string) = - fun - | [|Value(mean), Value(stdev)|] => - Ok(`Simple(`Normal({mean, stdev}))) - | _ => Error("Wrong number of variables in normal distribution"); - - let lognormal: array(arg) => result(SymbolicDist.bigDist, string) = - fun - | [|Value(mu), Value(sigma)|] => Ok(`Simple(`Lognormal({mu, sigma}))) - | [|Object(o)|] => { - let g = Js.Dict.get(o); - switch (g("mean"), g("stdev"), g("mu"), g("sigma")) { - | (Some(Value(mean)), Some(Value(stdev)), _, _) => - Ok(`Simple(SymbolicDist.Lognormal.fromMeanAndStdev(mean, stdev))) - | (_, _, Some(Value(mu)), Some(Value(sigma))) => - Ok(`Simple(`Lognormal({mu, sigma}))) - | _ => Error("Lognormal distribution would need mean and stdev") - }; - } - | _ => Error("Wrong number of variables in lognormal distribution"); - - let to_: array(arg) => result(SymbolicDist.bigDist, string) = - fun - | [|Value(low), Value(high)|] when low <= 0.0 && low < high=> { - Ok(`Simple(SymbolicDist.Normal.from90PercentCI(low, high))); - } - | [|Value(low), Value(high)|] when low < high => { - Ok(`Simple(SymbolicDist.Lognormal.from90PercentCI(low, high))); - } - | [|Value(_), Value(_)|] => - Error("Low value must be less than high value.") - | _ => Error("Wrong number of variables in lognormal distribution"); - - let uniform: array(arg) => result(SymbolicDist.bigDist, string) = - fun - | [|Value(low), Value(high)|] => Ok(`Simple(`Uniform({low, high}))) - | _ => Error("Wrong number of variables in lognormal distribution"); - - let beta: array(arg) => result(SymbolicDist.bigDist, string) = - fun - | [|Value(alpha), Value(beta)|] => Ok(`Simple(`Beta({alpha, beta}))) - | _ => Error("Wrong number of variables in lognormal distribution"); - - let exponential: array(arg) => result(SymbolicDist.bigDist, string) = - fun - | [|Value(rate)|] => Ok(`Simple(`Exponential({rate: rate}))) - | _ => Error("Wrong number of variables in Exponential distribution"); - - let cauchy: array(arg) => result(SymbolicDist.bigDist, string) = - fun - | [|Value(local), Value(scale)|] => - Ok(`Simple(`Cauchy({local, scale}))) - | _ => Error("Wrong number of variables in cauchy distribution"); - - let triangular: array(arg) => result(SymbolicDist.bigDist, string) = - fun - | [|Value(low), Value(medium), Value(high)|] => - Ok(`Simple(`Triangular({low, medium, high}))) - | _ => Error("Wrong number of variables in triangle distribution"); - - let multiModal = - ( - args: array(result(SymbolicDist.bigDist, string)), - weights: option(array(float)), - ) => { - let weights = weights |> E.O.default([||]); - let dists = - args - |> E.A.fmap( - fun - | Ok(`Simple(n)) => Ok(n) - | Error(e) => Error(e) - | Ok(k) => Error(SymbolicDist.toString(k)), - ); - let firstWithError = dists |> Belt.Array.getBy(_, Belt.Result.isError); - let withoutErrors = dists |> E.A.fmap(E.R.toOption) |> E.A.O.concatSomes; - switch (firstWithError) { - | Some(Error(e)) => Error(e) - | None when withoutErrors |> E.A.length == 0 => - Error("Multimodals need at least one input") - | _ => - withoutErrors - |> E.A.fmapi((index, item) => - (item, weights |> E.A.get(_, index) |> E.O.default(1.0)) - ) - |> (r => Ok(`PointwiseCombination(r))) - }; - }; - - let arrayParser = (args:array(arg)):result(SymbolicDist.bigDist, string) => { - let samples = args - |> E.A.fmap( - fun - | Value(n) => Some(n) - | _ => None - ) - |> E.A.O.concatSomes - let outputs = Samples.T.fromSamples(samples); - let pdf = outputs.shape |> E.O.bind(_,Distributions.Shape.T.toContinuous) - let shape = pdf |> E.O.fmap(pdf => { - let _pdf = Distributions.Continuous.T.scaleToIntegralSum(~cache=None, ~intendedSum=1.0, pdf); - let cdf = Distributions.Continuous.T.integral(~cache=None, _pdf); - SymbolicDist.ContinuousShape.make(_pdf, cdf) - }) - switch(shape){ - | Some(s) => Ok(`Simple(`ContinuousShape(s))) - | None => Error("Rendering did not work") - } - } - - - let rec functionParser = (r): result(SymbolicDist.bigDist, string) => - r - |> ( - fun - | Fn({name: "normal", args}) => normal(args) - | Fn({name: "lognormal", args}) => lognormal(args) - | Fn({name: "uniform", args}) => uniform(args) - | Fn({name: "beta", args}) => beta(args) - | Fn({name: "to", args}) => to_(args) - | Fn({name: "exponential", args}) => exponential(args) - | Fn({name: "cauchy", args}) => cauchy(args) - | Fn({name: "triangular", args}) => triangular(args) - | Value(f) => Ok(`Simple(`Float(f))) - | Fn({name: "mm", args}) => { - let weights = - args - |> E.A.last - |> E.O.bind( - _, - fun - | Array(values) => Some(values) - | _ => None, - ) - |> E.O.fmap(o => - o - |> E.A.fmap( - fun - | Value(r) => Some(r) - | _ => None, - ) - |> E.A.O.concatSomes - ); - let possibleDists = - E.O.isSome(weights) - ? Belt.Array.slice(args, ~offset=0, ~len=E.A.length(args) - 1) - : args; - let dists = possibleDists |> E.A.fmap(functionParser); - multiModal(dists, weights); - } - | Fn({name}) => Error(name ++ ": function not supported") - | _ => { - Error("This type not currently supported"); - } - ); - - let topLevel = (r): result(SymbolicDist.bigDist, string) => - r - |> ( - fun - | Fn(_) => functionParser(r) - | Value(r) => Ok(`Simple(`Float(r))) - | Array(r) => arrayParser(r) - | Symbol(_) => Error("Symbol not valid as top level") - | Object(_) => Error("Object not valid as top level") - ); - - let run = (r): result(SymbolicDist.bigDist, string) => - r |> MathAdtCleaner.run |> topLevel; -}; - -let fromString = str => { - let mathJsToJson = Mathjs.parseMath(str); - let mathJsParse = - E.R.bind(mathJsToJson, r => - switch (MathJsonToMathJsAdt.run(r)) { - | Some(r) => Ok(r) - | None => Error("MathJsParse Error") - } - ); - let value = E.R.bind(mathJsParse, MathAdtToDistDst.run); - value; -}; \ No newline at end of file diff --git a/src/distPlus/symbolic/MathjsWrapper.js b/src/distPlus/symbolic/MathjsWrapper.js deleted file mode 100644 index 01fd4994..00000000 --- a/src/distPlus/symbolic/MathjsWrapper.js +++ /dev/null @@ -1,8 +0,0 @@ - -const math = require("mathjs"); - -function parseMath(f){ return JSON.parse(JSON.stringify(math.parse(f))) }; - -module.exports = { - parseMath, -}; diff --git a/src/distPlus/symbolic/SymbolicDist.re b/src/distPlus/symbolic/SymbolicDist.re index d867eb22..96ecf0c1 100644 --- a/src/distPlus/symbolic/SymbolicDist.re +++ b/src/distPlus/symbolic/SymbolicDist.re @@ -1,70 +1,18 @@ -type normal = { - mean: float, - stdev: float, -}; - -type lognormal = { - mu: float, - sigma: float, -}; - -type uniform = { - low: float, - high: float, -}; - -type beta = { - alpha: float, - beta: float, -}; - -type exponential = {rate: float}; - -type cauchy = { - local: float, - scale: float, -}; - -type triangular = { - low: float, - medium: float, - high: float, -}; - -type continuousShape = { - pdf: DistTypes.continuousShape, - cdf: DistTypes.continuousShape, -}; - -type contType = [ | `Continuous | `Discrete]; - -type dist = [ - | `Normal(normal) - | `Beta(beta) - | `Lognormal(lognormal) - | `Uniform(uniform) - | `Exponential(exponential) - | `Cauchy(cauchy) - | `Triangular(triangular) - | `ContinuousShape(continuousShape) - | `Float(float) -]; - -type pointwiseAdd = array((dist, float)); - -type bigDist = [ | `Simple(dist) | `PointwiseCombination(pointwiseAdd)]; +open SymbolicTypes; module ContinuousShape = { type t = continuousShape; let make = (pdf, cdf): t => {pdf, cdf}; let pdf = (x, t: t) => Distributions.Continuous.T.xToY(x, t.pdf).continuous; + // TODO: pdf and inv are currently the same, this seems broken. let inv = (p, t: t) => Distributions.Continuous.T.xToY(p, t.pdf).continuous; // TODO: Fix the sampling, to have it work correctly. let sample = (t: t) => 3.0; + // TODO: Fix the mean, to have it work correctly. + let mean = (t: t) => Ok(0.0); let toString = t => {j|CustomContinuousShape|j}; - let contType: contType = `Continuous; }; module Exponential = { @@ -72,8 +20,8 @@ module Exponential = { let pdf = (x, t: t) => Jstat.exponential##pdf(x, t.rate); let inv = (p, t: t) => Jstat.exponential##inv(p, t.rate); let sample = (t: t) => Jstat.exponential##sample(t.rate); + let mean = (t: t) => Ok(Jstat.exponential##mean(t.rate)); let toString = ({rate}: t) => {j|Exponential($rate)|j}; - let contType: contType = `Continuous; }; module Cauchy = { @@ -81,8 +29,8 @@ module Cauchy = { let pdf = (x, t: t) => Jstat.cauchy##pdf(x, t.local, t.scale); let inv = (p, t: t) => Jstat.cauchy##inv(p, t.local, t.scale); let sample = (t: t) => Jstat.cauchy##sample(t.local, t.scale); + let mean = (_: t) => Error("Cauchy distributions have no mean value."); let toString = ({local, scale}: t) => {j|Cauchy($local, $scale)|j}; - let contType: contType = `Continuous; }; module Triangular = { @@ -90,8 +38,8 @@ module Triangular = { let pdf = (x, t: t) => Jstat.triangular##pdf(x, t.low, t.high, t.medium); let inv = (p, t: t) => Jstat.triangular##inv(p, t.low, t.high, t.medium); let sample = (t: t) => Jstat.triangular##sample(t.low, t.high, t.medium); + let mean = (t: t) => Ok(Jstat.triangular##mean(t.low, t.high, t.medium)); let toString = ({low, medium, high}: t) => {j|Triangular($low, $medium, $high)|j}; - let contType: contType = `Continuous; }; module Normal = { @@ -105,8 +53,35 @@ module Normal = { }; let inv = (p, t: t) => Jstat.normal##inv(p, t.mean, t.stdev); let sample = (t: t) => Jstat.normal##sample(t.mean, t.stdev); + let mean = (t: t) => Ok(Jstat.normal##mean(t.mean, t.stdev)); let toString = ({mean, stdev}: t) => {j|Normal($mean,$stdev)|j}; - let contType: contType = `Continuous; + + let add = (n1: t, n2: t) => { + let mean = n1.mean +. n2.mean; + let stdev = sqrt(n1.stdev ** 2. +. n2.stdev ** 2.); + `Normal({mean, stdev}); + }; + let subtract = (n1: t, n2: t) => { + let mean = n1.mean -. n2.mean; + let stdev = sqrt(n1.stdev ** 2. +. n2.stdev ** 2.); + `Normal({mean, stdev}); + }; + + // TODO: is this useful here at all? would need the integral as well ... + let pointwiseProduct = (n1: t, n2: t) => { + let mean = + (n1.mean *. n2.stdev ** 2. +. n2.mean *. n1.stdev ** 2.) + /. (n1.stdev ** 2. +. n2.stdev ** 2.); + let stdev = 1. /. (1. /. n1.stdev ** 2. +. 1. /. n2.stdev ** 2.); + `Normal({mean, stdev}); + }; + + let operate = (operation: Operation.Algebraic.t, n1: t, n2: t) => + switch (operation) { + | `Add => Some(add(n1, n2)) + | `Subtract => Some(subtract(n1, n2)) + | _ => None + }; }; module Beta = { @@ -114,17 +89,17 @@ module Beta = { let pdf = (x, t: t) => Jstat.beta##pdf(x, t.alpha, t.beta); let inv = (p, t: t) => Jstat.beta##inv(p, t.alpha, t.beta); let sample = (t: t) => Jstat.beta##sample(t.alpha, t.beta); + let mean = (t: t) => Ok(Jstat.beta##mean(t.alpha, t.beta)); let toString = ({alpha, beta}: t) => {j|Beta($alpha,$beta)|j}; - let contType: contType = `Continuous; }; module Lognormal = { type t = lognormal; let pdf = (x, t: t) => Jstat.lognormal##pdf(x, t.mu, t.sigma); let inv = (p, t: t) => Jstat.lognormal##inv(p, t.mu, t.sigma); + let mean = (t: t) => Ok(Jstat.lognormal##mean(t.mu, t.sigma)); let sample = (t: t) => Jstat.lognormal##sample(t.mu, t.sigma); let toString = ({mu, sigma}: t) => {j|Lognormal($mu,$sigma)|j}; - let contType: contType = `Continuous; let from90PercentCI = (low, high) => { let logLow = Js.Math.log(low); let logHigh = Js.Math.log(high); @@ -144,6 +119,23 @@ module Lognormal = { ); `Lognormal({mu, sigma}); }; + + let multiply = (l1, l2) => { + let mu = l1.mu +. l2.mu; + let sigma = l1.sigma +. l2.sigma; + `Lognormal({mu, sigma}); + }; + let divide = (l1, l2) => { + let mu = l1.mu -. l2.mu; + let sigma = l1.sigma +. l2.sigma; + `Lognormal({mu, sigma}); + }; + let operate = (operation: Operation.Algebraic.t, n1: t, n2: t) => + switch (operation) { + | `Multiply => Some(multiply(n1, n2)) + | `Divide => Some(divide(n1, n2)) + | _ => None + }; }; module Uniform = { @@ -151,20 +143,20 @@ module Uniform = { let pdf = (x, t: t) => Jstat.uniform##pdf(x, t.low, t.high); let inv = (p, t: t) => Jstat.uniform##inv(p, t.low, t.high); let sample = (t: t) => Jstat.uniform##sample(t.low, t.high); + let mean = (t: t) => Ok(Jstat.uniform##mean(t.low, t.high)); let toString = ({low, high}: t) => {j|Uniform($low,$high)|j}; - let contType: contType = `Continuous; }; module Float = { type t = float; let pdf = (x, t: t) => x == t ? 1.0 : 0.0; let inv = (p, t: t) => p < t ? 0.0 : 1.0; + let mean = (t: t) => Ok(t); let sample = (t: t) => t; let toString = Js.Float.toString; - let contType: contType = `Discrete; }; -module GenericSimple = { +module T = { let minCdfValue = 0.0001; let maxCdfValue = 0.9999; @@ -181,19 +173,6 @@ module GenericSimple = { | `ContinuousShape(n) => ContinuousShape.pdf(x, n) }; - let contType = (dist: dist): contType => - switch (dist) { - | `Normal(_) => Normal.contType - | `Triangular(_) => Triangular.contType - | `Exponential(_) => Exponential.contType - | `Cauchy(_) => Cauchy.contType - | `Lognormal(_) => Lognormal.contType - | `Uniform(_) => Uniform.contType - | `Beta(_) => Beta.contType - | `Float(_) => Float.contType - | `ContinuousShape(_) => ContinuousShape.contType - }; - let inv = (x, dist) => switch (dist) { | `Normal(n) => Normal.inv(x, n) @@ -207,7 +186,7 @@ module GenericSimple = { | `ContinuousShape(n) => ContinuousShape.inv(x, n) }; - let sample: dist => float = + let sample: symbolicDist => float = fun | `Normal(n) => Normal.sample(n) | `Triangular(n) => Triangular.sample(n) @@ -219,7 +198,7 @@ module GenericSimple = { | `Float(n) => Float.sample(n) | `ContinuousShape(n) => ContinuousShape.sample(n); - let toString: dist => string = + let toString: symbolicDist => string = fun | `Triangular(n) => Triangular.toString(n) | `Exponential(n) => Exponential.toString(n) @@ -231,7 +210,7 @@ module GenericSimple = { | `Float(n) => Float.toString(n) | `ContinuousShape(n) => ContinuousShape.toString(n); - let min: dist => float = + let min: symbolicDist => float = fun | `Triangular({low}) => low | `Exponential(n) => Exponential.inv(minCdfValue, n) @@ -243,7 +222,7 @@ module GenericSimple = { | `ContinuousShape(n) => ContinuousShape.inv(minCdfValue, n) | `Float(n) => n; - let max: dist => float = + let max: symbolicDist => float = fun | `Triangular(n) => n.high | `Exponential(n) => Exponential.inv(maxCdfValue, n) @@ -255,144 +234,84 @@ module GenericSimple = { | `Uniform({high}) => high | `Float(n) => n; - - /* This function returns a list of x's at which to evaluate the overall distribution (for rendering). - This function is called separately for each individual distribution. - - When called with xSelection=`Linear, this function will return (sampleCount) x's, evenly - distributed between the min and max of the distribution (whatever those are defined to be above). - - When called with xSelection=`ByWeight, this function will distribute the x's such as to - match the cumulative shape of the distribution. This is slower but may give better results. - */ - let interpolateXs = - (~xSelection: [ | `Linear | `ByWeight]=`Linear, dist: dist, sampleCount) => { - - switch (xSelection, dist) { - | (`Linear, _) => E.A.Floats.range(min(dist), max(dist), sampleCount) - | (`ByWeight, `Uniform(n)) => - // In `ByWeight mode, uniform distributions get special treatment because we need two x's - // on either side for proper rendering (just left and right of the discontinuities). - let dx = 0.00001 *. (n.high -. n.low); - [|n.low -. dx, n.low +. dx, n.high -. dx, n.high +. dx|] - | (`ByWeight, _) => - let ys = E.A.Floats.range(minCdfValue, maxCdfValue, sampleCount) - ys |> E.A.fmap(y => inv(y, dist)) - }; - }; - - let toShape = - (~xSelection: [ | `Linear | `ByWeight]=`Linear, dist: dist, sampleCount) - : DistTypes.shape => { - switch (dist) { - | `ContinuousShape(n) => n.pdf |> Distributions.Continuous.T.toShape - | dist => - let xs = interpolateXs(~xSelection, dist, sampleCount); - let ys = xs |> E.A.fmap(r => pdf(r, dist)); - XYShape.T.fromArrays(xs, ys) - |> Distributions.Continuous.make(`Linear, _) - |> Distributions.Continuous.T.toShape; - }; - }; -}; - -module PointwiseAddDistributionsWeighted = { - type t = pointwiseAdd; - - let normalizeWeights = (dists: t) => { - let total = dists |> E.A.fmap(snd) |> E.A.Floats.sum; - dists |> E.A.fmap(((a, b)) => (a, b /. total)); - }; - - let pdf = (x: float, dists: t) => - dists - |> E.A.fmap(((e, w)) => GenericSimple.pdf(x, e) *. w) - |> E.A.Floats.sum; - - let min = (dists: t) => - dists |> E.A.fmap(d => d |> fst |> GenericSimple.min) |> E.A.min; - - let max = (dists: t) => - dists |> E.A.fmap(d => d |> fst |> GenericSimple.max) |> E.A.max; - - let discreteShape = (dists: t, sampleCount: int) => { - let discrete = - dists - |> E.A.fmap(((r, e)) => - r - |> ( - fun - | `Float(r) => Some((r, e)) - | _ => None - ) - ) - |> E.A.O.concatSomes - |> E.A.fmap(((x, y)) => - ({xs: [|x|], ys: [|y|]}: DistTypes.xyShape) - ) - |> Distributions.Discrete.reduce((+.)); - discrete; - }; - - let continuousShape = (dists: t, sampleCount: int) => { - let xs = - dists - |> E.A.fmap(r => - r - |> fst - |> GenericSimple.interpolateXs( - ~xSelection=`ByWeight, - _, - sampleCount / (dists |> E.A.length), - ) - ) - |> E.A.concatMany; - xs |> Array.fast_sort(compare); - let ys = xs |> E.A.fmap(pdf(_, dists)); - XYShape.T.fromArrays(xs, ys) |> Distributions.Continuous.make(`Linear, _); - }; - - let toShape = (dists: t, sampleCount: int) => { - let normalized = normalizeWeights(dists); - let continuous = - normalized - |> E.A.filter(((r, _)) => GenericSimple.contType(r) == `Continuous) - |> continuousShape(_, sampleCount); - let discrete = - normalized - |> E.A.filter(((r, _)) => GenericSimple.contType(r) == `Discrete) - |> discreteShape(_, sampleCount); - let shape = - MixedShapeBuilder.buildSimple(~continuous=Some(continuous), ~discrete); - shape |> E.O.toExt(""); - }; - - let toString = (dists: t) => { - let distString = - dists - |> E.A.fmap(d => GenericSimple.toString(fst(d))) - |> Js.Array.joinWith(","); - let weights = - dists - |> E.A.fmap(d => - snd(d) |> Js.Float.toPrecisionWithPrecision(~digits=2) - ) - |> Js.Array.joinWith(","); - {j|multimodal($distString, [$weights])|j}; - }; -}; - -let toString = (r: bigDist) => - r - |> ( + let mean: symbolicDist => result(float, string) = fun - | `Simple(d) => GenericSimple.toString(d) - | `PointwiseCombination(d) => - PointwiseAddDistributionsWeighted.toString(d) - ); + | `Triangular(n) => Triangular.mean(n) + | `Exponential(n) => Exponential.mean(n) + | `Cauchy(n) => Cauchy.mean(n) + | `Normal(n) => Normal.mean(n) + | `Lognormal(n) => Lognormal.mean(n) + | `Beta(n) => Beta.mean(n) + | `ContinuousShape(n) => ContinuousShape.mean(n) + | `Uniform(n) => Uniform.mean(n) + | `Float(n) => Float.mean(n); -let toShape = n => - fun - | `Simple(d) => GenericSimple.toShape(~xSelection=`ByWeight, d, n) - | `PointwiseCombination(d) => - PointwiseAddDistributionsWeighted.toShape(d, n); + let operate = (distToFloatOp: ExpressionTypes.distToFloatOperation, s) => + switch (distToFloatOp) { + | `Pdf(f) => Ok(pdf(f, s)) + | `Inv(f) => Ok(inv(f, s)) + | `Sample => Ok(sample(s)) + | `Mean => mean(s) + }; + + let interpolateXs = + (~xSelection: [ | `Linear | `ByWeight]=`Linear, dist: symbolicDist, n) => { + switch (xSelection, dist) { + | (`Linear, _) => E.A.Floats.range(min(dist), max(dist), n) + /* | (`ByWeight, `Uniform(n)) => + // In `ByWeight mode, uniform distributions get special treatment because we need two x's + // on either side for proper rendering (just left and right of the discontinuities). + let dx = 0.00001 *. (n.high -. n.low); + [|n.low -. dx, n.low +. dx, n.high -. dx, n.high +. dx|]; */ + | (`ByWeight, _) => + let ys = E.A.Floats.range(minCdfValue, maxCdfValue, n); + ys |> E.A.fmap(y => inv(y, dist)); + }; + }; + + /* Calling e.g. "Normal.operate" returns an optional that wraps a result. + If the optional is None, there is no valid analytic solution. If it Some, it + can still return an error if there is a serious problem, + like in the case of a divide by 0. + */ + type analyticalSimplificationResult = [ + | `AnalyticalSolution(SymbolicTypes.symbolicDist) + | `Error(string) + | `NoSolution + ]; + let tryAnalyticalSimplification = + ( + d1: symbolicDist, + d2: symbolicDist, + op: ExpressionTypes.algebraicOperation, + ) + : analyticalSimplificationResult => + switch (d1, d2) { + | (`Float(v1), `Float(v2)) => + switch (Operation.Algebraic.applyFn(op, v1, v2)) { + | Ok(r) => `AnalyticalSolution(`Float(r)) + | Error(n) => `Error(n) + } + | (`Normal(v1), `Normal(v2)) => + Normal.operate(op, v1, v2) + |> E.O.dimap(r => `AnalyticalSolution(r), () => `NoSolution) + | (`Lognormal(v1), `Lognormal(v2)) => + Lognormal.operate(op, v1, v2) + |> E.O.dimap(r => `AnalyticalSolution(r), () => `NoSolution) + | _ => `NoSolution + }; + + let toShape = (sampleCount, d: symbolicDist): DistTypes.shape => + switch (d) { + | `Float(v) => + Discrete( + Distributions.Discrete.make({xs: [|v|], ys: [|1.0|]}, Some(1.0)), + ) + | _ => + let xs = interpolateXs(~xSelection=`ByWeight, d, sampleCount); + let ys = xs |> E.A.fmap(x => pdf(x, d)); + Continuous( + Distributions.Continuous.make(`Linear, {xs, ys}, Some(1.0)), + ); + }; +}; diff --git a/src/distPlus/symbolic/SymbolicTypes.re b/src/distPlus/symbolic/SymbolicTypes.re new file mode 100644 index 00000000..b372a00f --- /dev/null +++ b/src/distPlus/symbolic/SymbolicTypes.re @@ -0,0 +1,49 @@ +type normal = { + mean: float, + stdev: float, +}; + +type lognormal = { + mu: float, + sigma: float, +}; + +type uniform = { + low: float, + high: float, +}; + +type beta = { + alpha: float, + beta: float, +}; + +type exponential = {rate: float}; + +type cauchy = { + local: float, + scale: float, +}; + +type triangular = { + low: float, + medium: float, + high: float, +}; + +type continuousShape = { + pdf: DistTypes.continuousShape, + cdf: DistTypes.continuousShape, +}; + +type symbolicDist = [ + | `Normal(normal) + | `Beta(beta) + | `Lognormal(lognormal) + | `Uniform(uniform) + | `Exponential(exponential) + | `Cauchy(cauchy) + | `Triangular(triangular) + | `ContinuousShape(continuousShape) + | `Float(float) // Dirac delta at x. Practically useful only in the context of multimodals. +]; \ No newline at end of file diff --git a/src/distPlus/utility/Jstat.re b/src/distPlus/utility/Jstat.re index 5f1c6c51..0a2cc13f 100644 --- a/src/distPlus/utility/Jstat.re +++ b/src/distPlus/utility/Jstat.re @@ -5,6 +5,7 @@ type normal = { [@bs.meth] "cdf": (float, float, float) => float, [@bs.meth] "inv": (float, float, float) => float, [@bs.meth] "sample": (float, float) => float, + [@bs.meth] "mean": (float, float) => float, }; type lognormal = { . @@ -12,6 +13,7 @@ type lognormal = { [@bs.meth] "cdf": (float, float, float) => float, [@bs.meth] "inv": (float, float, float) => float, [@bs.meth] "sample": (float, float) => float, + [@bs.meth] "mean": (float, float) => float, }; type uniform = { . @@ -19,6 +21,7 @@ type uniform = { [@bs.meth] "cdf": (float, float, float) => float, [@bs.meth] "inv": (float, float, float) => float, [@bs.meth] "sample": (float, float) => float, + [@bs.meth] "mean": (float, float) => float, }; type beta = { . @@ -26,6 +29,7 @@ type beta = { [@bs.meth] "cdf": (float, float, float) => float, [@bs.meth] "inv": (float, float, float) => float, [@bs.meth] "sample": (float, float) => float, + [@bs.meth] "mean": (float, float) => float, }; type exponential = { . @@ -33,6 +37,7 @@ type exponential = { [@bs.meth] "cdf": (float, float) => float, [@bs.meth] "inv": (float, float) => float, [@bs.meth] "sample": float => float, + [@bs.meth] "mean": float => float, }; type cauchy = { . @@ -47,6 +52,7 @@ type triangular = { [@bs.meth] "cdf": (float, float, float, float) => float, [@bs.meth] "inv": (float, float, float, float) => float, [@bs.meth] "sample": (float, float, float) => float, + [@bs.meth] "mean": (float, float, float) => float, }; // Pareto doesn't have sample for some reason @@ -61,6 +67,7 @@ type poisson = { [@bs.meth] "pdf": (float, float) => float, [@bs.meth] "cdf": (float, float) => float, [@bs.meth] "sample": float => float, + [@bs.meth] "mean": float => float, }; type weibull = { . @@ -68,6 +75,7 @@ type weibull = { [@bs.meth] "cdf": (float, float, float) => float, [@bs.meth] "inv": (float, float, float) => float, [@bs.meth] "sample": (float, float) => float, + [@bs.meth] "mean": (float, float) => float, }; type binomial = { . @@ -101,4 +109,4 @@ external quartiles: (array(float)) => array(float) = "quartiles"; [@bs.module "jstat"] external quantiles: (array(float), array(float)) => array(float) = "quantiles"; [@bs.module "jstat"] -external percentile: (array(float), float, bool) => float = "percentile"; \ No newline at end of file +external percentile: (array(float), float, bool) => float = "percentile"; diff --git a/src/interface/FormBuilder.re b/src/interface/FormBuilder.re index 55c4f071..7556a82f 100644 --- a/src/interface/FormBuilder.re +++ b/src/interface/FormBuilder.re @@ -22,7 +22,7 @@ let propValue = (t: Prop.Value.t) => { RenderTypes.DistPlusRenderer.make( ~distPlusIngredients=r, ~recommendedLength=10000, - ~shouldTruncate=true, + ~shouldDownsample=true, (), ) |> DistPlusRenderer.run @@ -105,4 +105,4 @@ module ModelForm = {
; }; -}; \ No newline at end of file +};