Minor formatting and name changes

This commit is contained in:
Ozzie Gooen 2020-07-01 20:26:39 +01:00
parent 502481e345
commit acdd3dfe7a
5 changed files with 227 additions and 232 deletions

View File

@ -1,62 +1,75 @@
type algebraicOperation = [
| `Add
| `Multiply
| `Subtract
| `Divide
];
type algebraicOperation = [ | `Add | `Multiply | `Subtract | `Divide];
type pointMassesWithMoments = {
n: int,
masses: array(float),
means: array(float),
variances: array(float)
n: int,
masses: array(float),
means: array(float),
variances: array(float),
};
let operationToFn: (algebraicOperation, float, float) => float =
module Operation = {
type t = algebraicOperation;
let toFn: (t, float, float) => float =
fun
| `Add => (+.)
| `Subtract => (-.)
| `Multiply => ( *. )
| `Divide => (/.);
let toString =
fun
| `Add => " + "
| `Subtract => " - "
| `Multiply => " * "
| `Divide => " / ";
};
/* This function takes a continuous distribution and efficiently approximates it as
/* This function takes a continuous distribution and efficiently approximates it as
point masses that have variances associated with them.
We estimate the means and variances from overlapping triangular distributions which we imagine are making up the
XYShape.
We can then use the algebra of random variables to "convolve" the point masses and their variances,
and finally reconstruct a new distribution from them, e.g. using a Fast Gauss Transform or Raykar et al. (2007). */
let toDiscretePointMassesFromTriangulars = (~inverse=false, s: XYShape.T.t): pointMassesWithMoments => {
// TODO: what if there is only one point in the distribution?
let n = s |> XYShape.T.length;
// first, double up the leftmost and rightmost points:
let {xs, ys}: XYShape.T.t = s;
let _ = Js.Array.unshift(xs[0], xs);
let _ = Js.Array.unshift(ys[0], ys);
let _ = Js.Array.push(xs[n - 1], xs);
let _ = Js.Array.push(ys[n - 1], ys);
let n = E.A.length(xs);
// squares and neighbourly products of the xs
let xsSq: array(float) = Belt.Array.makeUninitializedUnsafe(n);
let xsProdN1: array(float) = Belt.Array.makeUninitializedUnsafe(n - 1);
let xsProdN2: array(float) = Belt.Array.makeUninitializedUnsafe(n - 2);
for (i in 0 to n - 1) {
let _ = Belt.Array.set(xsSq, i, xs[i] *. xs[i]); ();
};
for (i in 0 to n - 2) {
let _ = Belt.Array.set(xsProdN1, i, xs[i] *. xs[i + 1]); ();
};
for (i in 0 to n - 3) {
let _ = Belt.Array.set(xsProdN2, i, xs[i] *. xs[i + 2]); ();
};
// means and variances
let masses: array(float) = Belt.Array.makeUninitializedUnsafe(n - 2); // doesn't include the fake first and last points
let means: array(float) = Belt.Array.makeUninitializedUnsafe(n - 2);
let variances: array(float) = Belt.Array.makeUninitializedUnsafe(n - 2);
let toDiscretePointMassesFromTriangulars =
(~inverse=false, s: XYShape.T.t): pointMassesWithMoments => {
// TODO: what if there is only one point in the distribution?
let n = s |> XYShape.T.length;
// first, double up the leftmost and rightmost points:
let {xs, ys}: XYShape.T.t = s;
let _ = Js.Array.unshift(xs[0], xs);
let _ = Js.Array.unshift(ys[0], ys);
let _ = Js.Array.push(xs[n - 1], xs);
let _ = Js.Array.push(ys[n - 1], ys);
let n = E.A.length(xs);
// squares and neighbourly products of the xs
let xsSq: array(float) = Belt.Array.makeUninitializedUnsafe(n);
let xsProdN1: array(float) = Belt.Array.makeUninitializedUnsafe(n - 1);
let xsProdN2: array(float) = Belt.Array.makeUninitializedUnsafe(n - 2);
for (i in 0 to n - 1) {
let _ = Belt.Array.set(xsSq, i, xs[i] *. xs[i]);
();
};
for (i in 0 to n - 2) {
let _ = Belt.Array.set(xsProdN1, i, xs[i] *. xs[i + 1]);
();
};
for (i in 0 to n - 3) {
let _ = Belt.Array.set(xsProdN2, i, xs[i] *. xs[i + 2]);
();
};
// means and variances
let masses: array(float) = Belt.Array.makeUninitializedUnsafe(n - 2); // doesn't include the fake first and last points
let means: array(float) = Belt.Array.makeUninitializedUnsafe(n - 2);
let variances: array(float) = Belt.Array.makeUninitializedUnsafe(n - 2);
if (inverse) {
if (inverse) {
for (i in 1 to n - 2) {
let _ = Belt.Array.set(masses, i - 1, (xs[i + 1] -. xs[i - 1]) *. ys[i] /. 2.);
let _ =
Belt.Array.set(
masses,
i - 1,
(xs[i + 1] -. xs[i - 1]) *. ys[i] /. 2.,
);
// this only works when the whole triange is either on the left or on the right of zero
let a = xs[i - 1];
@ -66,8 +79,16 @@ if (inverse) {
// These are the moments of the reciprocal of a triangular distribution, as symbolically integrated by Mathematica.
// They're probably pretty close to invMean ~ 1/mean = 3/(a+b+c) and invVar. But I haven't worked out
// the worst case error, so for now let's use these monster equations
let inverseMean = 2. *. ((a *. log(a/.c) /. (a-.c)) +. ((b *. log(c/.b))/.(b-.c))) /. (a -. b);
let inverseVar = 2. *. ((log(c/.a) /. (a-.c)) +. ((b *. log(b/.c))/.(b-.c))) /. (a -. b) -. inverseMean ** 2.;
let inverseMean =
2.
*. (a *. log(a /. c) /. (a -. c) +. b *. log(c /. b) /. (b -. c))
/. (a -. b);
let inverseVar =
2.
*. (log(c /. a) /. (a -. c) +. b *. log(b /. c) /. (b -. c))
/. (a -. b)
-. inverseMean
** 2.;
let _ = Belt.Array.set(means, i - 1, inverseMean);
@ -76,21 +97,40 @@ if (inverse) {
};
{n: n - 2, masses, means, variances};
} else {
} else {
for (i in 1 to n - 2) {
let _ = Belt.Array.set(masses, i - 1, (xs[i + 1] -. xs[i - 1]) *. ys[i] /. 2.);
let _ = Belt.Array.set(means, i - 1, (xs[i - 1] +. xs[i] +. xs[i + 1]) /. 3.);
let _ =
Belt.Array.set(
masses,
i - 1,
(xs[i + 1] -. xs[i - 1]) *. ys[i] /. 2.,
);
let _ =
Belt.Array.set(means, i - 1, (xs[i - 1] +. xs[i] +. xs[i + 1]) /. 3.);
let _ = Belt.Array.set(variances, i - 1,
(xsSq[i-1] +. xsSq[i] +. xsSq[i+1] -. xsProdN1[i-1] -. xsProdN1[i] -. xsProdN2[i-1]) /. 18.);
let _ =
Belt.Array.set(
variances,
i - 1,
(
xsSq[i - 1]
+. xsSq[i]
+. xsSq[i + 1]
-. xsProdN1[i - 1]
-. xsProdN1[i]
-. xsProdN2[i - 1]
)
/. 18.,
);
();
};
{n: n - 2, masses, means, variances};
};
};
let combineShapesContinuousContinuous = (op: algebraicOperation, s1: DistTypes.xyShape, s2: DistTypes.xyShape): DistTypes.xyShape => {
let combineShapesContinuousContinuous =
(op: algebraicOperation, s1: DistTypes.xyShape, s2: DistTypes.xyShape)
: DistTypes.xyShape => {
let t1n = s1 |> XYShape.T.length;
let t2n = s2 |> XYShape.T.length;
@ -99,26 +139,36 @@ let combineShapesContinuousContinuous = (op: algebraicOperation, s1: DistTypes.x
let t1m = toDiscretePointMassesFromTriangulars(s1);
let t2m = toDiscretePointMassesFromTriangulars(s2);
let combineMeansFn = switch (op) {
| `Add => (m1, m2) => m1 +. m2
| `Subtract => (m1, m2) => m1 -. m2
| `Multiply => (m1, m2) => m1 *. m2
| `Divide => (m1, mInv2) => m1 *. mInv2
let combineMeansFn =
switch (op) {
| `Add => ((m1, m2) => m1 +. m2)
| `Subtract => ((m1, m2) => m1 -. m2)
| `Multiply => ((m1, m2) => m1 *. m2)
| `Divide => ((m1, mInv2) => m1 *. mInv2)
}; // note: here, mInv2 = mean(1 / t2) ~= 1 / mean(t2)
// converts the variances and means of the two inputs into the variance of the output
let combineVariancesFn = switch (op) {
| `Add => (v1, v2, m1, m2) => v1 +. v2
| `Subtract => (v1, v2, m1, m2) => v1 +. v2
| `Multiply => (v1, v2, m1, m2) => (v1 *. v2) +. (v1 *. m1**2.) +. (v2 *. m1**2.)
| `Divide => (v1, vInv2, m1, mInv2) => (v1 *. vInv2) +. (v1 *. mInv2**2.) +. (vInv2 *. m1**2.)
let combineVariancesFn =
switch (op) {
| `Add => ((v1, v2, m1, m2) => v1 +. v2)
| `Subtract => ((v1, v2, m1, m2) => v1 +. v2)
| `Multiply => (
(v1, v2, m1, m2) => v1 *. v2 +. v1 *. m1 ** 2. +. v2 *. m1 ** 2.
)
| `Divide => (
(v1, vInv2, m1, mInv2) =>
v1 *. vInv2 +. v1 *. mInv2 ** 2. +. vInv2 *. m1 ** 2.
)
};
let outputMinX: ref(float) = ref(infinity);
let outputMaxX: ref(float) = ref(neg_infinity);
let masses: array(float) = Belt.Array.makeUninitializedUnsafe(t1m.n * t2m.n);
let means: array(float) = Belt.Array.makeUninitializedUnsafe(t1m.n * t2m.n);
let variances: array(float) = Belt.Array.makeUninitializedUnsafe(t1m.n * t2m.n);
let masses: array(float) =
Belt.Array.makeUninitializedUnsafe(t1m.n * t2m.n);
let means: array(float) =
Belt.Array.makeUninitializedUnsafe(t1m.n * t2m.n);
let variances: array(float) =
Belt.Array.makeUninitializedUnsafe(t1m.n * t2m.n);
// then convolve the two sets of pointMassesWithMoments
for (i in 0 to t1m.n - 1) {
for (j in 0 to t2m.n - 1) {
@ -126,7 +176,13 @@ let combineShapesContinuousContinuous = (op: algebraicOperation, s1: DistTypes.x
let _ = Belt.Array.set(masses, k, t1m.masses[i] *. t2m.masses[j]);
let mean = combineMeansFn(t1m.means[i], t2m.means[j]);
let variance = combineVariancesFn(t1m.variances[i], t2m.variances[j], t1m.means[i], t2m.means[j]);
let variance =
combineVariancesFn(
t1m.variances[i],
t2m.variances[j],
t1m.means[i],
t2m.means[j],
);
let _ = Belt.Array.set(means, k, mean);
let _ = Belt.Array.set(variances, k, variance);
// update bounds
@ -134,23 +190,25 @@ let combineShapesContinuousContinuous = (op: algebraicOperation, s1: DistTypes.x
let maxX = mean +. variance *. 1.644854;
if (minX < outputMinX^) {
outputMinX := minX;
}
};
if (maxX > outputMaxX^) {
outputMaxX := maxX;
}
};
};
};
// we now want to create a set of target points. For now, let's just evenly distribute 200 points between
// between the outputMinX and outputMaxX
let outputXs: array(float) = E.A.Floats.range(outputMinX^, outputMaxX^, 200);
let outputXs: array(float) =
E.A.Floats.range(outputMinX^, outputMaxX^, 200);
let outputYs: array(float) = Belt.Array.make(200, 0.0);
// now, for each of the outputYs, accumulate from a Gaussian kernel over each input point.
for (i in 0 to E.A.length(outputXs) - 1) {
let x = outputXs[i];
for (j in 0 to E.A.length(masses) - 1) {
let dx = outputXs[i] -. means[j];
let contribution = masses[j] *. exp(-.(dx**2.) /. (2. *. variances[j]));
let contribution =
masses[j] *. exp(-. (dx ** 2.) /. (2. *. variances[j]));
let _ = Belt.Array.set(outputYs, i, outputYs[i] +. contribution);
();
};

View File

@ -285,7 +285,7 @@ module Continuous = {
let t1n = t1s |> XYShape.T.length;
let t2n = t2s |> XYShape.T.length;
let fn = AlgebraicCombinations.operationToFn(op);
let fn = AlgebraicCombinations.Operation.toFn(op);
let outXYShapes: array(array((float, float))) =
Belt.Array.makeUninitializedUnsafe(t2n);
@ -402,7 +402,7 @@ module Discrete = {
t2.knownIntegralSum,
);
let fn = AlgebraicCombinations.operationToFn(op);
let fn = AlgebraicCombinations.Operation.toFn(op);
let xToYMap = E.FloatFloatMap.empty();
for (i in 0 to t1n - 1) {

View File

@ -34,67 +34,3 @@ let buildSimple = (~continuous: option(DistTypes.continuousShape), ~discrete: op
Some(Mixed(mixedDist));
};
};
// TODO: Delete, only being used in tests
/*let build = (~continuous, ~discrete, ~assumptions) =>
switch (assumptions) {
| {
continuous: ADDS_TO_CORRECT_PROBABILITY,
discrete: ADDS_TO_CORRECT_PROBABILITY,
discreteProbabilityMass: Some(r),
} =>
// TODO: Fix this, it's wrong :(
Some(
Distributions.Mixed.make(
~continuous,
~discrete,
~discreteProbabilityMassFraction=r,
),
)
| {
continuous: ADDS_TO_1,
discrete: ADDS_TO_1,
discreteProbabilityMass: Some(r),
} =>
Some(
Distributions.Mixed.make(
~continuous,
~discrete,
~discreteProbabilityMassFraction=r,
),
)
| {
continuous: ADDS_TO_1,
discrete: ADDS_TO_1,
discreteProbabilityMass: None,
} =>
None
| {
continuous: ADDS_TO_CORRECT_PROBABILITY,
discrete: ADDS_TO_1,
discreteProbabilityMass: None,
} =>
None
| {
continuous: ADDS_TO_1,
discrete: ADDS_TO_CORRECT_PROBABILITY,
discreteProbabilityMass: None,
} =>
let discreteProbabilityMassFraction =
Distributions.Discrete.T.Integral.sum(~cache=None, discrete);
let discrete =
Distributions.Discrete.T.scaleToIntegralSum(~intendedSum=1.0, discrete);
Some(
Distributions.Mixed.make(
~continuous,
~discrete,
~discreteProbabilityMassFraction,
),
);
| _ => None
};*/

View File

@ -36,7 +36,6 @@ type continuousShape = {
cdf: DistTypes.continuousShape,
};
type dist = [
| `Normal(normal)
| `Beta(beta)
@ -54,6 +53,7 @@ module ContinuousShape = {
let make = (pdf, cdf): t => {pdf, cdf};
let pdf = (x, t: t) =>
Distributions.Continuous.T.xToY(x, t.pdf).continuous;
// TODO: pdf and inv are currently the same, this seems broken.
let inv = (p, t: t) =>
Distributions.Continuous.T.xToY(p, t.pdf).continuous;
// TODO: Fix the sampling, to have it work correctly.
@ -77,7 +77,7 @@ module Cauchy = {
let pdf = (x, t: t) => Jstat.cauchy##pdf(x, t.local, t.scale);
let inv = (p, t: t) => Jstat.cauchy##inv(p, t.local, t.scale);
let sample = (t: t) => Jstat.cauchy##sample(t.local, t.scale);
let mean = (t: t) => Error("Cauchy distributions have no mean value.")
let mean = (_: t) => Error("Cauchy distributions have no mean value.");
let toString = ({local, scale}: t) => {j|Cauchy($local, $scale)|j};
};
@ -117,8 +117,10 @@ module Normal = {
// TODO: is this useful here at all? would need the integral as well ...
let pointwiseProduct = (n1: t, n2: t) => {
let mean = (n1.mean *. n2.stdev**2. +. n2.mean *. n1.stdev**2.) /. (n1.stdev**2. +. n2.stdev**2.);
let stdev = 1. /. ((1. /. n1.stdev**2.) +. (1. /. n2.stdev**2.));
let mean =
(n1.mean *. n2.stdev ** 2. +. n2.mean *. n1.stdev ** 2.)
/. (n1.stdev ** 2. +. n2.stdev ** 2.);
let stdev = 1. /. (1. /. n1.stdev ** 2. +. 1. /. n2.stdev ** 2.);
`Normal({mean, stdev});
};
};
@ -162,12 +164,12 @@ module Lognormal = {
let multiply = (l1, l2) => {
let mu = l1.mu +. l2.mu;
let sigma = l1.sigma +. l2.sigma;
`Lognormal({mu, sigma})
`Lognormal({mu, sigma});
};
let divide = (l1, l2) => {
let mu = l1.mu -. l2.mu;
let sigma = l1.sigma +. l2.sigma;
`Lognormal({mu, sigma})
`Lognormal({mu, sigma});
};
};
@ -277,13 +279,13 @@ module GenericDistFunctions = {
| `Beta(n) => Beta.mean(n)
| `ContinuousShape(n) => ContinuousShape.mean(n)
| `Uniform(n) => Uniform.mean(n)
| `Float(n) => Float.mean(n)
| `Float(n) => Float.mean(n);
let interpolateXs =
(~xSelection: [ | `Linear | `ByWeight]=`Linear, dist: dist, n) => {
switch (xSelection, dist) {
| (`Linear, _) => E.A.Floats.range(min(dist), max(dist), n)
/* | (`ByWeight, `Uniform(n)) =>
/* | (`ByWeight, `Uniform(n)) =>
// In `ByWeight mode, uniform distributions get special treatment because we need two x's
// on either side for proper rendering (just left and right of the discontinuities).
let dx = 0.00001 *. (n.high -. n.low);
@ -294,4 +296,3 @@ module GenericDistFunctions = {
};
};
};

View File

@ -1,5 +1,6 @@
/* This module represents a tree node. */
// todo: Symbolic already has an arbitrary continuousShape option. It seems messy to have both.
type distData = [
| `Symbolic(SymbolicDist.dist)
| `RenderedShape(DistTypes.shape)
@ -46,7 +47,7 @@ and operation = [
module TreeNode = {
type t = treeNode;
type simplifier = treeNode => result(treeNode, string);
type tResult = treeNode => result(treeNode, string);
let rec toString = (t: t): string => {
let stringFromAlgebraicCombination =
@ -63,16 +64,15 @@ module TreeNode = {
let stringFromFloatFromDistOperation =
fun
| `Pdf(f) => "pdf(x=$f, "
| `Inv(f) => "inv(c=$f, "
| `Pdf(f) => {j|pdf(x=$f, |j}
| `Inv(f) => {j|inv(x=$f, |j}
| `Sample => "sample("
| `Mean => "mean(";
switch (t) {
| `DistData(`Symbolic(d)) =>
SymbolicDist.GenericDistFunctions.toString(d)
| `DistData(`RenderedShape(s)) => "[shape]"
| `DistData(`RenderedShape(_)) => "[shape]"
| `Operation(`AlgebraicCombination(op, t1, t2)) =>
toString(t1) ++ stringFromAlgebraicCombination(op) ++ toString(t2)
| `Operation(`PointwiseCombination(op, t1, t2)) =>
@ -102,12 +102,12 @@ module TreeNode = {
In general, this is implemented via convolution. */
module AlgebraicCombination = {
let simplify = (algebraicOp, t1: t, t2: t): result(treeNode, string) => {
let tryCombiningFloats: simplifier =
let tryCombiningFloats: tResult =
fun
| `Operation(
`AlgebraicCombination(
`Divide,
`DistData(`Symbolic(`Float(v1))),
`DistData(`Symbolic(`Float(_))),
`DistData(`Symbolic(`Float(0.))),
),
) =>
@ -119,12 +119,12 @@ module TreeNode = {
`DistData(`Symbolic(`Float(v2))),
),
) => {
let func = AlgebraicCombinations.operationToFn(algebraicOp);
let func = AlgebraicCombinations.Operation.toFn(algebraicOp);
Ok(`DistData(`Symbolic(`Float(func(v1, v2)))));
}
| t => Ok(t);
let tryCombiningNormals: simplifier =
let tryCombiningNormals: tResult =
fun
| `Operation(
`AlgebraicCombination(
@ -144,7 +144,7 @@ module TreeNode = {
Ok(`DistData(`Symbolic(SymbolicDist.Normal.subtract(n1, n2))))
| t => Ok(t);
let tryCombiningLognormals: simplifier =
let tryCombiningLognormals: tResult =
fun
| `Operation(
`AlgebraicCombination(
@ -281,13 +281,13 @@ module TreeNode = {
module Truncate = {
module Simplify = {
let tryTruncatingNothing: simplifier =
let tryTruncatingNothing: tResult =
fun
| `Operation(`Truncate(None, None, `DistData(d))) =>
Ok(`DistData(d))
| t => Ok(t);
let tryTruncatingUniform: simplifier =
let tryTruncatingUniform: tResult =
fun
| `Operation(`Truncate(lc, rc, `DistData(`Symbolic(`Uniform(u))))) => {
// just create a new Uniform distribution
@ -508,7 +508,7 @@ module TreeNode = {
but most often it will produce a RenderedShape.
This function is used mainly to turn a parse tree into a single RenderedShape
that can then be displayed to the user. */
let rec toDistData = (treeNode: t, sampleCount: int): result(t, string) => {
let toDistData = (treeNode: t, sampleCount: int): result(t, string) => {
switch (treeNode) {
| `DistData(d) => Ok(`DistData(d))
| `Operation(op) => operationToDistData(sampleCount, op)