Minor formatting and name changes

This commit is contained in:
Ozzie Gooen 2020-07-01 20:26:39 +01:00
parent 502481e345
commit acdd3dfe7a
5 changed files with 227 additions and 232 deletions

View File

@ -1,24 +1,28 @@
type algebraicOperation = [
| `Add
| `Multiply
| `Subtract
| `Divide
];
type algebraicOperation = [ | `Add | `Multiply | `Subtract | `Divide];
type pointMassesWithMoments = {
n: int,
masses: array(float),
means: array(float),
variances: array(float)
variances: array(float),
};
let operationToFn: (algebraicOperation, float, float) => float =
module Operation = {
type t = algebraicOperation;
let toFn: (t, float, float) => float =
fun
| `Add => (+.)
| `Subtract => (-.)
| `Multiply => ( *. )
| `Divide => (/.);
let toString =
fun
| `Add => " + "
| `Subtract => " - "
| `Multiply => " * "
| `Divide => " / ";
};
/* This function takes a continuous distribution and efficiently approximates it as
point masses that have variances associated with them.
@ -26,7 +30,8 @@ let operationToFn: (algebraicOperation, float, float) => float =
XYShape.
We can then use the algebra of random variables to "convolve" the point masses and their variances,
and finally reconstruct a new distribution from them, e.g. using a Fast Gauss Transform or Raykar et al. (2007). */
let toDiscretePointMassesFromTriangulars = (~inverse=false, s: XYShape.T.t): pointMassesWithMoments => {
let toDiscretePointMassesFromTriangulars =
(~inverse=false, s: XYShape.T.t): pointMassesWithMoments => {
// TODO: what if there is only one point in the distribution?
let n = s |> XYShape.T.length;
// first, double up the leftmost and rightmost points:
@ -41,13 +46,16 @@ let xsSq: array(float) = Belt.Array.makeUninitializedUnsafe(n);
let xsProdN1: array(float) = Belt.Array.makeUninitializedUnsafe(n - 1);
let xsProdN2: array(float) = Belt.Array.makeUninitializedUnsafe(n - 2);
for (i in 0 to n - 1) {
let _ = Belt.Array.set(xsSq, i, xs[i] *. xs[i]); ();
let _ = Belt.Array.set(xsSq, i, xs[i] *. xs[i]);
();
};
for (i in 0 to n - 2) {
let _ = Belt.Array.set(xsProdN1, i, xs[i] *. xs[i + 1]); ();
let _ = Belt.Array.set(xsProdN1, i, xs[i] *. xs[i + 1]);
();
};
for (i in 0 to n - 3) {
let _ = Belt.Array.set(xsProdN2, i, xs[i] *. xs[i + 2]); ();
let _ = Belt.Array.set(xsProdN2, i, xs[i] *. xs[i + 2]);
();
};
// means and variances
let masses: array(float) = Belt.Array.makeUninitializedUnsafe(n - 2); // doesn't include the fake first and last points
@ -56,7 +64,12 @@ let variances: array(float) = Belt.Array.makeUninitializedUnsafe(n - 2);
if (inverse) {
for (i in 1 to n - 2) {
let _ = Belt.Array.set(masses, i - 1, (xs[i + 1] -. xs[i - 1]) *. ys[i] /. 2.);
let _ =
Belt.Array.set(
masses,
i - 1,
(xs[i + 1] -. xs[i - 1]) *. ys[i] /. 2.,
);
// this only works when the whole triange is either on the left or on the right of zero
let a = xs[i - 1];
@ -66,8 +79,16 @@ if (inverse) {
// These are the moments of the reciprocal of a triangular distribution, as symbolically integrated by Mathematica.
// They're probably pretty close to invMean ~ 1/mean = 3/(a+b+c) and invVar. But I haven't worked out
// the worst case error, so for now let's use these monster equations
let inverseMean = 2. *. ((a *. log(a/.c) /. (a-.c)) +. ((b *. log(c/.b))/.(b-.c))) /. (a -. b);
let inverseVar = 2. *. ((log(c/.a) /. (a-.c)) +. ((b *. log(b/.c))/.(b-.c))) /. (a -. b) -. inverseMean ** 2.;
let inverseMean =
2.
*. (a *. log(a /. c) /. (a -. c) +. b *. log(c /. b) /. (b -. c))
/. (a -. b);
let inverseVar =
2.
*. (log(c /. a) /. (a -. c) +. b *. log(b /. c) /. (b -. c))
/. (a -. b)
-. inverseMean
** 2.;
let _ = Belt.Array.set(means, i - 1, inverseMean);
@ -78,19 +99,38 @@ if (inverse) {
{n: n - 2, masses, means, variances};
} else {
for (i in 1 to n - 2) {
let _ = Belt.Array.set(masses, i - 1, (xs[i + 1] -. xs[i - 1]) *. ys[i] /. 2.);
let _ = Belt.Array.set(means, i - 1, (xs[i - 1] +. xs[i] +. xs[i + 1]) /. 3.);
let _ =
Belt.Array.set(
masses,
i - 1,
(xs[i + 1] -. xs[i - 1]) *. ys[i] /. 2.,
);
let _ =
Belt.Array.set(means, i - 1, (xs[i - 1] +. xs[i] +. xs[i + 1]) /. 3.);
let _ = Belt.Array.set(variances, i - 1,
(xsSq[i-1] +. xsSq[i] +. xsSq[i+1] -. xsProdN1[i-1] -. xsProdN1[i] -. xsProdN2[i-1]) /. 18.);
let _ =
Belt.Array.set(
variances,
i - 1,
(
xsSq[i - 1]
+. xsSq[i]
+. xsSq[i + 1]
-. xsProdN1[i - 1]
-. xsProdN1[i]
-. xsProdN2[i - 1]
)
/. 18.,
);
();
};
{n: n - 2, masses, means, variances};
};
};
let combineShapesContinuousContinuous = (op: algebraicOperation, s1: DistTypes.xyShape, s2: DistTypes.xyShape): DistTypes.xyShape => {
let combineShapesContinuousContinuous =
(op: algebraicOperation, s1: DistTypes.xyShape, s2: DistTypes.xyShape)
: DistTypes.xyShape => {
let t1n = s1 |> XYShape.T.length;
let t2n = s2 |> XYShape.T.length;
@ -99,26 +139,36 @@ let combineShapesContinuousContinuous = (op: algebraicOperation, s1: DistTypes.x
let t1m = toDiscretePointMassesFromTriangulars(s1);
let t2m = toDiscretePointMassesFromTriangulars(s2);
let combineMeansFn = switch (op) {
| `Add => (m1, m2) => m1 +. m2
| `Subtract => (m1, m2) => m1 -. m2
| `Multiply => (m1, m2) => m1 *. m2
| `Divide => (m1, mInv2) => m1 *. mInv2
let combineMeansFn =
switch (op) {
| `Add => ((m1, m2) => m1 +. m2)
| `Subtract => ((m1, m2) => m1 -. m2)
| `Multiply => ((m1, m2) => m1 *. m2)
| `Divide => ((m1, mInv2) => m1 *. mInv2)
}; // note: here, mInv2 = mean(1 / t2) ~= 1 / mean(t2)
// converts the variances and means of the two inputs into the variance of the output
let combineVariancesFn = switch (op) {
| `Add => (v1, v2, m1, m2) => v1 +. v2
| `Subtract => (v1, v2, m1, m2) => v1 +. v2
| `Multiply => (v1, v2, m1, m2) => (v1 *. v2) +. (v1 *. m1**2.) +. (v2 *. m1**2.)
| `Divide => (v1, vInv2, m1, mInv2) => (v1 *. vInv2) +. (v1 *. mInv2**2.) +. (vInv2 *. m1**2.)
let combineVariancesFn =
switch (op) {
| `Add => ((v1, v2, m1, m2) => v1 +. v2)
| `Subtract => ((v1, v2, m1, m2) => v1 +. v2)
| `Multiply => (
(v1, v2, m1, m2) => v1 *. v2 +. v1 *. m1 ** 2. +. v2 *. m1 ** 2.
)
| `Divide => (
(v1, vInv2, m1, mInv2) =>
v1 *. vInv2 +. v1 *. mInv2 ** 2. +. vInv2 *. m1 ** 2.
)
};
let outputMinX: ref(float) = ref(infinity);
let outputMaxX: ref(float) = ref(neg_infinity);
let masses: array(float) = Belt.Array.makeUninitializedUnsafe(t1m.n * t2m.n);
let means: array(float) = Belt.Array.makeUninitializedUnsafe(t1m.n * t2m.n);
let variances: array(float) = Belt.Array.makeUninitializedUnsafe(t1m.n * t2m.n);
let masses: array(float) =
Belt.Array.makeUninitializedUnsafe(t1m.n * t2m.n);
let means: array(float) =
Belt.Array.makeUninitializedUnsafe(t1m.n * t2m.n);
let variances: array(float) =
Belt.Array.makeUninitializedUnsafe(t1m.n * t2m.n);
// then convolve the two sets of pointMassesWithMoments
for (i in 0 to t1m.n - 1) {
for (j in 0 to t2m.n - 1) {
@ -126,7 +176,13 @@ let combineShapesContinuousContinuous = (op: algebraicOperation, s1: DistTypes.x
let _ = Belt.Array.set(masses, k, t1m.masses[i] *. t2m.masses[j]);
let mean = combineMeansFn(t1m.means[i], t2m.means[j]);
let variance = combineVariancesFn(t1m.variances[i], t2m.variances[j], t1m.means[i], t2m.means[j]);
let variance =
combineVariancesFn(
t1m.variances[i],
t2m.variances[j],
t1m.means[i],
t2m.means[j],
);
let _ = Belt.Array.set(means, k, mean);
let _ = Belt.Array.set(variances, k, variance);
// update bounds
@ -134,23 +190,25 @@ let combineShapesContinuousContinuous = (op: algebraicOperation, s1: DistTypes.x
let maxX = mean +. variance *. 1.644854;
if (minX < outputMinX^) {
outputMinX := minX;
}
};
if (maxX > outputMaxX^) {
outputMaxX := maxX;
}
};
};
};
// we now want to create a set of target points. For now, let's just evenly distribute 200 points between
// between the outputMinX and outputMaxX
let outputXs: array(float) = E.A.Floats.range(outputMinX^, outputMaxX^, 200);
let outputXs: array(float) =
E.A.Floats.range(outputMinX^, outputMaxX^, 200);
let outputYs: array(float) = Belt.Array.make(200, 0.0);
// now, for each of the outputYs, accumulate from a Gaussian kernel over each input point.
for (i in 0 to E.A.length(outputXs) - 1) {
let x = outputXs[i];
for (j in 0 to E.A.length(masses) - 1) {
let dx = outputXs[i] -. means[j];
let contribution = masses[j] *. exp(-.(dx**2.) /. (2. *. variances[j]));
let contribution =
masses[j] *. exp(-. (dx ** 2.) /. (2. *. variances[j]));
let _ = Belt.Array.set(outputYs, i, outputYs[i] +. contribution);
();
};

View File

@ -285,7 +285,7 @@ module Continuous = {
let t1n = t1s |> XYShape.T.length;
let t2n = t2s |> XYShape.T.length;
let fn = AlgebraicCombinations.operationToFn(op);
let fn = AlgebraicCombinations.Operation.toFn(op);
let outXYShapes: array(array((float, float))) =
Belt.Array.makeUninitializedUnsafe(t2n);
@ -402,7 +402,7 @@ module Discrete = {
t2.knownIntegralSum,
);
let fn = AlgebraicCombinations.operationToFn(op);
let fn = AlgebraicCombinations.Operation.toFn(op);
let xToYMap = E.FloatFloatMap.empty();
for (i in 0 to t1n - 1) {

View File

@ -34,67 +34,3 @@ let buildSimple = (~continuous: option(DistTypes.continuousShape), ~discrete: op
Some(Mixed(mixedDist));
};
};
// TODO: Delete, only being used in tests
/*let build = (~continuous, ~discrete, ~assumptions) =>
switch (assumptions) {
| {
continuous: ADDS_TO_CORRECT_PROBABILITY,
discrete: ADDS_TO_CORRECT_PROBABILITY,
discreteProbabilityMass: Some(r),
} =>
// TODO: Fix this, it's wrong :(
Some(
Distributions.Mixed.make(
~continuous,
~discrete,
~discreteProbabilityMassFraction=r,
),
)
| {
continuous: ADDS_TO_1,
discrete: ADDS_TO_1,
discreteProbabilityMass: Some(r),
} =>
Some(
Distributions.Mixed.make(
~continuous,
~discrete,
~discreteProbabilityMassFraction=r,
),
)
| {
continuous: ADDS_TO_1,
discrete: ADDS_TO_1,
discreteProbabilityMass: None,
} =>
None
| {
continuous: ADDS_TO_CORRECT_PROBABILITY,
discrete: ADDS_TO_1,
discreteProbabilityMass: None,
} =>
None
| {
continuous: ADDS_TO_1,
discrete: ADDS_TO_CORRECT_PROBABILITY,
discreteProbabilityMass: None,
} =>
let discreteProbabilityMassFraction =
Distributions.Discrete.T.Integral.sum(~cache=None, discrete);
let discrete =
Distributions.Discrete.T.scaleToIntegralSum(~intendedSum=1.0, discrete);
Some(
Distributions.Mixed.make(
~continuous,
~discrete,
~discreteProbabilityMassFraction,
),
);
| _ => None
};*/

View File

@ -36,7 +36,6 @@ type continuousShape = {
cdf: DistTypes.continuousShape,
};
type dist = [
| `Normal(normal)
| `Beta(beta)
@ -54,6 +53,7 @@ module ContinuousShape = {
let make = (pdf, cdf): t => {pdf, cdf};
let pdf = (x, t: t) =>
Distributions.Continuous.T.xToY(x, t.pdf).continuous;
// TODO: pdf and inv are currently the same, this seems broken.
let inv = (p, t: t) =>
Distributions.Continuous.T.xToY(p, t.pdf).continuous;
// TODO: Fix the sampling, to have it work correctly.
@ -77,7 +77,7 @@ module Cauchy = {
let pdf = (x, t: t) => Jstat.cauchy##pdf(x, t.local, t.scale);
let inv = (p, t: t) => Jstat.cauchy##inv(p, t.local, t.scale);
let sample = (t: t) => Jstat.cauchy##sample(t.local, t.scale);
let mean = (t: t) => Error("Cauchy distributions have no mean value.")
let mean = (_: t) => Error("Cauchy distributions have no mean value.");
let toString = ({local, scale}: t) => {j|Cauchy($local, $scale)|j};
};
@ -117,8 +117,10 @@ module Normal = {
// TODO: is this useful here at all? would need the integral as well ...
let pointwiseProduct = (n1: t, n2: t) => {
let mean = (n1.mean *. n2.stdev**2. +. n2.mean *. n1.stdev**2.) /. (n1.stdev**2. +. n2.stdev**2.);
let stdev = 1. /. ((1. /. n1.stdev**2.) +. (1. /. n2.stdev**2.));
let mean =
(n1.mean *. n2.stdev ** 2. +. n2.mean *. n1.stdev ** 2.)
/. (n1.stdev ** 2. +. n2.stdev ** 2.);
let stdev = 1. /. (1. /. n1.stdev ** 2. +. 1. /. n2.stdev ** 2.);
`Normal({mean, stdev});
};
};
@ -162,12 +164,12 @@ module Lognormal = {
let multiply = (l1, l2) => {
let mu = l1.mu +. l2.mu;
let sigma = l1.sigma +. l2.sigma;
`Lognormal({mu, sigma})
`Lognormal({mu, sigma});
};
let divide = (l1, l2) => {
let mu = l1.mu -. l2.mu;
let sigma = l1.sigma +. l2.sigma;
`Lognormal({mu, sigma})
`Lognormal({mu, sigma});
};
};
@ -277,7 +279,7 @@ module GenericDistFunctions = {
| `Beta(n) => Beta.mean(n)
| `ContinuousShape(n) => ContinuousShape.mean(n)
| `Uniform(n) => Uniform.mean(n)
| `Float(n) => Float.mean(n)
| `Float(n) => Float.mean(n);
let interpolateXs =
(~xSelection: [ | `Linear | `ByWeight]=`Linear, dist: dist, n) => {
@ -294,4 +296,3 @@ module GenericDistFunctions = {
};
};
};

View File

@ -1,5 +1,6 @@
/* This module represents a tree node. */
// todo: Symbolic already has an arbitrary continuousShape option. It seems messy to have both.
type distData = [
| `Symbolic(SymbolicDist.dist)
| `RenderedShape(DistTypes.shape)
@ -46,7 +47,7 @@ and operation = [
module TreeNode = {
type t = treeNode;
type simplifier = treeNode => result(treeNode, string);
type tResult = treeNode => result(treeNode, string);
let rec toString = (t: t): string => {
let stringFromAlgebraicCombination =
@ -63,16 +64,15 @@ module TreeNode = {
let stringFromFloatFromDistOperation =
fun
| `Pdf(f) => "pdf(x=$f, "
| `Inv(f) => "inv(c=$f, "
| `Pdf(f) => {j|pdf(x=$f, |j}
| `Inv(f) => {j|inv(x=$f, |j}
| `Sample => "sample("
| `Mean => "mean(";
switch (t) {
| `DistData(`Symbolic(d)) =>
SymbolicDist.GenericDistFunctions.toString(d)
| `DistData(`RenderedShape(s)) => "[shape]"
| `DistData(`RenderedShape(_)) => "[shape]"
| `Operation(`AlgebraicCombination(op, t1, t2)) =>
toString(t1) ++ stringFromAlgebraicCombination(op) ++ toString(t2)
| `Operation(`PointwiseCombination(op, t1, t2)) =>
@ -102,12 +102,12 @@ module TreeNode = {
In general, this is implemented via convolution. */
module AlgebraicCombination = {
let simplify = (algebraicOp, t1: t, t2: t): result(treeNode, string) => {
let tryCombiningFloats: simplifier =
let tryCombiningFloats: tResult =
fun
| `Operation(
`AlgebraicCombination(
`Divide,
`DistData(`Symbolic(`Float(v1))),
`DistData(`Symbolic(`Float(_))),
`DistData(`Symbolic(`Float(0.))),
),
) =>
@ -119,12 +119,12 @@ module TreeNode = {
`DistData(`Symbolic(`Float(v2))),
),
) => {
let func = AlgebraicCombinations.operationToFn(algebraicOp);
let func = AlgebraicCombinations.Operation.toFn(algebraicOp);
Ok(`DistData(`Symbolic(`Float(func(v1, v2)))));
}
| t => Ok(t);
let tryCombiningNormals: simplifier =
let tryCombiningNormals: tResult =
fun
| `Operation(
`AlgebraicCombination(
@ -144,7 +144,7 @@ module TreeNode = {
Ok(`DistData(`Symbolic(SymbolicDist.Normal.subtract(n1, n2))))
| t => Ok(t);
let tryCombiningLognormals: simplifier =
let tryCombiningLognormals: tResult =
fun
| `Operation(
`AlgebraicCombination(
@ -281,13 +281,13 @@ module TreeNode = {
module Truncate = {
module Simplify = {
let tryTruncatingNothing: simplifier =
let tryTruncatingNothing: tResult =
fun
| `Operation(`Truncate(None, None, `DistData(d))) =>
Ok(`DistData(d))
| t => Ok(t);
let tryTruncatingUniform: simplifier =
let tryTruncatingUniform: tResult =
fun
| `Operation(`Truncate(lc, rc, `DistData(`Symbolic(`Uniform(u))))) => {
// just create a new Uniform distribution
@ -508,7 +508,7 @@ module TreeNode = {
but most often it will produce a RenderedShape.
This function is used mainly to turn a parse tree into a single RenderedShape
that can then be displayed to the user. */
let rec toDistData = (treeNode: t, sampleCount: int): result(t, string) => {
let toDistData = (treeNode: t, sampleCount: int): result(t, string) => {
switch (treeNode) {
| `DistData(d) => Ok(`DistData(d))
| `Operation(op) => operationToDistData(sampleCount, op)