Added mean and variance functions, and tests for those.

- A getMean and getVariance in each module of src/distPlus/distribution/Distributions.re
    - They get the exact answer for the functions in Distributions.re, according to the approximation used.
    - There is now an XYShape.Analysis.integrateContinuousShape function.
- Tests in the __tests__/Distributions__Test.re function.
    - Calculation of the mean and variance for the normal and lognnormal distributions, at the end.
- I also added some reduce array functions to the E.A. module.
This commit is contained in:
Nuno Sempere 2020-04-18 23:20:59 +02:00
parent 2e5f285a9e
commit 2f45f92552
4 changed files with 177 additions and 10 deletions

View File

@ -12,7 +12,17 @@ let makeTest = (~only=false, str, item1, item2) =>
expect(item1) |> toEqual(item2)
);
let makeTestCloseEquality = (~only=false, str, item1, item2, ~digits) =>
only
? Only.test(str, () =>
expect(item1) |> toBeSoCloseTo(item2, ~digits)
)
: test(str, () =>
expect(item1) |> toBeSoCloseTo(item2, ~digits)
);
describe("Shape", () => {
describe("Continuous", () => {
open Distributions.Continuous;
let continuous = make(`Linear, shape);
@ -119,7 +129,7 @@ describe("Shape", () => {
1.0,
);
});
describe("Discrete", () => {
open Distributions.Discrete;
let shape: DistTypes.xyShape = {
@ -185,6 +195,7 @@ describe("Shape", () => {
0.9,
);
makeTest("integralEndY", T.Integral.sum(~cache=None, discrete), 1.0);
});
describe("Mixed", () => {
@ -289,9 +300,10 @@ describe("Shape", () => {
},
),
);
});
describe("Mixed", () => {
describe("Distplus", () => {
open Distributions.DistPlus;
let discrete: DistTypes.xyShape = {
xs: [|1., 4., 8.|],
@ -362,4 +374,39 @@ describe("Shape", () => {
),
);
});
});
describe("Shape", () => {
let mean = 10.0;
let stdev = 4.0;
let variance = stdev ** 2.0;
let numSamples = 10000;
open Distributions.Shape;
let normal: SymbolicDist.dist = `Normal({ mean, stdev});
let normalShape = SymbolicDist.GenericSimple.toShape(normal, numSamples);
let lognormal = SymbolicDist.Lognormal.fromMeanAndStdev(mean, stdev);
let lognormalShape = SymbolicDist.GenericSimple.toShape(lognormal, numSamples);
makeTestCloseEquality(
"Mean of a normal",
T.getMean(normalShape),
mean,
~digits=2);
makeTestCloseEquality(
"Variance of a normal",
T.getVariance(normalShape),
variance,
~digits=1);
makeTestCloseEquality(
"Mean of a lognormal",
T.getMean(lognormalShape),
mean,
~digits=2);
makeTestCloseEquality(
"Variance of a lognormal",
T.getVariance(lognormalShape),
variance,
~digits=0);
});
});

View File

@ -17,6 +17,9 @@ module type dist = {
let integralEndY: (~cache: option(integral), t) => float;
let integralXtoY: (~cache: option(integral), float, t) => float;
let integralYtoX: (~cache: option(integral), float, t) => float;
let getMean: t => float;
let getVariance: t => float;
};
module Dist = (T: dist) => {
@ -35,6 +38,8 @@ module Dist = (T: dist) => {
let toDiscrete = T.toDiscrete;
let toScaledContinuous = T.toScaledContinuous;
let toScaledDiscrete = T.toScaledDiscrete;
let getMean = T.getMean;
let getVariance = T.getVariance;
// TODO: Move this to each class, have use integral to produce integral in DistPlus class.
let scaleBy = (~scale=1.0, t: t) => t |> mapY((r: float) => r *. scale);
@ -99,7 +104,7 @@ module Continuous = {
)
|> DistTypes.MixedPoint.makeContinuous;
};
// let combineWithFn = (t1: t, t2: t, fn: (float, float) => float) => {
// switch(t1, t2){
// | ({interpolation: `Stepwise}, {interpolation: `Stepwise}) => 3.0
@ -135,6 +140,9 @@ module Continuous = {
let toDiscrete = _ => None;
let toScaledContinuous = t => Some(t);
let toScaledDiscrete = _ => None;
let getMean = (t: t) => XYShape.Analysis.integrateContinuousShape(t);
let getVariance = (t: t): float => XYShape.Analysis.getVarianceDangerously(t, getMean, XYShape.Analysis.getMeanOfSquaresContinuousShape);
});
};
@ -144,11 +152,22 @@ module Discrete = {
let sortedByX = (t: DistTypes.discreteShape) =>
t |> XYShape.T.zip |> XYShape.Zipped.sortByX;
let empty = XYShape.T.empty;
let combine = (fn, t1: DistTypes.discreteShape, t2: DistTypes.discreteShape): DistTypes.discreteShape => {
XYShape.Combine.combine(~xsSelection=ALL_XS, ~xToYSelection=XYShape.XtoY.stepwiseIfAtX, ~fn, t1, t2)
}
let _default0 = ((fn, a,b) => fn(E.O.default(0.0, a), E.O.default(0.0, b)));
let reduce = (fn, items) => items |> E.A.fold_left(combine(_default0((fn))), empty);
let combine =
(fn, t1: DistTypes.discreteShape, t2: DistTypes.discreteShape)
: DistTypes.discreteShape => {
XYShape.Combine.combine(
~xsSelection=ALL_XS,
~xToYSelection=XYShape.XtoY.stepwiseIfAtX,
~fn,
t1,
t2,
);
};
let _default0 = (fn, a, b) =>
fn(E.O.default(0.0, a), E.O.default(0.0, b));
let reduce = (fn, items) =>
items |> E.A.fold_left(combine(_default0(fn)), empty);
module T =
Dist({
type t = DistTypes.discreteShape;
@ -195,7 +214,14 @@ module Discrete = {
|> integral(~cache)
|> Continuous.getShape
|> XYShape.YtoX.linear(f);
let getMean = (t: t): float => E.A.reducei(t.xs, 0.0, (acc, x, i) => acc +. x*. t.ys[i]);
let getVariance = (t: t): float => {
let getMeanOfSquares = t => getMean(XYShape.Analysis.squareXYShape(t));
XYShape.Analysis.getVarianceDangerously(t, getMean, getMeanOfSquares);
};
});
};
// TODO: I think this shouldn't assume continuous/discrete are normalized to 1.0, and thus should not need the discreteProbabilityMassFraction being separate.
@ -366,6 +392,30 @@ module Mixed = {
discreteProbabilityMassFraction,
};
};
let getMean = (t: t) : float => {
let discreteProbabilityMassFraction = t.discreteProbabilityMassFraction;
let mean = switch(discreteProbabilityMassFraction){
| 1.0 => Discrete.T.getMean(t.discrete);
| 0.0 => Continuous.T.getMean(t.continuous);
| _ => (Discrete.T.getMean(t.discrete) *. discreteProbabilityMassFraction)
+. (Continuous.T.getMean(t.continuous) *. (1.0 -. discreteProbabilityMassFraction))
};
mean;
};
let getVariance = (t: t) : float => {
let discreteProbabilityMassFraction = t.discreteProbabilityMassFraction;
let getMeanOfSquares = (t: t) => {
Discrete.T.getMean(XYShape.Analysis.squareXYShape(t.discrete))*.t.discreteProbabilityMassFraction
+. XYShape.Analysis.getMeanOfSquaresContinuousShape(t.continuous)*.(1.0 -. t.discreteProbabilityMassFraction)
};
switch(discreteProbabilityMassFraction){
| 1.0 => Discrete.T.getVariance(t.discrete);
| 0.0 => Continuous.T.getVariance(t.continuous);
| _ => XYShape.Analysis.getVarianceDangerously(t, getMean, getMeanOfSquares);
};
};
});
};
@ -470,6 +520,18 @@ module Shape = {
Discrete.T.mapY(fn),
Continuous.T.mapY(fn),
));
let getMean = (t: t): float => switch (t) {
| Mixed(m) => Mixed.T.getMean(m);
| Discrete(m) => Discrete.T.getMean(m);
| Continuous(m) => Continuous.T.getMean(m);
};
let getVariance = (t: t): float => switch (t) {
| Mixed(m) => Mixed.T.getVariance(m);
| Discrete(m) => Discrete.T.getVariance(m);
| Continuous(m) => Continuous.T.getVariance(m);
};
});
};
@ -584,6 +646,8 @@ module DistPlus = {
let integralYtoX = (~cache as _, f, t: t) => {
Shape.T.Integral.yToX(~cache=Some(t.integralCache), f, toShape(t));
};
let getMean = (t: t) => Shape.T.getMean(t.shape);
let getVariance = (t: t) => Shape.T.getVariance(t.shape);
});
};

View File

@ -297,4 +297,57 @@ let logScorePoint = (sampleCount, t1, t2) =>
|> Range.integrateWithTriangles
|> E.O.fmap(T.accumulateYs((+.)))
|> E.O.fmap(Pairs.last)
|> E.O.fmap(Pairs.y);
|> E.O.fmap(Pairs.y);
module Analysis = {
let integrateContinuousShape = (
~indefiniteIntegralStepwise = (p,h1) => (h1*.(p**2.0)/. 2.0),
~indefiniteIntegralLinear = (p, a, b) => (a *. (p ** 2.0) /.2.0) +. (b *. (p**3.0) /. 3.0),
t: DistTypes.continuousShape
): float => {
let xs = t.xyShape.xs;
let ys = t.xyShape.ys;
E.A.reducei(xs, 0.0, (acc, _x, i) => {
let areaUnderIntegral = switch(t.interpolation, i){
| (_, 0) => 0.0;
| (`Stepwise, _) => indefiniteIntegralStepwise(xs[i],ys[i-1])
-. indefiniteIntegralStepwise(xs[i-1],ys[i-1]);
| (`Linear, _) => {
let x1 = xs[i-1];
let x2 = xs[i];
let h1 = ys[i-1];
let h2 = ys[i];
let b = (h1 -. h2 ) /. (x1 -.x2)
let a = h1 -. b *.x1;
indefiniteIntegralLinear(x2, a, b) -. indefiniteIntegralLinear(x1, a, b);
};
};
acc +. areaUnderIntegral;
});
};
let getVarianceDangerously = (
t: 't,
getMean: ('t => float),
getMeanOfSquares: ('t => float),
): float => {
let meanSquared = getMean(t)**2.0;
let meanOfSquares = getMeanOfSquares(t);
meanOfSquares -. meanSquared;
};
let squareXYShape = t: DistTypes.xyShape => {...t, xs: E.A.fmap(x => x**2.0, t.xs)};
let getMeanOfSquaresContinuousShape = (t: DistTypes.continuousShape) => {
let indefiniteIntegralLinear = (p, a, b) => (a *. (p ** 3.0) /.3.0) +. (b *. (p**4.0) /. 4.0);
let indefiniteIntegralStepwise = (p,h1) => h1*.(p**3.0)/. 3.0;
integrateContinuousShape(
~indefiniteIntegralStepwise,
~indefiniteIntegralLinear,
t
);
}
};

View File

@ -259,6 +259,9 @@ module A = {
let fold_right = Array.fold_right;
let concatMany = Belt.Array.concatMany;
let keepMap = Belt.Array.keepMap;
let init = Array.init;
let reduce = Belt.Array.reduce;
let reducei = Belt.Array.reduceWithIndex;
let min = a =>
get(a, 0)
|> O.fmap(first => Belt.Array.reduce(a, first, (i, j) => i < j ? i : j));