Merge pull request #384 from quantified-uncertainty/normalize-improvements

Minor Math improvements for pointSet distributions
This commit is contained in:
Ozzie Gooen 2022-04-27 13:11:47 -04:00 committed by GitHub
commit 079e8f6c8d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 65 additions and 34 deletions

View File

@ -65,7 +65,7 @@ describe("(Algebraic) addition of distributions", () => {
| None => "algebraicAdd has"->expect->toBe("failed") | None => "algebraicAdd has"->expect->toBe("failed")
// This is nondeterministic, we could be in a situation where ci fails but you click rerun and it passes, which is bad. // This is nondeterministic, we could be in a situation where ci fails but you click rerun and it passes, which is bad.
// sometimes it works with ~digits=2. // sometimes it works with ~digits=2.
| Some(x) => x->expect->toBeSoCloseTo(9.78655777150074, ~digits=1) // (uniformMean +. betaMean) | Some(x) => x->expect->toBeSoCloseTo(9.786831807237022, ~digits=1) // (uniformMean +. betaMean)
} }
}) })
test("beta(alpha=2, beta=5) + uniform(low=9, high=10)", () => { test("beta(alpha=2, beta=5) + uniform(low=9, high=10)", () => {
@ -82,7 +82,7 @@ describe("(Algebraic) addition of distributions", () => {
| None => "algebraicAdd has"->expect->toBe("failed") | None => "algebraicAdd has"->expect->toBe("failed")
// This is nondeterministic, we could be in a situation where ci fails but you click rerun and it passes, which is bad. // This is nondeterministic, we could be in a situation where ci fails but you click rerun and it passes, which is bad.
// sometimes it works with ~digits=2. // sometimes it works with ~digits=2.
| Some(x) => x->expect->toBeSoCloseTo(9.786753454457116, ~digits=1) // (uniformMean +. betaMean) | Some(x) => x->expect->toBeSoCloseTo(9.784290207736126, ~digits=1) // (uniformMean +. betaMean)
} }
}) })
}) })
@ -162,6 +162,7 @@ describe("(Algebraic) addition of distributions", () => {
switch received { switch received {
| None => "algebraicAdd has"->expect->toBe("failed") | None => "algebraicAdd has"->expect->toBe("failed")
// This is nondeterministic, we could be in a situation where ci fails but you click rerun and it passes, which is bad. // This is nondeterministic, we could be in a situation where ci fails but you click rerun and it passes, which is bad.
// sometimes it works with ~digits=4.
// This value was calculated by a python script // This value was calculated by a python script
| Some(x) => x->expect->toBeSoCloseTo(0.979023, ~digits=0) | Some(x) => x->expect->toBeSoCloseTo(0.979023, ~digits=0)
} }
@ -360,7 +361,7 @@ describe("(Algebraic) addition of distributions", () => {
| None => "algebraicAdd has"->expect->toBe("failed") | None => "algebraicAdd has"->expect->toBe("failed")
// This is nondeterministic, we could be in a situation where ci fails but you click rerun and it passes, which is bad. // This is nondeterministic, we could be in a situation where ci fails but you click rerun and it passes, which is bad.
// sometimes it works with ~digits=2. // sometimes it works with ~digits=2.
| Some(x) => x->expect->toBeSoCloseTo(9.174267267465632, ~digits=0) | Some(x) => x->expect->toBeSoCloseTo(9.190872365862756, ~digits=0)
} }
}) })
}) })

View File

@ -37,4 +37,5 @@ describe("Continuous and discrete splits", () => {
) )
let toArr2 = discrete2 |> E.FloatFloatMap.toArray let toArr2 = discrete2 |> E.FloatFloatMap.toArray
makeTest("splitMedium at count=500", toArr2 |> Belt.Array.length, 500) makeTest("splitMedium at count=500", toArr2 |> Belt.Array.length, 500)
// makeTest("foo", [] |> Belt.Array.length, 500)
}) })

View File

@ -58,14 +58,15 @@ describe("Distribution", () => {
); );
test("mean", () => { test("mean", () => {
expect(dist.mean().value).toBeCloseTo(8.704375514292865); expect(dist.mean().value).toBeCloseTo(9.5555555);
}); });
test("pdf", () => { test("pdf", () => {
expect(dist.pdf(5.0).value).toBeCloseTo(0.052007455285386944, 1); expect(dist.pdf(5.0).value).toBeCloseTo(0.10499097598222966, 1);
}); });
test("cdf", () => { test("cdf", () => {
expect(dist.cdf(5.0).value).toBeCloseTo( expect(dist.cdf(5.0).value).toBeCloseTo(
dist1Samples.filter((x) => x <= 5).length / dist1SampleCount dist1Samples.filter((x) => x <= 5).length / dist1SampleCount,
1
); );
}); });
test("inv", () => { test("inv", () => {
@ -77,7 +78,7 @@ describe("Distribution", () => {
).toEqual(Ok("Point Set Distribution")); ).toEqual(Ok("Point Set Distribution"));
}); });
test("toSparkline", () => { test("toSparkline", () => {
expect(dist.toSparkline(20).value).toEqual("▁▁▃▅███▆▄▃▂▁▁▂▂▃▂▁▁▁"); expect(dist.toSparkline(20).value).toEqual("▁▁▃▇█▇▄▂▂▂▁▁▁▁▁▂▂▁▁▁");
}); });
test("algebraicAdd", () => { test("algebraicAdd", () => {
expect( expect(
@ -91,6 +92,6 @@ describe("Distribution", () => {
resultMap(dist.pointwiseAdd(dist2), (r: Distribution) => resultMap(dist.pointwiseAdd(dist2), (r: Distribution) =>
r.toSparkline(20) r.toSparkline(20)
).value ).value
).toEqual(Ok("▁▂▅██▅▅▅▆▆▇▅▄▃▃▂▂▁▁▁")); ).toEqual(Ok("▁▂██▃▃▃▃▄▅▄▃▃▂▂▂▁▁▁▁"));
}); });
}); });

View File

@ -59,13 +59,7 @@ describe("cumulative density function", () => {
{ sampleCount: n, xyPointLength: 100 } { sampleCount: n, xyPointLength: 100 }
); );
let cdfValue = dist.cdf(max).value; let cdfValue = dist.cdf(max).value;
let min = Math.min(...xs); expect(cdfValue).toBeCloseTo(1.0, 2);
let epsilon = 5e-3;
if (max - min < epsilon) {
expect(cdfValue).toBeLessThan(1 - epsilon);
} else {
expect(dist.cdf(max).value).toBeGreaterThan(1 - epsilon);
}
}) })
); );
}); });

View File

@ -46,18 +46,25 @@ let toFloatOperation = (
~toPointSetFn: toPointSetFn, ~toPointSetFn: toPointSetFn,
~distToFloatOperation: Operation.distToFloatOperation, ~distToFloatOperation: Operation.distToFloatOperation,
) => { ) => {
let symbolicSolution = switch (t: t) { let trySymbolicSolution = switch (t: t) {
| Symbolic(r) => | Symbolic(r) => SymbolicDist.T.operate(distToFloatOperation, r)->E.R.toOption
switch SymbolicDist.T.operate(distToFloatOperation, r) {
| Ok(f) => Some(f)
| _ => None
}
| _ => None | _ => None
} }
switch symbolicSolution { let trySampleSetSolution = switch ((t: t), distToFloatOperation) {
| (SampleSet(sampleSet), #Mean) => SampleSetDist.mean(sampleSet)->Some
| (SampleSet(sampleSet), #Sample) => SampleSetDist.sample(sampleSet)->Some
| (SampleSet(sampleSet), #Inv(r)) => SampleSetDist.percentile(sampleSet, r)->Some
| _ => None
}
switch trySymbolicSolution {
| Some(r) => Ok(r) | Some(r) => Ok(r)
| None => toPointSetFn(t)->E.R2.fmap(PointSetDist.operate(distToFloatOperation)) | None =>
switch trySampleSetSolution {
| Some(r) => Ok(r)
| None => toPointSetFn(t)->E.R2.fmap(PointSetDist.operate(distToFloatOperation))
}
} }
} }

View File

@ -98,3 +98,13 @@ let map2 = (~fn: (float, float) => result<float, Operation.Error.t>, ~t1: t, ~t2
E.R.toExn("Input of samples should be larger than 5", make(x)) E.R.toExn("Input of samples should be larger than 5", make(x))
) )
} }
let mean = t => T.get(t)->E.A.Floats.mean
let geomean = t => T.get(t)->E.A.Floats.geomean
let mode = t => T.get(t)->E.A.Floats.mode
let sum = t => T.get(t)->E.A.Floats.sum
let min = t => T.get(t)->E.A.Floats.min
let max = t => T.get(t)->E.A.Floats.max
let stdev = t => T.get(t)->E.A.Floats.stdev
let variance = t => T.get(t)->E.A.Floats.variance
let percentile = (t, f) => T.get(t)->E.A.Floats.percentile(f)

View File

@ -133,9 +133,17 @@ let toPointSetDist = (
~discrete=Some(discrete), ~discrete=Some(discrete),
) )
/*
I'm surprised that this doesn't come out normalized. My guess is that the KDE library
we're using is standardizing on something else. If we ever change that library, we should
check to see if we still need to do this.
*/
let normalizedPointSet = pointSetDist->E.O2.fmap(PointSetDist.T.normalize)
let samplesParse: Internals.Types.outputs = { let samplesParse: Internals.Types.outputs = {
continuousParseParams: pdf |> E.O.fmap(snd), continuousParseParams: pdf |> E.O.fmap(snd),
pointSetDist: pointSetDist, pointSetDist: normalizedPointSet,
} }
samplesParse samplesParse

View File

@ -521,14 +521,15 @@ module A = {
let split = (sortedArray: array<float>) => { let split = (sortedArray: array<float>) => {
let continuous = [] let continuous = []
let discrete = FloatFloatMap.empty() let discrete = FloatFloatMap.empty()
Belt.Array.forEachWithIndex(sortedArray, (index, element) => { Belt.Array.forEachWithIndex(sortedArray, (_, element) => {
let maxIndex = (sortedArray |> Array.length) - 1 // let maxIndex = (sortedArray |> Array.length) - 1
let possiblySimilarElements = switch index { // let possiblySimilarElements = switch index {
| 0 => [index + 1] // | 0 => [index + 1]
| n if n == maxIndex => [index - 1] // | n if n == maxIndex => [index - 1]
| _ => [index - 1, index + 1] // | _ => [index - 1, index + 1]
} |> Belt.Array.map(_, r => sortedArray[r]) // } |> Belt.Array.map(_, r => sortedArray[r])
let hasSimilarElement = Belt.Array.some(possiblySimilarElements, r => r == element) // let hasSimilarElement = Belt.Array.some(possiblySimilarElements, r => r == element)
let hasSimilarElement = false
hasSimilarElement hasSimilarElement
? FloatFloatMap.increment(element, discrete) ? FloatFloatMap.increment(element, discrete)
: { : {
@ -544,10 +545,18 @@ module A = {
} }
module Floats = { module Floats = {
let sum = Belt.Array.reduce(_, 0., (i, j) => i +. j) let mean = Jstat.mean
let mean = a => sum(a) /. (Array.length(a) |> float_of_int) let geomean = Jstat.geomean
let mode = Jstat.mode
let variance = Jstat.variance
let stdev = Jstat.stdev
let sum = Jstat.sum
let random = Js.Math.random_int let random = Js.Math.random_int
//Passing true for the exclusive parameter excludes both endpoints of the range.
//https://jstat.github.io/all.html
let percentile = (a, b) => Jstat.percentile(a, b, false)
// Gives an array with all the differences between values // Gives an array with all the differences between values
// diff([1,5,3,7]) = [4,-2,4] // diff([1,5,3,7]) = [4,-2,4]
let diff = (arr: array<float>): array<float> => let diff = (arr: array<float>): array<float> =>