Merge pull request #384 from quantified-uncertainty/normalize-improvements

Minor Math improvements for pointSet distributions
This commit is contained in:
Ozzie Gooen 2022-04-27 13:11:47 -04:00 committed by GitHub
commit 079e8f6c8d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 65 additions and 34 deletions

View File

@ -65,7 +65,7 @@ describe("(Algebraic) addition of distributions", () => {
| None => "algebraicAdd has"->expect->toBe("failed")
// This is nondeterministic, we could be in a situation where ci fails but you click rerun and it passes, which is bad.
// sometimes it works with ~digits=2.
| Some(x) => x->expect->toBeSoCloseTo(9.78655777150074, ~digits=1) // (uniformMean +. betaMean)
| Some(x) => x->expect->toBeSoCloseTo(9.786831807237022, ~digits=1) // (uniformMean +. betaMean)
}
})
test("beta(alpha=2, beta=5) + uniform(low=9, high=10)", () => {
@ -82,7 +82,7 @@ describe("(Algebraic) addition of distributions", () => {
| None => "algebraicAdd has"->expect->toBe("failed")
// This is nondeterministic, we could be in a situation where ci fails but you click rerun and it passes, which is bad.
// sometimes it works with ~digits=2.
| Some(x) => x->expect->toBeSoCloseTo(9.786753454457116, ~digits=1) // (uniformMean +. betaMean)
| Some(x) => x->expect->toBeSoCloseTo(9.784290207736126, ~digits=1) // (uniformMean +. betaMean)
}
})
})
@ -162,6 +162,7 @@ describe("(Algebraic) addition of distributions", () => {
switch received {
| None => "algebraicAdd has"->expect->toBe("failed")
// This is nondeterministic, we could be in a situation where ci fails but you click rerun and it passes, which is bad.
// sometimes it works with ~digits=4.
// This value was calculated by a python script
| Some(x) => x->expect->toBeSoCloseTo(0.979023, ~digits=0)
}
@ -360,7 +361,7 @@ describe("(Algebraic) addition of distributions", () => {
| None => "algebraicAdd has"->expect->toBe("failed")
// This is nondeterministic, we could be in a situation where ci fails but you click rerun and it passes, which is bad.
// sometimes it works with ~digits=2.
| Some(x) => x->expect->toBeSoCloseTo(9.174267267465632, ~digits=0)
| Some(x) => x->expect->toBeSoCloseTo(9.190872365862756, ~digits=0)
}
})
})

View File

@ -37,4 +37,5 @@ describe("Continuous and discrete splits", () => {
)
let toArr2 = discrete2 |> E.FloatFloatMap.toArray
makeTest("splitMedium at count=500", toArr2 |> Belt.Array.length, 500)
// makeTest("foo", [] |> Belt.Array.length, 500)
})

View File

@ -58,14 +58,15 @@ describe("Distribution", () => {
);
test("mean", () => {
expect(dist.mean().value).toBeCloseTo(8.704375514292865);
expect(dist.mean().value).toBeCloseTo(9.5555555);
});
test("pdf", () => {
expect(dist.pdf(5.0).value).toBeCloseTo(0.052007455285386944, 1);
expect(dist.pdf(5.0).value).toBeCloseTo(0.10499097598222966, 1);
});
test("cdf", () => {
expect(dist.cdf(5.0).value).toBeCloseTo(
dist1Samples.filter((x) => x <= 5).length / dist1SampleCount
dist1Samples.filter((x) => x <= 5).length / dist1SampleCount,
1
);
});
test("inv", () => {
@ -77,7 +78,7 @@ describe("Distribution", () => {
).toEqual(Ok("Point Set Distribution"));
});
test("toSparkline", () => {
expect(dist.toSparkline(20).value).toEqual("▁▁▃▅███▆▄▃▂▁▁▂▂▃▂▁▁▁");
expect(dist.toSparkline(20).value).toEqual("▁▁▃▇█▇▄▂▂▂▁▁▁▁▁▂▂▁▁▁");
});
test("algebraicAdd", () => {
expect(
@ -91,6 +92,6 @@ describe("Distribution", () => {
resultMap(dist.pointwiseAdd(dist2), (r: Distribution) =>
r.toSparkline(20)
).value
).toEqual(Ok("▁▂▅██▅▅▅▆▆▇▅▄▃▃▂▂▁▁▁"));
).toEqual(Ok("▁▂██▃▃▃▃▄▅▄▃▃▂▂▂▁▁▁▁"));
});
});

View File

@ -59,13 +59,7 @@ describe("cumulative density function", () => {
{ sampleCount: n, xyPointLength: 100 }
);
let cdfValue = dist.cdf(max).value;
let min = Math.min(...xs);
let epsilon = 5e-3;
if (max - min < epsilon) {
expect(cdfValue).toBeLessThan(1 - epsilon);
} else {
expect(dist.cdf(max).value).toBeGreaterThan(1 - epsilon);
}
expect(cdfValue).toBeCloseTo(1.0, 2);
})
);
});

View File

@ -46,18 +46,25 @@ let toFloatOperation = (
~toPointSetFn: toPointSetFn,
~distToFloatOperation: Operation.distToFloatOperation,
) => {
let symbolicSolution = switch (t: t) {
| Symbolic(r) =>
switch SymbolicDist.T.operate(distToFloatOperation, r) {
| Ok(f) => Some(f)
| _ => None
}
let trySymbolicSolution = switch (t: t) {
| Symbolic(r) => SymbolicDist.T.operate(distToFloatOperation, r)->E.R.toOption
| _ => None
}
switch symbolicSolution {
let trySampleSetSolution = switch ((t: t), distToFloatOperation) {
| (SampleSet(sampleSet), #Mean) => SampleSetDist.mean(sampleSet)->Some
| (SampleSet(sampleSet), #Sample) => SampleSetDist.sample(sampleSet)->Some
| (SampleSet(sampleSet), #Inv(r)) => SampleSetDist.percentile(sampleSet, r)->Some
| _ => None
}
switch trySymbolicSolution {
| Some(r) => Ok(r)
| None => toPointSetFn(t)->E.R2.fmap(PointSetDist.operate(distToFloatOperation))
| None =>
switch trySampleSetSolution {
| Some(r) => Ok(r)
| None => toPointSetFn(t)->E.R2.fmap(PointSetDist.operate(distToFloatOperation))
}
}
}

View File

@ -98,3 +98,13 @@ let map2 = (~fn: (float, float) => result<float, Operation.Error.t>, ~t1: t, ~t2
E.R.toExn("Input of samples should be larger than 5", make(x))
)
}
let mean = t => T.get(t)->E.A.Floats.mean
let geomean = t => T.get(t)->E.A.Floats.geomean
let mode = t => T.get(t)->E.A.Floats.mode
let sum = t => T.get(t)->E.A.Floats.sum
let min = t => T.get(t)->E.A.Floats.min
let max = t => T.get(t)->E.A.Floats.max
let stdev = t => T.get(t)->E.A.Floats.stdev
let variance = t => T.get(t)->E.A.Floats.variance
let percentile = (t, f) => T.get(t)->E.A.Floats.percentile(f)

View File

@ -133,9 +133,17 @@ let toPointSetDist = (
~discrete=Some(discrete),
)
/*
I'm surprised that this doesn't come out normalized. My guess is that the KDE library
we're using is standardizing on something else. If we ever change that library, we should
check to see if we still need to do this.
*/
let normalizedPointSet = pointSetDist->E.O2.fmap(PointSetDist.T.normalize)
let samplesParse: Internals.Types.outputs = {
continuousParseParams: pdf |> E.O.fmap(snd),
pointSetDist: pointSetDist,
pointSetDist: normalizedPointSet,
}
samplesParse

View File

@ -521,14 +521,15 @@ module A = {
let split = (sortedArray: array<float>) => {
let continuous = []
let discrete = FloatFloatMap.empty()
Belt.Array.forEachWithIndex(sortedArray, (index, element) => {
let maxIndex = (sortedArray |> Array.length) - 1
let possiblySimilarElements = switch index {
| 0 => [index + 1]
| n if n == maxIndex => [index - 1]
| _ => [index - 1, index + 1]
} |> Belt.Array.map(_, r => sortedArray[r])
let hasSimilarElement = Belt.Array.some(possiblySimilarElements, r => r == element)
Belt.Array.forEachWithIndex(sortedArray, (_, element) => {
// let maxIndex = (sortedArray |> Array.length) - 1
// let possiblySimilarElements = switch index {
// | 0 => [index + 1]
// | n if n == maxIndex => [index - 1]
// | _ => [index - 1, index + 1]
// } |> Belt.Array.map(_, r => sortedArray[r])
// let hasSimilarElement = Belt.Array.some(possiblySimilarElements, r => r == element)
let hasSimilarElement = false
hasSimilarElement
? FloatFloatMap.increment(element, discrete)
: {
@ -544,10 +545,18 @@ module A = {
}
module Floats = {
let sum = Belt.Array.reduce(_, 0., (i, j) => i +. j)
let mean = a => sum(a) /. (Array.length(a) |> float_of_int)
let mean = Jstat.mean
let geomean = Jstat.geomean
let mode = Jstat.mode
let variance = Jstat.variance
let stdev = Jstat.stdev
let sum = Jstat.sum
let random = Js.Math.random_int
//Passing true for the exclusive parameter excludes both endpoints of the range.
//https://jstat.github.io/all.html
let percentile = (a, b) => Jstat.percentile(a, b, false)
// Gives an array with all the differences between values
// diff([1,5,3,7]) = [4,-2,4]
let diff = (arr: array<float>): array<float> =>