Merge pull request #384 from quantified-uncertainty/normalize-improvements
Minor Math improvements for pointSet distributions
This commit is contained in:
commit
079e8f6c8d
|
@ -65,7 +65,7 @@ describe("(Algebraic) addition of distributions", () => {
|
|||
| None => "algebraicAdd has"->expect->toBe("failed")
|
||||
// This is nondeterministic, we could be in a situation where ci fails but you click rerun and it passes, which is bad.
|
||||
// sometimes it works with ~digits=2.
|
||||
| Some(x) => x->expect->toBeSoCloseTo(9.78655777150074, ~digits=1) // (uniformMean +. betaMean)
|
||||
| Some(x) => x->expect->toBeSoCloseTo(9.786831807237022, ~digits=1) // (uniformMean +. betaMean)
|
||||
}
|
||||
})
|
||||
test("beta(alpha=2, beta=5) + uniform(low=9, high=10)", () => {
|
||||
|
@ -82,7 +82,7 @@ describe("(Algebraic) addition of distributions", () => {
|
|||
| None => "algebraicAdd has"->expect->toBe("failed")
|
||||
// This is nondeterministic, we could be in a situation where ci fails but you click rerun and it passes, which is bad.
|
||||
// sometimes it works with ~digits=2.
|
||||
| Some(x) => x->expect->toBeSoCloseTo(9.786753454457116, ~digits=1) // (uniformMean +. betaMean)
|
||||
| Some(x) => x->expect->toBeSoCloseTo(9.784290207736126, ~digits=1) // (uniformMean +. betaMean)
|
||||
}
|
||||
})
|
||||
})
|
||||
|
@ -162,6 +162,7 @@ describe("(Algebraic) addition of distributions", () => {
|
|||
switch received {
|
||||
| None => "algebraicAdd has"->expect->toBe("failed")
|
||||
// This is nondeterministic, we could be in a situation where ci fails but you click rerun and it passes, which is bad.
|
||||
// sometimes it works with ~digits=4.
|
||||
// This value was calculated by a python script
|
||||
| Some(x) => x->expect->toBeSoCloseTo(0.979023, ~digits=0)
|
||||
}
|
||||
|
@ -360,7 +361,7 @@ describe("(Algebraic) addition of distributions", () => {
|
|||
| None => "algebraicAdd has"->expect->toBe("failed")
|
||||
// This is nondeterministic, we could be in a situation where ci fails but you click rerun and it passes, which is bad.
|
||||
// sometimes it works with ~digits=2.
|
||||
| Some(x) => x->expect->toBeSoCloseTo(9.174267267465632, ~digits=0)
|
||||
| Some(x) => x->expect->toBeSoCloseTo(9.190872365862756, ~digits=0)
|
||||
}
|
||||
})
|
||||
})
|
||||
|
|
|
@ -37,4 +37,5 @@ describe("Continuous and discrete splits", () => {
|
|||
)
|
||||
let toArr2 = discrete2 |> E.FloatFloatMap.toArray
|
||||
makeTest("splitMedium at count=500", toArr2 |> Belt.Array.length, 500)
|
||||
// makeTest("foo", [] |> Belt.Array.length, 500)
|
||||
})
|
||||
|
|
|
@ -58,14 +58,15 @@ describe("Distribution", () => {
|
|||
);
|
||||
|
||||
test("mean", () => {
|
||||
expect(dist.mean().value).toBeCloseTo(8.704375514292865);
|
||||
expect(dist.mean().value).toBeCloseTo(9.5555555);
|
||||
});
|
||||
test("pdf", () => {
|
||||
expect(dist.pdf(5.0).value).toBeCloseTo(0.052007455285386944, 1);
|
||||
expect(dist.pdf(5.0).value).toBeCloseTo(0.10499097598222966, 1);
|
||||
});
|
||||
test("cdf", () => {
|
||||
expect(dist.cdf(5.0).value).toBeCloseTo(
|
||||
dist1Samples.filter((x) => x <= 5).length / dist1SampleCount
|
||||
dist1Samples.filter((x) => x <= 5).length / dist1SampleCount,
|
||||
1
|
||||
);
|
||||
});
|
||||
test("inv", () => {
|
||||
|
@ -77,7 +78,7 @@ describe("Distribution", () => {
|
|||
).toEqual(Ok("Point Set Distribution"));
|
||||
});
|
||||
test("toSparkline", () => {
|
||||
expect(dist.toSparkline(20).value).toEqual("▁▁▃▅███▆▄▃▂▁▁▂▂▃▂▁▁▁");
|
||||
expect(dist.toSparkline(20).value).toEqual("▁▁▃▇█▇▄▂▂▂▁▁▁▁▁▂▂▁▁▁");
|
||||
});
|
||||
test("algebraicAdd", () => {
|
||||
expect(
|
||||
|
@ -91,6 +92,6 @@ describe("Distribution", () => {
|
|||
resultMap(dist.pointwiseAdd(dist2), (r: Distribution) =>
|
||||
r.toSparkline(20)
|
||||
).value
|
||||
).toEqual(Ok("▁▂▅██▅▅▅▆▆▇▅▄▃▃▂▂▁▁▁"));
|
||||
).toEqual(Ok("▁▂██▃▃▃▃▄▅▄▃▃▂▂▂▁▁▁▁"));
|
||||
});
|
||||
});
|
||||
|
|
|
@ -59,13 +59,7 @@ describe("cumulative density function", () => {
|
|||
{ sampleCount: n, xyPointLength: 100 }
|
||||
);
|
||||
let cdfValue = dist.cdf(max).value;
|
||||
let min = Math.min(...xs);
|
||||
let epsilon = 5e-3;
|
||||
if (max - min < epsilon) {
|
||||
expect(cdfValue).toBeLessThan(1 - epsilon);
|
||||
} else {
|
||||
expect(dist.cdf(max).value).toBeGreaterThan(1 - epsilon);
|
||||
}
|
||||
expect(cdfValue).toBeCloseTo(1.0, 2);
|
||||
})
|
||||
);
|
||||
});
|
||||
|
|
|
@ -46,20 +46,27 @@ let toFloatOperation = (
|
|||
~toPointSetFn: toPointSetFn,
|
||||
~distToFloatOperation: Operation.distToFloatOperation,
|
||||
) => {
|
||||
let symbolicSolution = switch (t: t) {
|
||||
| Symbolic(r) =>
|
||||
switch SymbolicDist.T.operate(distToFloatOperation, r) {
|
||||
| Ok(f) => Some(f)
|
||||
| _ => None
|
||||
}
|
||||
let trySymbolicSolution = switch (t: t) {
|
||||
| Symbolic(r) => SymbolicDist.T.operate(distToFloatOperation, r)->E.R.toOption
|
||||
| _ => None
|
||||
}
|
||||
|
||||
switch symbolicSolution {
|
||||
let trySampleSetSolution = switch ((t: t), distToFloatOperation) {
|
||||
| (SampleSet(sampleSet), #Mean) => SampleSetDist.mean(sampleSet)->Some
|
||||
| (SampleSet(sampleSet), #Sample) => SampleSetDist.sample(sampleSet)->Some
|
||||
| (SampleSet(sampleSet), #Inv(r)) => SampleSetDist.percentile(sampleSet, r)->Some
|
||||
| _ => None
|
||||
}
|
||||
|
||||
switch trySymbolicSolution {
|
||||
| Some(r) => Ok(r)
|
||||
| None =>
|
||||
switch trySampleSetSolution {
|
||||
| Some(r) => Ok(r)
|
||||
| None => toPointSetFn(t)->E.R2.fmap(PointSetDist.operate(distToFloatOperation))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//Todo: If it's a pointSet, but the xyPointLength is different from what it has, it should change.
|
||||
// This is tricky because the case of discrete distributions.
|
||||
|
|
|
@ -98,3 +98,13 @@ let map2 = (~fn: (float, float) => result<float, Operation.Error.t>, ~t1: t, ~t2
|
|||
E.R.toExn("Input of samples should be larger than 5", make(x))
|
||||
)
|
||||
}
|
||||
|
||||
let mean = t => T.get(t)->E.A.Floats.mean
|
||||
let geomean = t => T.get(t)->E.A.Floats.geomean
|
||||
let mode = t => T.get(t)->E.A.Floats.mode
|
||||
let sum = t => T.get(t)->E.A.Floats.sum
|
||||
let min = t => T.get(t)->E.A.Floats.min
|
||||
let max = t => T.get(t)->E.A.Floats.max
|
||||
let stdev = t => T.get(t)->E.A.Floats.stdev
|
||||
let variance = t => T.get(t)->E.A.Floats.variance
|
||||
let percentile = (t, f) => T.get(t)->E.A.Floats.percentile(f)
|
||||
|
|
|
@ -133,9 +133,17 @@ let toPointSetDist = (
|
|||
~discrete=Some(discrete),
|
||||
)
|
||||
|
||||
/*
|
||||
I'm surprised that this doesn't come out normalized. My guess is that the KDE library
|
||||
we're using is standardizing on something else. If we ever change that library, we should
|
||||
check to see if we still need to do this.
|
||||
*/
|
||||
|
||||
let normalizedPointSet = pointSetDist->E.O2.fmap(PointSetDist.T.normalize)
|
||||
|
||||
let samplesParse: Internals.Types.outputs = {
|
||||
continuousParseParams: pdf |> E.O.fmap(snd),
|
||||
pointSetDist: pointSetDist,
|
||||
pointSetDist: normalizedPointSet,
|
||||
}
|
||||
|
||||
samplesParse
|
||||
|
|
|
@ -521,14 +521,15 @@ module A = {
|
|||
let split = (sortedArray: array<float>) => {
|
||||
let continuous = []
|
||||
let discrete = FloatFloatMap.empty()
|
||||
Belt.Array.forEachWithIndex(sortedArray, (index, element) => {
|
||||
let maxIndex = (sortedArray |> Array.length) - 1
|
||||
let possiblySimilarElements = switch index {
|
||||
| 0 => [index + 1]
|
||||
| n if n == maxIndex => [index - 1]
|
||||
| _ => [index - 1, index + 1]
|
||||
} |> Belt.Array.map(_, r => sortedArray[r])
|
||||
let hasSimilarElement = Belt.Array.some(possiblySimilarElements, r => r == element)
|
||||
Belt.Array.forEachWithIndex(sortedArray, (_, element) => {
|
||||
// let maxIndex = (sortedArray |> Array.length) - 1
|
||||
// let possiblySimilarElements = switch index {
|
||||
// | 0 => [index + 1]
|
||||
// | n if n == maxIndex => [index - 1]
|
||||
// | _ => [index - 1, index + 1]
|
||||
// } |> Belt.Array.map(_, r => sortedArray[r])
|
||||
// let hasSimilarElement = Belt.Array.some(possiblySimilarElements, r => r == element)
|
||||
let hasSimilarElement = false
|
||||
hasSimilarElement
|
||||
? FloatFloatMap.increment(element, discrete)
|
||||
: {
|
||||
|
@ -544,10 +545,18 @@ module A = {
|
|||
}
|
||||
|
||||
module Floats = {
|
||||
let sum = Belt.Array.reduce(_, 0., (i, j) => i +. j)
|
||||
let mean = a => sum(a) /. (Array.length(a) |> float_of_int)
|
||||
let mean = Jstat.mean
|
||||
let geomean = Jstat.geomean
|
||||
let mode = Jstat.mode
|
||||
let variance = Jstat.variance
|
||||
let stdev = Jstat.stdev
|
||||
let sum = Jstat.sum
|
||||
let random = Js.Math.random_int
|
||||
|
||||
//Passing true for the exclusive parameter excludes both endpoints of the range.
|
||||
//https://jstat.github.io/all.html
|
||||
let percentile = (a, b) => Jstat.percentile(a, b, false)
|
||||
|
||||
// Gives an array with all the differences between values
|
||||
// diff([1,5,3,7]) = [4,-2,4]
|
||||
let diff = (arr: array<float>): array<float> =>
|
||||
|
|
Loading…
Reference in New Issue
Block a user