Merge pull request #384 from quantified-uncertainty/normalize-improvements
Minor Math improvements for pointSet distributions
This commit is contained in:
commit
079e8f6c8d
|
@ -65,7 +65,7 @@ describe("(Algebraic) addition of distributions", () => {
|
||||||
| None => "algebraicAdd has"->expect->toBe("failed")
|
| None => "algebraicAdd has"->expect->toBe("failed")
|
||||||
// This is nondeterministic, we could be in a situation where ci fails but you click rerun and it passes, which is bad.
|
// This is nondeterministic, we could be in a situation where ci fails but you click rerun and it passes, which is bad.
|
||||||
// sometimes it works with ~digits=2.
|
// sometimes it works with ~digits=2.
|
||||||
| Some(x) => x->expect->toBeSoCloseTo(9.78655777150074, ~digits=1) // (uniformMean +. betaMean)
|
| Some(x) => x->expect->toBeSoCloseTo(9.786831807237022, ~digits=1) // (uniformMean +. betaMean)
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
test("beta(alpha=2, beta=5) + uniform(low=9, high=10)", () => {
|
test("beta(alpha=2, beta=5) + uniform(low=9, high=10)", () => {
|
||||||
|
@ -82,7 +82,7 @@ describe("(Algebraic) addition of distributions", () => {
|
||||||
| None => "algebraicAdd has"->expect->toBe("failed")
|
| None => "algebraicAdd has"->expect->toBe("failed")
|
||||||
// This is nondeterministic, we could be in a situation where ci fails but you click rerun and it passes, which is bad.
|
// This is nondeterministic, we could be in a situation where ci fails but you click rerun and it passes, which is bad.
|
||||||
// sometimes it works with ~digits=2.
|
// sometimes it works with ~digits=2.
|
||||||
| Some(x) => x->expect->toBeSoCloseTo(9.786753454457116, ~digits=1) // (uniformMean +. betaMean)
|
| Some(x) => x->expect->toBeSoCloseTo(9.784290207736126, ~digits=1) // (uniformMean +. betaMean)
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
@ -162,6 +162,7 @@ describe("(Algebraic) addition of distributions", () => {
|
||||||
switch received {
|
switch received {
|
||||||
| None => "algebraicAdd has"->expect->toBe("failed")
|
| None => "algebraicAdd has"->expect->toBe("failed")
|
||||||
// This is nondeterministic, we could be in a situation where ci fails but you click rerun and it passes, which is bad.
|
// This is nondeterministic, we could be in a situation where ci fails but you click rerun and it passes, which is bad.
|
||||||
|
// sometimes it works with ~digits=4.
|
||||||
// This value was calculated by a python script
|
// This value was calculated by a python script
|
||||||
| Some(x) => x->expect->toBeSoCloseTo(0.979023, ~digits=0)
|
| Some(x) => x->expect->toBeSoCloseTo(0.979023, ~digits=0)
|
||||||
}
|
}
|
||||||
|
@ -360,7 +361,7 @@ describe("(Algebraic) addition of distributions", () => {
|
||||||
| None => "algebraicAdd has"->expect->toBe("failed")
|
| None => "algebraicAdd has"->expect->toBe("failed")
|
||||||
// This is nondeterministic, we could be in a situation where ci fails but you click rerun and it passes, which is bad.
|
// This is nondeterministic, we could be in a situation where ci fails but you click rerun and it passes, which is bad.
|
||||||
// sometimes it works with ~digits=2.
|
// sometimes it works with ~digits=2.
|
||||||
| Some(x) => x->expect->toBeSoCloseTo(9.174267267465632, ~digits=0)
|
| Some(x) => x->expect->toBeSoCloseTo(9.190872365862756, ~digits=0)
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
|
|
@ -37,4 +37,5 @@ describe("Continuous and discrete splits", () => {
|
||||||
)
|
)
|
||||||
let toArr2 = discrete2 |> E.FloatFloatMap.toArray
|
let toArr2 = discrete2 |> E.FloatFloatMap.toArray
|
||||||
makeTest("splitMedium at count=500", toArr2 |> Belt.Array.length, 500)
|
makeTest("splitMedium at count=500", toArr2 |> Belt.Array.length, 500)
|
||||||
|
// makeTest("foo", [] |> Belt.Array.length, 500)
|
||||||
})
|
})
|
||||||
|
|
|
@ -58,14 +58,15 @@ describe("Distribution", () => {
|
||||||
);
|
);
|
||||||
|
|
||||||
test("mean", () => {
|
test("mean", () => {
|
||||||
expect(dist.mean().value).toBeCloseTo(8.704375514292865);
|
expect(dist.mean().value).toBeCloseTo(9.5555555);
|
||||||
});
|
});
|
||||||
test("pdf", () => {
|
test("pdf", () => {
|
||||||
expect(dist.pdf(5.0).value).toBeCloseTo(0.052007455285386944, 1);
|
expect(dist.pdf(5.0).value).toBeCloseTo(0.10499097598222966, 1);
|
||||||
});
|
});
|
||||||
test("cdf", () => {
|
test("cdf", () => {
|
||||||
expect(dist.cdf(5.0).value).toBeCloseTo(
|
expect(dist.cdf(5.0).value).toBeCloseTo(
|
||||||
dist1Samples.filter((x) => x <= 5).length / dist1SampleCount
|
dist1Samples.filter((x) => x <= 5).length / dist1SampleCount,
|
||||||
|
1
|
||||||
);
|
);
|
||||||
});
|
});
|
||||||
test("inv", () => {
|
test("inv", () => {
|
||||||
|
@ -77,7 +78,7 @@ describe("Distribution", () => {
|
||||||
).toEqual(Ok("Point Set Distribution"));
|
).toEqual(Ok("Point Set Distribution"));
|
||||||
});
|
});
|
||||||
test("toSparkline", () => {
|
test("toSparkline", () => {
|
||||||
expect(dist.toSparkline(20).value).toEqual("▁▁▃▅███▆▄▃▂▁▁▂▂▃▂▁▁▁");
|
expect(dist.toSparkline(20).value).toEqual("▁▁▃▇█▇▄▂▂▂▁▁▁▁▁▂▂▁▁▁");
|
||||||
});
|
});
|
||||||
test("algebraicAdd", () => {
|
test("algebraicAdd", () => {
|
||||||
expect(
|
expect(
|
||||||
|
@ -91,6 +92,6 @@ describe("Distribution", () => {
|
||||||
resultMap(dist.pointwiseAdd(dist2), (r: Distribution) =>
|
resultMap(dist.pointwiseAdd(dist2), (r: Distribution) =>
|
||||||
r.toSparkline(20)
|
r.toSparkline(20)
|
||||||
).value
|
).value
|
||||||
).toEqual(Ok("▁▂▅██▅▅▅▆▆▇▅▄▃▃▂▂▁▁▁"));
|
).toEqual(Ok("▁▂██▃▃▃▃▄▅▄▃▃▂▂▂▁▁▁▁"));
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
|
@ -59,13 +59,7 @@ describe("cumulative density function", () => {
|
||||||
{ sampleCount: n, xyPointLength: 100 }
|
{ sampleCount: n, xyPointLength: 100 }
|
||||||
);
|
);
|
||||||
let cdfValue = dist.cdf(max).value;
|
let cdfValue = dist.cdf(max).value;
|
||||||
let min = Math.min(...xs);
|
expect(cdfValue).toBeCloseTo(1.0, 2);
|
||||||
let epsilon = 5e-3;
|
|
||||||
if (max - min < epsilon) {
|
|
||||||
expect(cdfValue).toBeLessThan(1 - epsilon);
|
|
||||||
} else {
|
|
||||||
expect(dist.cdf(max).value).toBeGreaterThan(1 - epsilon);
|
|
||||||
}
|
|
||||||
})
|
})
|
||||||
);
|
);
|
||||||
});
|
});
|
||||||
|
|
|
@ -46,20 +46,27 @@ let toFloatOperation = (
|
||||||
~toPointSetFn: toPointSetFn,
|
~toPointSetFn: toPointSetFn,
|
||||||
~distToFloatOperation: Operation.distToFloatOperation,
|
~distToFloatOperation: Operation.distToFloatOperation,
|
||||||
) => {
|
) => {
|
||||||
let symbolicSolution = switch (t: t) {
|
let trySymbolicSolution = switch (t: t) {
|
||||||
| Symbolic(r) =>
|
| Symbolic(r) => SymbolicDist.T.operate(distToFloatOperation, r)->E.R.toOption
|
||||||
switch SymbolicDist.T.operate(distToFloatOperation, r) {
|
|
||||||
| Ok(f) => Some(f)
|
|
||||||
| _ => None
|
|
||||||
}
|
|
||||||
| _ => None
|
| _ => None
|
||||||
}
|
}
|
||||||
|
|
||||||
switch symbolicSolution {
|
let trySampleSetSolution = switch ((t: t), distToFloatOperation) {
|
||||||
|
| (SampleSet(sampleSet), #Mean) => SampleSetDist.mean(sampleSet)->Some
|
||||||
|
| (SampleSet(sampleSet), #Sample) => SampleSetDist.sample(sampleSet)->Some
|
||||||
|
| (SampleSet(sampleSet), #Inv(r)) => SampleSetDist.percentile(sampleSet, r)->Some
|
||||||
|
| _ => None
|
||||||
|
}
|
||||||
|
|
||||||
|
switch trySymbolicSolution {
|
||||||
|
| Some(r) => Ok(r)
|
||||||
|
| None =>
|
||||||
|
switch trySampleSetSolution {
|
||||||
| Some(r) => Ok(r)
|
| Some(r) => Ok(r)
|
||||||
| None => toPointSetFn(t)->E.R2.fmap(PointSetDist.operate(distToFloatOperation))
|
| None => toPointSetFn(t)->E.R2.fmap(PointSetDist.operate(distToFloatOperation))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
//Todo: If it's a pointSet, but the xyPointLength is different from what it has, it should change.
|
//Todo: If it's a pointSet, but the xyPointLength is different from what it has, it should change.
|
||||||
// This is tricky because the case of discrete distributions.
|
// This is tricky because the case of discrete distributions.
|
||||||
|
|
|
@ -98,3 +98,13 @@ let map2 = (~fn: (float, float) => result<float, Operation.Error.t>, ~t1: t, ~t2
|
||||||
E.R.toExn("Input of samples should be larger than 5", make(x))
|
E.R.toExn("Input of samples should be larger than 5", make(x))
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let mean = t => T.get(t)->E.A.Floats.mean
|
||||||
|
let geomean = t => T.get(t)->E.A.Floats.geomean
|
||||||
|
let mode = t => T.get(t)->E.A.Floats.mode
|
||||||
|
let sum = t => T.get(t)->E.A.Floats.sum
|
||||||
|
let min = t => T.get(t)->E.A.Floats.min
|
||||||
|
let max = t => T.get(t)->E.A.Floats.max
|
||||||
|
let stdev = t => T.get(t)->E.A.Floats.stdev
|
||||||
|
let variance = t => T.get(t)->E.A.Floats.variance
|
||||||
|
let percentile = (t, f) => T.get(t)->E.A.Floats.percentile(f)
|
||||||
|
|
|
@ -133,9 +133,17 @@ let toPointSetDist = (
|
||||||
~discrete=Some(discrete),
|
~discrete=Some(discrete),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
/*
|
||||||
|
I'm surprised that this doesn't come out normalized. My guess is that the KDE library
|
||||||
|
we're using is standardizing on something else. If we ever change that library, we should
|
||||||
|
check to see if we still need to do this.
|
||||||
|
*/
|
||||||
|
|
||||||
|
let normalizedPointSet = pointSetDist->E.O2.fmap(PointSetDist.T.normalize)
|
||||||
|
|
||||||
let samplesParse: Internals.Types.outputs = {
|
let samplesParse: Internals.Types.outputs = {
|
||||||
continuousParseParams: pdf |> E.O.fmap(snd),
|
continuousParseParams: pdf |> E.O.fmap(snd),
|
||||||
pointSetDist: pointSetDist,
|
pointSetDist: normalizedPointSet,
|
||||||
}
|
}
|
||||||
|
|
||||||
samplesParse
|
samplesParse
|
||||||
|
|
|
@ -521,14 +521,15 @@ module A = {
|
||||||
let split = (sortedArray: array<float>) => {
|
let split = (sortedArray: array<float>) => {
|
||||||
let continuous = []
|
let continuous = []
|
||||||
let discrete = FloatFloatMap.empty()
|
let discrete = FloatFloatMap.empty()
|
||||||
Belt.Array.forEachWithIndex(sortedArray, (index, element) => {
|
Belt.Array.forEachWithIndex(sortedArray, (_, element) => {
|
||||||
let maxIndex = (sortedArray |> Array.length) - 1
|
// let maxIndex = (sortedArray |> Array.length) - 1
|
||||||
let possiblySimilarElements = switch index {
|
// let possiblySimilarElements = switch index {
|
||||||
| 0 => [index + 1]
|
// | 0 => [index + 1]
|
||||||
| n if n == maxIndex => [index - 1]
|
// | n if n == maxIndex => [index - 1]
|
||||||
| _ => [index - 1, index + 1]
|
// | _ => [index - 1, index + 1]
|
||||||
} |> Belt.Array.map(_, r => sortedArray[r])
|
// } |> Belt.Array.map(_, r => sortedArray[r])
|
||||||
let hasSimilarElement = Belt.Array.some(possiblySimilarElements, r => r == element)
|
// let hasSimilarElement = Belt.Array.some(possiblySimilarElements, r => r == element)
|
||||||
|
let hasSimilarElement = false
|
||||||
hasSimilarElement
|
hasSimilarElement
|
||||||
? FloatFloatMap.increment(element, discrete)
|
? FloatFloatMap.increment(element, discrete)
|
||||||
: {
|
: {
|
||||||
|
@ -544,10 +545,18 @@ module A = {
|
||||||
}
|
}
|
||||||
|
|
||||||
module Floats = {
|
module Floats = {
|
||||||
let sum = Belt.Array.reduce(_, 0., (i, j) => i +. j)
|
let mean = Jstat.mean
|
||||||
let mean = a => sum(a) /. (Array.length(a) |> float_of_int)
|
let geomean = Jstat.geomean
|
||||||
|
let mode = Jstat.mode
|
||||||
|
let variance = Jstat.variance
|
||||||
|
let stdev = Jstat.stdev
|
||||||
|
let sum = Jstat.sum
|
||||||
let random = Js.Math.random_int
|
let random = Js.Math.random_int
|
||||||
|
|
||||||
|
//Passing true for the exclusive parameter excludes both endpoints of the range.
|
||||||
|
//https://jstat.github.io/all.html
|
||||||
|
let percentile = (a, b) => Jstat.percentile(a, b, false)
|
||||||
|
|
||||||
// Gives an array with all the differences between values
|
// Gives an array with all the differences between values
|
||||||
// diff([1,5,3,7]) = [4,-2,4]
|
// diff([1,5,3,7]) = [4,-2,4]
|
||||||
let diff = (arr: array<float>): array<float> =>
|
let diff = (arr: array<float>): array<float> =>
|
||||||
|
|
Loading…
Reference in New Issue
Block a user