Messing with possible distribution split

This commit is contained in:
Ozzie Gooen 2020-03-16 10:41:44 +00:00
parent cebca141bd
commit 80d040e138
7 changed files with 154 additions and 28 deletions

View File

@ -18,6 +18,7 @@ module FormConfig = [%lenses
sampleCount: string,
outputXYPoints: string,
truncateTo: string,
kernelWidth: string,
}
];
@ -25,6 +26,7 @@ type options = {
sampleCount: int,
outputXYPoints: int,
truncateTo: option(int),
kernelWidth: int,
};
module Form = ReForm.Make(FormConfig);
@ -123,6 +125,7 @@ module DemoDist = {
~sampleCount=options.sampleCount,
~outputXYPoints=options.outputXYPoints,
~truncateTo=options.truncateTo,
~kernelWidth=options.kernelWidth,
);
switch (distPlus) {
| Some(distPlus) => <DistPlusPlot distPlus />
@ -148,7 +151,7 @@ let make = () => {
~schema,
~onSubmit=({state}) => {None},
~initialState={
guesstimatorString: "50 to 50000",
guesstimatorString: "40 to 50",
domainType: "Complete",
xPoint: "50.0",
xPoint2: "60.0",
@ -157,9 +160,10 @@ let make = () => {
unitType: "UnspecifiedDistribution",
zero: MomentRe.momentNow(),
unit: "days",
sampleCount: "1000",
outputXYPoints: "1000",
truncateTo: "500",
sampleCount: "10000",
outputXYPoints: "500",
truncateTo: "100",
kernelWidth: "5",
},
(),
);
@ -187,6 +191,7 @@ let make = () => {
let outputXYPoints =
reform.state.values.outputXYPoints |> Js.Float.fromString;
let truncateTo = reform.state.values.truncateTo |> Js.Float.fromString;
let kernelWidth = reform.state.values.kernelWidth |> Js.Float.fromString;
let domain =
switch (domainType) {
@ -240,6 +245,7 @@ let make = () => {
sampleCount: sampleCount |> int_of_float,
outputXYPoints: outputXYPoints |> int_of_float,
truncateTo: truncateTo |> int_of_float |> E.O.some,
kernelWidth: kernelWidth |> int_of_float,
})
| _ => None
};
@ -261,6 +267,7 @@ let make = () => {
reform.state.values.sampleCount,
reform.state.values.outputXYPoints,
reform.state.values.truncateTo,
reform.state.values.kernelWidth,
reloader |> string_of_int,
|],
);
@ -455,6 +462,9 @@ let make = () => {
<Col span=4>
<FieldFloat field=FormConfig.TruncateTo label="Truncate To" />
</Col>
<Col span=4>
<FieldFloat field=FormConfig.KernelWidth label="Kernel Width" />
</Col>
</Row>
<Antd.Button
_type=`primary icon=Antd.IconName.reload onClick=onRealod>

View File

@ -173,6 +173,21 @@ let percentiles = distPlus => {
|> Distributions.DistPlus.T.Integral.yToX(~cache=None, 0.99)
|> showFloat}
</td>
<td className="px-4 py-2 border">
{distPlus
|> Distributions.DistPlus.T.Integral.yToX(~cache=None, 0.999)
|> showFloat}
</td>
<td className="px-4 py-2 border">
{distPlus
|> Distributions.DistPlus.T.Integral.yToX(~cache=None, 0.9999)
|> showFloat}
</td>
<td className="px-4 py-2 border">
{distPlus
|> Distributions.DistPlus.T.Integral.yToX(~cache=None, 0.99999)
|> showFloat}
</td>
</tr>
</tbody>
</table>;

View File

@ -13,21 +13,16 @@ let toDistPlus =
~sampleCount=2000,
~outputXYPoints=1500,
~truncateTo=Some(300),
~kernelWidth=5,
t: distPlusIngredients,
)
: option(distPlus) => {
let test =
Guesstimator.toMixed(
t.guesstimatorString,
sampleCount,
outputXYPoints,
30,
);
let shape =
Guesstimator.stringToMixedShape(
Guesstimator.toMixed(
~string=t.guesstimatorString,
~sampleCount,
~outputXYPoints,
~kernelWidth,
~truncateTo,
(),
);

View File

@ -8,8 +8,11 @@ module T = {
{"xs": t.xs, "ys": t.ys};
};
let xs = (t: t) => t.xs;
let ys = (t: t) => t.ys;
let minX = (t: t) => t |> xs |> E.A.first;
let maxX = (t: t) => t |> xs |> E.A.last;
let minY = (t: t) => t |> ys |> E.A.first;
let maxY = (t: t) => t |> ys |> E.A.last;
let xTotalRange = (t: t) =>
switch (minX(t), maxX(t)) {
| (Some(min), Some(max)) => Some(max -. min)
@ -45,6 +48,31 @@ module T = {
previousIndex |> Belt.Option.flatMap(_, Belt.Array.get(zipped));
};
let findY = (x: float, t: t): float => {
let firstHigherIndex = Belt.Array.getIndexBy(xs(t), e => e >= x);
switch (firstHigherIndex) {
| None => maxY(t) |> E.O.default(0.0)
| Some(0) => minY(t) |> E.O.default(0.0)
| Some(firstHigherIndex) =>
let lowerOrEqualIndex =
firstHigherIndex - 1 < 0 ? 0 : firstHigherIndex - 1;
let needsInterpolation = xs(t)[lowerOrEqualIndex] != x;
if (needsInterpolation) {
Functions.interpolate(
xs(t)[lowerOrEqualIndex],
xs(t)[firstHigherIndex],
ys(t)[lowerOrEqualIndex],
ys(t)[firstHigherIndex],
x,
);
} else {
ys(t)[lowerOrEqualIndex];
};
};
};
let findX = CdfLibrary.Distribution.findX;
module XtoY = {
let stepwiseIncremental = (f, t: t) =>
firstPairAtOrBeforeValue(f, t) |> E.O.fmap(((_, y)) => y);
@ -54,7 +82,7 @@ module T = {
};
// TODO: When Roman's PR comes in, fix this bit. This depends on interpolation, obviously.
let linear = (f, t: t) => t |> CdfLibrary.Distribution.findY(f);
let linear = (f, t: t) => t |> findY(f);
};
let pointwiseMap = (fn, t: t): t => {xs: t.xs, ys: t.ys |> E.A.fmap(fn)};
@ -173,9 +201,6 @@ module T = {
let accumulateYs = _transverseShape((aCurrent, aLast) => aCurrent +. aLast);
let subtractYs = _transverseShape((aCurrent, aLast) => aCurrent -. aLast);
let findY = CdfLibrary.Distribution.findY;
let findX = CdfLibrary.Distribution.findX;
};
// I'm really not sure this part is actually what we want at this point.

View File

@ -62,22 +62,93 @@ let stringToMixedShape =
~string,
~sampleCount=3000,
~outputXYPoints=3000,
~width=3000,
~width=10,
~truncateTo=Some(500),
(),
) =>
Internals.toCombinedFormat(string, sampleCount, outputXYPoints, width)
|> Internals.toMixedShape(~truncateTo);
let toMixed = (string, sampleCount, returnLength, width) => {
module KDE = {
let normalSampling = (samples, outputXYPoints, kernelWidth) => {
samples
|> Internals.samplesToContinuousPdf(_, outputXYPoints, kernelWidth)
|> CdfLibrary.JS.jsToDist;
};
let inGroups = (samples, outputXYPoints, kernelWidth, ~cuttoff=0.9, ()) => {
let partitionAt =
samples
|> E.A.length
|> float_of_int
|> (e => e *. cuttoff)
|> int_of_float;
let part1XYPoints =
outputXYPoints |> float_of_int |> (e => e *. cuttoff) |> int_of_float;
let part2XYPoints = outputXYPoints - part1XYPoints |> Js.Math.max_int(30);
let part1Data =
samples |> Belt.Array.slice(_, ~offset=0, ~len=partitionAt);
let part2DataLength = (samples |> E.A.length) - partitionAt;
let part2Data =
samples
|> Belt.Array.slice(
_,
~offset=(-1) * part2DataLength,
~len=part2DataLength,
);
let part1 =
part1Data
|> Internals.samplesToContinuousPdf(_, part1XYPoints, kernelWidth)
|> CdfLibrary.JS.jsToDist;
let part2 =
part2Data
|> Internals.samplesToContinuousPdf(_, part2XYPoints, 3)
|> CdfLibrary.JS.jsToDist;
Js.log4(partitionAt, part1XYPoints, part1, part2);
let opp = 1.0 -. cuttoff;
// let result =
// XYShape.T.Combine.combineLinear(
// part1,
// part2,
// (a, b) => {
// let aa = a *. cuttoff;
// let bb = b *. opp;
// aa +. bb;
// },
// );
// Js.log2("HI", result);
// result;
part1;
};
};
let toMixed =
(
~string,
~sampleCount=3000,
~outputXYPoints=3000,
~kernelWidth=10,
~truncateTo=Some(500),
~cuttoff=0.995,
(),
) => {
let truncateTo = None;
let start = Js.Date.now();
let timeMessage = message => Js.log2(message, Js.Date.now() -. start);
timeMessage("Starting");
let samples = Internals.stringToSamples(string, sampleCount);
timeMessage("Finished sampling");
let length = samples |> E.A.length;
Array.sort(compare, samples);
let items =
E.A.uniq(samples)
|> E.A.fmap(r => (r, samples |> E.A.filter(n => n == r) |> E.A.length));
let (discretePart, continuousPart) =
Belt.Array.partition(items, ((_, count)) => count > 1);
Array.fast_sort(compare, samples);
Js.log2("SORTED?", samples);
// let items =
// E.A.uniq(samples)
// |> E.A.fmap(r => (r, samples |> E.A.filter(n => n == r) |> E.A.length));
// let (discretePart, continuousPart) =
// Belt.Array.partition(items, ((_, count)) => count > 1);
let discretePart = [||];
let continuousPart = samples;
let discrete: DistTypes.xyShape =
discretePart
|> E.A.fmap(((x, count)) =>
@ -87,11 +158,21 @@ let toMixed = (string, sampleCount, returnLength, width) => {
let pdf: DistTypes.xyShape =
continuousPart |> E.A.length > 20
? {
Internals.samplesToContinuousPdf(samples, returnLength, width)
|> CdfLibrary.JS.jsToDist;
samples |> KDE.inGroups(_, outputXYPoints, kernelWidth, ~cuttoff, ());
// samples |> KDE.normalSampling(_, outputXYPoints, kernelWidth);
}
: {xs: [||], ys: [||]};
timeMessage("Finished pdf");
let continuous = pdf |> Distributions.Continuous.fromShape;
let shape = MixedShapeBuilder.buildSimple(~continuous, ~discrete);
timeMessage("Finished shape");
let shape =
switch (truncateTo, shape) {
| (Some(trunctate), Some(shape)) =>
Some(shape |> Distributions.Shape.T.truncate(trunctate))
| (None, Some(shape)) => Some(shape)
| _ => None
};
timeMessage("Finished truncation");
shape;
};

View File

@ -113,7 +113,7 @@ const samplesToContinuousPdf = (
if (_.isFinite(min)) { _samples = _.filter(_samples, r => r > min) };
if (_.isFinite(max)) { _samples = _.filter(_samples, r => r < max) };
let pdf = pdfast.create(_samples, { size, width });
return {xs: pdf.map(r => r.x), ys: pdf.map(r => r.x)};
return {xs: pdf.map(r => r.x), ys: pdf.map(r => r.y)};
};
module.exports = {