Messing with possible distribution split

This commit is contained in:
Ozzie Gooen 2020-03-16 10:41:44 +00:00
parent cebca141bd
commit 80d040e138
7 changed files with 154 additions and 28 deletions

View File

@ -18,6 +18,7 @@ module FormConfig = [%lenses
sampleCount: string, sampleCount: string,
outputXYPoints: string, outputXYPoints: string,
truncateTo: string, truncateTo: string,
kernelWidth: string,
} }
]; ];
@ -25,6 +26,7 @@ type options = {
sampleCount: int, sampleCount: int,
outputXYPoints: int, outputXYPoints: int,
truncateTo: option(int), truncateTo: option(int),
kernelWidth: int,
}; };
module Form = ReForm.Make(FormConfig); module Form = ReForm.Make(FormConfig);
@ -123,6 +125,7 @@ module DemoDist = {
~sampleCount=options.sampleCount, ~sampleCount=options.sampleCount,
~outputXYPoints=options.outputXYPoints, ~outputXYPoints=options.outputXYPoints,
~truncateTo=options.truncateTo, ~truncateTo=options.truncateTo,
~kernelWidth=options.kernelWidth,
); );
switch (distPlus) { switch (distPlus) {
| Some(distPlus) => <DistPlusPlot distPlus /> | Some(distPlus) => <DistPlusPlot distPlus />
@ -148,7 +151,7 @@ let make = () => {
~schema, ~schema,
~onSubmit=({state}) => {None}, ~onSubmit=({state}) => {None},
~initialState={ ~initialState={
guesstimatorString: "50 to 50000", guesstimatorString: "40 to 50",
domainType: "Complete", domainType: "Complete",
xPoint: "50.0", xPoint: "50.0",
xPoint2: "60.0", xPoint2: "60.0",
@ -157,9 +160,10 @@ let make = () => {
unitType: "UnspecifiedDistribution", unitType: "UnspecifiedDistribution",
zero: MomentRe.momentNow(), zero: MomentRe.momentNow(),
unit: "days", unit: "days",
sampleCount: "1000", sampleCount: "10000",
outputXYPoints: "1000", outputXYPoints: "500",
truncateTo: "500", truncateTo: "100",
kernelWidth: "5",
}, },
(), (),
); );
@ -187,6 +191,7 @@ let make = () => {
let outputXYPoints = let outputXYPoints =
reform.state.values.outputXYPoints |> Js.Float.fromString; reform.state.values.outputXYPoints |> Js.Float.fromString;
let truncateTo = reform.state.values.truncateTo |> Js.Float.fromString; let truncateTo = reform.state.values.truncateTo |> Js.Float.fromString;
let kernelWidth = reform.state.values.kernelWidth |> Js.Float.fromString;
let domain = let domain =
switch (domainType) { switch (domainType) {
@ -240,6 +245,7 @@ let make = () => {
sampleCount: sampleCount |> int_of_float, sampleCount: sampleCount |> int_of_float,
outputXYPoints: outputXYPoints |> int_of_float, outputXYPoints: outputXYPoints |> int_of_float,
truncateTo: truncateTo |> int_of_float |> E.O.some, truncateTo: truncateTo |> int_of_float |> E.O.some,
kernelWidth: kernelWidth |> int_of_float,
}) })
| _ => None | _ => None
}; };
@ -261,6 +267,7 @@ let make = () => {
reform.state.values.sampleCount, reform.state.values.sampleCount,
reform.state.values.outputXYPoints, reform.state.values.outputXYPoints,
reform.state.values.truncateTo, reform.state.values.truncateTo,
reform.state.values.kernelWidth,
reloader |> string_of_int, reloader |> string_of_int,
|], |],
); );
@ -455,6 +462,9 @@ let make = () => {
<Col span=4> <Col span=4>
<FieldFloat field=FormConfig.TruncateTo label="Truncate To" /> <FieldFloat field=FormConfig.TruncateTo label="Truncate To" />
</Col> </Col>
<Col span=4>
<FieldFloat field=FormConfig.KernelWidth label="Kernel Width" />
</Col>
</Row> </Row>
<Antd.Button <Antd.Button
_type=`primary icon=Antd.IconName.reload onClick=onRealod> _type=`primary icon=Antd.IconName.reload onClick=onRealod>

View File

@ -173,6 +173,21 @@ let percentiles = distPlus => {
|> Distributions.DistPlus.T.Integral.yToX(~cache=None, 0.99) |> Distributions.DistPlus.T.Integral.yToX(~cache=None, 0.99)
|> showFloat} |> showFloat}
</td> </td>
<td className="px-4 py-2 border">
{distPlus
|> Distributions.DistPlus.T.Integral.yToX(~cache=None, 0.999)
|> showFloat}
</td>
<td className="px-4 py-2 border">
{distPlus
|> Distributions.DistPlus.T.Integral.yToX(~cache=None, 0.9999)
|> showFloat}
</td>
<td className="px-4 py-2 border">
{distPlus
|> Distributions.DistPlus.T.Integral.yToX(~cache=None, 0.99999)
|> showFloat}
</td>
</tr> </tr>
</tbody> </tbody>
</table>; </table>;

View File

@ -13,21 +13,16 @@ let toDistPlus =
~sampleCount=2000, ~sampleCount=2000,
~outputXYPoints=1500, ~outputXYPoints=1500,
~truncateTo=Some(300), ~truncateTo=Some(300),
~kernelWidth=5,
t: distPlusIngredients, t: distPlusIngredients,
) )
: option(distPlus) => { : option(distPlus) => {
let test =
Guesstimator.toMixed(
t.guesstimatorString,
sampleCount,
outputXYPoints,
30,
);
let shape = let shape =
Guesstimator.stringToMixedShape( Guesstimator.toMixed(
~string=t.guesstimatorString, ~string=t.guesstimatorString,
~sampleCount, ~sampleCount,
~outputXYPoints, ~outputXYPoints,
~kernelWidth,
~truncateTo, ~truncateTo,
(), (),
); );

View File

@ -8,8 +8,11 @@ module T = {
{"xs": t.xs, "ys": t.ys}; {"xs": t.xs, "ys": t.ys};
}; };
let xs = (t: t) => t.xs; let xs = (t: t) => t.xs;
let ys = (t: t) => t.ys;
let minX = (t: t) => t |> xs |> E.A.first; let minX = (t: t) => t |> xs |> E.A.first;
let maxX = (t: t) => t |> xs |> E.A.last; let maxX = (t: t) => t |> xs |> E.A.last;
let minY = (t: t) => t |> ys |> E.A.first;
let maxY = (t: t) => t |> ys |> E.A.last;
let xTotalRange = (t: t) => let xTotalRange = (t: t) =>
switch (minX(t), maxX(t)) { switch (minX(t), maxX(t)) {
| (Some(min), Some(max)) => Some(max -. min) | (Some(min), Some(max)) => Some(max -. min)
@ -45,6 +48,31 @@ module T = {
previousIndex |> Belt.Option.flatMap(_, Belt.Array.get(zipped)); previousIndex |> Belt.Option.flatMap(_, Belt.Array.get(zipped));
}; };
let findY = (x: float, t: t): float => {
let firstHigherIndex = Belt.Array.getIndexBy(xs(t), e => e >= x);
switch (firstHigherIndex) {
| None => maxY(t) |> E.O.default(0.0)
| Some(0) => minY(t) |> E.O.default(0.0)
| Some(firstHigherIndex) =>
let lowerOrEqualIndex =
firstHigherIndex - 1 < 0 ? 0 : firstHigherIndex - 1;
let needsInterpolation = xs(t)[lowerOrEqualIndex] != x;
if (needsInterpolation) {
Functions.interpolate(
xs(t)[lowerOrEqualIndex],
xs(t)[firstHigherIndex],
ys(t)[lowerOrEqualIndex],
ys(t)[firstHigherIndex],
x,
);
} else {
ys(t)[lowerOrEqualIndex];
};
};
};
let findX = CdfLibrary.Distribution.findX;
module XtoY = { module XtoY = {
let stepwiseIncremental = (f, t: t) => let stepwiseIncremental = (f, t: t) =>
firstPairAtOrBeforeValue(f, t) |> E.O.fmap(((_, y)) => y); firstPairAtOrBeforeValue(f, t) |> E.O.fmap(((_, y)) => y);
@ -54,7 +82,7 @@ module T = {
}; };
// TODO: When Roman's PR comes in, fix this bit. This depends on interpolation, obviously. // TODO: When Roman's PR comes in, fix this bit. This depends on interpolation, obviously.
let linear = (f, t: t) => t |> CdfLibrary.Distribution.findY(f); let linear = (f, t: t) => t |> findY(f);
}; };
let pointwiseMap = (fn, t: t): t => {xs: t.xs, ys: t.ys |> E.A.fmap(fn)}; let pointwiseMap = (fn, t: t): t => {xs: t.xs, ys: t.ys |> E.A.fmap(fn)};
@ -173,9 +201,6 @@ module T = {
let accumulateYs = _transverseShape((aCurrent, aLast) => aCurrent +. aLast); let accumulateYs = _transverseShape((aCurrent, aLast) => aCurrent +. aLast);
let subtractYs = _transverseShape((aCurrent, aLast) => aCurrent -. aLast); let subtractYs = _transverseShape((aCurrent, aLast) => aCurrent -. aLast);
let findY = CdfLibrary.Distribution.findY;
let findX = CdfLibrary.Distribution.findX;
}; };
// I'm really not sure this part is actually what we want at this point. // I'm really not sure this part is actually what we want at this point.

View File

@ -62,22 +62,93 @@ let stringToMixedShape =
~string, ~string,
~sampleCount=3000, ~sampleCount=3000,
~outputXYPoints=3000, ~outputXYPoints=3000,
~width=3000, ~width=10,
~truncateTo=Some(500), ~truncateTo=Some(500),
(), (),
) => ) =>
Internals.toCombinedFormat(string, sampleCount, outputXYPoints, width) Internals.toCombinedFormat(string, sampleCount, outputXYPoints, width)
|> Internals.toMixedShape(~truncateTo); |> Internals.toMixedShape(~truncateTo);
let toMixed = (string, sampleCount, returnLength, width) => { module KDE = {
let normalSampling = (samples, outputXYPoints, kernelWidth) => {
samples
|> Internals.samplesToContinuousPdf(_, outputXYPoints, kernelWidth)
|> CdfLibrary.JS.jsToDist;
};
let inGroups = (samples, outputXYPoints, kernelWidth, ~cuttoff=0.9, ()) => {
let partitionAt =
samples
|> E.A.length
|> float_of_int
|> (e => e *. cuttoff)
|> int_of_float;
let part1XYPoints =
outputXYPoints |> float_of_int |> (e => e *. cuttoff) |> int_of_float;
let part2XYPoints = outputXYPoints - part1XYPoints |> Js.Math.max_int(30);
let part1Data =
samples |> Belt.Array.slice(_, ~offset=0, ~len=partitionAt);
let part2DataLength = (samples |> E.A.length) - partitionAt;
let part2Data =
samples
|> Belt.Array.slice(
_,
~offset=(-1) * part2DataLength,
~len=part2DataLength,
);
let part1 =
part1Data
|> Internals.samplesToContinuousPdf(_, part1XYPoints, kernelWidth)
|> CdfLibrary.JS.jsToDist;
let part2 =
part2Data
|> Internals.samplesToContinuousPdf(_, part2XYPoints, 3)
|> CdfLibrary.JS.jsToDist;
Js.log4(partitionAt, part1XYPoints, part1, part2);
let opp = 1.0 -. cuttoff;
// let result =
// XYShape.T.Combine.combineLinear(
// part1,
// part2,
// (a, b) => {
// let aa = a *. cuttoff;
// let bb = b *. opp;
// aa +. bb;
// },
// );
// Js.log2("HI", result);
// result;
part1;
};
};
let toMixed =
(
~string,
~sampleCount=3000,
~outputXYPoints=3000,
~kernelWidth=10,
~truncateTo=Some(500),
~cuttoff=0.995,
(),
) => {
let truncateTo = None;
let start = Js.Date.now();
let timeMessage = message => Js.log2(message, Js.Date.now() -. start);
timeMessage("Starting");
let samples = Internals.stringToSamples(string, sampleCount); let samples = Internals.stringToSamples(string, sampleCount);
timeMessage("Finished sampling");
let length = samples |> E.A.length; let length = samples |> E.A.length;
Array.sort(compare, samples); Array.fast_sort(compare, samples);
let items = Js.log2("SORTED?", samples);
E.A.uniq(samples) // let items =
|> E.A.fmap(r => (r, samples |> E.A.filter(n => n == r) |> E.A.length)); // E.A.uniq(samples)
let (discretePart, continuousPart) = // |> E.A.fmap(r => (r, samples |> E.A.filter(n => n == r) |> E.A.length));
Belt.Array.partition(items, ((_, count)) => count > 1); // let (discretePart, continuousPart) =
// Belt.Array.partition(items, ((_, count)) => count > 1);
let discretePart = [||];
let continuousPart = samples;
let discrete: DistTypes.xyShape = let discrete: DistTypes.xyShape =
discretePart discretePart
|> E.A.fmap(((x, count)) => |> E.A.fmap(((x, count)) =>
@ -87,11 +158,21 @@ let toMixed = (string, sampleCount, returnLength, width) => {
let pdf: DistTypes.xyShape = let pdf: DistTypes.xyShape =
continuousPart |> E.A.length > 20 continuousPart |> E.A.length > 20
? { ? {
Internals.samplesToContinuousPdf(samples, returnLength, width) samples |> KDE.inGroups(_, outputXYPoints, kernelWidth, ~cuttoff, ());
|> CdfLibrary.JS.jsToDist; // samples |> KDE.normalSampling(_, outputXYPoints, kernelWidth);
} }
: {xs: [||], ys: [||]}; : {xs: [||], ys: [||]};
timeMessage("Finished pdf");
let continuous = pdf |> Distributions.Continuous.fromShape; let continuous = pdf |> Distributions.Continuous.fromShape;
let shape = MixedShapeBuilder.buildSimple(~continuous, ~discrete); let shape = MixedShapeBuilder.buildSimple(~continuous, ~discrete);
timeMessage("Finished shape");
let shape =
switch (truncateTo, shape) {
| (Some(trunctate), Some(shape)) =>
Some(shape |> Distributions.Shape.T.truncate(trunctate))
| (None, Some(shape)) => Some(shape)
| _ => None
};
timeMessage("Finished truncation");
shape; shape;
}; };

View File

@ -113,7 +113,7 @@ const samplesToContinuousPdf = (
if (_.isFinite(min)) { _samples = _.filter(_samples, r => r > min) }; if (_.isFinite(min)) { _samples = _.filter(_samples, r => r > min) };
if (_.isFinite(max)) { _samples = _.filter(_samples, r => r < max) }; if (_.isFinite(max)) { _samples = _.filter(_samples, r => r < max) };
let pdf = pdfast.create(_samples, { size, width }); let pdf = pdfast.create(_samples, { size, width });
return {xs: pdf.map(r => r.x), ys: pdf.map(r => r.x)}; return {xs: pdf.map(r => r.x), ys: pdf.map(r => r.y)};
}; };
module.exports = { module.exports = {