Messing with possible distribution split
This commit is contained in:
parent
cebca141bd
commit
80d040e138
|
@ -18,6 +18,7 @@ module FormConfig = [%lenses
|
||||||
sampleCount: string,
|
sampleCount: string,
|
||||||
outputXYPoints: string,
|
outputXYPoints: string,
|
||||||
truncateTo: string,
|
truncateTo: string,
|
||||||
|
kernelWidth: string,
|
||||||
}
|
}
|
||||||
];
|
];
|
||||||
|
|
||||||
|
@ -25,6 +26,7 @@ type options = {
|
||||||
sampleCount: int,
|
sampleCount: int,
|
||||||
outputXYPoints: int,
|
outputXYPoints: int,
|
||||||
truncateTo: option(int),
|
truncateTo: option(int),
|
||||||
|
kernelWidth: int,
|
||||||
};
|
};
|
||||||
|
|
||||||
module Form = ReForm.Make(FormConfig);
|
module Form = ReForm.Make(FormConfig);
|
||||||
|
@ -123,6 +125,7 @@ module DemoDist = {
|
||||||
~sampleCount=options.sampleCount,
|
~sampleCount=options.sampleCount,
|
||||||
~outputXYPoints=options.outputXYPoints,
|
~outputXYPoints=options.outputXYPoints,
|
||||||
~truncateTo=options.truncateTo,
|
~truncateTo=options.truncateTo,
|
||||||
|
~kernelWidth=options.kernelWidth,
|
||||||
);
|
);
|
||||||
switch (distPlus) {
|
switch (distPlus) {
|
||||||
| Some(distPlus) => <DistPlusPlot distPlus />
|
| Some(distPlus) => <DistPlusPlot distPlus />
|
||||||
|
@ -148,7 +151,7 @@ let make = () => {
|
||||||
~schema,
|
~schema,
|
||||||
~onSubmit=({state}) => {None},
|
~onSubmit=({state}) => {None},
|
||||||
~initialState={
|
~initialState={
|
||||||
guesstimatorString: "50 to 50000",
|
guesstimatorString: "40 to 50",
|
||||||
domainType: "Complete",
|
domainType: "Complete",
|
||||||
xPoint: "50.0",
|
xPoint: "50.0",
|
||||||
xPoint2: "60.0",
|
xPoint2: "60.0",
|
||||||
|
@ -157,9 +160,10 @@ let make = () => {
|
||||||
unitType: "UnspecifiedDistribution",
|
unitType: "UnspecifiedDistribution",
|
||||||
zero: MomentRe.momentNow(),
|
zero: MomentRe.momentNow(),
|
||||||
unit: "days",
|
unit: "days",
|
||||||
sampleCount: "1000",
|
sampleCount: "10000",
|
||||||
outputXYPoints: "1000",
|
outputXYPoints: "500",
|
||||||
truncateTo: "500",
|
truncateTo: "100",
|
||||||
|
kernelWidth: "5",
|
||||||
},
|
},
|
||||||
(),
|
(),
|
||||||
);
|
);
|
||||||
|
@ -187,6 +191,7 @@ let make = () => {
|
||||||
let outputXYPoints =
|
let outputXYPoints =
|
||||||
reform.state.values.outputXYPoints |> Js.Float.fromString;
|
reform.state.values.outputXYPoints |> Js.Float.fromString;
|
||||||
let truncateTo = reform.state.values.truncateTo |> Js.Float.fromString;
|
let truncateTo = reform.state.values.truncateTo |> Js.Float.fromString;
|
||||||
|
let kernelWidth = reform.state.values.kernelWidth |> Js.Float.fromString;
|
||||||
|
|
||||||
let domain =
|
let domain =
|
||||||
switch (domainType) {
|
switch (domainType) {
|
||||||
|
@ -240,6 +245,7 @@ let make = () => {
|
||||||
sampleCount: sampleCount |> int_of_float,
|
sampleCount: sampleCount |> int_of_float,
|
||||||
outputXYPoints: outputXYPoints |> int_of_float,
|
outputXYPoints: outputXYPoints |> int_of_float,
|
||||||
truncateTo: truncateTo |> int_of_float |> E.O.some,
|
truncateTo: truncateTo |> int_of_float |> E.O.some,
|
||||||
|
kernelWidth: kernelWidth |> int_of_float,
|
||||||
})
|
})
|
||||||
| _ => None
|
| _ => None
|
||||||
};
|
};
|
||||||
|
@ -261,6 +267,7 @@ let make = () => {
|
||||||
reform.state.values.sampleCount,
|
reform.state.values.sampleCount,
|
||||||
reform.state.values.outputXYPoints,
|
reform.state.values.outputXYPoints,
|
||||||
reform.state.values.truncateTo,
|
reform.state.values.truncateTo,
|
||||||
|
reform.state.values.kernelWidth,
|
||||||
reloader |> string_of_int,
|
reloader |> string_of_int,
|
||||||
|],
|
|],
|
||||||
);
|
);
|
||||||
|
@ -455,6 +462,9 @@ let make = () => {
|
||||||
<Col span=4>
|
<Col span=4>
|
||||||
<FieldFloat field=FormConfig.TruncateTo label="Truncate To" />
|
<FieldFloat field=FormConfig.TruncateTo label="Truncate To" />
|
||||||
</Col>
|
</Col>
|
||||||
|
<Col span=4>
|
||||||
|
<FieldFloat field=FormConfig.KernelWidth label="Kernel Width" />
|
||||||
|
</Col>
|
||||||
</Row>
|
</Row>
|
||||||
<Antd.Button
|
<Antd.Button
|
||||||
_type=`primary icon=Antd.IconName.reload onClick=onRealod>
|
_type=`primary icon=Antd.IconName.reload onClick=onRealod>
|
||||||
|
|
|
@ -173,6 +173,21 @@ let percentiles = distPlus => {
|
||||||
|> Distributions.DistPlus.T.Integral.yToX(~cache=None, 0.99)
|
|> Distributions.DistPlus.T.Integral.yToX(~cache=None, 0.99)
|
||||||
|> showFloat}
|
|> showFloat}
|
||||||
</td>
|
</td>
|
||||||
|
<td className="px-4 py-2 border">
|
||||||
|
{distPlus
|
||||||
|
|> Distributions.DistPlus.T.Integral.yToX(~cache=None, 0.999)
|
||||||
|
|> showFloat}
|
||||||
|
</td>
|
||||||
|
<td className="px-4 py-2 border">
|
||||||
|
{distPlus
|
||||||
|
|> Distributions.DistPlus.T.Integral.yToX(~cache=None, 0.9999)
|
||||||
|
|> showFloat}
|
||||||
|
</td>
|
||||||
|
<td className="px-4 py-2 border">
|
||||||
|
{distPlus
|
||||||
|
|> Distributions.DistPlus.T.Integral.yToX(~cache=None, 0.99999)
|
||||||
|
|> showFloat}
|
||||||
|
</td>
|
||||||
</tr>
|
</tr>
|
||||||
</tbody>
|
</tbody>
|
||||||
</table>;
|
</table>;
|
||||||
|
|
|
@ -13,21 +13,16 @@ let toDistPlus =
|
||||||
~sampleCount=2000,
|
~sampleCount=2000,
|
||||||
~outputXYPoints=1500,
|
~outputXYPoints=1500,
|
||||||
~truncateTo=Some(300),
|
~truncateTo=Some(300),
|
||||||
|
~kernelWidth=5,
|
||||||
t: distPlusIngredients,
|
t: distPlusIngredients,
|
||||||
)
|
)
|
||||||
: option(distPlus) => {
|
: option(distPlus) => {
|
||||||
let test =
|
|
||||||
Guesstimator.toMixed(
|
|
||||||
t.guesstimatorString,
|
|
||||||
sampleCount,
|
|
||||||
outputXYPoints,
|
|
||||||
30,
|
|
||||||
);
|
|
||||||
let shape =
|
let shape =
|
||||||
Guesstimator.stringToMixedShape(
|
Guesstimator.toMixed(
|
||||||
~string=t.guesstimatorString,
|
~string=t.guesstimatorString,
|
||||||
~sampleCount,
|
~sampleCount,
|
||||||
~outputXYPoints,
|
~outputXYPoints,
|
||||||
|
~kernelWidth,
|
||||||
~truncateTo,
|
~truncateTo,
|
||||||
(),
|
(),
|
||||||
);
|
);
|
||||||
|
|
|
@ -8,8 +8,11 @@ module T = {
|
||||||
{"xs": t.xs, "ys": t.ys};
|
{"xs": t.xs, "ys": t.ys};
|
||||||
};
|
};
|
||||||
let xs = (t: t) => t.xs;
|
let xs = (t: t) => t.xs;
|
||||||
|
let ys = (t: t) => t.ys;
|
||||||
let minX = (t: t) => t |> xs |> E.A.first;
|
let minX = (t: t) => t |> xs |> E.A.first;
|
||||||
let maxX = (t: t) => t |> xs |> E.A.last;
|
let maxX = (t: t) => t |> xs |> E.A.last;
|
||||||
|
let minY = (t: t) => t |> ys |> E.A.first;
|
||||||
|
let maxY = (t: t) => t |> ys |> E.A.last;
|
||||||
let xTotalRange = (t: t) =>
|
let xTotalRange = (t: t) =>
|
||||||
switch (minX(t), maxX(t)) {
|
switch (minX(t), maxX(t)) {
|
||||||
| (Some(min), Some(max)) => Some(max -. min)
|
| (Some(min), Some(max)) => Some(max -. min)
|
||||||
|
@ -45,6 +48,31 @@ module T = {
|
||||||
previousIndex |> Belt.Option.flatMap(_, Belt.Array.get(zipped));
|
previousIndex |> Belt.Option.flatMap(_, Belt.Array.get(zipped));
|
||||||
};
|
};
|
||||||
|
|
||||||
|
let findY = (x: float, t: t): float => {
|
||||||
|
let firstHigherIndex = Belt.Array.getIndexBy(xs(t), e => e >= x);
|
||||||
|
switch (firstHigherIndex) {
|
||||||
|
| None => maxY(t) |> E.O.default(0.0)
|
||||||
|
| Some(0) => minY(t) |> E.O.default(0.0)
|
||||||
|
| Some(firstHigherIndex) =>
|
||||||
|
let lowerOrEqualIndex =
|
||||||
|
firstHigherIndex - 1 < 0 ? 0 : firstHigherIndex - 1;
|
||||||
|
let needsInterpolation = xs(t)[lowerOrEqualIndex] != x;
|
||||||
|
if (needsInterpolation) {
|
||||||
|
Functions.interpolate(
|
||||||
|
xs(t)[lowerOrEqualIndex],
|
||||||
|
xs(t)[firstHigherIndex],
|
||||||
|
ys(t)[lowerOrEqualIndex],
|
||||||
|
ys(t)[firstHigherIndex],
|
||||||
|
x,
|
||||||
|
);
|
||||||
|
} else {
|
||||||
|
ys(t)[lowerOrEqualIndex];
|
||||||
|
};
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
let findX = CdfLibrary.Distribution.findX;
|
||||||
|
|
||||||
module XtoY = {
|
module XtoY = {
|
||||||
let stepwiseIncremental = (f, t: t) =>
|
let stepwiseIncremental = (f, t: t) =>
|
||||||
firstPairAtOrBeforeValue(f, t) |> E.O.fmap(((_, y)) => y);
|
firstPairAtOrBeforeValue(f, t) |> E.O.fmap(((_, y)) => y);
|
||||||
|
@ -54,7 +82,7 @@ module T = {
|
||||||
};
|
};
|
||||||
|
|
||||||
// TODO: When Roman's PR comes in, fix this bit. This depends on interpolation, obviously.
|
// TODO: When Roman's PR comes in, fix this bit. This depends on interpolation, obviously.
|
||||||
let linear = (f, t: t) => t |> CdfLibrary.Distribution.findY(f);
|
let linear = (f, t: t) => t |> findY(f);
|
||||||
};
|
};
|
||||||
|
|
||||||
let pointwiseMap = (fn, t: t): t => {xs: t.xs, ys: t.ys |> E.A.fmap(fn)};
|
let pointwiseMap = (fn, t: t): t => {xs: t.xs, ys: t.ys |> E.A.fmap(fn)};
|
||||||
|
@ -173,9 +201,6 @@ module T = {
|
||||||
|
|
||||||
let accumulateYs = _transverseShape((aCurrent, aLast) => aCurrent +. aLast);
|
let accumulateYs = _transverseShape((aCurrent, aLast) => aCurrent +. aLast);
|
||||||
let subtractYs = _transverseShape((aCurrent, aLast) => aCurrent -. aLast);
|
let subtractYs = _transverseShape((aCurrent, aLast) => aCurrent -. aLast);
|
||||||
|
|
||||||
let findY = CdfLibrary.Distribution.findY;
|
|
||||||
let findX = CdfLibrary.Distribution.findX;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
// I'm really not sure this part is actually what we want at this point.
|
// I'm really not sure this part is actually what we want at this point.
|
||||||
|
|
|
@ -62,22 +62,93 @@ let stringToMixedShape =
|
||||||
~string,
|
~string,
|
||||||
~sampleCount=3000,
|
~sampleCount=3000,
|
||||||
~outputXYPoints=3000,
|
~outputXYPoints=3000,
|
||||||
~width=3000,
|
~width=10,
|
||||||
~truncateTo=Some(500),
|
~truncateTo=Some(500),
|
||||||
(),
|
(),
|
||||||
) =>
|
) =>
|
||||||
Internals.toCombinedFormat(string, sampleCount, outputXYPoints, width)
|
Internals.toCombinedFormat(string, sampleCount, outputXYPoints, width)
|
||||||
|> Internals.toMixedShape(~truncateTo);
|
|> Internals.toMixedShape(~truncateTo);
|
||||||
|
|
||||||
let toMixed = (string, sampleCount, returnLength, width) => {
|
module KDE = {
|
||||||
|
let normalSampling = (samples, outputXYPoints, kernelWidth) => {
|
||||||
|
samples
|
||||||
|
|> Internals.samplesToContinuousPdf(_, outputXYPoints, kernelWidth)
|
||||||
|
|> CdfLibrary.JS.jsToDist;
|
||||||
|
};
|
||||||
|
|
||||||
|
let inGroups = (samples, outputXYPoints, kernelWidth, ~cuttoff=0.9, ()) => {
|
||||||
|
let partitionAt =
|
||||||
|
samples
|
||||||
|
|> E.A.length
|
||||||
|
|> float_of_int
|
||||||
|
|> (e => e *. cuttoff)
|
||||||
|
|> int_of_float;
|
||||||
|
let part1XYPoints =
|
||||||
|
outputXYPoints |> float_of_int |> (e => e *. cuttoff) |> int_of_float;
|
||||||
|
let part2XYPoints = outputXYPoints - part1XYPoints |> Js.Math.max_int(30);
|
||||||
|
let part1Data =
|
||||||
|
samples |> Belt.Array.slice(_, ~offset=0, ~len=partitionAt);
|
||||||
|
let part2DataLength = (samples |> E.A.length) - partitionAt;
|
||||||
|
let part2Data =
|
||||||
|
samples
|
||||||
|
|> Belt.Array.slice(
|
||||||
|
_,
|
||||||
|
~offset=(-1) * part2DataLength,
|
||||||
|
~len=part2DataLength,
|
||||||
|
);
|
||||||
|
let part1 =
|
||||||
|
part1Data
|
||||||
|
|> Internals.samplesToContinuousPdf(_, part1XYPoints, kernelWidth)
|
||||||
|
|> CdfLibrary.JS.jsToDist;
|
||||||
|
let part2 =
|
||||||
|
part2Data
|
||||||
|
|> Internals.samplesToContinuousPdf(_, part2XYPoints, 3)
|
||||||
|
|> CdfLibrary.JS.jsToDist;
|
||||||
|
Js.log4(partitionAt, part1XYPoints, part1, part2);
|
||||||
|
let opp = 1.0 -. cuttoff;
|
||||||
|
// let result =
|
||||||
|
// XYShape.T.Combine.combineLinear(
|
||||||
|
// part1,
|
||||||
|
// part2,
|
||||||
|
// (a, b) => {
|
||||||
|
// let aa = a *. cuttoff;
|
||||||
|
// let bb = b *. opp;
|
||||||
|
// aa +. bb;
|
||||||
|
// },
|
||||||
|
// );
|
||||||
|
// Js.log2("HI", result);
|
||||||
|
// result;
|
||||||
|
part1;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
let toMixed =
|
||||||
|
(
|
||||||
|
~string,
|
||||||
|
~sampleCount=3000,
|
||||||
|
~outputXYPoints=3000,
|
||||||
|
~kernelWidth=10,
|
||||||
|
~truncateTo=Some(500),
|
||||||
|
~cuttoff=0.995,
|
||||||
|
(),
|
||||||
|
) => {
|
||||||
|
let truncateTo = None;
|
||||||
|
let start = Js.Date.now();
|
||||||
|
let timeMessage = message => Js.log2(message, Js.Date.now() -. start);
|
||||||
|
timeMessage("Starting");
|
||||||
let samples = Internals.stringToSamples(string, sampleCount);
|
let samples = Internals.stringToSamples(string, sampleCount);
|
||||||
|
timeMessage("Finished sampling");
|
||||||
|
|
||||||
let length = samples |> E.A.length;
|
let length = samples |> E.A.length;
|
||||||
Array.sort(compare, samples);
|
Array.fast_sort(compare, samples);
|
||||||
let items =
|
Js.log2("SORTED?", samples);
|
||||||
E.A.uniq(samples)
|
// let items =
|
||||||
|> E.A.fmap(r => (r, samples |> E.A.filter(n => n == r) |> E.A.length));
|
// E.A.uniq(samples)
|
||||||
let (discretePart, continuousPart) =
|
// |> E.A.fmap(r => (r, samples |> E.A.filter(n => n == r) |> E.A.length));
|
||||||
Belt.Array.partition(items, ((_, count)) => count > 1);
|
// let (discretePart, continuousPart) =
|
||||||
|
// Belt.Array.partition(items, ((_, count)) => count > 1);
|
||||||
|
let discretePart = [||];
|
||||||
|
let continuousPart = samples;
|
||||||
let discrete: DistTypes.xyShape =
|
let discrete: DistTypes.xyShape =
|
||||||
discretePart
|
discretePart
|
||||||
|> E.A.fmap(((x, count)) =>
|
|> E.A.fmap(((x, count)) =>
|
||||||
|
@ -87,11 +158,21 @@ let toMixed = (string, sampleCount, returnLength, width) => {
|
||||||
let pdf: DistTypes.xyShape =
|
let pdf: DistTypes.xyShape =
|
||||||
continuousPart |> E.A.length > 20
|
continuousPart |> E.A.length > 20
|
||||||
? {
|
? {
|
||||||
Internals.samplesToContinuousPdf(samples, returnLength, width)
|
samples |> KDE.inGroups(_, outputXYPoints, kernelWidth, ~cuttoff, ());
|
||||||
|> CdfLibrary.JS.jsToDist;
|
// samples |> KDE.normalSampling(_, outputXYPoints, kernelWidth);
|
||||||
}
|
}
|
||||||
: {xs: [||], ys: [||]};
|
: {xs: [||], ys: [||]};
|
||||||
|
timeMessage("Finished pdf");
|
||||||
let continuous = pdf |> Distributions.Continuous.fromShape;
|
let continuous = pdf |> Distributions.Continuous.fromShape;
|
||||||
let shape = MixedShapeBuilder.buildSimple(~continuous, ~discrete);
|
let shape = MixedShapeBuilder.buildSimple(~continuous, ~discrete);
|
||||||
|
timeMessage("Finished shape");
|
||||||
|
let shape =
|
||||||
|
switch (truncateTo, shape) {
|
||||||
|
| (Some(trunctate), Some(shape)) =>
|
||||||
|
Some(shape |> Distributions.Shape.T.truncate(trunctate))
|
||||||
|
| (None, Some(shape)) => Some(shape)
|
||||||
|
| _ => None
|
||||||
|
};
|
||||||
|
timeMessage("Finished truncation");
|
||||||
shape;
|
shape;
|
||||||
};
|
};
|
|
@ -113,7 +113,7 @@ const samplesToContinuousPdf = (
|
||||||
if (_.isFinite(min)) { _samples = _.filter(_samples, r => r > min) };
|
if (_.isFinite(min)) { _samples = _.filter(_samples, r => r > min) };
|
||||||
if (_.isFinite(max)) { _samples = _.filter(_samples, r => r < max) };
|
if (_.isFinite(max)) { _samples = _.filter(_samples, r => r < max) };
|
||||||
let pdf = pdfast.create(_samples, { size, width });
|
let pdf = pdfast.create(_samples, { size, width });
|
||||||
return {xs: pdf.map(r => r.x), ys: pdf.map(r => r.x)};
|
return {xs: pdf.map(r => r.x), ys: pdf.map(r => r.y)};
|
||||||
};
|
};
|
||||||
|
|
||||||
module.exports = {
|
module.exports = {
|
||||||
|
|
Loading…
Reference in New Issue
Block a user