diff --git a/src/components/DistBuilder.re b/src/components/DistBuilder.re index 785a9d31..95bd0529 100644 --- a/src/components/DistBuilder.re +++ b/src/components/DistBuilder.re @@ -18,6 +18,7 @@ module FormConfig = [%lenses sampleCount: string, outputXYPoints: string, truncateTo: string, + kernelWidth: string, } ]; @@ -25,6 +26,7 @@ type options = { sampleCount: int, outputXYPoints: int, truncateTo: option(int), + kernelWidth: int, }; module Form = ReForm.Make(FormConfig); @@ -123,6 +125,7 @@ module DemoDist = { ~sampleCount=options.sampleCount, ~outputXYPoints=options.outputXYPoints, ~truncateTo=options.truncateTo, + ~kernelWidth=options.kernelWidth, ); switch (distPlus) { | Some(distPlus) => @@ -148,7 +151,7 @@ let make = () => { ~schema, ~onSubmit=({state}) => {None}, ~initialState={ - guesstimatorString: "50 to 50000", + guesstimatorString: "40 to 50", domainType: "Complete", xPoint: "50.0", xPoint2: "60.0", @@ -157,9 +160,10 @@ let make = () => { unitType: "UnspecifiedDistribution", zero: MomentRe.momentNow(), unit: "days", - sampleCount: "1000", - outputXYPoints: "1000", - truncateTo: "500", + sampleCount: "10000", + outputXYPoints: "500", + truncateTo: "100", + kernelWidth: "5", }, (), ); @@ -187,6 +191,7 @@ let make = () => { let outputXYPoints = reform.state.values.outputXYPoints |> Js.Float.fromString; let truncateTo = reform.state.values.truncateTo |> Js.Float.fromString; + let kernelWidth = reform.state.values.kernelWidth |> Js.Float.fromString; let domain = switch (domainType) { @@ -240,6 +245,7 @@ let make = () => { sampleCount: sampleCount |> int_of_float, outputXYPoints: outputXYPoints |> int_of_float, truncateTo: truncateTo |> int_of_float |> E.O.some, + kernelWidth: kernelWidth |> int_of_float, }) | _ => None }; @@ -261,6 +267,7 @@ let make = () => { reform.state.values.sampleCount, reform.state.values.outputXYPoints, reform.state.values.truncateTo, + reform.state.values.kernelWidth, reloader |> string_of_int, |], ); @@ -455,6 +462,9 @@ let make = () => { + + + diff --git a/src/components/charts/DistPlusPlot.re b/src/components/charts/DistPlusPlot.re index 2dfc5d81..2ad1755c 100644 --- a/src/components/charts/DistPlusPlot.re +++ b/src/components/charts/DistPlusPlot.re @@ -173,6 +173,21 @@ let percentiles = distPlus => { |> Distributions.DistPlus.T.Integral.yToX(~cache=None, 0.99) |> showFloat} + + {distPlus + |> Distributions.DistPlus.T.Integral.yToX(~cache=None, 0.999) + |> showFloat} + + + {distPlus + |> Distributions.DistPlus.T.Integral.yToX(~cache=None, 0.9999) + |> showFloat} + + + {distPlus + |> Distributions.DistPlus.T.Integral.yToX(~cache=None, 0.99999) + |> showFloat} + ; diff --git a/src/distributions/DistPlusIngredients.re b/src/distributions/DistPlusIngredients.re index 4636017a..c2a8b882 100644 --- a/src/distributions/DistPlusIngredients.re +++ b/src/distributions/DistPlusIngredients.re @@ -13,21 +13,16 @@ let toDistPlus = ~sampleCount=2000, ~outputXYPoints=1500, ~truncateTo=Some(300), + ~kernelWidth=5, t: distPlusIngredients, ) : option(distPlus) => { - let test = - Guesstimator.toMixed( - t.guesstimatorString, - sampleCount, - outputXYPoints, - 30, - ); let shape = - Guesstimator.stringToMixedShape( + Guesstimator.toMixed( ~string=t.guesstimatorString, ~sampleCount, ~outputXYPoints, + ~kernelWidth, ~truncateTo, (), ); diff --git a/src/distributions/XYShape.re b/src/distributions/XYShape.re index f299580e..41161efe 100644 --- a/src/distributions/XYShape.re +++ b/src/distributions/XYShape.re @@ -8,8 +8,11 @@ module T = { {"xs": t.xs, "ys": t.ys}; }; let xs = (t: t) => t.xs; + let ys = (t: t) => t.ys; let minX = (t: t) => t |> xs |> E.A.first; let maxX = (t: t) => t |> xs |> E.A.last; + let minY = (t: t) => t |> ys |> E.A.first; + let maxY = (t: t) => t |> ys |> E.A.last; let xTotalRange = (t: t) => switch (minX(t), maxX(t)) { | (Some(min), Some(max)) => Some(max -. min) @@ -45,6 +48,31 @@ module T = { previousIndex |> Belt.Option.flatMap(_, Belt.Array.get(zipped)); }; + let findY = (x: float, t: t): float => { + let firstHigherIndex = Belt.Array.getIndexBy(xs(t), e => e >= x); + switch (firstHigherIndex) { + | None => maxY(t) |> E.O.default(0.0) + | Some(0) => minY(t) |> E.O.default(0.0) + | Some(firstHigherIndex) => + let lowerOrEqualIndex = + firstHigherIndex - 1 < 0 ? 0 : firstHigherIndex - 1; + let needsInterpolation = xs(t)[lowerOrEqualIndex] != x; + if (needsInterpolation) { + Functions.interpolate( + xs(t)[lowerOrEqualIndex], + xs(t)[firstHigherIndex], + ys(t)[lowerOrEqualIndex], + ys(t)[firstHigherIndex], + x, + ); + } else { + ys(t)[lowerOrEqualIndex]; + }; + }; + }; + + let findX = CdfLibrary.Distribution.findX; + module XtoY = { let stepwiseIncremental = (f, t: t) => firstPairAtOrBeforeValue(f, t) |> E.O.fmap(((_, y)) => y); @@ -54,7 +82,7 @@ module T = { }; // TODO: When Roman's PR comes in, fix this bit. This depends on interpolation, obviously. - let linear = (f, t: t) => t |> CdfLibrary.Distribution.findY(f); + let linear = (f, t: t) => t |> findY(f); }; let pointwiseMap = (fn, t: t): t => {xs: t.xs, ys: t.ys |> E.A.fmap(fn)}; @@ -173,9 +201,6 @@ module T = { let accumulateYs = _transverseShape((aCurrent, aLast) => aCurrent +. aLast); let subtractYs = _transverseShape((aCurrent, aLast) => aCurrent -. aLast); - - let findY = CdfLibrary.Distribution.findY; - let findX = CdfLibrary.Distribution.findX; }; // I'm really not sure this part is actually what we want at this point. diff --git a/src/utility/Guesstimator.re b/src/utility/Guesstimator.re index 6923e83f..45103024 100644 --- a/src/utility/Guesstimator.re +++ b/src/utility/Guesstimator.re @@ -62,22 +62,93 @@ let stringToMixedShape = ~string, ~sampleCount=3000, ~outputXYPoints=3000, - ~width=3000, + ~width=10, ~truncateTo=Some(500), (), ) => Internals.toCombinedFormat(string, sampleCount, outputXYPoints, width) |> Internals.toMixedShape(~truncateTo); -let toMixed = (string, sampleCount, returnLength, width) => { +module KDE = { + let normalSampling = (samples, outputXYPoints, kernelWidth) => { + samples + |> Internals.samplesToContinuousPdf(_, outputXYPoints, kernelWidth) + |> CdfLibrary.JS.jsToDist; + }; + + let inGroups = (samples, outputXYPoints, kernelWidth, ~cuttoff=0.9, ()) => { + let partitionAt = + samples + |> E.A.length + |> float_of_int + |> (e => e *. cuttoff) + |> int_of_float; + let part1XYPoints = + outputXYPoints |> float_of_int |> (e => e *. cuttoff) |> int_of_float; + let part2XYPoints = outputXYPoints - part1XYPoints |> Js.Math.max_int(30); + let part1Data = + samples |> Belt.Array.slice(_, ~offset=0, ~len=partitionAt); + let part2DataLength = (samples |> E.A.length) - partitionAt; + let part2Data = + samples + |> Belt.Array.slice( + _, + ~offset=(-1) * part2DataLength, + ~len=part2DataLength, + ); + let part1 = + part1Data + |> Internals.samplesToContinuousPdf(_, part1XYPoints, kernelWidth) + |> CdfLibrary.JS.jsToDist; + let part2 = + part2Data + |> Internals.samplesToContinuousPdf(_, part2XYPoints, 3) + |> CdfLibrary.JS.jsToDist; + Js.log4(partitionAt, part1XYPoints, part1, part2); + let opp = 1.0 -. cuttoff; + // let result = + // XYShape.T.Combine.combineLinear( + // part1, + // part2, + // (a, b) => { + // let aa = a *. cuttoff; + // let bb = b *. opp; + // aa +. bb; + // }, + // ); + // Js.log2("HI", result); + // result; + part1; + }; +}; + +let toMixed = + ( + ~string, + ~sampleCount=3000, + ~outputXYPoints=3000, + ~kernelWidth=10, + ~truncateTo=Some(500), + ~cuttoff=0.995, + (), + ) => { + let truncateTo = None; + let start = Js.Date.now(); + let timeMessage = message => Js.log2(message, Js.Date.now() -. start); + timeMessage("Starting"); let samples = Internals.stringToSamples(string, sampleCount); + timeMessage("Finished sampling"); + let length = samples |> E.A.length; - Array.sort(compare, samples); - let items = - E.A.uniq(samples) - |> E.A.fmap(r => (r, samples |> E.A.filter(n => n == r) |> E.A.length)); - let (discretePart, continuousPart) = - Belt.Array.partition(items, ((_, count)) => count > 1); + Array.fast_sort(compare, samples); + Js.log2("SORTED?", samples); + // let items = + // E.A.uniq(samples) + // |> E.A.fmap(r => (r, samples |> E.A.filter(n => n == r) |> E.A.length)); + // let (discretePart, continuousPart) = + // Belt.Array.partition(items, ((_, count)) => count > 1); + let discretePart = [||]; + let continuousPart = samples; let discrete: DistTypes.xyShape = discretePart |> E.A.fmap(((x, count)) => @@ -87,11 +158,21 @@ let toMixed = (string, sampleCount, returnLength, width) => { let pdf: DistTypes.xyShape = continuousPart |> E.A.length > 20 ? { - Internals.samplesToContinuousPdf(samples, returnLength, width) - |> CdfLibrary.JS.jsToDist; + samples |> KDE.inGroups(_, outputXYPoints, kernelWidth, ~cuttoff, ()); + // samples |> KDE.normalSampling(_, outputXYPoints, kernelWidth); } : {xs: [||], ys: [||]}; + timeMessage("Finished pdf"); let continuous = pdf |> Distributions.Continuous.fromShape; let shape = MixedShapeBuilder.buildSimple(~continuous, ~discrete); + timeMessage("Finished shape"); + let shape = + switch (truncateTo, shape) { + | (Some(trunctate), Some(shape)) => + Some(shape |> Distributions.Shape.T.truncate(trunctate)) + | (None, Some(shape)) => Some(shape) + | _ => None + }; + timeMessage("Finished truncation"); shape; }; \ No newline at end of file diff --git a/src/utility/GuesstimatorLibrary.js b/src/utility/GuesstimatorLibrary.js index be8c2045..c8f754de 100644 --- a/src/utility/GuesstimatorLibrary.js +++ b/src/utility/GuesstimatorLibrary.js @@ -113,7 +113,7 @@ const samplesToContinuousPdf = ( if (_.isFinite(min)) { _samples = _.filter(_samples, r => r > min) }; if (_.isFinite(max)) { _samples = _.filter(_samples, r => r < max) }; let pdf = pdfast.create(_samples, { size, width }); - return {xs: pdf.map(r => r.x), ys: pdf.map(r => r.x)}; + return {xs: pdf.map(r => r.x), ys: pdf.map(r => r.y)}; }; module.exports = { diff --git a/src/utility/lib/CDF.re b/src/utility/lib/CDF.re index cbfefa6c..e0e5f114 100644 --- a/src/utility/lib/CDF.re +++ b/src/utility/lib/CDF.re @@ -104,4 +104,4 @@ module Make = (Config: Config) => { } }); }; -}; +}; \ No newline at end of file