From 2c83c68d9535e529705dd9cf505779c145084a39 Mon Sep 17 00:00:00 2001 From: Ozzie Gooen Date: Tue, 17 Mar 2020 18:38:16 +0000 Subject: [PATCH] Performance gains and added back mixtures --- __tests__/Lodash__test.re | 107 ++++++++++++++++++++++++++ src/components/DistBuilder.re | 11 +-- src/components/charts/DistPlusPlot.re | 14 +++- src/distributions/Distributions.re | 4 +- src/distributions/XYShape.re | 48 ++++-------- src/utility/Guesstimator.re | 74 +++++++++++++++--- src/utility/Lodash.re | 5 ++ 7 files changed, 207 insertions(+), 56 deletions(-) create mode 100644 __tests__/Lodash__test.re create mode 100644 src/utility/Lodash.re diff --git a/__tests__/Lodash__test.re b/__tests__/Lodash__test.re new file mode 100644 index 00000000..661e62eb --- /dev/null +++ b/__tests__/Lodash__test.re @@ -0,0 +1,107 @@ +open Jest; +open Expect; + +let makeTest = (~only=false, str, item1, item2) => + only + ? Only.test(str, () => + expect(item1) |> toEqual(item2) + ) + : test(str, () => + expect(item1) |> toEqual(item2) + ); + +module FloatFloatMap = { + module Id = + Belt.Id.MakeComparable({ + type t = float; + let cmp: (float, float) => int = Pervasives.compare; + }); + + type t = Belt.MutableMap.t(Id.t, float, Id.identity); + + let fromArray = (ar: array((float, float))) => + Belt.MutableMap.fromArray(ar, ~id=(module Id)); + let toArray = (t: t) => Belt.MutableMap.toArray(t); + let empty = () => Belt.MutableMap.make(~id=(module Id)); + let increment = (el, t: t) => + Belt.MutableMap.update( + t, + el, + fun + | Some(n) => Some(n +. 1.0) + | None => Some(1.0), + ); + + let get = (el, t: t) => Belt.MutableMap.get(t, el); + let fmap = (fn, t: t) => Belt.MutableMap.map(t, fn); +}; + +let split = (sortedArray: array(float)) => { + let continuous = [||]; + let discrete = FloatFloatMap.empty(); + Belt.Array.forEachWithIndex( + sortedArray, + (index, element) => { + let maxIndex = (sortedArray |> Array.length) - 1; + let possiblySimilarElements = + ( + switch (index) { + | 0 => [|index + 1|] + | n when n == maxIndex => [|index - 1|] + | _ => [|index - 1, index + 1|] + } + ) + |> Belt.Array.map(_, r => sortedArray[r]); + let hasSimilarElement = + Belt.Array.some(possiblySimilarElements, r => r == element); + hasSimilarElement + ? FloatFloatMap.increment(element, discrete) + : { + let _ = Js.Array.push(element, continuous); + (); + }; + (); + }, + ); + + (continuous, discrete); +}; + +describe("Lodash", () => { + describe("Lodash", () => { + makeTest("min", Lodash.min([|1, 3, 4|]), 1); + makeTest("max", Lodash.max([|1, 3, 4|]), 4); + makeTest("uniq", Lodash.uniq([|1, 3, 4, 4|]), [|1, 3, 4|]); + makeTest( + "countBy", + Lodash.countBy([|1, 3, 4, 4|], r => r), + Js.Dict.fromArray([|("1", 1), ("3", 1), ("4", 2)|]), + ); + makeTest( + "split", + split([|1.432, 1.33455, 2.0|]), + ([|1.432, 1.33455, 2.0|], FloatFloatMap.empty()), + ); + makeTest( + "split", + split([|1.432, 1.33455, 2.0, 2.0, 2.0, 2.0|]) + |> (((c, disc)) => (c, disc |> FloatFloatMap.toArray)), + ([|1.432, 1.33455|], [|(2.0, 4.0)|]), + ); + + let makeDuplicatedArray = count => { + let arr = Belt.Array.range(1, count) |> E.A.fmap(float_of_int); + let sorted = arr |> Belt.SortArray.stableSortBy(_, compare); + E.A.concatMany([|sorted, sorted, sorted, sorted|]) + |> Belt.SortArray.stableSortBy(_, compare); + }; + + let (_, discrete) = split(makeDuplicatedArray(10)); + let toArr = discrete |> FloatFloatMap.toArray; + makeTest("splitMedium", toArr |> Belt.Array.length, 10); + + let (c, discrete) = split(makeDuplicatedArray(500)); + let toArr = discrete |> FloatFloatMap.toArray; + makeTest("splitMedium", toArr |> Belt.Array.length, 500); + }) +}); \ No newline at end of file diff --git a/src/components/DistBuilder.re b/src/components/DistBuilder.re index 95bd0529..abc02f74 100644 --- a/src/components/DistBuilder.re +++ b/src/components/DistBuilder.re @@ -151,7 +151,7 @@ let make = () => { ~schema, ~onSubmit=({state}) => {None}, ~initialState={ - guesstimatorString: "40 to 50", + guesstimatorString: "mm(40 to 80, floor(50 to 80), [.5,.5])", domainType: "Complete", xPoint: "50.0", xPoint2: "60.0", @@ -162,7 +162,7 @@ let make = () => { unit: "days", sampleCount: "10000", outputXYPoints: "500", - truncateTo: "100", + truncateTo: "0", kernelWidth: "5", }, (), @@ -239,12 +239,13 @@ let make = () => { && !Js.Float.isNaN(outputXYPoints) && !Js.Float.isNaN(truncateTo) && sampleCount > 10. - && outputXYPoints > 10. - && truncateTo > 10. => + && outputXYPoints > 10. => Some({ sampleCount: sampleCount |> int_of_float, outputXYPoints: outputXYPoints |> int_of_float, - truncateTo: truncateTo |> int_of_float |> E.O.some, + truncateTo: + int_of_float(truncateTo) > 0 + ? Some(int_of_float(truncateTo)) : None, kernelWidth: kernelWidth |> int_of_float, }) | _ => None diff --git a/src/components/charts/DistPlusPlot.re b/src/components/charts/DistPlusPlot.re index 4b65ff60..bf041b89 100644 --- a/src/components/charts/DistPlusPlot.re +++ b/src/components/charts/DistPlusPlot.re @@ -215,13 +215,21 @@ module DistPlusChart = { |> T.toScaledContinuous |> E.O.fmap(Distributions.Continuous.getShape); let range = T.xTotalRange(distPlus); + + // We subtract a bit from the range to make sure that it fits. Maybe this should be done in d3 instead. let minX = - switch (T.minX(distPlus), range) { - | (Some(min), Some(range)) => Some(min -. range *. 0.001) + switch ( + distPlus |> Distributions.DistPlus.T.Integral.yToX(~cache=None, 0.01), + range, + ) { + | (min, Some(range)) => Some(min -. range *. 0.001) | _ => None }; - let maxX = T.maxX(distPlus); + let maxX = { + distPlus |> Distributions.DistPlus.T.Integral.yToX(~cache=None, 0.99); + }; + let timeScale = distPlus.unit |> DistTypes.DistributionUnit.toJson; let toDiscreteProbabilityMass = distPlus |> Distributions.DistPlus.T.toDiscreteProbabilityMass; diff --git a/src/distributions/Distributions.re b/src/distributions/Distributions.re index 1af451d9..4e5bdc3a 100644 --- a/src/distributions/Distributions.re +++ b/src/distributions/Distributions.re @@ -347,11 +347,11 @@ module Mixed = { }; let integralXtoY = (~cache, f, t) => { - t |> integral(~cache) |> Continuous.getShape |> XYShape.T.findX(f); + t |> integral(~cache) |> Continuous.getShape |> XYShape.T.findY(f); }; let integralYtoX = (~cache, f, t) => { - t |> integral(~cache) |> Continuous.getShape |> XYShape.T.findY(f); + t |> integral(~cache) |> Continuous.getShape |> XYShape.T.findX(f); }; // TODO: This functionality is kinda weird, because it seems to assume the cdf adds to 1.0 elsewhere, which wouldn't happen here. diff --git a/src/distributions/XYShape.re b/src/distributions/XYShape.re index 67226d35..6198617d 100644 --- a/src/distributions/XYShape.re +++ b/src/distributions/XYShape.re @@ -49,7 +49,6 @@ module T = { }; let findY = (x: float, t: t): float => { - // todo: change getIndexBy to realize it's sorted let firstHigherIndex = E.A.Sorted.binarySearchFirstElementGreaterIndex(xs(t), x); let n = @@ -75,37 +74,14 @@ module T = { n; }; - let findYA = (x: float, t: t): float => { - // todo: change getIndexBy to realize it's sorted - let firstHigherIndex = Belt.Array.getIndexBy(xs(t), e => e >= x); - switch (firstHigherIndex) { - | None => maxY(t) |> E.O.default(0.0) - | Some(0) => minY(t) |> E.O.default(0.0) - | Some(firstHigherIndex) => - let lowerOrEqualIndex = - firstHigherIndex - 1 < 0 ? 0 : firstHigherIndex - 1; - let needsInterpolation = xs(t)[lowerOrEqualIndex] != x; - if (needsInterpolation) { - Functions.interpolate( - xs(t)[lowerOrEqualIndex], - xs(t)[firstHigherIndex], - ys(t)[lowerOrEqualIndex], - ys(t)[firstHigherIndex], - x, - ); - } else { - ys(t)[lowerOrEqualIndex]; - }; - }; - }; - let findX = (y: float, t: t): float => { - let firstHigherIndex = Belt.Array.getIndexBy(ys(t), e => e >= y); - let f: float = + let firstHigherIndex = + E.A.Sorted.binarySearchFirstElementGreaterIndex(ys(t), y); + let foundX = switch (firstHigherIndex) { - | None => maxX(t) |> E.O.default(0.0) - | Some(0) => minX(t) |> E.O.default(0.0) - | Some(firstHigherIndex) => + | `overMax => maxX(t) |> E.O.default(0.0) + | `underMin => minX(t) |> E.O.default(0.0) + | `firstHigher(firstHigherIndex) => let lowerOrEqualIndex = firstHigherIndex - 1 < 0 ? 0 : firstHigherIndex - 1; let needsInterpolation = ys(t)[lowerOrEqualIndex] != y; @@ -113,15 +89,15 @@ module T = { Functions.interpolate( ys(t)[lowerOrEqualIndex], ys(t)[firstHigherIndex], - ys(t)[lowerOrEqualIndex], - ys(t)[firstHigherIndex], + xs(t)[lowerOrEqualIndex], + xs(t)[firstHigherIndex], y, ); } else { xs(t)[lowerOrEqualIndex]; }; }; - f; + foundX; }; let convertWithAlternativeXs = (newXs: array(float), t: t): t => { @@ -284,13 +260,17 @@ module T = { } ); - let _transverseShape = (fn, p: t) => { + let _transverseShape2 = (fn, p: t) => { Belt.Array.zip(p.xs, p.ys) |> _transverseB(fn) |> Belt.Array.unzip |> fromArray; }; + let _transverseShape = (fn, p: t) => { + fromArray((p.xs, _transverse2(fn, p.ys))); + }; + let filter = (fn, t: t) => t |> zip |> E.A.filter(fn) |> Belt.Array.unzip |> fromArray; diff --git a/src/utility/Guesstimator.re b/src/utility/Guesstimator.re index a4dce3da..02e45353 100644 --- a/src/utility/Guesstimator.re +++ b/src/utility/Guesstimator.re @@ -121,6 +121,63 @@ module KDE = { }; }; +module FloatFloatMap = { + module Id = + Belt.Id.MakeComparable({ + type t = float; + let cmp: (float, float) => int = Pervasives.compare; + }); + + type t = Belt.MutableMap.t(Id.t, float, Id.identity); + + let fromArray = (ar: array((float, float))) => + Belt.MutableMap.fromArray(ar, ~id=(module Id)); + let toArray = (t: t) => Belt.MutableMap.toArray(t); + let empty = () => Belt.MutableMap.make(~id=(module Id)); + let increment = (el, t: t) => + Belt.MutableMap.update( + t, + el, + fun + | Some(n) => Some(n +. 1.0) + | None => Some(1.0), + ); + + let get = (el, t: t) => Belt.MutableMap.get(t, el); + let fmap = (fn, t: t) => Belt.MutableMap.map(t, fn); +}; + +let split = (sortedArray: array(float)) => { + let continuous = [||]; + let discrete = FloatFloatMap.empty(); + Belt.Array.forEachWithIndex( + sortedArray, + (index, element) => { + let maxIndex = (sortedArray |> Array.length) - 1; + let possiblySimilarElements = + ( + switch (index) { + | 0 => [|index + 1|] + | n when n == maxIndex => [|index - 1|] + | _ => [|index - 1, index + 1|] + } + ) + |> Belt.Array.map(_, r => sortedArray[r]); + let hasSimilarElement = + Belt.Array.some(possiblySimilarElements, r => r == element); + hasSimilarElement + ? FloatFloatMap.increment(element, discrete) + : { + let _ = Js.Array.push(element, continuous); + (); + }; + (); + }, + ); + + (continuous, discrete); +}; + let toMixed = ( ~string, @@ -140,18 +197,12 @@ let toMixed = let length = samples |> E.A.length; Array.fast_sort(compare, samples); - // let items = - // E.A.uniq(samples) - // |> E.A.fmap(r => (r, samples |> E.A.filter(n => n == r) |> E.A.length)); - // let (discretePart, continuousPart) = - // Belt.Array.partition(items, ((_, count)) => count > 1); - let discretePart = [||]; - let continuousPart = samples; + let (continuousPart, disc) = split(samples); + let lengthFloat = float_of_int(length); let discrete: DistTypes.xyShape = - discretePart - |> E.A.fmap(((x, count)) => - (x, float_of_int(count) /. float_of_int(length)) - ) + disc + |> FloatFloatMap.fmap(r => r /. lengthFloat) + |> FloatFloatMap.toArray |> XYShape.T.fromZippedArray; let pdf: DistTypes.xyShape = continuousPart |> E.A.length > 20 @@ -171,6 +222,5 @@ let toMixed = | (None, Some(shape)) => Some(shape) | _ => None }; - timeMessage("Finished truncation"); shape; }; \ No newline at end of file diff --git a/src/utility/Lodash.re b/src/utility/Lodash.re new file mode 100644 index 00000000..57eab32e --- /dev/null +++ b/src/utility/Lodash.re @@ -0,0 +1,5 @@ +[@bs.module "lodash"] external min: array('a) => 'a = "min"; +[@bs.module "lodash"] external max: array('a) => 'a = "max"; +[@bs.module "lodash"] external uniq: array('a) => array('a) = "uniq"; +[@bs.module "lodash"] +external countBy: (array('a), 'a => 'b) => Js.Dict.t(int) = "countBy"; \ No newline at end of file