Performance gains and added back mixtures

This commit is contained in:
Ozzie Gooen 2020-03-17 18:38:16 +00:00
parent fff473b27c
commit 2c83c68d95
7 changed files with 207 additions and 56 deletions

107
__tests__/Lodash__test.re Normal file
View File

@ -0,0 +1,107 @@
open Jest;
open Expect;
let makeTest = (~only=false, str, item1, item2) =>
only
? Only.test(str, () =>
expect(item1) |> toEqual(item2)
)
: test(str, () =>
expect(item1) |> toEqual(item2)
);
module FloatFloatMap = {
module Id =
Belt.Id.MakeComparable({
type t = float;
let cmp: (float, float) => int = Pervasives.compare;
});
type t = Belt.MutableMap.t(Id.t, float, Id.identity);
let fromArray = (ar: array((float, float))) =>
Belt.MutableMap.fromArray(ar, ~id=(module Id));
let toArray = (t: t) => Belt.MutableMap.toArray(t);
let empty = () => Belt.MutableMap.make(~id=(module Id));
let increment = (el, t: t) =>
Belt.MutableMap.update(
t,
el,
fun
| Some(n) => Some(n +. 1.0)
| None => Some(1.0),
);
let get = (el, t: t) => Belt.MutableMap.get(t, el);
let fmap = (fn, t: t) => Belt.MutableMap.map(t, fn);
};
let split = (sortedArray: array(float)) => {
let continuous = [||];
let discrete = FloatFloatMap.empty();
Belt.Array.forEachWithIndex(
sortedArray,
(index, element) => {
let maxIndex = (sortedArray |> Array.length) - 1;
let possiblySimilarElements =
(
switch (index) {
| 0 => [|index + 1|]
| n when n == maxIndex => [|index - 1|]
| _ => [|index - 1, index + 1|]
}
)
|> Belt.Array.map(_, r => sortedArray[r]);
let hasSimilarElement =
Belt.Array.some(possiblySimilarElements, r => r == element);
hasSimilarElement
? FloatFloatMap.increment(element, discrete)
: {
let _ = Js.Array.push(element, continuous);
();
};
();
},
);
(continuous, discrete);
};
describe("Lodash", () => {
describe("Lodash", () => {
makeTest("min", Lodash.min([|1, 3, 4|]), 1);
makeTest("max", Lodash.max([|1, 3, 4|]), 4);
makeTest("uniq", Lodash.uniq([|1, 3, 4, 4|]), [|1, 3, 4|]);
makeTest(
"countBy",
Lodash.countBy([|1, 3, 4, 4|], r => r),
Js.Dict.fromArray([|("1", 1), ("3", 1), ("4", 2)|]),
);
makeTest(
"split",
split([|1.432, 1.33455, 2.0|]),
([|1.432, 1.33455, 2.0|], FloatFloatMap.empty()),
);
makeTest(
"split",
split([|1.432, 1.33455, 2.0, 2.0, 2.0, 2.0|])
|> (((c, disc)) => (c, disc |> FloatFloatMap.toArray)),
([|1.432, 1.33455|], [|(2.0, 4.0)|]),
);
let makeDuplicatedArray = count => {
let arr = Belt.Array.range(1, count) |> E.A.fmap(float_of_int);
let sorted = arr |> Belt.SortArray.stableSortBy(_, compare);
E.A.concatMany([|sorted, sorted, sorted, sorted|])
|> Belt.SortArray.stableSortBy(_, compare);
};
let (_, discrete) = split(makeDuplicatedArray(10));
let toArr = discrete |> FloatFloatMap.toArray;
makeTest("splitMedium", toArr |> Belt.Array.length, 10);
let (c, discrete) = split(makeDuplicatedArray(500));
let toArr = discrete |> FloatFloatMap.toArray;
makeTest("splitMedium", toArr |> Belt.Array.length, 500);
})
});

View File

@ -151,7 +151,7 @@ let make = () => {
~schema,
~onSubmit=({state}) => {None},
~initialState={
guesstimatorString: "40 to 50",
guesstimatorString: "mm(40 to 80, floor(50 to 80), [.5,.5])",
domainType: "Complete",
xPoint: "50.0",
xPoint2: "60.0",
@ -162,7 +162,7 @@ let make = () => {
unit: "days",
sampleCount: "10000",
outputXYPoints: "500",
truncateTo: "100",
truncateTo: "0",
kernelWidth: "5",
},
(),
@ -239,12 +239,13 @@ let make = () => {
&& !Js.Float.isNaN(outputXYPoints)
&& !Js.Float.isNaN(truncateTo)
&& sampleCount > 10.
&& outputXYPoints > 10.
&& truncateTo > 10. =>
&& outputXYPoints > 10. =>
Some({
sampleCount: sampleCount |> int_of_float,
outputXYPoints: outputXYPoints |> int_of_float,
truncateTo: truncateTo |> int_of_float |> E.O.some,
truncateTo:
int_of_float(truncateTo) > 0
? Some(int_of_float(truncateTo)) : None,
kernelWidth: kernelWidth |> int_of_float,
})
| _ => None

View File

@ -215,13 +215,21 @@ module DistPlusChart = {
|> T.toScaledContinuous
|> E.O.fmap(Distributions.Continuous.getShape);
let range = T.xTotalRange(distPlus);
// We subtract a bit from the range to make sure that it fits. Maybe this should be done in d3 instead.
let minX =
switch (T.minX(distPlus), range) {
| (Some(min), Some(range)) => Some(min -. range *. 0.001)
switch (
distPlus |> Distributions.DistPlus.T.Integral.yToX(~cache=None, 0.01),
range,
) {
| (min, Some(range)) => Some(min -. range *. 0.001)
| _ => None
};
let maxX = T.maxX(distPlus);
let maxX = {
distPlus |> Distributions.DistPlus.T.Integral.yToX(~cache=None, 0.99);
};
let timeScale = distPlus.unit |> DistTypes.DistributionUnit.toJson;
let toDiscreteProbabilityMass =
distPlus |> Distributions.DistPlus.T.toDiscreteProbabilityMass;

View File

@ -347,11 +347,11 @@ module Mixed = {
};
let integralXtoY = (~cache, f, t) => {
t |> integral(~cache) |> Continuous.getShape |> XYShape.T.findX(f);
t |> integral(~cache) |> Continuous.getShape |> XYShape.T.findY(f);
};
let integralYtoX = (~cache, f, t) => {
t |> integral(~cache) |> Continuous.getShape |> XYShape.T.findY(f);
t |> integral(~cache) |> Continuous.getShape |> XYShape.T.findX(f);
};
// TODO: This functionality is kinda weird, because it seems to assume the cdf adds to 1.0 elsewhere, which wouldn't happen here.

View File

@ -49,7 +49,6 @@ module T = {
};
let findY = (x: float, t: t): float => {
// todo: change getIndexBy to realize it's sorted
let firstHigherIndex =
E.A.Sorted.binarySearchFirstElementGreaterIndex(xs(t), x);
let n =
@ -75,37 +74,14 @@ module T = {
n;
};
let findYA = (x: float, t: t): float => {
// todo: change getIndexBy to realize it's sorted
let firstHigherIndex = Belt.Array.getIndexBy(xs(t), e => e >= x);
switch (firstHigherIndex) {
| None => maxY(t) |> E.O.default(0.0)
| Some(0) => minY(t) |> E.O.default(0.0)
| Some(firstHigherIndex) =>
let lowerOrEqualIndex =
firstHigherIndex - 1 < 0 ? 0 : firstHigherIndex - 1;
let needsInterpolation = xs(t)[lowerOrEqualIndex] != x;
if (needsInterpolation) {
Functions.interpolate(
xs(t)[lowerOrEqualIndex],
xs(t)[firstHigherIndex],
ys(t)[lowerOrEqualIndex],
ys(t)[firstHigherIndex],
x,
);
} else {
ys(t)[lowerOrEqualIndex];
};
};
};
let findX = (y: float, t: t): float => {
let firstHigherIndex = Belt.Array.getIndexBy(ys(t), e => e >= y);
let f: float =
let firstHigherIndex =
E.A.Sorted.binarySearchFirstElementGreaterIndex(ys(t), y);
let foundX =
switch (firstHigherIndex) {
| None => maxX(t) |> E.O.default(0.0)
| Some(0) => minX(t) |> E.O.default(0.0)
| Some(firstHigherIndex) =>
| `overMax => maxX(t) |> E.O.default(0.0)
| `underMin => minX(t) |> E.O.default(0.0)
| `firstHigher(firstHigherIndex) =>
let lowerOrEqualIndex =
firstHigherIndex - 1 < 0 ? 0 : firstHigherIndex - 1;
let needsInterpolation = ys(t)[lowerOrEqualIndex] != y;
@ -113,15 +89,15 @@ module T = {
Functions.interpolate(
ys(t)[lowerOrEqualIndex],
ys(t)[firstHigherIndex],
ys(t)[lowerOrEqualIndex],
ys(t)[firstHigherIndex],
xs(t)[lowerOrEqualIndex],
xs(t)[firstHigherIndex],
y,
);
} else {
xs(t)[lowerOrEqualIndex];
};
};
f;
foundX;
};
let convertWithAlternativeXs = (newXs: array(float), t: t): t => {
@ -284,13 +260,17 @@ module T = {
}
);
let _transverseShape = (fn, p: t) => {
let _transverseShape2 = (fn, p: t) => {
Belt.Array.zip(p.xs, p.ys)
|> _transverseB(fn)
|> Belt.Array.unzip
|> fromArray;
};
let _transverseShape = (fn, p: t) => {
fromArray((p.xs, _transverse2(fn, p.ys)));
};
let filter = (fn, t: t) =>
t |> zip |> E.A.filter(fn) |> Belt.Array.unzip |> fromArray;

View File

@ -121,6 +121,63 @@ module KDE = {
};
};
module FloatFloatMap = {
module Id =
Belt.Id.MakeComparable({
type t = float;
let cmp: (float, float) => int = Pervasives.compare;
});
type t = Belt.MutableMap.t(Id.t, float, Id.identity);
let fromArray = (ar: array((float, float))) =>
Belt.MutableMap.fromArray(ar, ~id=(module Id));
let toArray = (t: t) => Belt.MutableMap.toArray(t);
let empty = () => Belt.MutableMap.make(~id=(module Id));
let increment = (el, t: t) =>
Belt.MutableMap.update(
t,
el,
fun
| Some(n) => Some(n +. 1.0)
| None => Some(1.0),
);
let get = (el, t: t) => Belt.MutableMap.get(t, el);
let fmap = (fn, t: t) => Belt.MutableMap.map(t, fn);
};
let split = (sortedArray: array(float)) => {
let continuous = [||];
let discrete = FloatFloatMap.empty();
Belt.Array.forEachWithIndex(
sortedArray,
(index, element) => {
let maxIndex = (sortedArray |> Array.length) - 1;
let possiblySimilarElements =
(
switch (index) {
| 0 => [|index + 1|]
| n when n == maxIndex => [|index - 1|]
| _ => [|index - 1, index + 1|]
}
)
|> Belt.Array.map(_, r => sortedArray[r]);
let hasSimilarElement =
Belt.Array.some(possiblySimilarElements, r => r == element);
hasSimilarElement
? FloatFloatMap.increment(element, discrete)
: {
let _ = Js.Array.push(element, continuous);
();
};
();
},
);
(continuous, discrete);
};
let toMixed =
(
~string,
@ -140,18 +197,12 @@ let toMixed =
let length = samples |> E.A.length;
Array.fast_sort(compare, samples);
// let items =
// E.A.uniq(samples)
// |> E.A.fmap(r => (r, samples |> E.A.filter(n => n == r) |> E.A.length));
// let (discretePart, continuousPart) =
// Belt.Array.partition(items, ((_, count)) => count > 1);
let discretePart = [||];
let continuousPart = samples;
let (continuousPart, disc) = split(samples);
let lengthFloat = float_of_int(length);
let discrete: DistTypes.xyShape =
discretePart
|> E.A.fmap(((x, count)) =>
(x, float_of_int(count) /. float_of_int(length))
)
disc
|> FloatFloatMap.fmap(r => r /. lengthFloat)
|> FloatFloatMap.toArray
|> XYShape.T.fromZippedArray;
let pdf: DistTypes.xyShape =
continuousPart |> E.A.length > 20
@ -171,6 +222,5 @@ let toMixed =
| (None, Some(shape)) => Some(shape)
| _ => None
};
timeMessage("Finished truncation");
shape;
};

5
src/utility/Lodash.re Normal file
View File

@ -0,0 +1,5 @@
[@bs.module "lodash"] external min: array('a) => 'a = "min";
[@bs.module "lodash"] external max: array('a) => 'a = "max";
[@bs.module "lodash"] external uniq: array('a) => array('a) = "uniq";
[@bs.module "lodash"]
external countBy: (array('a), 'a => 'b) => Js.Dict.t(int) = "countBy";