Merge pull request #20 from foretold-app/guesstimator-refactor

Guesstimator refactor
This commit is contained in:
Ozzie Gooen 2020-03-17 18:39:52 +00:00 committed by GitHub
commit 9dcef813a8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
21 changed files with 1275 additions and 1219 deletions

View File

@ -0,0 +1,104 @@
open Jest;
open Expect;
let makeTest = (~only=false, str, item1, item2) =>
only
? Only.test(str, () =>
expect(item1) |> toEqual(item2)
)
: test(str, () =>
expect(item1) |> toEqual(item2)
);
describe("DistTypes", () => {
describe("Domain", () => {
let makeComplete = (yPoint, expectation) =>
makeTest(
"With input: " ++ Js.Float.toString(yPoint),
DistTypes.Domain.yPointToSubYPoint(Complete, yPoint),
expectation,
);
let makeSingle =
(
direction: [ | `left | `right],
excludingProbabilityMass,
yPoint,
expectation,
) =>
makeTest(
"Excluding: "
++ Js.Float.toString(excludingProbabilityMass)
++ " and yPoint: "
++ Js.Float.toString(yPoint),
DistTypes.Domain.yPointToSubYPoint(
direction == `left
? LeftLimited({xPoint: 3.0, excludingProbabilityMass})
: RightLimited({xPoint: 3.0, excludingProbabilityMass}),
yPoint,
),
expectation,
);
let makeDouble = (domain, yPoint, expectation) =>
makeTest(
"Excluding: limits",
DistTypes.Domain.yPointToSubYPoint(domain, yPoint),
expectation,
);
describe("With Complete Domain", () => {
makeComplete(0.0, Some(0.0));
makeComplete(0.6, Some(0.6));
makeComplete(1.0, Some(1.0));
});
describe("With Left Limit", () => {
makeSingle(`left, 0.5, 1.0, Some(1.0));
makeSingle(`left, 0.5, 0.75, Some(0.5));
makeSingle(`left, 0.8, 0.9, Some(0.5));
makeSingle(`left, 0.5, 0.4, None);
makeSingle(`left, 0.5, 0.5, Some(0.0));
});
describe("With Right Limit", () => {
makeSingle(`right, 0.5, 1.0, None);
makeSingle(`right, 0.5, 0.25, Some(0.5));
makeSingle(`right, 0.8, 0.5, None);
makeSingle(`right, 0.2, 0.2, Some(0.25));
makeSingle(`right, 0.5, 0.5, Some(1.0));
makeSingle(`right, 0.5, 0.0, Some(0.0));
makeSingle(`right, 0.5, 0.5, Some(1.0));
});
describe("With Left and Right Limit", () => {
makeDouble(
LeftAndRightLimited(
{excludingProbabilityMass: 0.25, xPoint: 3.0},
{excludingProbabilityMass: 0.25, xPoint: 10.0},
),
0.5,
Some(0.5),
);
makeDouble(
LeftAndRightLimited(
{excludingProbabilityMass: 0.1, xPoint: 3.0},
{excludingProbabilityMass: 0.1, xPoint: 10.0},
),
0.2,
Some(0.125),
);
makeDouble(
LeftAndRightLimited(
{excludingProbabilityMass: 0.1, xPoint: 3.0},
{excludingProbabilityMass: 0.1, xPoint: 10.0},
),
0.1,
Some(0.0),
);
makeDouble(
LeftAndRightLimited(
{excludingProbabilityMass: 0.1, xPoint: 3.0},
{excludingProbabilityMass: 0.1, xPoint: 10.0},
),
0.05,
None,
);
});
})
});

View File

@ -0,0 +1,21 @@
open Jest;
open Expect;
let makeTest = (~only=false, str, item1, item2) =>
only
? Only.test(str, () =>
expect(item1) |> toEqual(item2)
)
: test(str, () =>
expect(item1) |> toEqual(item2)
) /* })*/;
// These fail because of issues with Jest, Babel, and Bucklescript
// describe("XYShapes", () => {
// describe("logScorePoint", () => {
// makeTest(
// "When identical",
// Some(Guesstimator.stringToMixedShape(~string="5 to 20")),
// None,
// )
// })

107
__tests__/Lodash__test.re Normal file
View File

@ -0,0 +1,107 @@
open Jest;
open Expect;
let makeTest = (~only=false, str, item1, item2) =>
only
? Only.test(str, () =>
expect(item1) |> toEqual(item2)
)
: test(str, () =>
expect(item1) |> toEqual(item2)
);
module FloatFloatMap = {
module Id =
Belt.Id.MakeComparable({
type t = float;
let cmp: (float, float) => int = Pervasives.compare;
});
type t = Belt.MutableMap.t(Id.t, float, Id.identity);
let fromArray = (ar: array((float, float))) =>
Belt.MutableMap.fromArray(ar, ~id=(module Id));
let toArray = (t: t) => Belt.MutableMap.toArray(t);
let empty = () => Belt.MutableMap.make(~id=(module Id));
let increment = (el, t: t) =>
Belt.MutableMap.update(
t,
el,
fun
| Some(n) => Some(n +. 1.0)
| None => Some(1.0),
);
let get = (el, t: t) => Belt.MutableMap.get(t, el);
let fmap = (fn, t: t) => Belt.MutableMap.map(t, fn);
};
let split = (sortedArray: array(float)) => {
let continuous = [||];
let discrete = FloatFloatMap.empty();
Belt.Array.forEachWithIndex(
sortedArray,
(index, element) => {
let maxIndex = (sortedArray |> Array.length) - 1;
let possiblySimilarElements =
(
switch (index) {
| 0 => [|index + 1|]
| n when n == maxIndex => [|index - 1|]
| _ => [|index - 1, index + 1|]
}
)
|> Belt.Array.map(_, r => sortedArray[r]);
let hasSimilarElement =
Belt.Array.some(possiblySimilarElements, r => r == element);
hasSimilarElement
? FloatFloatMap.increment(element, discrete)
: {
let _ = Js.Array.push(element, continuous);
();
};
();
},
);
(continuous, discrete);
};
describe("Lodash", () => {
describe("Lodash", () => {
makeTest("min", Lodash.min([|1, 3, 4|]), 1);
makeTest("max", Lodash.max([|1, 3, 4|]), 4);
makeTest("uniq", Lodash.uniq([|1, 3, 4, 4|]), [|1, 3, 4|]);
makeTest(
"countBy",
Lodash.countBy([|1, 3, 4, 4|], r => r),
Js.Dict.fromArray([|("1", 1), ("3", 1), ("4", 2)|]),
);
makeTest(
"split",
split([|1.432, 1.33455, 2.0|]),
([|1.432, 1.33455, 2.0|], FloatFloatMap.empty()),
);
makeTest(
"split",
split([|1.432, 1.33455, 2.0, 2.0, 2.0, 2.0|])
|> (((c, disc)) => (c, disc |> FloatFloatMap.toArray)),
([|1.432, 1.33455|], [|(2.0, 4.0)|]),
);
let makeDuplicatedArray = count => {
let arr = Belt.Array.range(1, count) |> E.A.fmap(float_of_int);
let sorted = arr |> Belt.SortArray.stableSortBy(_, compare);
E.A.concatMany([|sorted, sorted, sorted, sorted|])
|> Belt.SortArray.stableSortBy(_, compare);
};
let (_, discrete) = split(makeDuplicatedArray(10));
let toArr = discrete |> FloatFloatMap.toArray;
makeTest("splitMedium", toArr |> Belt.Array.length, 10);
let (c, discrete) = split(makeDuplicatedArray(500));
let toArr = discrete |> FloatFloatMap.toArray;
makeTest("splitMedium", toArr |> Belt.Array.length, 500);
})
});

View File

@ -0,0 +1,53 @@
open Jest;
open Expect;
let makeTest = (~only=false, str, item1, item2) =>
only
? Only.test(str, () =>
expect(item1) |> toEqual(item2)
)
: test(str, () =>
expect(item1) |> toEqual(item2)
);
let shape1: DistTypes.xyShape = {xs: [|1., 4., 8.|], ys: [|0.2, 0.4, 0.8|]};
let shape2: DistTypes.xyShape = {
xs: [|1., 5., 10.|],
ys: [|0.2, 0.5, 0.8|],
};
let shape3: DistTypes.xyShape = {
xs: [|1., 20., 50.|],
ys: [|0.2, 0.5, 0.8|],
};
describe("XYShapes", () => {
describe("logScorePoint", () => {
makeTest(
"When identical",
XYShape.logScorePoint(30, shape1, shape1),
Some(0.0),
);
makeTest(
"When similar",
XYShape.logScorePoint(30, shape1, shape2),
Some(1.658971191043856),
);
makeTest(
"When very different",
XYShape.logScorePoint(30, shape1, shape3),
Some(210.3721280423322),
);
});
describe("transverse", () => {
makeTest(
"When very different",
XYShape.T._transverse2(
(aCurrent, aLast) => aCurrent +. aLast,
[|1.0, 2.0, 3.0, 4.0|],
),
[|1.0, 3.0, 6.0, 10.0|],
)
});
});

View File

@ -29,10 +29,11 @@
"@foretold/cdf": "1.0.15",
"@foretold/components": "0.0.3",
"@foretold/guesstimator": "1.0.10",
"@glennsl/bs-jest": "^0.4.9",
"@glennsl/bs-jest": "^0.5.0",
"antd": "3.17.0",
"autoprefixer": "9.7.4",
"babel-jest": "25.1.0",
"babel-plugin-transform-es2015-modules-commonjs": "^6.26.2",
"binary-search-tree": "0.2.6",
"bs-ant-design-alt": "2.0.0-alpha.33",
"bs-css": "11.0.0",
"bs-moment": "0.4.4",
@ -42,23 +43,24 @@
"d3": "5.15.0",
"gh-pages": "2.2.0",
"jest": "^25.1.0",
"jstat": "1.9.2",
"lenses-ppx": "5.1.0",
"less": "3.10.3",
"lodash": "4.17.15",
"mathjs": "6.6.0",
"moduleserve": "0.9.1",
"moment": "2.24.0",
"parcel-bundler": "1.12.4",
"parcel-plugin-less-js-enabled": "1.0.2",
"pdfast": "^0.2.0",
"postcss-cli": "7.1.0",
"rationale": "0.2.0",
"react": "16.12.0",
"react-dom": "16.12.0",
"react": "^16.8.0",
"react-dom": "^16.8.0",
"react-use": "^13.27.0",
"reason-react": ">=0.7.0",
"reschema": "1.3.0",
"tailwindcss": "1.2.0",
"binary-search-tree": "0.2.6",
"jstat": "1.9.2",
"mathjs": "6.6.0"
"tailwindcss": "1.2.0"
},
"alias": {
"react": "./node_modules/react",

View File

@ -18,6 +18,7 @@ module FormConfig = [%lenses
sampleCount: string,
outputXYPoints: string,
truncateTo: string,
kernelWidth: string,
}
];
@ -25,6 +26,7 @@ type options = {
sampleCount: int,
outputXYPoints: int,
truncateTo: option(int),
kernelWidth: int,
};
module Form = ReForm.Make(FormConfig);
@ -123,6 +125,7 @@ module DemoDist = {
~sampleCount=options.sampleCount,
~outputXYPoints=options.outputXYPoints,
~truncateTo=options.truncateTo,
~kernelWidth=options.kernelWidth,
);
switch (distPlus) {
| Some(distPlus) => <DistPlusPlot distPlus />
@ -148,7 +151,7 @@ let make = () => {
~schema,
~onSubmit=({state}) => {None},
~initialState={
guesstimatorString: "mm(5 to 20, floor(normal(20,2)), [.5, .5])",
guesstimatorString: "mm(40 to 80, floor(50 to 80), [.5,.5])",
domainType: "Complete",
xPoint: "50.0",
xPoint2: "60.0",
@ -157,9 +160,10 @@ let make = () => {
unitType: "UnspecifiedDistribution",
zero: MomentRe.momentNow(),
unit: "days",
sampleCount: "1000",
outputXYPoints: "2000",
truncateTo: "500",
sampleCount: "10000",
outputXYPoints: "500",
truncateTo: "0",
kernelWidth: "5",
},
(),
);
@ -187,6 +191,7 @@ let make = () => {
let outputXYPoints =
reform.state.values.outputXYPoints |> Js.Float.fromString;
let truncateTo = reform.state.values.truncateTo |> Js.Float.fromString;
let kernelWidth = reform.state.values.kernelWidth |> Js.Float.fromString;
let domain =
switch (domainType) {
@ -234,12 +239,14 @@ let make = () => {
&& !Js.Float.isNaN(outputXYPoints)
&& !Js.Float.isNaN(truncateTo)
&& sampleCount > 10.
&& outputXYPoints > 10.
&& truncateTo > 10. =>
&& outputXYPoints > 10. =>
Some({
sampleCount: sampleCount |> int_of_float,
outputXYPoints: outputXYPoints |> int_of_float,
truncateTo: truncateTo |> int_of_float |> E.O.some,
truncateTo:
int_of_float(truncateTo) > 0
? Some(int_of_float(truncateTo)) : None,
kernelWidth: kernelWidth |> int_of_float,
})
| _ => None
};
@ -261,6 +268,7 @@ let make = () => {
reform.state.values.sampleCount,
reform.state.values.outputXYPoints,
reform.state.values.truncateTo,
reform.state.values.kernelWidth,
reloader |> string_of_int,
|],
);
@ -455,6 +463,9 @@ let make = () => {
<Col span=4>
<FieldFloat field=FormConfig.TruncateTo label="Truncate To" />
</Col>
<Col span=4>
<FieldFloat field=FormConfig.KernelWidth label="Kernel Width" />
</Col>
</Row>
<Antd.Button
_type=`primary icon=Antd.IconName.reload onClick=onRealod>

View File

@ -6,6 +6,9 @@ let showAsForm = (distPlus: DistTypes.distPlus) => {
</div>;
};
let showFloat = (~precision=3, number) =>
<ForetoldComponents.NumberShower number precision />;
let table = (distPlus, x) => {
<div>
<table className="table-auto text-sm">
@ -120,6 +123,75 @@ let table = (distPlus, x) => {
</table>
</div>;
};
let percentiles = distPlus => {
<table className="table-auto text-sm">
<thead>
<tr>
<td className="px-4 py-2"> {"1" |> ReasonReact.string} </td>
<td className="px-4 py-2"> {"5" |> ReasonReact.string} </td>
<td className="px-4 py-2"> {"25" |> ReasonReact.string} </td>
<td className="px-4 py-2"> {"50" |> ReasonReact.string} </td>
<td className="px-4 py-2"> {"75" |> ReasonReact.string} </td>
<td className="px-4 py-2"> {"95" |> ReasonReact.string} </td>
<td className="px-4 py-2"> {"99" |> ReasonReact.string} </td>
</tr>
</thead>
<tbody>
<tr>
<td className="px-4 py-2 border">
{distPlus
|> Distributions.DistPlus.T.Integral.yToX(~cache=None, 0.01)
|> showFloat}
</td>
<td className="px-4 py-2 border">
{distPlus
|> Distributions.DistPlus.T.Integral.yToX(~cache=None, 0.05)
|> showFloat}
</td>
<td className="px-4 py-2 border">
{distPlus
|> Distributions.DistPlus.T.Integral.yToX(~cache=None, 0.25)
|> showFloat}
</td>
<td className="px-4 py-2 border">
{distPlus
|> Distributions.DistPlus.T.Integral.yToX(~cache=None, 0.5)
|> showFloat}
</td>
<td className="px-4 py-2 border">
{distPlus
|> Distributions.DistPlus.T.Integral.yToX(~cache=None, 0.75)
|> showFloat}
</td>
<td className="px-4 py-2 border">
{distPlus
|> Distributions.DistPlus.T.Integral.yToX(~cache=None, 0.95)
|> showFloat}
</td>
<td className="px-4 py-2 border">
{distPlus
|> Distributions.DistPlus.T.Integral.yToX(~cache=None, 0.99)
|> showFloat}
</td>
<td className="px-4 py-2 border">
{distPlus
|> Distributions.DistPlus.T.Integral.yToX(~cache=None, 0.999)
|> showFloat}
</td>
<td className="px-4 py-2 border">
{distPlus
|> Distributions.DistPlus.T.Integral.yToX(~cache=None, 0.9999)
|> showFloat}
</td>
<td className="px-4 py-2 border">
{distPlus
|> Distributions.DistPlus.T.Integral.yToX(~cache=None, 0.99999)
|> showFloat}
</td>
</tr>
</tbody>
</table>;
};
let adjustBoth = discreteProbabilityMass => {
let yMaxDiscreteDomainFactor = discreteProbabilityMass;
@ -143,13 +215,21 @@ module DistPlusChart = {
|> T.toScaledContinuous
|> E.O.fmap(Distributions.Continuous.getShape);
let range = T.xTotalRange(distPlus);
// We subtract a bit from the range to make sure that it fits. Maybe this should be done in d3 instead.
let minX =
switch (T.minX(distPlus), range) {
| (Some(min), Some(range)) => Some(min -. range *. 0.001)
switch (
distPlus |> Distributions.DistPlus.T.Integral.yToX(~cache=None, 0.01),
range,
) {
| (min, Some(range)) => Some(min -. range *. 0.001)
| _ => None
};
let maxX = T.maxX(distPlus);
let maxX = {
distPlus |> Distributions.DistPlus.T.Integral.yToX(~cache=None, 0.99);
};
let timeScale = distPlus.unit |> DistTypes.DistributionUnit.toJson;
let toDiscreteProbabilityMass =
distPlus |> Distributions.DistPlus.T.toDiscreteProbabilityMass;
@ -176,9 +256,7 @@ module IntegralChart = {
[@react.component]
let make = (~distPlus: DistTypes.distPlus, ~config: chartConfig, ~onHover) => {
open Distributions.DistPlus;
let integral =
Distributions.DistPlus.T.toShape(distPlus)
|> Distributions.Shape.T.Integral.get(~cache=None);
let integral = distPlus.integralCache;
let continuous =
integral
|> Distributions.Continuous.toLinear
@ -287,8 +365,12 @@ let make = (~distPlus: DistTypes.distPlus) => {
|> E.L.toArray
|> ReasonReact.array}
<div className="inline-flex opacity-50 hover:opacity-100">
<button
className=button onClick={_ => dispatch(CHANGE_SHOW_PERCENTILES)}>
{"Percentiles" |> ReasonReact.string}
</button>
<button className=button onClick={_ => dispatch(CHANGE_SHOW_STATS)}>
{"Stats" |> ReasonReact.string}
{"Debug Stats" |> ReasonReact.string}
</button>
<button className=button onClick={_ => dispatch(CHANGE_SHOW_PARAMS)}>
{"Params" |> ReasonReact.string}
@ -299,5 +381,6 @@ let make = (~distPlus: DistTypes.distPlus) => {
</div>
{state.showParams ? showAsForm(distPlus) : ReasonReact.null}
{state.showStats ? table(distPlus, x) : ReasonReact.null}
{state.showPercentiles ? percentiles(distPlus) : ReasonReact.null}
</div>;
};

View File

@ -7,6 +7,7 @@ type chartConfig = {
type state = {
showStats: bool,
showPercentiles: bool,
showParams: bool,
distributions: list(chartConfig),
};
@ -14,6 +15,7 @@ type state = {
type action =
| CHANGE_SHOW_STATS
| CHANGE_SHOW_PARAMS
| CHANGE_SHOW_PERCENTILES
| REMOVE_DIST(int)
| ADD_DIST
| CHANGE_X_LOG(int)
@ -90,11 +92,16 @@ let reducer = (state: state, action: action) =>
}
| CHANGE_SHOW_STATS => {...state, showStats: !state.showStats}
| CHANGE_SHOW_PARAMS => {...state, showParams: !state.showParams}
| CHANGE_SHOW_PERCENTILES => {
...state,
showPercentiles: !state.showPercentiles,
}
};
let init = {
showStats: false,
showParams: false,
showPercentiles: true,
distributions: [
{yLog: false, xLog: false, isCumulative: false, height: 2},
{yLog: false, xLog: false, isCumulative: true, height: 1},

View File

@ -134,12 +134,12 @@ let make =
?xScale
?yScale
?timeScale
discrete={discrete |> E.O.fmap(XYShape.toJs)}
discrete={discrete |> E.O.fmap(XYShape.T.toJs)}
height
marginBottom=50
marginTop=0
onHover
continuous={continuous |> E.O.fmap(XYShape.toJs)}
continuous={continuous |> E.O.fmap(XYShape.T.toJs)}
showDistributionLines
showDistributionYAxis
showVerticalLine

View File

@ -13,14 +13,16 @@ let toDistPlus =
~sampleCount=2000,
~outputXYPoints=1500,
~truncateTo=Some(300),
~kernelWidth=5,
t: distPlusIngredients,
)
: option(distPlus) => {
let shape =
Guesstimator.stringToMixedShape(
Guesstimator.toMixed(
~string=t.guesstimatorString,
~sampleCount,
~outputXYPoints,
~kernelWidth,
~truncateTo,
(),
);

View File

@ -106,6 +106,34 @@ module Domain = {
let normalizeProbabilityMass = (t: domain) => {
1. /. excludedProbabilityMass(t);
};
let yPointToSubYPoint = (t: domain, yPoint) => {
switch (t) {
| Complete => Some(yPoint)
| LeftLimited({excludingProbabilityMass})
when yPoint < excludingProbabilityMass =>
None
| LeftLimited({excludingProbabilityMass})
when yPoint >= excludingProbabilityMass =>
Some(
(yPoint -. excludingProbabilityMass) /. includedProbabilityMass(t),
)
| RightLimited({excludingProbabilityMass})
when yPoint > 1. -. excludingProbabilityMass =>
None
| RightLimited({excludingProbabilityMass})
when yPoint <= 1. -. excludingProbabilityMass =>
Some(yPoint /. includedProbabilityMass(t))
| LeftAndRightLimited({excludingProbabilityMass: l}, _) when yPoint < l =>
None
| LeftAndRightLimited(_, {excludingProbabilityMass: r})
when yPoint > 1.0 -. r =>
None
| LeftAndRightLimited({excludingProbabilityMass: l}, _) =>
Some((yPoint -. l) /. includedProbabilityMass(t))
| _ => None
};
};
};
type mixedPoint = {

View File

@ -19,6 +19,7 @@ module type dist = {
let minX: t => option(float);
let maxX: t => option(float);
let pointwiseFmap: (float => float, t) => t;
let truncate: (int, t) => t;
let xToY: (float, t) => DistTypes.mixedPoint;
let toShape: t => DistTypes.shape;
let toContinuous: t => option(DistTypes.continuousShape);
@ -31,6 +32,7 @@ module type dist = {
let integral: (~cache: option(integral), t) => integral;
let integralEndY: (~cache: option(integral), t) => float;
let integralXtoY: (~cache: option(integral), float, t) => float;
let integralYtoX: (~cache: option(integral), float, t) => float;
};
module Dist = (T: dist) => {
@ -45,6 +47,7 @@ module Dist = (T: dist) => {
};
let pointwiseFmap = T.pointwiseFmap;
let xToY = T.xToY;
let truncate = T.truncate;
let toShape = T.toShape;
let toDiscreteProbabilityMass = T.toDiscreteProbabilityMass;
let toContinuous = T.toContinuous;
@ -58,6 +61,7 @@ module Dist = (T: dist) => {
type t = T.integral;
let get = T.integral;
let xToY = T.integralXtoY;
let yToX = T.integralYtoX;
let sum = T.integralEndY;
};
@ -80,7 +84,7 @@ module Continuous = {
interpolation,
};
let lastY = (t: t) =>
t |> xyShape |> XYShape.unsafeLast |> (((_, y)) => y);
t |> xyShape |> XYShape.T.unsafeLast |> (((_, y)) => y);
let oShapeMap =
(fn, {xyShape, interpolation}: t): option(DistTypes.continuousShape) =>
fn(xyShape) |> E.O.fmap(make(_, interpolation));
@ -103,22 +107,23 @@ module Continuous = {
Dist({
type t = DistTypes.continuousShape;
type integral = DistTypes.continuousShape;
let minX = shapeFn(XYShape.minX);
let maxX = shapeFn(XYShape.maxX);
let toDiscreteProbabilityMass = t => 0.0;
let minX = shapeFn(XYShape.T.minX);
let maxX = shapeFn(XYShape.T.maxX);
let toDiscreteProbabilityMass = _ => 0.0;
let pointwiseFmap = (fn, t: t) =>
t |> xyShape |> XYShape.pointwiseMap(fn) |> fromShape;
t |> xyShape |> XYShape.T.pointwiseMap(fn) |> fromShape;
let truncate = i => shapeMap(XYShape.T.convertToNewLength(i));
let toShape = (t: t): DistTypes.shape => Continuous(t);
let xToY = (f, {interpolation, xyShape}: t) =>
switch (interpolation) {
| `Stepwise =>
xyShape
|> XYShape.XtoY.stepwiseIncremental(f)
|> XYShape.T.XtoY.stepwiseIncremental(f)
|> E.O.default(0.0)
|> DistTypes.MixedPoint.makeContinuous
| `Linear =>
xyShape
|> XYShape.XtoY.linear(f)
|> XYShape.T.XtoY.linear(f)
|> DistTypes.MixedPoint.makeContinuous
};
@ -141,7 +146,9 @@ module Continuous = {
};
let integralEndY = (~cache, t) => t |> integral(~cache) |> lastY;
let integralXtoY = (~cache, f, t) =>
t |> integral(~cache) |> shapeFn(CdfLibrary.Distribution.findY(f));
t |> integral(~cache) |> shapeFn(XYShape.T.findY(f));
let integralYtoX = (~cache, f, t) =>
t |> integral(~cache) |> shapeFn(XYShape.T.findX(f));
let toContinuous = t => Some(t);
let toDiscrete = _ => None;
let toScaledContinuous = t => Some(t);
@ -150,6 +157,14 @@ module Continuous = {
};
module Discrete = {
let sortedByY = (t: DistTypes.discreteShape) =>
t
|> XYShape.T.zip
|> E.A.stableSortBy(_, ((_, y1), (_, y2)) => y1 > y2 ? 1 : 0);
let sortedByX = (t: DistTypes.discreteShape) =>
t
|> XYShape.T.zip
|> E.A.stableSortBy(_, ((x1, _), (x2, _)) => x1 > x2 ? 1 : 0);
module T =
Dist({
type t = DistTypes.discreteShape;
@ -157,31 +172,39 @@ module Discrete = {
let integral = (~cache, t) =>
switch (cache) {
| Some(c) => c
| None => Continuous.make(XYShape.accumulateYs(t), `Stepwise)
| None => Continuous.make(XYShape.T.accumulateYs(t), `Stepwise)
};
let integralEndY = (~cache, t) =>
t |> integral(~cache) |> Continuous.lastY;
let minX = XYShape.minX;
let maxX = XYShape.maxX;
let minX = XYShape.T.minX;
let maxX = XYShape.T.maxX;
let toDiscreteProbabilityMass = t => 1.0;
let pointwiseFmap = XYShape.pointwiseMap;
let pointwiseFmap = XYShape.T.pointwiseMap;
let toShape = (t: t): DistTypes.shape => Discrete(t);
let toContinuous = _ => None;
let toDiscrete = t => Some(t);
let toScaledContinuous = _ => None;
let toScaledDiscrete = t => Some(t);
let truncate = (i, t: t): DistTypes.discreteShape =>
t
|> XYShape.T.zip
|> XYShape.T.Zipped.sortByY
|> Belt.Array.slice(_, ~offset=0, ~len=i)
|> XYShape.T.Zipped.sortByX
|> XYShape.T.fromZippedArray;
let xToY = (f, t) => {
XYShape.XtoY.stepwiseIfAtX(f, t)
XYShape.T.XtoY.stepwiseIfAtX(f, t)
|> E.O.default(0.0)
|> DistTypes.MixedPoint.makeDiscrete;
};
// todo: This should use cache and/or same code as above. FindingY is more complex, should use interpolationType.
let integralXtoY = (~cache, f, t) =>
t
|> integral(~cache)
|> Continuous.getShape
|> CdfLibrary.Distribution.findY(f);
t |> integral(~cache) |> Continuous.getShape |> XYShape.T.findY(f);
let integralYtoX = (~cache, f, t) =>
t |> integral(~cache) |> Continuous.getShape |> XYShape.T.findX(f);
});
};
@ -241,6 +264,32 @@ module Mixed = {
DistTypes.MixedPoint.add(c, d);
};
let truncate =
(
count,
{discrete, continuous, discreteProbabilityMassFraction} as t: t,
)
: t => {
{
discrete:
Discrete.T.truncate(
int_of_float(
float_of_int(count) *. discreteProbabilityMassFraction,
),
discrete,
),
continuous:
Continuous.T.truncate(
int_of_float(
float_of_int(count)
*. (1.0 -. discreteProbabilityMassFraction),
),
continuous,
),
discreteProbabilityMassFraction,
};
};
let toScaledContinuous = ({continuous} as t: t) =>
Some(scaleContinuous(t, continuous));
@ -283,7 +332,7 @@ module Mixed = {
let result =
Continuous.make(
XYShape.Combine.combineLinear(
XYShape.T.Combine.combineLinear(
Continuous.getShape(cont), Continuous.getShape(dist), (a, b) =>
a +. b
),
@ -297,10 +346,12 @@ module Mixed = {
integral(~cache, t) |> Continuous.lastY;
};
let integralXtoY = (~cache, f, {discrete, continuous} as t: t) => {
let cont = Continuous.T.Integral.xToY(~cache, f, continuous);
let discrete = Discrete.T.Integral.xToY(~cache, f, discrete);
scaleDiscreteFn(t, discrete) +. scaleContinuousFn(t, cont);
let integralXtoY = (~cache, f, t) => {
t |> integral(~cache) |> Continuous.getShape |> XYShape.T.findY(f);
};
let integralYtoX = (~cache, f, t) => {
t |> integral(~cache) |> Continuous.getShape |> XYShape.T.findX(f);
};
// TODO: This functionality is kinda weird, because it seems to assume the cdf adds to 1.0 elsewhere, which wouldn't happen here.
@ -362,6 +413,16 @@ module Shape = {
),
);
let truncate = (i, t: t) =>
fmap(
t,
(
Mixed.T.truncate(i),
Discrete.T.truncate(i),
Continuous.T.truncate(i),
),
);
let toDiscreteProbabilityMass = (t: t) =>
mapToAll(
t,
@ -421,6 +482,16 @@ module Shape = {
),
);
};
let integralYtoX = (~cache, f, t) => {
mapToAll(
t,
(
Mixed.T.Integral.yToX(~cache, f),
Discrete.T.Integral.yToX(~cache, f),
Continuous.T.Integral.yToX(~cache, f),
),
);
};
let maxX = (t: t) =>
mapToAll(t, (Mixed.T.maxX, Discrete.T.maxX, Continuous.T.maxX));
let pointwiseFmap = (fn, t: t) =>
@ -531,6 +602,8 @@ module DistPlus = {
let integral = (~cache, t: t) =>
updateShape(Continuous(t.integralCache), t);
let truncate = (i, t) =>
updateShape(t |> toShape |> Shape.T.truncate(i), t);
// todo: adjust for limit, maybe?
let pointwiseFmap = (fn, {shape, _} as t: t): t =>
Shape.T.pointwiseFmap(fn, shape) |> updateShape(_, t);
@ -543,6 +616,11 @@ module DistPlus = {
Shape.T.Integral.xToY(~cache=Some(t.integralCache), f, toShape(t))
|> domainIncludedProbabilityMassAdjustment(t);
};
// TODO: This part is broken when there is a limit, if this is supposed to be taken into account.
let integralYtoX = (~cache as _, f, t: t) => {
Shape.T.Integral.yToX(~cache=Some(t.integralCache), f, toShape(t));
};
});
};

View File

@ -10,8 +10,11 @@ type assumptions = {
let buildSimple = (~continuous, ~discrete): option(DistTypes.shape) => {
let cLength =
continuous |> Distributions.Continuous.getShape |> XYShape.xs |> E.A.length;
let dLength = discrete |> XYShape.xs |> E.A.length;
continuous
|> Distributions.Continuous.getShape
|> XYShape.T.xs
|> E.A.length;
let dLength = discrete |> XYShape.T.xs |> E.A.length;
switch (cLength, dLength) {
| (0 | 1, 0) => None
| (0 | 1, _) => Some(Discrete(discrete))

View File

@ -1,36 +1,41 @@
open DistTypes;
type t = xyShape;
module T = {
type t = xyShape;
type ts = array(xyShape);
let toJs = (t: t) => {
let toJs = (t: t) => {
{"xs": t.xs, "ys": t.ys};
};
let xs = (t: t) => t.xs;
let minX = (t: t) => t |> xs |> E.A.first;
let maxX = (t: t) => t |> xs |> E.A.last;
let xTotalRange = (t: t) =>
};
let xs = (t: t) => t.xs;
let ys = (t: t) => t.ys;
let minX = (t: t) => t |> xs |> E.A.first;
let maxX = (t: t) => t |> xs |> E.A.last;
let minY = (t: t) => t |> ys |> E.A.first;
let maxY = (t: t) => t |> ys |> E.A.last;
let xTotalRange = (t: t) =>
switch (minX(t), maxX(t)) {
| (Some(min), Some(max)) => Some(max -. min)
| _ => None
};
let first = ({xs, ys}: t) =>
let first = ({xs, ys}: t) =>
switch (xs |> E.A.first, ys |> E.A.first) {
| (Some(x), Some(y)) => Some((x, y))
| _ => None
};
let last = ({xs, ys}: t) =>
let last = ({xs, ys}: t) =>
switch (xs |> E.A.last, ys |> E.A.last) {
| (Some(x), Some(y)) => Some((x, y))
| _ => None
};
let unsafeFirst = (t: t) => first(t) |> E.O.toExn("Unsafe operation");
let unsafeLast = (t: t) => last(t) |> E.O.toExn("Unsafe operation");
let unsafeFirst = (t: t) => first(t) |> E.O.toExn("Unsafe operation");
let unsafeLast = (t: t) => last(t) |> E.O.toExn("Unsafe operation");
let zip = ({xs, ys}: t) => Belt.Array.zip(xs, ys);
let getBy = (t: t, fn) => t |> zip |> Belt.Array.getBy(_, fn);
let zip = ({xs, ys}: t) => Belt.Array.zip(xs, ys);
let getBy = (t: t, fn) => t |> zip |> Belt.Array.getBy(_, fn);
let firstPairAtOrBeforeValue = (xValue, t: t) => {
let firstPairAtOrBeforeValue = (xValue, t: t) => {
let zipped = zip(t);
let firstIndex =
zipped |> Belt.Array.getIndexBy(_, ((x, y)) => x > xValue);
@ -41,9 +46,73 @@ let firstPairAtOrBeforeValue = (xValue, t: t) => {
| Some(n) => Some(n - 1)
};
previousIndex |> Belt.Option.flatMap(_, Belt.Array.get(zipped));
};
};
module XtoY = {
let findY = (x: float, t: t): float => {
let firstHigherIndex =
E.A.Sorted.binarySearchFirstElementGreaterIndex(xs(t), x);
let n =
switch (firstHigherIndex) {
| `overMax => maxY(t) |> E.O.default(0.0)
| `underMin => minY(t) |> E.O.default(0.0)
| `firstHigher(firstHigherIndex) =>
let lowerOrEqualIndex =
firstHigherIndex - 1 < 0 ? 0 : firstHigherIndex - 1;
let needsInterpolation = xs(t)[lowerOrEqualIndex] != x;
if (needsInterpolation) {
Functions.interpolate(
xs(t)[lowerOrEqualIndex],
xs(t)[firstHigherIndex],
ys(t)[lowerOrEqualIndex],
ys(t)[firstHigherIndex],
x,
);
} else {
ys(t)[lowerOrEqualIndex];
};
};
n;
};
let findX = (y: float, t: t): float => {
let firstHigherIndex =
E.A.Sorted.binarySearchFirstElementGreaterIndex(ys(t), y);
let foundX =
switch (firstHigherIndex) {
| `overMax => maxX(t) |> E.O.default(0.0)
| `underMin => minX(t) |> E.O.default(0.0)
| `firstHigher(firstHigherIndex) =>
let lowerOrEqualIndex =
firstHigherIndex - 1 < 0 ? 0 : firstHigherIndex - 1;
let needsInterpolation = ys(t)[lowerOrEqualIndex] != y;
if (needsInterpolation) {
Functions.interpolate(
ys(t)[lowerOrEqualIndex],
ys(t)[firstHigherIndex],
xs(t)[lowerOrEqualIndex],
xs(t)[firstHigherIndex],
y,
);
} else {
xs(t)[lowerOrEqualIndex];
};
};
foundX;
};
let convertWithAlternativeXs = (newXs: array(float), t: t): t => {
let newYs = Belt.Array.map(newXs, f => findY(f, t));
{xs: newXs, ys: newYs};
};
let convertToNewLength = (newLength: int, t: t): DistTypes.xyShape => {
Functions.(
range(min(xs(t)), max(xs(t)), newLength)
|> convertWithAlternativeXs(_, t)
);
};
module XtoY = {
let stepwiseIncremental = (f, t: t) =>
firstPairAtOrBeforeValue(f, t) |> E.O.fmap(((_, y)) => y);
@ -52,15 +121,25 @@ module XtoY = {
};
// TODO: When Roman's PR comes in, fix this bit. This depends on interpolation, obviously.
let linear = (f, t: t) => t |> CdfLibrary.Distribution.findY(f);
};
let linear = (f, t: t) => t |> findY(f);
};
let pointwiseMap = (fn, t: t): t => {xs: t.xs, ys: t.ys |> E.A.fmap(fn)};
let xMap = (fn, t: t): t => {xs: E.A.fmap(fn, t.xs), ys: t.ys};
let fromArray = ((xs, ys)): t => {xs, ys};
let fromArrays = (xs, ys): t => {xs, ys};
let pointwiseMap = (fn, t: t): t => {xs: t.xs, ys: t.ys |> E.A.fmap(fn)};
let xMap = (fn, t: t): t => {xs: E.A.fmap(fn, t.xs), ys: t.ys};
let fromArray = ((xs, ys)): t => {xs, ys};
let fromArrays = (xs, ys): t => {xs, ys};
let fromZippedArray = (is: array((float, float))): t =>
is |> Belt.Array.unzip |> fromArray;
module Combine = {
module Zipped = {
type zipped = array((float, float));
let sortByY = (t: zipped) =>
t |> E.A.stableSortBy(_, ((_, y1), (_, y2)) => y1 > y2 ? 1 : 0);
let sortByX = (t: zipped) =>
t |> E.A.stableSortBy(_, ((x1, _), (x2, _)) => x1 > x2 ? 1 : 0);
};
module Combine = {
let combineLinear = (t1: t, t2: t, fn: (float, float) => float) => {
let allXs = Belt.Array.concat(xs(t1), xs(t2));
allXs |> Array.sort(compare);
@ -101,27 +180,26 @@ module Combine = {
});
fromArrays(allXs, allYs);
};
};
};
// todo: maybe not needed?
// let comparePoint = (a: float, b: float) => a > b ? 1 : (-1);
// todo: maybe not needed?
// let comparePoint = (a: float, b: float) => a > b ? 1 : (-1);
let comparePoints = ((x1: float, y1: float), (x2: float, y2: float)) =>
let comparePoints = ((x1: float, y1: float), (x2: float, y2: float)) =>
switch (x1 == x2, y1 == y2) {
| (false, _) => compare(x1, x2)
| (true, false) => compare(y1, y2)
| (true, true) => (-1)
};
// todo: This is broken :(
let combine = (t1: t, t2: t) => {
let totalLength = E.A.length(t1.xs) + E.A.length(t2.xs);
// todo: This is broken :(
let combine = (t1: t, t2: t) => {
let array = Belt.Array.concat(zip(t1), zip(t2));
Array.sort(comparePoints, array);
array |> Belt.Array.unzip |> fromArray;
};
};
let intersperce = (t1: t, t2: t) => {
let intersperce = (t1: t, t2: t) => {
let items: ref(array((float, float))) = ref([||]);
let t1 = zip(t1);
let t2 = zip(t2);
@ -133,37 +211,72 @@ let intersperce = (t1: t, t2: t) => {
}
});
items^ |> Belt.Array.unzip |> fromArray;
};
};
let yFold = (fn, t: t) => {
let yFold = (fn, t: t) => {
E.A.fold_left(fn, 0., t.ys);
};
};
let ySum = yFold((a, b) => a +. b);
let ySum = yFold((a, b) => a +. b);
let _transverse = fn =>
let _transverseSimple = fn =>
Belt.Array.reduce(_, [||], (items, y) =>
switch (E.A.last(items)) {
| Some(yLast) => Belt.Array.concat(items, [|fn(y, yLast)|])
| None => [|y|]
}
);
let _transverse2 = (fn, items) => {
let length = items |> E.A.length;
let empty = Belt.Array.make(length, items |> E.A.unsafe_get(_, 0));
Belt.Array.forEachWithIndex(
items,
(index, element) => {
let item =
switch (index) {
| 0 => element
| index => fn(element, E.A.unsafe_get(empty, index - 1))
};
let _ = Belt.Array.set(empty, index, item);
();
},
);
empty;
};
let _transverseB = (fn, items) => {
let (xs, ys) = items |> Belt.Array.unzip;
let newYs = _transverse2(fn, ys);
Belt.Array.zip(xs, newYs);
};
let _transverse = fn =>
Belt.Array.reduce(_, [||], (items, (x, y)) =>
switch (E.A.last(items)) {
| Some((_, yLast)) => Belt.Array.concat(items, [|(x, fn(y, yLast))|])
| Some((_, yLast)) =>
Belt.Array.concat(items, [|(x, fn(y, yLast))|])
| None => [|(x, y)|]
}
);
let _transverseShape = (fn, p: t) => {
let _transverseShape2 = (fn, p: t) => {
Belt.Array.zip(p.xs, p.ys)
|> _transverse(fn)
|> _transverseB(fn)
|> Belt.Array.unzip
|> fromArray;
};
};
let filter = (fn, t: t) =>
let _transverseShape = (fn, p: t) => {
fromArray((p.xs, _transverse2(fn, p.ys)));
};
let filter = (fn, t: t) =>
t |> zip |> E.A.filter(fn) |> Belt.Array.unzip |> fromArray;
let accumulateYs = _transverseShape((aCurrent, aLast) => aCurrent +. aLast);
let subtractYs = _transverseShape((aCurrent, aLast) => aCurrent -. aLast);
let findY = CdfLibrary.Distribution.findY;
let findX = CdfLibrary.Distribution.findX;
let accumulateYs = _transverseShape((aCurrent, aLast) => aCurrent +. aLast);
let subtractYs = _transverseShape((aCurrent, aLast) => aCurrent -. aLast);
};
// I'm really not sure this part is actually what we want at this point.
module Range = {
@ -171,7 +284,7 @@ module Range = {
type zippedRange = ((float, float), (float, float));
let floatSum = Belt.Array.reduce(_, 0., (a, b) => a +. b);
let toT = r => r |> Belt.Array.unzip |> fromArray;
let toT = r => r |> Belt.Array.unzip |> T.fromArray;
let nextX = ((_, (nextX, _)): zippedRange) => nextX;
let rangePointAssumingSteps =
@ -197,21 +310,21 @@ module Range = {
let integrateWithTriangles = z => {
let rangeItems = mapYsBasedOnRanges(rangeAreaAssumingTriangles, z);
(
switch (rangeItems, z |> first) {
switch (rangeItems, z |> T.first) {
| (Some(r), Some((firstX, _))) =>
Some(Belt.Array.concat([|(firstX, 0.0)|], r))
| _ => None
}
)
|> E.O.fmap(toT)
|> E.O.fmap(accumulateYs);
|> E.O.fmap(T.accumulateYs);
};
let derivative = mapYsBasedOnRanges(delta_y_over_delta_x);
// TODO: It would be nicer if this the diff didn't change the first element, and also maybe if there were a more elegant way of doing this.
let stepsToContinuous = t => {
let diff = xTotalRange(t) |> E.O.fmap(r => r *. 0.00001);
let diff = T.xTotalRange(t) |> E.O.fmap(r => r *. 0.00001);
let items =
switch (diff, E.A.toRanges(Belt.Array.zip(t.xs, t.ys))) {
| (Some(diff), Ok(items)) =>
@ -219,21 +332,57 @@ module Range = {
items
|> Belt.Array.map(_, rangePointAssumingSteps)
|> Belt.Array.unzip
|> fromArray
|> intersperce(t |> xMap(e => e +. diff)),
|> T.fromArray
|> T.intersperce(t |> T.xMap(e => e +. diff)),
)
| _ => Some(t)
};
let bar = items |> E.O.fmap(zip) |> E.O.bind(_, E.A.get(_, 0));
let bar = items |> E.O.fmap(T.zip) |> E.O.bind(_, E.A.get(_, 0));
let items =
switch (items, bar) {
| (Some(items), Some((0.0, _))) => Some(items)
| (Some(items), Some((firstX, _))) =>
let all = E.A.append([|(firstX, 0.0)|], items |> zip);
let foo = all |> Belt.Array.unzip |> fromArray;
let all = E.A.append([|(firstX, 0.0)|], items |> T.zip);
let foo = all |> Belt.Array.unzip |> T.fromArray;
Some(foo);
| _ => None
};
items;
};
};
module Ts = {
type t = T.ts;
let minX = (t: t) =>
t |> E.A.fmap(T.minX) |> E.A.O.concatSomes |> Functions.min;
let maxX = (t: t) =>
t |> E.A.fmap(T.maxX) |> E.A.O.concatSomes |> Functions.max;
// TODO/Warning: This will break if the shapes are empty.
let equallyDividedXs = (t: t, newLength) => {
Functions.range(minX(t), maxX(t), newLength);
};
};
let combinePointwise = (fn, sampleCount, t1: xyShape, t2: xyShape) => {
let xs = Ts.equallyDividedXs([|t1, t2|], sampleCount);
let ys =
xs |> E.A.fmap(x => fn(T.XtoY.linear(x, t1), T.XtoY.linear(x, t2)));
T.fromArrays(xs, ys);
};
let logScoreDist =
combinePointwise((prediction, answer) =>
switch (answer) {
| 0. => 0.0
| answer =>
answer *. Js.Math.log2(Js.Math.abs_float(prediction /. answer))
}
);
let logScorePoint = (sampleCount, t1, t2) =>
logScoreDist(sampleCount, t1, t2)
|> Range.integrateWithTriangles
|> E.O.fmap(T.accumulateYs)
|> E.O.bind(_, T.last)
|> E.O.fmap(((_, y)) => y);

View File

@ -134,13 +134,10 @@ module S = {
module J = {
let toString = Js.Json.decodeString ||> O.default("");
let toMoment = toString ||> MomentRe.moment;
let fromString = Js.Json.string;
let fromNumber = Js.Json.number;
module O = {
let toMoment = O.fmap(toMoment);
let fromString = (str: string) =>
switch (str) {
| "" => None
@ -289,6 +286,18 @@ module A = {
bringErrorUp |> Belt.Result.map(_, forceOpen);
};
};
module Sorted = {
let binarySearchFirstElementGreaterIndex = (ar: array('a), el: 'a) => {
let el = Belt.SortArray.binarySearchBy(ar, el, compare);
let el = el < 0 ? el * (-1) - 1 : el;
switch (el) {
| e when e >= length(ar) => `overMax
| e when e == 0 => `underMin
| e => `firstHigher(e)
};
};
};
};
module JsArray = {

View File

@ -26,7 +26,15 @@ module Internals = {
discreteGet(r) |> jsToDistDiscrete;
[@bs.module "./GuesstimatorLibrary.js"]
external toCombinedFormat: (string, int, int) => combined = "run";
external toCombinedFormat: (string, int, int, int) => combined = "run";
[@bs.module "./GuesstimatorLibrary.js"]
external stringToSamples: (string, int) => array(float) = "stringToSamples";
[@bs.module "./GuesstimatorLibrary.js"]
external samplesToContinuousPdf:
(array(float), int, int) => CdfLibrary.JS.distJs =
"samplesToContinuousPdf";
// todo: Format to correct mass, also normalize the pdf.
let toMixedShape =
@ -44,8 +52,8 @@ module Internals = {
// let discreteProb =
// d |> Distributions.Discrete.T.Integral.sum(~cache=None);
let foo = MixedShapeBuilder.buildSimple(~continuous, ~discrete);
foo;
let shape = MixedShapeBuilder.buildSimple(~continuous, ~discrete);
shape;
};
};
@ -54,8 +62,165 @@ let stringToMixedShape =
~string,
~sampleCount=3000,
~outputXYPoints=3000,
~width=10,
~truncateTo=Some(500),
(),
) =>
Internals.toCombinedFormat(string, sampleCount, outputXYPoints)
Internals.toCombinedFormat(string, sampleCount, outputXYPoints, width)
|> Internals.toMixedShape(~truncateTo);
module KDE = {
let normalSampling = (samples, outputXYPoints, kernelWidth) => {
samples
|> Internals.samplesToContinuousPdf(_, outputXYPoints, kernelWidth)
|> CdfLibrary.JS.jsToDist;
};
let inGroups = (samples, outputXYPoints, kernelWidth, ~cuttoff=0.9, ()) => {
let partitionAt =
samples
|> E.A.length
|> float_of_int
|> (e => e *. cuttoff)
|> int_of_float;
let part1XYPoints =
outputXYPoints |> float_of_int |> (e => e *. cuttoff) |> int_of_float;
let part2XYPoints = outputXYPoints - part1XYPoints |> Js.Math.max_int(30);
let part1Data =
samples |> Belt.Array.slice(_, ~offset=0, ~len=partitionAt);
let part2DataLength = (samples |> E.A.length) - partitionAt;
let part2Data =
samples
|> Belt.Array.slice(
_,
~offset=(-1) * part2DataLength,
~len=part2DataLength,
);
let part1 =
part1Data
|> Internals.samplesToContinuousPdf(_, part1XYPoints, kernelWidth)
|> CdfLibrary.JS.jsToDist;
let part2 =
part2Data
|> Internals.samplesToContinuousPdf(_, part2XYPoints, 3)
|> CdfLibrary.JS.jsToDist;
let opp = 1.0 -. cuttoff;
// let result =
// XYShape.T.Combine.combineLinear(
// part1,
// part2,
// (a, b) => {
// let aa = a *. cuttoff;
// let bb = b *. opp;
// aa +. bb;
// },
// );
// Js.log2("HI", result);
// result;
part1;
};
};
module FloatFloatMap = {
module Id =
Belt.Id.MakeComparable({
type t = float;
let cmp: (float, float) => int = Pervasives.compare;
});
type t = Belt.MutableMap.t(Id.t, float, Id.identity);
let fromArray = (ar: array((float, float))) =>
Belt.MutableMap.fromArray(ar, ~id=(module Id));
let toArray = (t: t) => Belt.MutableMap.toArray(t);
let empty = () => Belt.MutableMap.make(~id=(module Id));
let increment = (el, t: t) =>
Belt.MutableMap.update(
t,
el,
fun
| Some(n) => Some(n +. 1.0)
| None => Some(1.0),
);
let get = (el, t: t) => Belt.MutableMap.get(t, el);
let fmap = (fn, t: t) => Belt.MutableMap.map(t, fn);
};
let split = (sortedArray: array(float)) => {
let continuous = [||];
let discrete = FloatFloatMap.empty();
Belt.Array.forEachWithIndex(
sortedArray,
(index, element) => {
let maxIndex = (sortedArray |> Array.length) - 1;
let possiblySimilarElements =
(
switch (index) {
| 0 => [|index + 1|]
| n when n == maxIndex => [|index - 1|]
| _ => [|index - 1, index + 1|]
}
)
|> Belt.Array.map(_, r => sortedArray[r]);
let hasSimilarElement =
Belt.Array.some(possiblySimilarElements, r => r == element);
hasSimilarElement
? FloatFloatMap.increment(element, discrete)
: {
let _ = Js.Array.push(element, continuous);
();
};
();
},
);
(continuous, discrete);
};
let toMixed =
(
~string,
~sampleCount=3000,
~outputXYPoints=3000,
~kernelWidth=10,
~truncateTo=Some(500),
~cuttoff=0.995,
(),
) => {
// let truncateTo = None;
let start = Js.Date.now();
let timeMessage = message => Js.log2(message, Js.Date.now() -. start);
timeMessage("Starting");
let samples = Internals.stringToSamples(string, sampleCount);
timeMessage("Finished sampling");
let length = samples |> E.A.length;
Array.fast_sort(compare, samples);
let (continuousPart, disc) = split(samples);
let lengthFloat = float_of_int(length);
let discrete: DistTypes.xyShape =
disc
|> FloatFloatMap.fmap(r => r /. lengthFloat)
|> FloatFloatMap.toArray
|> XYShape.T.fromZippedArray;
let pdf: DistTypes.xyShape =
continuousPart |> E.A.length > 20
? {
// samples |> KDE.inGroups(_, outputXYPoints, kernelWidth, ~cuttoff, ());
samples |> KDE.normalSampling(_, outputXYPoints, kernelWidth);
}
: {xs: [||], ys: [||]};
timeMessage("Finished pdf");
let continuous = pdf |> Distributions.Continuous.fromShape;
let shape = MixedShapeBuilder.buildSimple(~continuous, ~discrete);
timeMessage("Finished shape");
let shape =
switch (truncateTo, shape) {
| (Some(trunctate), Some(shape)) =>
Some(shape |> Distributions.Shape.T.truncate(trunctate))
| (None, Some(shape)) => Some(shape)
| _ => None
};
shape;
};

View File

@ -1,38 +1,9 @@
import { Guesstimator } from '@foretold/guesstimator/src';
import { Samples } from '@foretold/cdf/lib/samples';
import _ from 'lodash';
/**
*
* @param {number} minValue
* @param {number} maxValue
* @returns {string}
*/
const minMaxRatio = (minValue, maxValue) => {
if (minValue === 0 || maxValue === 0) {
return 'SMALL';
}
const ratio = maxValue / minValue;
if (ratio < 10000) {
return 'SMALL';
} else if (ratio < 1000000) {
return 'MEDIUM';
} else {
return 'LARGE';
}
};
/**
* @param samples
* @return {string}
*/
const ratioSize = samples => {
samples.sort();
const minValue = samples.getPercentile(2);
const maxValue = samples.getPercentile(98);
return minMaxRatio(minValue, maxValue);
};
const {
Samples,
} = require("@foretold/cdf/lib/samples");
const _ = require("lodash");
const { Guesstimator } = require('@foretold/guesstimator/src');
const pdfast = require('pdfast');
/**
* @param values
@ -41,7 +12,7 @@ const ratioSize = samples => {
* @param max
* @returns {{discrete: {ys: *, xs: *}, continuous: {ys: [], xs: []}}}
*/
const toPdf = (values, outputResolutionCount, min, max) => {
const toPdf = (values, outputResolutionCount, width, min, max) => {
let duplicateSamples = _(values).groupBy().pickBy(x => x.length > 1).keys().value();
let totalLength = _.size(values);
let frequencies = duplicateSamples.map(s => ({
@ -57,12 +28,13 @@ const toPdf = (values, outputResolutionCount, min, max) => {
let continuous = { ys: [], xs: [] };
if (continuousSamples.length > 20) {
const samples = new Samples(continuousSamples);
// let c = continuousSamples.map( r => (Math.log2(r)) * 1000);
let c = continuousSamples;
const samples = new Samples(c);
const ratioSize$ = ratioSize(samples);
const width = ratioSize$ === 'SMALL' ? 60 : 1;
const pdf = samples.toPdf({ size: outputResolutionCount, width, min, max });
// continuous = {xs: pdf.xs.map(r => Math.pow(2,r/1000)), ys: pdf.ys};
continuous = pdf;
}
@ -82,6 +54,7 @@ const run = (
text,
sampleCount,
outputResolutionCount,
width,
inputs = [],
min = false,
max = false,
@ -107,11 +80,47 @@ const run = (
} else if (values.length === 1) {
update = blankResponse;
} else {
update = toPdf(values, outputResolutionCount, min, max);
update = toPdf(values, outputResolutionCount, width, min, max);
}
return update;
};
const stringToSamples = (
text,
sampleCount,
inputs = [],
) => {
const [_error, { parsedInput, parsedError }] = Guesstimator.parse({ text:"=" + text });
const guesstimator = new Guesstimator({ parsedInput });
const {values, errors} = guesstimator.sample(
sampleCount,
inputs,
);
if (errors.length > 0){
return []
} else {
return values
}
};
const samplesToContinuousPdf = (
samples,
size,
width,
min = false,
max = false,
) => {
let _samples = _.filter(samples, _.isFinite);
if (_.isFinite(min)) { _samples = _.filter(_samples, r => r > min) };
if (_.isFinite(max)) { _samples = _.filter(_samples, r => r < max) };
let pdf = pdfast.create(_samples, { size, width });
return {xs: pdf.map(r => r.x), ys: pdf.map(r => r.y)};
};
module.exports = {
run,
stringToSamples,
samplesToContinuousPdf
};

5
src/utility/Lodash.re Normal file
View File

@ -0,0 +1,5 @@
[@bs.module "lodash"] external min: array('a) => 'a = "min";
[@bs.module "lodash"] external max: array('a) => 'a = "max";
[@bs.module "lodash"] external uniq: array('a) => array('a) = "uniq";
[@bs.module "lodash"]
external countBy: (array('a), 'a => 'b) => Js.Dict.t(int) = "countBy";

View File

@ -35,11 +35,12 @@ module Make = (Config: Config) => {
let minY = () => get(ys, 0);
let maxY = () => get(ys, len(ys) - 1);
let findY = (x: float): float => {
let firstHigherIndex = Belt.Array.getIndexBy(xs, e => e >= x);
let firstHigherIndex =
E.A.Sorted.binarySearchFirstElementGreaterIndex(xs, x);
switch (firstHigherIndex) {
| None => maxY()
| Some(0) => minY()
| Some(firstHigherIndex) =>
| `overMax => maxY()
| `underMin => minY()
| `firstHigher(firstHigherIndex) =>
let lowerOrEqualIndex =
firstHigherIndex - 1 < 0 ? 0 : firstHigherIndex - 1;
let needsInterpolation = get(xs, lowerOrEqualIndex) != x;
@ -57,11 +58,12 @@ module Make = (Config: Config) => {
};
};
let findX = (y: float): float => {
let firstHigherIndex = Belt.Array.getIndexBy(ys, e => e >= y);
let firstHigherIndex =
E.A.Sorted.binarySearchFirstElementGreaterIndex(ys, y);
switch (firstHigherIndex) {
| None => maxX()
| Some(0) => minX()
| Some(firstHigherIndex) =>
| `overMax => maxX()
| `underMin => minX()
| `firstHigher(firstHigherIndex) =>
let lowerOrEqualIndex =
firstHigherIndex - 1 < 0 ? 0 : firstHigherIndex - 1;
let needsInterpolation = get(ys, lowerOrEqualIndex) != y;

1114
yarn.lock

File diff suppressed because it is too large Load Diff