Minor cleanup
This commit is contained in:
parent
831d1345c5
commit
8d1ab10e06
|
@ -1,21 +0,0 @@
|
|||
open Jest;
|
||||
open Expect;
|
||||
|
||||
let makeTest = (~only=false, str, item1, item2) =>
|
||||
only
|
||||
? Only.test(str, () =>
|
||||
expect(item1) |> toEqual(item2)
|
||||
)
|
||||
: test(str, () =>
|
||||
expect(item1) |> toEqual(item2)
|
||||
) /* })*/;
|
||||
|
||||
// These fail because of issues with Jest, Babel, and Bucklescript
|
||||
// describe("XYShapes", () => {
|
||||
// describe("logScorePoint", () => {
|
||||
// makeTest(
|
||||
// "When identical",
|
||||
// Some(Guesstimator.stringToMixedShape(~string="5 to 20")),
|
||||
// None,
|
||||
// )
|
||||
// })
|
|
@ -10,63 +10,6 @@ let makeTest = (~only=false, str, item1, item2) =>
|
|||
expect(item1) |> toEqual(item2)
|
||||
);
|
||||
|
||||
module FloatFloatMap = {
|
||||
module Id =
|
||||
Belt.Id.MakeComparable({
|
||||
type t = float;
|
||||
let cmp: (float, float) => int = Pervasives.compare;
|
||||
});
|
||||
|
||||
type t = Belt.MutableMap.t(Id.t, float, Id.identity);
|
||||
|
||||
let fromArray = (ar: array((float, float))) =>
|
||||
Belt.MutableMap.fromArray(ar, ~id=(module Id));
|
||||
let toArray = (t: t) => Belt.MutableMap.toArray(t);
|
||||
let empty = () => Belt.MutableMap.make(~id=(module Id));
|
||||
let increment = (el, t: t) =>
|
||||
Belt.MutableMap.update(
|
||||
t,
|
||||
el,
|
||||
fun
|
||||
| Some(n) => Some(n +. 1.0)
|
||||
| None => Some(1.0),
|
||||
);
|
||||
|
||||
let get = (el, t: t) => Belt.MutableMap.get(t, el);
|
||||
let fmap = (fn, t: t) => Belt.MutableMap.map(t, fn);
|
||||
};
|
||||
|
||||
let split = (sortedArray: array(float)) => {
|
||||
let continuous = [||];
|
||||
let discrete = FloatFloatMap.empty();
|
||||
Belt.Array.forEachWithIndex(
|
||||
sortedArray,
|
||||
(index, element) => {
|
||||
let maxIndex = (sortedArray |> Array.length) - 1;
|
||||
let possiblySimilarElements =
|
||||
(
|
||||
switch (index) {
|
||||
| 0 => [|index + 1|]
|
||||
| n when n == maxIndex => [|index - 1|]
|
||||
| _ => [|index - 1, index + 1|]
|
||||
}
|
||||
)
|
||||
|> Belt.Array.map(_, r => sortedArray[r]);
|
||||
let hasSimilarElement =
|
||||
Belt.Array.some(possiblySimilarElements, r => r == element);
|
||||
hasSimilarElement
|
||||
? FloatFloatMap.increment(element, discrete)
|
||||
: {
|
||||
let _ = Js.Array.push(element, continuous);
|
||||
();
|
||||
};
|
||||
();
|
||||
},
|
||||
);
|
||||
|
||||
(continuous, discrete);
|
||||
};
|
||||
|
||||
describe("Lodash", () => {
|
||||
describe("Lodash", () => {
|
||||
makeTest("min", Lodash.min([|1, 3, 4|]), 1);
|
||||
|
@ -77,31 +20,5 @@ describe("Lodash", () => {
|
|||
Lodash.countBy([|1, 3, 4, 4|], r => r),
|
||||
Js.Dict.fromArray([|("1", 1), ("3", 1), ("4", 2)|]),
|
||||
);
|
||||
makeTest(
|
||||
"split",
|
||||
split([|1.432, 1.33455, 2.0|]),
|
||||
([|1.432, 1.33455, 2.0|], FloatFloatMap.empty()),
|
||||
);
|
||||
makeTest(
|
||||
"split",
|
||||
split([|1.432, 1.33455, 2.0, 2.0, 2.0, 2.0|])
|
||||
|> (((c, disc)) => (c, disc |> FloatFloatMap.toArray)),
|
||||
([|1.432, 1.33455|], [|(2.0, 4.0)|]),
|
||||
);
|
||||
|
||||
let makeDuplicatedArray = count => {
|
||||
let arr = Belt.Array.range(1, count) |> E.A.fmap(float_of_int);
|
||||
let sorted = arr |> Belt.SortArray.stableSortBy(_, compare);
|
||||
E.A.concatMany([|sorted, sorted, sorted, sorted|])
|
||||
|> Belt.SortArray.stableSortBy(_, compare);
|
||||
};
|
||||
|
||||
let (_, discrete) = split(makeDuplicatedArray(10));
|
||||
let toArr = discrete |> FloatFloatMap.toArray;
|
||||
makeTest("splitMedium", toArr |> Belt.Array.length, 10);
|
||||
|
||||
let (c, discrete) = split(makeDuplicatedArray(500));
|
||||
let toArr = discrete |> FloatFloatMap.toArray;
|
||||
makeTest("splitMedium", toArr |> Belt.Array.length, 500);
|
||||
})
|
||||
});
|
51
__tests__/Samples__test.re
Normal file
51
__tests__/Samples__test.re
Normal file
|
@ -0,0 +1,51 @@
|
|||
open Jest;
|
||||
open Expect;
|
||||
|
||||
let makeTest = (~only=false, str, item1, item2) =>
|
||||
only
|
||||
? Only.test(str, () =>
|
||||
expect(item1) |> toEqual(item2)
|
||||
)
|
||||
: test(str, () =>
|
||||
expect(item1) |> toEqual(item2)
|
||||
);
|
||||
|
||||
describe("Lodash", () => {
|
||||
describe("Lodash", () => {
|
||||
makeTest(
|
||||
"split",
|
||||
Samples.T.splitContinuousAndDiscrete([|1.432, 1.33455, 2.0|]),
|
||||
([|1.432, 1.33455, 2.0|], E.FloatFloatMap.empty()),
|
||||
);
|
||||
makeTest(
|
||||
"split",
|
||||
Samples.T.splitContinuousAndDiscrete([|
|
||||
1.432,
|
||||
1.33455,
|
||||
2.0,
|
||||
2.0,
|
||||
2.0,
|
||||
2.0,
|
||||
|])
|
||||
|> (((c, disc)) => (c, disc |> E.FloatFloatMap.toArray)),
|
||||
([|1.432, 1.33455|], [|(2.0, 4.0)|]),
|
||||
);
|
||||
|
||||
let makeDuplicatedArray = count => {
|
||||
let arr = Belt.Array.range(1, count) |> E.A.fmap(float_of_int);
|
||||
let sorted = arr |> Belt.SortArray.stableSortBy(_, compare);
|
||||
E.A.concatMany([|sorted, sorted, sorted, sorted|])
|
||||
|> Belt.SortArray.stableSortBy(_, compare);
|
||||
};
|
||||
|
||||
let (_, discrete) =
|
||||
Samples.T.splitContinuousAndDiscrete(makeDuplicatedArray(10));
|
||||
let toArr = discrete |> E.FloatFloatMap.toArray;
|
||||
makeTest("splitMedium", toArr |> Belt.Array.length, 10);
|
||||
|
||||
let (c, discrete) =
|
||||
Samples.T.splitContinuousAndDiscrete(makeDuplicatedArray(500));
|
||||
let toArr = discrete |> E.FloatFloatMap.toArray;
|
||||
makeTest("splitMedium", toArr |> Belt.Array.length, 500);
|
||||
})
|
||||
});
|
|
@ -26,7 +26,6 @@
|
|||
"author": "",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@foretold/cdf": "1.0.15",
|
||||
"@foretold/components": "0.0.3",
|
||||
"@foretold/guesstimator": "1.0.10",
|
||||
"@glennsl/bs-jest": "^0.5.0",
|
||||
|
|
112
src/Samples.re
Normal file
112
src/Samples.re
Normal file
|
@ -0,0 +1,112 @@
|
|||
module JS = {
|
||||
[@bs.deriving abstract]
|
||||
type distJs = {
|
||||
xs: array(float),
|
||||
ys: array(float),
|
||||
};
|
||||
|
||||
let jsToDist = (d: distJs): DistTypes.xyShape => {
|
||||
xs: xsGet(d),
|
||||
ys: ysGet(d),
|
||||
};
|
||||
|
||||
[@bs.module "./utility/KdeLibrary.js"]
|
||||
external samplesToContinuousPdf: (array(float), int, int) => distJs =
|
||||
"samplesToContinuousPdf";
|
||||
};
|
||||
|
||||
module KDE = {
|
||||
let normalSampling = (samples, outputXYPoints, kernelWidth) => {
|
||||
samples
|
||||
|> JS.samplesToContinuousPdf(_, outputXYPoints, kernelWidth)
|
||||
|> JS.jsToDist;
|
||||
};
|
||||
|
||||
let inGroups = (samples, outputXYPoints, kernelWidth, ~cuttoff=0.9, ()) => {
|
||||
let partitionAt =
|
||||
samples
|
||||
|> E.A.length
|
||||
|> float_of_int
|
||||
|> (e => e *. cuttoff)
|
||||
|> int_of_float;
|
||||
let part1XYPoints =
|
||||
outputXYPoints |> float_of_int |> (e => e *. cuttoff) |> int_of_float;
|
||||
let part2XYPoints = outputXYPoints - part1XYPoints |> Js.Math.max_int(30);
|
||||
let part1Data =
|
||||
samples |> Belt.Array.slice(_, ~offset=0, ~len=partitionAt);
|
||||
let part2DataLength = (samples |> E.A.length) - partitionAt;
|
||||
let part2Data =
|
||||
samples
|
||||
|> Belt.Array.slice(
|
||||
_,
|
||||
~offset=(-1) * part2DataLength,
|
||||
~len=part2DataLength,
|
||||
);
|
||||
let part1 =
|
||||
part1Data
|
||||
|> JS.samplesToContinuousPdf(_, part1XYPoints, kernelWidth)
|
||||
|> JS.jsToDist;
|
||||
let part2 =
|
||||
part2Data
|
||||
|> JS.samplesToContinuousPdf(_, part2XYPoints, 3)
|
||||
|> JS.jsToDist;
|
||||
let opp = 1.0 -. cuttoff;
|
||||
part1;
|
||||
};
|
||||
};
|
||||
|
||||
module T = {
|
||||
type t = array(float);
|
||||
|
||||
let splitContinuousAndDiscrete = (sortedArray: t) => {
|
||||
let continuous = [||];
|
||||
let discrete = E.FloatFloatMap.empty();
|
||||
Belt.Array.forEachWithIndex(
|
||||
sortedArray,
|
||||
(index, element) => {
|
||||
let maxIndex = (sortedArray |> Array.length) - 1;
|
||||
let possiblySimilarElements =
|
||||
(
|
||||
switch (index) {
|
||||
| 0 => [|index + 1|]
|
||||
| n when n == maxIndex => [|index - 1|]
|
||||
| _ => [|index - 1, index + 1|]
|
||||
}
|
||||
)
|
||||
|> Belt.Array.map(_, r => sortedArray[r]);
|
||||
let hasSimilarElement =
|
||||
Belt.Array.some(possiblySimilarElements, r => r == element);
|
||||
hasSimilarElement
|
||||
? E.FloatFloatMap.increment(element, discrete)
|
||||
: {
|
||||
let _ = Js.Array.push(element, continuous);
|
||||
();
|
||||
};
|
||||
();
|
||||
},
|
||||
);
|
||||
(continuous, discrete);
|
||||
};
|
||||
|
||||
// todo: Figure out some way of doing this without having to integrate so many times.
|
||||
let toShape = (~samples: t, ~outputXYPoints=3000, ~kernelWidth=10, ()) => {
|
||||
Array.fast_sort(compare, samples);
|
||||
let (continuousPart, discretePart) = E.A.Floats.split(samples);
|
||||
let length = samples |> E.A.length;
|
||||
let lengthFloat = float_of_int(length);
|
||||
let discrete: DistTypes.xyShape =
|
||||
discretePart
|
||||
|> E.FloatFloatMap.fmap(r => r /. lengthFloat)
|
||||
|> E.FloatFloatMap.toArray
|
||||
|> XYShape.T.fromZippedArray;
|
||||
let pdf: DistTypes.xyShape =
|
||||
continuousPart |> E.A.length > 20
|
||||
? {
|
||||
samples |> KDE.normalSampling(_, outputXYPoints, kernelWidth);
|
||||
}
|
||||
: {xs: [||], ys: [||]};
|
||||
let continuous = pdf |> Distributions.Continuous.fromShape;
|
||||
let shape = MixedShapeBuilder.buildSimple(~continuous, ~discrete);
|
||||
shape;
|
||||
};
|
||||
};
|
|
@ -8,6 +8,13 @@ let make =
|
|||
unit,
|
||||
};
|
||||
|
||||
let applyTruncation = (truncateTo, distPlus) =>
|
||||
switch (truncateTo, distPlus) {
|
||||
| (Some(t), Some(d)) => Some(d |> Distributions.DistPlus.T.truncate(t))
|
||||
| (None, Some(d)) => Some(d)
|
||||
| _ => None
|
||||
};
|
||||
|
||||
let toDistPlus =
|
||||
(
|
||||
~sampleCount=2000,
|
||||
|
@ -17,34 +24,19 @@ let toDistPlus =
|
|||
t: distPlusIngredients,
|
||||
)
|
||||
: option(distPlus) => {
|
||||
let shape =
|
||||
Guesstimator.toMixed(
|
||||
~string=t.guesstimatorString,
|
||||
~sampleCount,
|
||||
~outputXYPoints,
|
||||
~kernelWidth,
|
||||
(),
|
||||
);
|
||||
// TODO:
|
||||
// Truncate before resizing.
|
||||
// Faster sort
|
||||
let distPlus =
|
||||
shape
|
||||
|> E.O.fmap(
|
||||
Distributions.DistPlus.make(
|
||||
~shape=_,
|
||||
~domain=t.domain,
|
||||
~unit=t.unit,
|
||||
~guesstimatorString=Some(t.guesstimatorString),
|
||||
(),
|
||||
),
|
||||
)
|
||||
|> E.O.fmap(
|
||||
Distributions.DistPlus.T.scaleToIntegralSum(~intendedSum=1.0),
|
||||
);
|
||||
switch (truncateTo, distPlus) {
|
||||
| (Some(t), Some(d)) => Some(d |> Distributions.DistPlus.T.truncate(t))
|
||||
| (None, Some(d)) => Some(d)
|
||||
| _ => None
|
||||
};
|
||||
let samples =
|
||||
Guesstimator.stringToSamples(t.guesstimatorString, sampleCount);
|
||||
let shape = Samples.T.toShape(~samples, ~outputXYPoints, ~kernelWidth, ());
|
||||
shape
|
||||
|> E.O.fmap(
|
||||
Distributions.DistPlus.make(
|
||||
~shape=_,
|
||||
~domain=t.domain,
|
||||
~unit=t.unit,
|
||||
~guesstimatorString=Some(t.guesstimatorString),
|
||||
(),
|
||||
),
|
||||
)
|
||||
|> E.O.fmap(Distributions.DistPlus.T.scaleToIntegralSum(~intendedSum=1.0))
|
||||
|> applyTruncation(truncateTo);
|
||||
};
|
|
@ -1,170 +0,0 @@
|
|||
const {
|
||||
Cdf,
|
||||
Pdf,
|
||||
ContinuousDistribution,
|
||||
ContinuousDistributionCombination,
|
||||
scoringFunctions,
|
||||
} = require("@foretold/cdf/lib");
|
||||
const _ = require("lodash");
|
||||
|
||||
/**
|
||||
*
|
||||
* @param xs
|
||||
* @param ys
|
||||
* @returns {{ys: *, xs: *}}
|
||||
*/
|
||||
function cdfToPdf({ xs, ys }) {
|
||||
let cdf = new Cdf(xs, ys);
|
||||
let pdf = cdf.toPdf();
|
||||
return { xs: pdf.xs, ys: pdf.ys };
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param xs
|
||||
* @param ys
|
||||
* @returns {{ys: *, xs: *}}
|
||||
*/
|
||||
function pdfToCdf({ xs, ys }) {
|
||||
let cdf = new Pdf(xs, ys);
|
||||
let pdf = cdf.toCdf();
|
||||
return { xs: pdf.xs, ys: pdf.ys };
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param sampleCount
|
||||
* @param vars
|
||||
* @returns {{ys: *, xs: *}}
|
||||
*/
|
||||
function mean(sampleCount, vars) {
|
||||
let cdfs = vars.map(r => new Cdf(r.xs, r.ys));
|
||||
let comb = new ContinuousDistributionCombination(cdfs);
|
||||
let newCdf = comb.combineYsWithMean(sampleCount);
|
||||
|
||||
return { xs: newCdf.xs, ys: newCdf.ys };
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param sampleCount
|
||||
* @param predictionCdf
|
||||
* @param resolutionCdf
|
||||
*/
|
||||
function scoreNonMarketCdfCdf(sampleCount, predictionCdf, resolutionCdf, resolutionUniformAdditionWeight = 0) {
|
||||
let toCdf = (r) => (new Cdf(r.xs, r.ys));
|
||||
let prediction = toCdf(predictionCdf);
|
||||
if (_.isFinite(resolutionUniformAdditionWeight)) {
|
||||
prediction = prediction.combineWithUniformOfCdf(
|
||||
{
|
||||
cdf: toCdf(resolutionCdf),
|
||||
uniformWeight: resolutionUniformAdditionWeight,
|
||||
sampleCount
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
return scoringFunctions.distributionInputDistributionOutputMarketless({
|
||||
predictionCdf: prediction,
|
||||
resultCdf: toCdf(resolutionCdf),
|
||||
sampleCount,
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param sampleCount
|
||||
* @param cdf
|
||||
*/
|
||||
function differentialEntropy(sampleCount, cdf) {
|
||||
let toCdf = (r) => (new Cdf(r.xs, r.ys));
|
||||
|
||||
return scoringFunctions.differentialEntropy({
|
||||
cdf: toCdf(cdf),
|
||||
sampleCount: sampleCount
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param x
|
||||
* @param xs
|
||||
* @param ys
|
||||
* @returns {number}
|
||||
*/
|
||||
function findY(x, { xs, ys }) {
|
||||
let cdf = new Cdf(xs, ys);
|
||||
return cdf.findY(x);
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param x
|
||||
* @param xs
|
||||
* @param ys
|
||||
* @returns {number[]}
|
||||
*/
|
||||
function convertToNewLength(n, { xs, ys }) {
|
||||
let dist = new ContinuousDistribution(xs, ys);
|
||||
return dist.convertToNewLength(n);
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param y
|
||||
* @param xs
|
||||
* @param ys
|
||||
* @returns {number}
|
||||
*/
|
||||
function findX(y, { xs, ys }) {
|
||||
let cdf = new Cdf(xs, ys);
|
||||
return cdf.findX(y);
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param xs
|
||||
* @param ys
|
||||
* @returns {number[]}
|
||||
*/
|
||||
function integral({ xs, ys }) {
|
||||
if (_.includes(ys, NaN)) {
|
||||
return NaN;
|
||||
} else if (_.includes(ys, Infinity) && _.includes(ys, -Infinity)) {
|
||||
return NaN;
|
||||
} else if (_.includes(ys, Infinity)) {
|
||||
return Infinity;
|
||||
} else if (_.includes(ys, -Infinity)) {
|
||||
return -Infinity;
|
||||
}
|
||||
|
||||
let integral = 0;
|
||||
for (let i = 1; i < ys.length; i++) {
|
||||
let thisY = ys[i];
|
||||
let lastY = ys[i - 1];
|
||||
let thisX = xs[i];
|
||||
let lastX = xs[i - 1];
|
||||
|
||||
if (
|
||||
_.isFinite(thisY) && _.isFinite(lastY) &&
|
||||
_.isFinite(thisX) && _.isFinite(lastX)
|
||||
) {
|
||||
let sectionInterval = ((thisY + lastY) / 2) * (thisX - lastX);
|
||||
integral = integral + sectionInterval;
|
||||
}
|
||||
|
||||
}
|
||||
return integral;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
cdfToPdf,
|
||||
pdfToCdf,
|
||||
findY,
|
||||
findX,
|
||||
convertToNewLength,
|
||||
mean,
|
||||
scoreNonMarketCdfCdf,
|
||||
differentialEntropy,
|
||||
integral,
|
||||
};
|
|
@ -1,56 +0,0 @@
|
|||
module JS = {
|
||||
[@bs.deriving abstract]
|
||||
type distJs = {
|
||||
xs: array(float),
|
||||
ys: array(float),
|
||||
};
|
||||
|
||||
let distToJs = (d: DistTypes.xyShape) => distJs(~xs=d.xs, ~ys=d.ys);
|
||||
|
||||
let jsToDist = (d: distJs): DistTypes.xyShape => {
|
||||
xs: xsGet(d),
|
||||
ys: ysGet(d),
|
||||
};
|
||||
|
||||
let doAsDist = (f, d: DistTypes.xyShape) => d |> distToJs |> f |> jsToDist;
|
||||
|
||||
[@bs.module "./CdfLibrary.js"]
|
||||
external cdfToPdf: distJs => distJs = "cdfToPdf";
|
||||
|
||||
[@bs.module "./CdfLibrary.js"]
|
||||
external pdfToCdf: distJs => distJs = "pdfToCdf";
|
||||
|
||||
[@bs.module "./CdfLibrary.js"]
|
||||
external findY: (float, distJs) => float = "findY";
|
||||
|
||||
[@bs.module "./CdfLibrary.js"]
|
||||
external findX: (float, distJs) => float = "findX";
|
||||
|
||||
[@bs.module "./CdfLibrary.js"]
|
||||
external integral: distJs => float = "integral";
|
||||
|
||||
[@bs.module "./CdfLibrary.js"]
|
||||
external differentialEntropy: (int, distJs) => distJs =
|
||||
"differentialEntropy";
|
||||
|
||||
[@bs.module "./CdfLibrary.js"]
|
||||
external convertToNewLength: (int, distJs) => distJs = "convertToNewLength";
|
||||
};
|
||||
|
||||
module Distribution = {
|
||||
let convertToNewLength = (int, {xs, _} as dist: DistTypes.xyShape) =>
|
||||
switch (E.A.length(xs)) {
|
||||
| 0
|
||||
| 1 => dist
|
||||
| _ => dist |> JS.doAsDist(JS.convertToNewLength(int))
|
||||
};
|
||||
let toPdf = dist => dist |> JS.doAsDist(JS.cdfToPdf);
|
||||
let toCdf = dist => dist |> JS.doAsDist(JS.pdfToCdf);
|
||||
let findX = (y, dist) => dist |> JS.distToJs |> JS.findX(y);
|
||||
let findY = (x, dist) => dist |> JS.distToJs |> JS.findY(x);
|
||||
let integral = dist => dist |> JS.distToJs |> JS.integral;
|
||||
let differentialEntropy = (maxCalculationLength, dist) =>
|
||||
dist
|
||||
|> JS.doAsDist(JS.differentialEntropy(maxCalculationLength))
|
||||
|> integral;
|
||||
};
|
|
@ -1,5 +1,31 @@
|
|||
open Rationale.Function.Infix;
|
||||
|
||||
module FloatFloatMap = {
|
||||
module Id =
|
||||
Belt.Id.MakeComparable({
|
||||
type t = float;
|
||||
let cmp: (float, float) => int = Pervasives.compare;
|
||||
});
|
||||
|
||||
type t = Belt.MutableMap.t(Id.t, float, Id.identity);
|
||||
|
||||
let fromArray = (ar: array((float, float))) =>
|
||||
Belt.MutableMap.fromArray(ar, ~id=(module Id));
|
||||
let toArray = (t: t) => Belt.MutableMap.toArray(t);
|
||||
let empty = () => Belt.MutableMap.make(~id=(module Id));
|
||||
let increment = (el, t: t) =>
|
||||
Belt.MutableMap.update(
|
||||
t,
|
||||
el,
|
||||
fun
|
||||
| Some(n) => Some(n +. 1.0)
|
||||
| None => Some(1.0),
|
||||
);
|
||||
|
||||
let get = (el, t: t) => Belt.MutableMap.get(t, el);
|
||||
let fmap = (fn, t: t) => Belt.MutableMap.map(t, fn);
|
||||
};
|
||||
|
||||
/* Utils */
|
||||
module U = {
|
||||
let isEqual = (a, b) => a == b;
|
||||
|
@ -298,6 +324,39 @@ module A = {
|
|||
};
|
||||
};
|
||||
};
|
||||
|
||||
module Floats = {
|
||||
let split = (sortedArray: array(float)) => {
|
||||
let continuous = [||];
|
||||
let discrete = FloatFloatMap.empty();
|
||||
Belt.Array.forEachWithIndex(
|
||||
sortedArray,
|
||||
(index, element) => {
|
||||
let maxIndex = (sortedArray |> Array.length) - 1;
|
||||
let possiblySimilarElements =
|
||||
(
|
||||
switch (index) {
|
||||
| 0 => [|index + 1|]
|
||||
| n when n == maxIndex => [|index - 1|]
|
||||
| _ => [|index - 1, index + 1|]
|
||||
}
|
||||
)
|
||||
|> Belt.Array.map(_, r => sortedArray[r]);
|
||||
let hasSimilarElement =
|
||||
Belt.Array.some(possiblySimilarElements, r => r == element);
|
||||
hasSimilarElement
|
||||
? FloatFloatMap.increment(element, discrete)
|
||||
: {
|
||||
let _ = Js.Array.push(element, continuous);
|
||||
();
|
||||
};
|
||||
();
|
||||
},
|
||||
);
|
||||
|
||||
(continuous, discrete);
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
module JsArray = {
|
||||
|
|
|
@ -1,186 +1,13 @@
|
|||
module Internals = {
|
||||
[@bs.deriving abstract]
|
||||
type discrete = {
|
||||
xs: array(float),
|
||||
ys: array(float),
|
||||
};
|
||||
|
||||
let jsToDistDiscrete = (d: discrete): DistTypes.discreteShape => {
|
||||
xs: xsGet(d),
|
||||
ys: ysGet(d),
|
||||
};
|
||||
|
||||
[@bs.deriving abstract]
|
||||
type combined = {
|
||||
continuous: CdfLibrary.JS.distJs,
|
||||
discrete,
|
||||
};
|
||||
|
||||
// todo: Force to be fewer samples
|
||||
let toContinous = (r: combined) =>
|
||||
continuousGet(r)
|
||||
|> CdfLibrary.JS.jsToDist
|
||||
|> Distributions.Continuous.fromShape;
|
||||
|
||||
let toDiscrete = (r: combined): DistTypes.xyShape =>
|
||||
discreteGet(r) |> jsToDistDiscrete;
|
||||
|
||||
[@bs.module "./GuesstimatorLibrary.js"]
|
||||
external toCombinedFormat: (string, int, int, int) => combined = "run";
|
||||
|
||||
[@bs.module "./GuesstimatorLibrary.js"]
|
||||
external stringToSamples: (string, int) => array(float) = "stringToSamples";
|
||||
|
||||
[@bs.module "./GuesstimatorLibrary.js"]
|
||||
external samplesToContinuousPdf:
|
||||
(array(float), int, int) => CdfLibrary.JS.distJs =
|
||||
"samplesToContinuousPdf";
|
||||
[@bs.deriving abstract]
|
||||
type discrete = {
|
||||
xs: array(float),
|
||||
ys: array(float),
|
||||
};
|
||||
|
||||
module KDE = {
|
||||
let normalSampling = (samples, outputXYPoints, kernelWidth) => {
|
||||
samples
|
||||
|> Internals.samplesToContinuousPdf(_, outputXYPoints, kernelWidth)
|
||||
|> CdfLibrary.JS.jsToDist;
|
||||
};
|
||||
|
||||
let inGroups = (samples, outputXYPoints, kernelWidth, ~cuttoff=0.9, ()) => {
|
||||
let partitionAt =
|
||||
samples
|
||||
|> E.A.length
|
||||
|> float_of_int
|
||||
|> (e => e *. cuttoff)
|
||||
|> int_of_float;
|
||||
let part1XYPoints =
|
||||
outputXYPoints |> float_of_int |> (e => e *. cuttoff) |> int_of_float;
|
||||
let part2XYPoints = outputXYPoints - part1XYPoints |> Js.Math.max_int(30);
|
||||
let part1Data =
|
||||
samples |> Belt.Array.slice(_, ~offset=0, ~len=partitionAt);
|
||||
let part2DataLength = (samples |> E.A.length) - partitionAt;
|
||||
let part2Data =
|
||||
samples
|
||||
|> Belt.Array.slice(
|
||||
_,
|
||||
~offset=(-1) * part2DataLength,
|
||||
~len=part2DataLength,
|
||||
);
|
||||
let part1 =
|
||||
part1Data
|
||||
|> Internals.samplesToContinuousPdf(_, part1XYPoints, kernelWidth)
|
||||
|> CdfLibrary.JS.jsToDist;
|
||||
let part2 =
|
||||
part2Data
|
||||
|> Internals.samplesToContinuousPdf(_, part2XYPoints, 3)
|
||||
|> CdfLibrary.JS.jsToDist;
|
||||
let opp = 1.0 -. cuttoff;
|
||||
// let result =
|
||||
// XYShape.T.Combine.combineLinear(
|
||||
// part1,
|
||||
// part2,
|
||||
// (a, b) => {
|
||||
// let aa = a *. cuttoff;
|
||||
// let bb = b *. opp;
|
||||
// aa +. bb;
|
||||
// },
|
||||
// );
|
||||
// Js.log2("HI", result);
|
||||
// result;
|
||||
part1;
|
||||
};
|
||||
let jsToDistDiscrete = (d: discrete): DistTypes.discreteShape => {
|
||||
xs: xsGet(d),
|
||||
ys: ysGet(d),
|
||||
};
|
||||
|
||||
module FloatFloatMap = {
|
||||
module Id =
|
||||
Belt.Id.MakeComparable({
|
||||
type t = float;
|
||||
let cmp: (float, float) => int = Pervasives.compare;
|
||||
});
|
||||
|
||||
type t = Belt.MutableMap.t(Id.t, float, Id.identity);
|
||||
|
||||
let fromArray = (ar: array((float, float))) =>
|
||||
Belt.MutableMap.fromArray(ar, ~id=(module Id));
|
||||
let toArray = (t: t) => Belt.MutableMap.toArray(t);
|
||||
let empty = () => Belt.MutableMap.make(~id=(module Id));
|
||||
let increment = (el, t: t) =>
|
||||
Belt.MutableMap.update(
|
||||
t,
|
||||
el,
|
||||
fun
|
||||
| Some(n) => Some(n +. 1.0)
|
||||
| None => Some(1.0),
|
||||
);
|
||||
|
||||
let get = (el, t: t) => Belt.MutableMap.get(t, el);
|
||||
let fmap = (fn, t: t) => Belt.MutableMap.map(t, fn);
|
||||
};
|
||||
|
||||
// todo: Figure out some way of doing this without creating a new array.
|
||||
let split = (sortedArray: array(float)) => {
|
||||
let continuous = [||];
|
||||
let discrete = FloatFloatMap.empty();
|
||||
Belt.Array.forEachWithIndex(
|
||||
sortedArray,
|
||||
(index, element) => {
|
||||
let maxIndex = (sortedArray |> Array.length) - 1;
|
||||
let possiblySimilarElements =
|
||||
(
|
||||
switch (index) {
|
||||
| 0 => [|index + 1|]
|
||||
| n when n == maxIndex => [|index - 1|]
|
||||
| _ => [|index - 1, index + 1|]
|
||||
}
|
||||
)
|
||||
|> Belt.Array.map(_, r => sortedArray[r]);
|
||||
let hasSimilarElement =
|
||||
Belt.Array.some(possiblySimilarElements, r => r == element);
|
||||
hasSimilarElement
|
||||
? FloatFloatMap.increment(element, discrete)
|
||||
: {
|
||||
let _ = Js.Array.push(element, continuous);
|
||||
();
|
||||
};
|
||||
();
|
||||
},
|
||||
);
|
||||
|
||||
(continuous, discrete);
|
||||
};
|
||||
|
||||
let toMixed =
|
||||
(
|
||||
~string,
|
||||
~sampleCount=3000,
|
||||
~outputXYPoints=3000,
|
||||
~kernelWidth=10,
|
||||
~cuttoff=0.995,
|
||||
(),
|
||||
) => {
|
||||
let start = Js.Date.now();
|
||||
let timeMessage = message => Js.log2(message, Js.Date.now() -. start);
|
||||
timeMessage("Starting");
|
||||
let samples = Internals.stringToSamples(string, sampleCount);
|
||||
timeMessage("Finished sampling");
|
||||
|
||||
let length = samples |> E.A.length;
|
||||
Array.fast_sort(compare, samples);
|
||||
let (continuousPart, disc) = split(samples);
|
||||
let lengthFloat = float_of_int(length);
|
||||
let discrete: DistTypes.xyShape =
|
||||
disc
|
||||
|> FloatFloatMap.fmap(r => r /. lengthFloat)
|
||||
|> FloatFloatMap.toArray
|
||||
|> XYShape.T.fromZippedArray;
|
||||
let pdf: DistTypes.xyShape =
|
||||
continuousPart |> E.A.length > 20
|
||||
? {
|
||||
// samples |> KDE.inGroups(_, outputXYPoints, kernelWidth, ~cuttoff, ());
|
||||
samples |> KDE.normalSampling(_, outputXYPoints, kernelWidth);
|
||||
}
|
||||
: {xs: [||], ys: [||]};
|
||||
timeMessage("Finished pdf");
|
||||
let continuous = pdf |> Distributions.Continuous.fromShape;
|
||||
let shape = MixedShapeBuilder.buildSimple(~continuous, ~discrete);
|
||||
timeMessage("Finished shape");
|
||||
shape;
|
||||
};
|
||||
[@bs.module "./GuesstimatorLibrary.js"]
|
||||
external stringToSamples: (string, int) => array(float) = "stringToSamples";
|
|
@ -1,89 +1,5 @@
|
|||
const {
|
||||
Samples,
|
||||
} = require("@foretold/cdf/lib/samples");
|
||||
const _ = require("lodash");
|
||||
const { Guesstimator } = require('@foretold/guesstimator/src');
|
||||
const pdfast = require('pdfast');
|
||||
|
||||
/**
|
||||
* @param values
|
||||
* @param outputResolutionCount
|
||||
* @param min
|
||||
* @param max
|
||||
* @returns {{discrete: {ys: *, xs: *}, continuous: {ys: [], xs: []}}}
|
||||
*/
|
||||
const toPdf = (values, outputResolutionCount, width, min, max) => {
|
||||
let duplicateSamples = _(values).groupBy().pickBy(x => x.length > 1).keys().value();
|
||||
let totalLength = _.size(values);
|
||||
let frequencies = duplicateSamples.map(s => ({
|
||||
value: parseFloat(s),
|
||||
percentage: _(values).filter(x => x == s).size() / totalLength
|
||||
}));
|
||||
let continuousSamples = _.difference(values, frequencies.map(f => f.value));
|
||||
|
||||
let discrete = {
|
||||
xs: frequencies.map(f => f.value),
|
||||
ys: frequencies.map(f => f.percentage)
|
||||
};
|
||||
let continuous = { ys: [], xs: [] };
|
||||
|
||||
if (continuousSamples.length > 20) {
|
||||
// let c = continuousSamples.map( r => (Math.log2(r)) * 1000);
|
||||
let c = continuousSamples;
|
||||
const samples = new Samples(c);
|
||||
|
||||
|
||||
const pdf = samples.toPdf({ size: outputResolutionCount, width, min, max });
|
||||
// continuous = {xs: pdf.xs.map(r => Math.pow(2,r/1000)), ys: pdf.ys};
|
||||
continuous = pdf;
|
||||
}
|
||||
|
||||
return { continuous, discrete };
|
||||
};
|
||||
|
||||
/**
|
||||
* @param text
|
||||
* @param sampleCount
|
||||
* @param outputResolutionCount
|
||||
* @param inputs
|
||||
* @param min
|
||||
* @param max
|
||||
* @returns {{discrete: {ys: *, xs: *}, continuous: {ys: *[], xs: *[]}}}
|
||||
*/
|
||||
const run = (
|
||||
text,
|
||||
sampleCount,
|
||||
outputResolutionCount,
|
||||
width,
|
||||
inputs = [],
|
||||
min = false,
|
||||
max = false,
|
||||
) => {
|
||||
const [_error, item] = Guesstimator.parse({ text: "=" + text });
|
||||
const { parsedInput } = item;
|
||||
|
||||
const guesstimator = new Guesstimator({ parsedInput });
|
||||
const value = guesstimator.sample(
|
||||
sampleCount,
|
||||
inputs,
|
||||
);
|
||||
|
||||
const values = _.filter(value.values, _.isFinite);
|
||||
|
||||
let update;
|
||||
let blankResponse = {
|
||||
continuous: { ys: [], xs: [] },
|
||||
discrete: { ys: [], xs: [] }
|
||||
};
|
||||
if (values.length === 0) {
|
||||
update = blankResponse;
|
||||
} else if (values.length === 1) {
|
||||
update = blankResponse;
|
||||
} else {
|
||||
update = toPdf(values, outputResolutionCount, width, min, max);
|
||||
}
|
||||
return update;
|
||||
};
|
||||
|
||||
const stringToSamples = (
|
||||
text,
|
||||
|
@ -104,23 +20,6 @@ const stringToSamples = (
|
|||
}
|
||||
};
|
||||
|
||||
|
||||
const samplesToContinuousPdf = (
|
||||
samples,
|
||||
size,
|
||||
width,
|
||||
min = false,
|
||||
max = false,
|
||||
) => {
|
||||
let _samples = _.filter(samples, _.isFinite);
|
||||
if (_.isFinite(min)) { _samples = _.filter(_samples, r => r > min) };
|
||||
if (_.isFinite(max)) { _samples = _.filter(_samples, r => r < max) };
|
||||
let pdf = pdfast.create(_samples, { size, width });
|
||||
return {xs: pdf.map(r => r.x), ys: pdf.map(r => r.y)};
|
||||
};
|
||||
|
||||
module.exports = {
|
||||
run,
|
||||
stringToSamples,
|
||||
samplesToContinuousPdf
|
||||
};
|
||||
|
|
21
src/utility/KdeLibrary.js
Normal file
21
src/utility/KdeLibrary.js
Normal file
|
@ -0,0 +1,21 @@
|
|||
const pdfast = require('pdfast');
|
||||
const _ = require("lodash");
|
||||
|
||||
const samplesToContinuousPdf = (
|
||||
samples,
|
||||
size,
|
||||
width,
|
||||
min = false,
|
||||
max = false,
|
||||
) => {
|
||||
let _samples = _.filter(samples, _.isFinite);
|
||||
if (_.isFinite(min)) { _samples = _.filter(_samples, r => r > min) };
|
||||
if (_.isFinite(max)) { _samples = _.filter(_samples, r => r < max) };
|
||||
let pdf = pdfast.create(_samples, { size, width });
|
||||
return {xs: pdf.map(r => r.x), ys: pdf.map(r => r.y)};
|
||||
};
|
||||
|
||||
|
||||
module.exports = {
|
||||
samplesToContinuousPdf,
|
||||
};
|
Loading…
Reference in New Issue
Block a user