From 8d1ab10e06b444702293fd6e47354099212162a5 Mon Sep 17 00:00:00 2001 From: Ozzie Gooen Date: Fri, 20 Mar 2020 12:08:26 +0000 Subject: [PATCH] Minor cleanup --- __tests__/Guesstimator__Test.re | 21 --- __tests__/Lodash__test.re | 83 ---------- __tests__/Samples__test.re | 51 ++++++ package.json | 3 +- src/Samples.re | 112 +++++++++++++ src/distributions/DistPlusIngredients.re | 52 +++--- src/utility/CdfLibrary.js | 170 -------------------- src/utility/CdfLibrary.re | 56 ------- src/utility/E.re | 59 +++++++ src/utility/Guesstimator.re | 191 ++--------------------- src/utility/GuesstimatorLibrary.js | 101 ------------ src/utility/KdeLibrary.js | 21 +++ 12 files changed, 275 insertions(+), 645 deletions(-) delete mode 100644 __tests__/Guesstimator__Test.re create mode 100644 __tests__/Samples__test.re create mode 100644 src/Samples.re delete mode 100644 src/utility/CdfLibrary.js delete mode 100644 src/utility/CdfLibrary.re create mode 100644 src/utility/KdeLibrary.js diff --git a/__tests__/Guesstimator__Test.re b/__tests__/Guesstimator__Test.re deleted file mode 100644 index 5cea654c..00000000 --- a/__tests__/Guesstimator__Test.re +++ /dev/null @@ -1,21 +0,0 @@ -open Jest; -open Expect; - -let makeTest = (~only=false, str, item1, item2) => - only - ? Only.test(str, () => - expect(item1) |> toEqual(item2) - ) - : test(str, () => - expect(item1) |> toEqual(item2) - ) /* })*/; - -// These fail because of issues with Jest, Babel, and Bucklescript -// describe("XYShapes", () => { -// describe("logScorePoint", () => { -// makeTest( -// "When identical", -// Some(Guesstimator.stringToMixedShape(~string="5 to 20")), -// None, -// ) -// }) \ No newline at end of file diff --git a/__tests__/Lodash__test.re b/__tests__/Lodash__test.re index 661e62eb..8e6ccc21 100644 --- a/__tests__/Lodash__test.re +++ b/__tests__/Lodash__test.re @@ -10,63 +10,6 @@ let makeTest = (~only=false, str, item1, item2) => expect(item1) |> toEqual(item2) ); -module FloatFloatMap = { - module Id = - Belt.Id.MakeComparable({ - type t = float; - let cmp: (float, float) => int = Pervasives.compare; - }); - - type t = Belt.MutableMap.t(Id.t, float, Id.identity); - - let fromArray = (ar: array((float, float))) => - Belt.MutableMap.fromArray(ar, ~id=(module Id)); - let toArray = (t: t) => Belt.MutableMap.toArray(t); - let empty = () => Belt.MutableMap.make(~id=(module Id)); - let increment = (el, t: t) => - Belt.MutableMap.update( - t, - el, - fun - | Some(n) => Some(n +. 1.0) - | None => Some(1.0), - ); - - let get = (el, t: t) => Belt.MutableMap.get(t, el); - let fmap = (fn, t: t) => Belt.MutableMap.map(t, fn); -}; - -let split = (sortedArray: array(float)) => { - let continuous = [||]; - let discrete = FloatFloatMap.empty(); - Belt.Array.forEachWithIndex( - sortedArray, - (index, element) => { - let maxIndex = (sortedArray |> Array.length) - 1; - let possiblySimilarElements = - ( - switch (index) { - | 0 => [|index + 1|] - | n when n == maxIndex => [|index - 1|] - | _ => [|index - 1, index + 1|] - } - ) - |> Belt.Array.map(_, r => sortedArray[r]); - let hasSimilarElement = - Belt.Array.some(possiblySimilarElements, r => r == element); - hasSimilarElement - ? FloatFloatMap.increment(element, discrete) - : { - let _ = Js.Array.push(element, continuous); - (); - }; - (); - }, - ); - - (continuous, discrete); -}; - describe("Lodash", () => { describe("Lodash", () => { makeTest("min", Lodash.min([|1, 3, 4|]), 1); @@ -77,31 +20,5 @@ describe("Lodash", () => { Lodash.countBy([|1, 3, 4, 4|], r => r), Js.Dict.fromArray([|("1", 1), ("3", 1), ("4", 2)|]), ); - makeTest( - "split", - split([|1.432, 1.33455, 2.0|]), - ([|1.432, 1.33455, 2.0|], FloatFloatMap.empty()), - ); - makeTest( - "split", - split([|1.432, 1.33455, 2.0, 2.0, 2.0, 2.0|]) - |> (((c, disc)) => (c, disc |> FloatFloatMap.toArray)), - ([|1.432, 1.33455|], [|(2.0, 4.0)|]), - ); - - let makeDuplicatedArray = count => { - let arr = Belt.Array.range(1, count) |> E.A.fmap(float_of_int); - let sorted = arr |> Belt.SortArray.stableSortBy(_, compare); - E.A.concatMany([|sorted, sorted, sorted, sorted|]) - |> Belt.SortArray.stableSortBy(_, compare); - }; - - let (_, discrete) = split(makeDuplicatedArray(10)); - let toArr = discrete |> FloatFloatMap.toArray; - makeTest("splitMedium", toArr |> Belt.Array.length, 10); - - let (c, discrete) = split(makeDuplicatedArray(500)); - let toArr = discrete |> FloatFloatMap.toArray; - makeTest("splitMedium", toArr |> Belt.Array.length, 500); }) }); \ No newline at end of file diff --git a/__tests__/Samples__test.re b/__tests__/Samples__test.re new file mode 100644 index 00000000..bc61cb99 --- /dev/null +++ b/__tests__/Samples__test.re @@ -0,0 +1,51 @@ +open Jest; +open Expect; + +let makeTest = (~only=false, str, item1, item2) => + only + ? Only.test(str, () => + expect(item1) |> toEqual(item2) + ) + : test(str, () => + expect(item1) |> toEqual(item2) + ); + +describe("Lodash", () => { + describe("Lodash", () => { + makeTest( + "split", + Samples.T.splitContinuousAndDiscrete([|1.432, 1.33455, 2.0|]), + ([|1.432, 1.33455, 2.0|], E.FloatFloatMap.empty()), + ); + makeTest( + "split", + Samples.T.splitContinuousAndDiscrete([| + 1.432, + 1.33455, + 2.0, + 2.0, + 2.0, + 2.0, + |]) + |> (((c, disc)) => (c, disc |> E.FloatFloatMap.toArray)), + ([|1.432, 1.33455|], [|(2.0, 4.0)|]), + ); + + let makeDuplicatedArray = count => { + let arr = Belt.Array.range(1, count) |> E.A.fmap(float_of_int); + let sorted = arr |> Belt.SortArray.stableSortBy(_, compare); + E.A.concatMany([|sorted, sorted, sorted, sorted|]) + |> Belt.SortArray.stableSortBy(_, compare); + }; + + let (_, discrete) = + Samples.T.splitContinuousAndDiscrete(makeDuplicatedArray(10)); + let toArr = discrete |> E.FloatFloatMap.toArray; + makeTest("splitMedium", toArr |> Belt.Array.length, 10); + + let (c, discrete) = + Samples.T.splitContinuousAndDiscrete(makeDuplicatedArray(500)); + let toArr = discrete |> E.FloatFloatMap.toArray; + makeTest("splitMedium", toArr |> Belt.Array.length, 500); + }) +}); \ No newline at end of file diff --git a/package.json b/package.json index d1b25257..b14a95ae 100644 --- a/package.json +++ b/package.json @@ -26,7 +26,6 @@ "author": "", "license": "MIT", "dependencies": { - "@foretold/cdf": "1.0.15", "@foretold/components": "0.0.3", "@foretold/guesstimator": "1.0.10", "@glennsl/bs-jest": "^0.5.0", @@ -66,4 +65,4 @@ "react": "./node_modules/react", "react-dom": "./node_modules/react-dom" } -} +} \ No newline at end of file diff --git a/src/Samples.re b/src/Samples.re new file mode 100644 index 00000000..ebd36722 --- /dev/null +++ b/src/Samples.re @@ -0,0 +1,112 @@ +module JS = { + [@bs.deriving abstract] + type distJs = { + xs: array(float), + ys: array(float), + }; + + let jsToDist = (d: distJs): DistTypes.xyShape => { + xs: xsGet(d), + ys: ysGet(d), + }; + + [@bs.module "./utility/KdeLibrary.js"] + external samplesToContinuousPdf: (array(float), int, int) => distJs = + "samplesToContinuousPdf"; +}; + +module KDE = { + let normalSampling = (samples, outputXYPoints, kernelWidth) => { + samples + |> JS.samplesToContinuousPdf(_, outputXYPoints, kernelWidth) + |> JS.jsToDist; + }; + + let inGroups = (samples, outputXYPoints, kernelWidth, ~cuttoff=0.9, ()) => { + let partitionAt = + samples + |> E.A.length + |> float_of_int + |> (e => e *. cuttoff) + |> int_of_float; + let part1XYPoints = + outputXYPoints |> float_of_int |> (e => e *. cuttoff) |> int_of_float; + let part2XYPoints = outputXYPoints - part1XYPoints |> Js.Math.max_int(30); + let part1Data = + samples |> Belt.Array.slice(_, ~offset=0, ~len=partitionAt); + let part2DataLength = (samples |> E.A.length) - partitionAt; + let part2Data = + samples + |> Belt.Array.slice( + _, + ~offset=(-1) * part2DataLength, + ~len=part2DataLength, + ); + let part1 = + part1Data + |> JS.samplesToContinuousPdf(_, part1XYPoints, kernelWidth) + |> JS.jsToDist; + let part2 = + part2Data + |> JS.samplesToContinuousPdf(_, part2XYPoints, 3) + |> JS.jsToDist; + let opp = 1.0 -. cuttoff; + part1; + }; +}; + +module T = { + type t = array(float); + + let splitContinuousAndDiscrete = (sortedArray: t) => { + let continuous = [||]; + let discrete = E.FloatFloatMap.empty(); + Belt.Array.forEachWithIndex( + sortedArray, + (index, element) => { + let maxIndex = (sortedArray |> Array.length) - 1; + let possiblySimilarElements = + ( + switch (index) { + | 0 => [|index + 1|] + | n when n == maxIndex => [|index - 1|] + | _ => [|index - 1, index + 1|] + } + ) + |> Belt.Array.map(_, r => sortedArray[r]); + let hasSimilarElement = + Belt.Array.some(possiblySimilarElements, r => r == element); + hasSimilarElement + ? E.FloatFloatMap.increment(element, discrete) + : { + let _ = Js.Array.push(element, continuous); + (); + }; + (); + }, + ); + (continuous, discrete); + }; + + // todo: Figure out some way of doing this without having to integrate so many times. + let toShape = (~samples: t, ~outputXYPoints=3000, ~kernelWidth=10, ()) => { + Array.fast_sort(compare, samples); + let (continuousPart, discretePart) = E.A.Floats.split(samples); + let length = samples |> E.A.length; + let lengthFloat = float_of_int(length); + let discrete: DistTypes.xyShape = + discretePart + |> E.FloatFloatMap.fmap(r => r /. lengthFloat) + |> E.FloatFloatMap.toArray + |> XYShape.T.fromZippedArray; + let pdf: DistTypes.xyShape = + continuousPart |> E.A.length > 20 + ? { + samples |> KDE.normalSampling(_, outputXYPoints, kernelWidth); + } + : {xs: [||], ys: [||]}; + let continuous = pdf |> Distributions.Continuous.fromShape; + let shape = MixedShapeBuilder.buildSimple(~continuous, ~discrete); + shape; + }; +}; \ No newline at end of file diff --git a/src/distributions/DistPlusIngredients.re b/src/distributions/DistPlusIngredients.re index 1d58b298..6bba41af 100644 --- a/src/distributions/DistPlusIngredients.re +++ b/src/distributions/DistPlusIngredients.re @@ -8,6 +8,13 @@ let make = unit, }; +let applyTruncation = (truncateTo, distPlus) => + switch (truncateTo, distPlus) { + | (Some(t), Some(d)) => Some(d |> Distributions.DistPlus.T.truncate(t)) + | (None, Some(d)) => Some(d) + | _ => None + }; + let toDistPlus = ( ~sampleCount=2000, @@ -17,34 +24,19 @@ let toDistPlus = t: distPlusIngredients, ) : option(distPlus) => { - let shape = - Guesstimator.toMixed( - ~string=t.guesstimatorString, - ~sampleCount, - ~outputXYPoints, - ~kernelWidth, - (), - ); - // TODO: - // Truncate before resizing. - // Faster sort - let distPlus = - shape - |> E.O.fmap( - Distributions.DistPlus.make( - ~shape=_, - ~domain=t.domain, - ~unit=t.unit, - ~guesstimatorString=Some(t.guesstimatorString), - (), - ), - ) - |> E.O.fmap( - Distributions.DistPlus.T.scaleToIntegralSum(~intendedSum=1.0), - ); - switch (truncateTo, distPlus) { - | (Some(t), Some(d)) => Some(d |> Distributions.DistPlus.T.truncate(t)) - | (None, Some(d)) => Some(d) - | _ => None - }; + let samples = + Guesstimator.stringToSamples(t.guesstimatorString, sampleCount); + let shape = Samples.T.toShape(~samples, ~outputXYPoints, ~kernelWidth, ()); + shape + |> E.O.fmap( + Distributions.DistPlus.make( + ~shape=_, + ~domain=t.domain, + ~unit=t.unit, + ~guesstimatorString=Some(t.guesstimatorString), + (), + ), + ) + |> E.O.fmap(Distributions.DistPlus.T.scaleToIntegralSum(~intendedSum=1.0)) + |> applyTruncation(truncateTo); }; \ No newline at end of file diff --git a/src/utility/CdfLibrary.js b/src/utility/CdfLibrary.js deleted file mode 100644 index 2e23da7e..00000000 --- a/src/utility/CdfLibrary.js +++ /dev/null @@ -1,170 +0,0 @@ -const { - Cdf, - Pdf, - ContinuousDistribution, - ContinuousDistributionCombination, - scoringFunctions, -} = require("@foretold/cdf/lib"); -const _ = require("lodash"); - -/** - * - * @param xs - * @param ys - * @returns {{ys: *, xs: *}} - */ -function cdfToPdf({ xs, ys }) { - let cdf = new Cdf(xs, ys); - let pdf = cdf.toPdf(); - return { xs: pdf.xs, ys: pdf.ys }; -} - -/** - * - * @param xs - * @param ys - * @returns {{ys: *, xs: *}} - */ -function pdfToCdf({ xs, ys }) { - let cdf = new Pdf(xs, ys); - let pdf = cdf.toCdf(); - return { xs: pdf.xs, ys: pdf.ys }; -} - -/** - * - * @param sampleCount - * @param vars - * @returns {{ys: *, xs: *}} - */ -function mean(sampleCount, vars) { - let cdfs = vars.map(r => new Cdf(r.xs, r.ys)); - let comb = new ContinuousDistributionCombination(cdfs); - let newCdf = comb.combineYsWithMean(sampleCount); - - return { xs: newCdf.xs, ys: newCdf.ys }; -} - -/** - * - * @param sampleCount - * @param predictionCdf - * @param resolutionCdf - */ -function scoreNonMarketCdfCdf(sampleCount, predictionCdf, resolutionCdf, resolutionUniformAdditionWeight = 0) { - let toCdf = (r) => (new Cdf(r.xs, r.ys)); - let prediction = toCdf(predictionCdf); - if (_.isFinite(resolutionUniformAdditionWeight)) { - prediction = prediction.combineWithUniformOfCdf( - { - cdf: toCdf(resolutionCdf), - uniformWeight: resolutionUniformAdditionWeight, - sampleCount - } - ); - } - - return scoringFunctions.distributionInputDistributionOutputMarketless({ - predictionCdf: prediction, - resultCdf: toCdf(resolutionCdf), - sampleCount, - }); -} - -/** - * - * @param sampleCount - * @param cdf - */ -function differentialEntropy(sampleCount, cdf) { - let toCdf = (r) => (new Cdf(r.xs, r.ys)); - - return scoringFunctions.differentialEntropy({ - cdf: toCdf(cdf), - sampleCount: sampleCount - }); -} - -/** - * - * @param x - * @param xs - * @param ys - * @returns {number} - */ -function findY(x, { xs, ys }) { - let cdf = new Cdf(xs, ys); - return cdf.findY(x); -} - -/** - * - * @param x - * @param xs - * @param ys - * @returns {number[]} - */ -function convertToNewLength(n, { xs, ys }) { - let dist = new ContinuousDistribution(xs, ys); - return dist.convertToNewLength(n); -} - -/** - * - * @param y - * @param xs - * @param ys - * @returns {number} - */ -function findX(y, { xs, ys }) { - let cdf = new Cdf(xs, ys); - return cdf.findX(y); -} - -/** - * - * @param xs - * @param ys - * @returns {number[]} - */ -function integral({ xs, ys }) { - if (_.includes(ys, NaN)) { - return NaN; - } else if (_.includes(ys, Infinity) && _.includes(ys, -Infinity)) { - return NaN; - } else if (_.includes(ys, Infinity)) { - return Infinity; - } else if (_.includes(ys, -Infinity)) { - return -Infinity; - } - - let integral = 0; - for (let i = 1; i < ys.length; i++) { - let thisY = ys[i]; - let lastY = ys[i - 1]; - let thisX = xs[i]; - let lastX = xs[i - 1]; - - if ( - _.isFinite(thisY) && _.isFinite(lastY) && - _.isFinite(thisX) && _.isFinite(lastX) - ) { - let sectionInterval = ((thisY + lastY) / 2) * (thisX - lastX); - integral = integral + sectionInterval; - } - - } - return integral; -} - -module.exports = { - cdfToPdf, - pdfToCdf, - findY, - findX, - convertToNewLength, - mean, - scoreNonMarketCdfCdf, - differentialEntropy, - integral, -}; diff --git a/src/utility/CdfLibrary.re b/src/utility/CdfLibrary.re deleted file mode 100644 index 329826d9..00000000 --- a/src/utility/CdfLibrary.re +++ /dev/null @@ -1,56 +0,0 @@ -module JS = { - [@bs.deriving abstract] - type distJs = { - xs: array(float), - ys: array(float), - }; - - let distToJs = (d: DistTypes.xyShape) => distJs(~xs=d.xs, ~ys=d.ys); - - let jsToDist = (d: distJs): DistTypes.xyShape => { - xs: xsGet(d), - ys: ysGet(d), - }; - - let doAsDist = (f, d: DistTypes.xyShape) => d |> distToJs |> f |> jsToDist; - - [@bs.module "./CdfLibrary.js"] - external cdfToPdf: distJs => distJs = "cdfToPdf"; - - [@bs.module "./CdfLibrary.js"] - external pdfToCdf: distJs => distJs = "pdfToCdf"; - - [@bs.module "./CdfLibrary.js"] - external findY: (float, distJs) => float = "findY"; - - [@bs.module "./CdfLibrary.js"] - external findX: (float, distJs) => float = "findX"; - - [@bs.module "./CdfLibrary.js"] - external integral: distJs => float = "integral"; - - [@bs.module "./CdfLibrary.js"] - external differentialEntropy: (int, distJs) => distJs = - "differentialEntropy"; - - [@bs.module "./CdfLibrary.js"] - external convertToNewLength: (int, distJs) => distJs = "convertToNewLength"; -}; - -module Distribution = { - let convertToNewLength = (int, {xs, _} as dist: DistTypes.xyShape) => - switch (E.A.length(xs)) { - | 0 - | 1 => dist - | _ => dist |> JS.doAsDist(JS.convertToNewLength(int)) - }; - let toPdf = dist => dist |> JS.doAsDist(JS.cdfToPdf); - let toCdf = dist => dist |> JS.doAsDist(JS.pdfToCdf); - let findX = (y, dist) => dist |> JS.distToJs |> JS.findX(y); - let findY = (x, dist) => dist |> JS.distToJs |> JS.findY(x); - let integral = dist => dist |> JS.distToJs |> JS.integral; - let differentialEntropy = (maxCalculationLength, dist) => - dist - |> JS.doAsDist(JS.differentialEntropy(maxCalculationLength)) - |> integral; -}; \ No newline at end of file diff --git a/src/utility/E.re b/src/utility/E.re index 4d4845bb..f47b00b8 100644 --- a/src/utility/E.re +++ b/src/utility/E.re @@ -1,5 +1,31 @@ open Rationale.Function.Infix; +module FloatFloatMap = { + module Id = + Belt.Id.MakeComparable({ + type t = float; + let cmp: (float, float) => int = Pervasives.compare; + }); + + type t = Belt.MutableMap.t(Id.t, float, Id.identity); + + let fromArray = (ar: array((float, float))) => + Belt.MutableMap.fromArray(ar, ~id=(module Id)); + let toArray = (t: t) => Belt.MutableMap.toArray(t); + let empty = () => Belt.MutableMap.make(~id=(module Id)); + let increment = (el, t: t) => + Belt.MutableMap.update( + t, + el, + fun + | Some(n) => Some(n +. 1.0) + | None => Some(1.0), + ); + + let get = (el, t: t) => Belt.MutableMap.get(t, el); + let fmap = (fn, t: t) => Belt.MutableMap.map(t, fn); +}; + /* Utils */ module U = { let isEqual = (a, b) => a == b; @@ -298,6 +324,39 @@ module A = { }; }; }; + + module Floats = { + let split = (sortedArray: array(float)) => { + let continuous = [||]; + let discrete = FloatFloatMap.empty(); + Belt.Array.forEachWithIndex( + sortedArray, + (index, element) => { + let maxIndex = (sortedArray |> Array.length) - 1; + let possiblySimilarElements = + ( + switch (index) { + | 0 => [|index + 1|] + | n when n == maxIndex => [|index - 1|] + | _ => [|index - 1, index + 1|] + } + ) + |> Belt.Array.map(_, r => sortedArray[r]); + let hasSimilarElement = + Belt.Array.some(possiblySimilarElements, r => r == element); + hasSimilarElement + ? FloatFloatMap.increment(element, discrete) + : { + let _ = Js.Array.push(element, continuous); + (); + }; + (); + }, + ); + + (continuous, discrete); + }; + }; }; module JsArray = { diff --git a/src/utility/Guesstimator.re b/src/utility/Guesstimator.re index 3389327c..e099889f 100644 --- a/src/utility/Guesstimator.re +++ b/src/utility/Guesstimator.re @@ -1,186 +1,13 @@ -module Internals = { - [@bs.deriving abstract] - type discrete = { - xs: array(float), - ys: array(float), - }; - - let jsToDistDiscrete = (d: discrete): DistTypes.discreteShape => { - xs: xsGet(d), - ys: ysGet(d), - }; - - [@bs.deriving abstract] - type combined = { - continuous: CdfLibrary.JS.distJs, - discrete, - }; - - // todo: Force to be fewer samples - let toContinous = (r: combined) => - continuousGet(r) - |> CdfLibrary.JS.jsToDist - |> Distributions.Continuous.fromShape; - - let toDiscrete = (r: combined): DistTypes.xyShape => - discreteGet(r) |> jsToDistDiscrete; - - [@bs.module "./GuesstimatorLibrary.js"] - external toCombinedFormat: (string, int, int, int) => combined = "run"; - - [@bs.module "./GuesstimatorLibrary.js"] - external stringToSamples: (string, int) => array(float) = "stringToSamples"; - - [@bs.module "./GuesstimatorLibrary.js"] - external samplesToContinuousPdf: - (array(float), int, int) => CdfLibrary.JS.distJs = - "samplesToContinuousPdf"; +[@bs.deriving abstract] +type discrete = { + xs: array(float), + ys: array(float), }; -module KDE = { - let normalSampling = (samples, outputXYPoints, kernelWidth) => { - samples - |> Internals.samplesToContinuousPdf(_, outputXYPoints, kernelWidth) - |> CdfLibrary.JS.jsToDist; - }; - - let inGroups = (samples, outputXYPoints, kernelWidth, ~cuttoff=0.9, ()) => { - let partitionAt = - samples - |> E.A.length - |> float_of_int - |> (e => e *. cuttoff) - |> int_of_float; - let part1XYPoints = - outputXYPoints |> float_of_int |> (e => e *. cuttoff) |> int_of_float; - let part2XYPoints = outputXYPoints - part1XYPoints |> Js.Math.max_int(30); - let part1Data = - samples |> Belt.Array.slice(_, ~offset=0, ~len=partitionAt); - let part2DataLength = (samples |> E.A.length) - partitionAt; - let part2Data = - samples - |> Belt.Array.slice( - _, - ~offset=(-1) * part2DataLength, - ~len=part2DataLength, - ); - let part1 = - part1Data - |> Internals.samplesToContinuousPdf(_, part1XYPoints, kernelWidth) - |> CdfLibrary.JS.jsToDist; - let part2 = - part2Data - |> Internals.samplesToContinuousPdf(_, part2XYPoints, 3) - |> CdfLibrary.JS.jsToDist; - let opp = 1.0 -. cuttoff; - // let result = - // XYShape.T.Combine.combineLinear( - // part1, - // part2, - // (a, b) => { - // let aa = a *. cuttoff; - // let bb = b *. opp; - // aa +. bb; - // }, - // ); - // Js.log2("HI", result); - // result; - part1; - }; +let jsToDistDiscrete = (d: discrete): DistTypes.discreteShape => { + xs: xsGet(d), + ys: ysGet(d), }; -module FloatFloatMap = { - module Id = - Belt.Id.MakeComparable({ - type t = float; - let cmp: (float, float) => int = Pervasives.compare; - }); - - type t = Belt.MutableMap.t(Id.t, float, Id.identity); - - let fromArray = (ar: array((float, float))) => - Belt.MutableMap.fromArray(ar, ~id=(module Id)); - let toArray = (t: t) => Belt.MutableMap.toArray(t); - let empty = () => Belt.MutableMap.make(~id=(module Id)); - let increment = (el, t: t) => - Belt.MutableMap.update( - t, - el, - fun - | Some(n) => Some(n +. 1.0) - | None => Some(1.0), - ); - - let get = (el, t: t) => Belt.MutableMap.get(t, el); - let fmap = (fn, t: t) => Belt.MutableMap.map(t, fn); -}; - -// todo: Figure out some way of doing this without creating a new array. -let split = (sortedArray: array(float)) => { - let continuous = [||]; - let discrete = FloatFloatMap.empty(); - Belt.Array.forEachWithIndex( - sortedArray, - (index, element) => { - let maxIndex = (sortedArray |> Array.length) - 1; - let possiblySimilarElements = - ( - switch (index) { - | 0 => [|index + 1|] - | n when n == maxIndex => [|index - 1|] - | _ => [|index - 1, index + 1|] - } - ) - |> Belt.Array.map(_, r => sortedArray[r]); - let hasSimilarElement = - Belt.Array.some(possiblySimilarElements, r => r == element); - hasSimilarElement - ? FloatFloatMap.increment(element, discrete) - : { - let _ = Js.Array.push(element, continuous); - (); - }; - (); - }, - ); - - (continuous, discrete); -}; - -let toMixed = - ( - ~string, - ~sampleCount=3000, - ~outputXYPoints=3000, - ~kernelWidth=10, - ~cuttoff=0.995, - (), - ) => { - let start = Js.Date.now(); - let timeMessage = message => Js.log2(message, Js.Date.now() -. start); - timeMessage("Starting"); - let samples = Internals.stringToSamples(string, sampleCount); - timeMessage("Finished sampling"); - - let length = samples |> E.A.length; - Array.fast_sort(compare, samples); - let (continuousPart, disc) = split(samples); - let lengthFloat = float_of_int(length); - let discrete: DistTypes.xyShape = - disc - |> FloatFloatMap.fmap(r => r /. lengthFloat) - |> FloatFloatMap.toArray - |> XYShape.T.fromZippedArray; - let pdf: DistTypes.xyShape = - continuousPart |> E.A.length > 20 - ? { - // samples |> KDE.inGroups(_, outputXYPoints, kernelWidth, ~cuttoff, ()); - samples |> KDE.normalSampling(_, outputXYPoints, kernelWidth); - } - : {xs: [||], ys: [||]}; - timeMessage("Finished pdf"); - let continuous = pdf |> Distributions.Continuous.fromShape; - let shape = MixedShapeBuilder.buildSimple(~continuous, ~discrete); - timeMessage("Finished shape"); - shape; -}; \ No newline at end of file +[@bs.module "./GuesstimatorLibrary.js"] +external stringToSamples: (string, int) => array(float) = "stringToSamples"; \ No newline at end of file diff --git a/src/utility/GuesstimatorLibrary.js b/src/utility/GuesstimatorLibrary.js index 1a7027de..4e8e7b09 100644 --- a/src/utility/GuesstimatorLibrary.js +++ b/src/utility/GuesstimatorLibrary.js @@ -1,89 +1,5 @@ -const { - Samples, -} = require("@foretold/cdf/lib/samples"); const _ = require("lodash"); const { Guesstimator } = require('@foretold/guesstimator/src'); -const pdfast = require('pdfast'); - -/** - * @param values - * @param outputResolutionCount - * @param min - * @param max - * @returns {{discrete: {ys: *, xs: *}, continuous: {ys: [], xs: []}}} - */ -const toPdf = (values, outputResolutionCount, width, min, max) => { - let duplicateSamples = _(values).groupBy().pickBy(x => x.length > 1).keys().value(); - let totalLength = _.size(values); - let frequencies = duplicateSamples.map(s => ({ - value: parseFloat(s), - percentage: _(values).filter(x => x == s).size() / totalLength - })); - let continuousSamples = _.difference(values, frequencies.map(f => f.value)); - - let discrete = { - xs: frequencies.map(f => f.value), - ys: frequencies.map(f => f.percentage) - }; - let continuous = { ys: [], xs: [] }; - - if (continuousSamples.length > 20) { - // let c = continuousSamples.map( r => (Math.log2(r)) * 1000); - let c = continuousSamples; - const samples = new Samples(c); - - - const pdf = samples.toPdf({ size: outputResolutionCount, width, min, max }); - // continuous = {xs: pdf.xs.map(r => Math.pow(2,r/1000)), ys: pdf.ys}; - continuous = pdf; - } - - return { continuous, discrete }; -}; - -/** - * @param text - * @param sampleCount - * @param outputResolutionCount - * @param inputs - * @param min - * @param max - * @returns {{discrete: {ys: *, xs: *}, continuous: {ys: *[], xs: *[]}}} - */ -const run = ( - text, - sampleCount, - outputResolutionCount, - width, - inputs = [], - min = false, - max = false, -) => { - const [_error, item] = Guesstimator.parse({ text: "=" + text }); - const { parsedInput } = item; - - const guesstimator = new Guesstimator({ parsedInput }); - const value = guesstimator.sample( - sampleCount, - inputs, - ); - - const values = _.filter(value.values, _.isFinite); - - let update; - let blankResponse = { - continuous: { ys: [], xs: [] }, - discrete: { ys: [], xs: [] } - }; - if (values.length === 0) { - update = blankResponse; - } else if (values.length === 1) { - update = blankResponse; - } else { - update = toPdf(values, outputResolutionCount, width, min, max); - } - return update; -}; const stringToSamples = ( text, @@ -104,23 +20,6 @@ const stringToSamples = ( } }; - -const samplesToContinuousPdf = ( - samples, - size, - width, - min = false, - max = false, -) => { - let _samples = _.filter(samples, _.isFinite); - if (_.isFinite(min)) { _samples = _.filter(_samples, r => r > min) }; - if (_.isFinite(max)) { _samples = _.filter(_samples, r => r < max) }; - let pdf = pdfast.create(_samples, { size, width }); - return {xs: pdf.map(r => r.x), ys: pdf.map(r => r.y)}; -}; - module.exports = { - run, stringToSamples, - samplesToContinuousPdf }; diff --git a/src/utility/KdeLibrary.js b/src/utility/KdeLibrary.js new file mode 100644 index 00000000..d95d3829 --- /dev/null +++ b/src/utility/KdeLibrary.js @@ -0,0 +1,21 @@ +const pdfast = require('pdfast'); +const _ = require("lodash"); + +const samplesToContinuousPdf = ( + samples, + size, + width, + min = false, + max = false, +) => { + let _samples = _.filter(samples, _.isFinite); + if (_.isFinite(min)) { _samples = _.filter(_samples, r => r > min) }; + if (_.isFinite(max)) { _samples = _.filter(_samples, r => r < max) }; + let pdf = pdfast.create(_samples, { size, width }); + return {xs: pdf.map(r => r.x), ys: pdf.map(r => r.y)}; +}; + + +module.exports = { + samplesToContinuousPdf, +};