diff --git a/__tests__/Bandwidth__Test.re b/__tests__/Bandwidth__Test.re deleted file mode 100644 index 54cd111d..00000000 --- a/__tests__/Bandwidth__Test.re +++ /dev/null @@ -1,13 +0,0 @@ -open Jest; -open Expect; - -describe("Bandwidth", () => { - test("nrd0()", () => { - let data = [|1., 4., 3., 2.|]; - expect(Bandwidth.nrd0(data)) |> toEqual(0.7625801874014622); - }); - test("nrd()", () => { - let data = [|1., 4., 3., 2.|]; - expect(Bandwidth.nrd(data)) |> toEqual(0.8981499984950554); - }); -}); \ No newline at end of file diff --git a/__tests__/Bandwidth__Test.res b/__tests__/Bandwidth__Test.res new file mode 100644 index 00000000..d6b5aab4 --- /dev/null +++ b/__tests__/Bandwidth__Test.res @@ -0,0 +1,13 @@ +open Jest +open Expect + +describe("Bandwidth", () => { + test("nrd0()", () => { + let data = [1., 4., 3., 2.] + expect(Bandwidth.nrd0(data)) |> toEqual(0.7625801874014622) + }) + test("nrd()", () => { + let data = [1., 4., 3., 2.] + expect(Bandwidth.nrd(data)) |> toEqual(0.8981499984950554) + }) +}) diff --git a/__tests__/DistTypes__Test.re b/__tests__/DistTypes__Test.res similarity index 53% rename from __tests__/DistTypes__Test.re rename to __tests__/DistTypes__Test.res index d5db18dd..6cfeed57 100644 --- a/__tests__/DistTypes__Test.re +++ b/__tests__/DistTypes__Test.res @@ -1,71 +1,56 @@ -open Jest; -open Expect; +open Jest +open Expect let makeTest = (~only=false, str, item1, item2) => only - ? Only.test(str, () => - expect(item1) |> toEqual(item2) - ) - : test(str, () => - expect(item1) |> toEqual(item2) - ); + ? Only.test(str, () => expect(item1) |> toEqual(item2)) + : test(str, () => expect(item1) |> toEqual(item2)) -describe("DistTypes", () => { +describe("DistTypes", () => describe("Domain", () => { let makeComplete = (yPoint, expectation) => makeTest( "With input: " ++ Js.Float.toString(yPoint), DistTypes.Domain.yPointToSubYPoint(Complete, yPoint), expectation, - ); - let makeSingle = - ( - direction: [ | `left | `right], - excludingProbabilityMass, - yPoint, - expectation, - ) => + ) + let makeSingle = (direction: [#left | #right], excludingProbabilityMass, yPoint, expectation) => makeTest( - "Excluding: " - ++ Js.Float.toString(excludingProbabilityMass) - ++ " and yPoint: " - ++ Js.Float.toString(yPoint), + "Excluding: " ++ + (Js.Float.toString(excludingProbabilityMass) ++ + (" and yPoint: " ++ Js.Float.toString(yPoint))), DistTypes.Domain.yPointToSubYPoint( - direction == `left - ? LeftLimited({xPoint: 3.0, excludingProbabilityMass}) - : RightLimited({xPoint: 3.0, excludingProbabilityMass}), + direction == #left + ? LeftLimited({xPoint: 3.0, excludingProbabilityMass: excludingProbabilityMass}) + : RightLimited({xPoint: 3.0, excludingProbabilityMass: excludingProbabilityMass}), yPoint, ), expectation, - ); + ) let makeDouble = (domain, yPoint, expectation) => - makeTest( - "Excluding: limits", - DistTypes.Domain.yPointToSubYPoint(domain, yPoint), - expectation, - ); + makeTest("Excluding: limits", DistTypes.Domain.yPointToSubYPoint(domain, yPoint), expectation) describe("With Complete Domain", () => { - makeComplete(0.0, Some(0.0)); - makeComplete(0.6, Some(0.6)); - makeComplete(1.0, Some(1.0)); - }); + makeComplete(0.0, Some(0.0)) + makeComplete(0.6, Some(0.6)) + makeComplete(1.0, Some(1.0)) + }) describe("With Left Limit", () => { - makeSingle(`left, 0.5, 1.0, Some(1.0)); - makeSingle(`left, 0.5, 0.75, Some(0.5)); - makeSingle(`left, 0.8, 0.9, Some(0.5)); - makeSingle(`left, 0.5, 0.4, None); - makeSingle(`left, 0.5, 0.5, Some(0.0)); - }); + makeSingle(#left, 0.5, 1.0, Some(1.0)) + makeSingle(#left, 0.5, 0.75, Some(0.5)) + makeSingle(#left, 0.8, 0.9, Some(0.5)) + makeSingle(#left, 0.5, 0.4, None) + makeSingle(#left, 0.5, 0.5, Some(0.0)) + }) describe("With Right Limit", () => { - makeSingle(`right, 0.5, 1.0, None); - makeSingle(`right, 0.5, 0.25, Some(0.5)); - makeSingle(`right, 0.8, 0.5, None); - makeSingle(`right, 0.2, 0.2, Some(0.25)); - makeSingle(`right, 0.5, 0.5, Some(1.0)); - makeSingle(`right, 0.5, 0.0, Some(0.0)); - makeSingle(`right, 0.5, 0.5, Some(1.0)); - }); + makeSingle(#right, 0.5, 1.0, None) + makeSingle(#right, 0.5, 0.25, Some(0.5)) + makeSingle(#right, 0.8, 0.5, None) + makeSingle(#right, 0.2, 0.2, Some(0.25)) + makeSingle(#right, 0.5, 0.5, Some(1.0)) + makeSingle(#right, 0.5, 0.0, Some(0.0)) + makeSingle(#right, 0.5, 0.5, Some(1.0)) + }) describe("With Left and Right Limit", () => { makeDouble( LeftAndRightLimited( @@ -74,7 +59,7 @@ describe("DistTypes", () => { ), 0.5, Some(0.5), - ); + ) makeDouble( LeftAndRightLimited( {excludingProbabilityMass: 0.1, xPoint: 3.0}, @@ -82,7 +67,7 @@ describe("DistTypes", () => { ), 0.2, Some(0.125), - ); + ) makeDouble( LeftAndRightLimited( {excludingProbabilityMass: 0.1, xPoint: 3.0}, @@ -90,7 +75,7 @@ describe("DistTypes", () => { ), 0.1, Some(0.0), - ); + ) makeDouble( LeftAndRightLimited( {excludingProbabilityMass: 0.1, xPoint: 3.0}, @@ -98,7 +83,7 @@ describe("DistTypes", () => { ), 0.05, None, - ); - }); + ) + }) }) -}); \ No newline at end of file +) diff --git a/__tests__/Distributions__Test.re b/__tests__/Distributions__Test.res similarity index 99% rename from __tests__/Distributions__Test.re rename to __tests__/Distributions__Test.res index 0e2869d1..7b26962c 100644 --- a/__tests__/Distributions__Test.re +++ b/__tests__/Distributions__Test.res @@ -1,7 +1,7 @@ -open Jest; -open Expect; +open Jest +open Expect -let shape: DistTypes.xyShape = {xs: [|1., 4., 8.|], ys: [|8., 9., 2.|]}; +let shape: DistTypes.xyShape = {xs: [1., 4., 8.], ys: [8., 9., 2.]} // let makeTest = (~only=false, str, item1, item2) => // only diff --git a/__tests__/Hardcoded__Test.re b/__tests__/Hardcoded__Test.re index fd915b31..1e44df7a 100644 --- a/__tests__/Hardcoded__Test.re +++ b/__tests__/Hardcoded__Test.re @@ -1,6 +1,6 @@ open Jest; open Expect; - +/* let makeTest = (~only=false, str, item1, item2) => only ? Only.test(str, () => @@ -54,4 +54,4 @@ describe("XYShapes", () => { Error("Sad"), ) }) -}); +}); */ diff --git a/__tests__/Samples__test.re b/__tests__/Samples__test.re deleted file mode 100644 index 9afbaf86..00000000 --- a/__tests__/Samples__test.re +++ /dev/null @@ -1,51 +0,0 @@ -open Jest; -open Expect; - -let makeTest = (~only=false, str, item1, item2) => - only - ? Only.test(str, () => - expect(item1) |> toEqual(item2) - ) - : test(str, () => - expect(item1) |> toEqual(item2) - ); - -describe("Lodash", () => { - describe("Lodash", () => { - makeTest( - "split", - SamplesToShape.Internals.T.splitContinuousAndDiscrete([|1.432, 1.33455, 2.0|]), - ([|1.432, 1.33455, 2.0|], E.FloatFloatMap.empty()), - ); - makeTest( - "split", - SamplesToShape.Internals.T.splitContinuousAndDiscrete([| - 1.432, - 1.33455, - 2.0, - 2.0, - 2.0, - 2.0, - |]) - |> (((c, disc)) => (c, disc |> E.FloatFloatMap.toArray)), - ([|1.432, 1.33455|], [|(2.0, 4.0)|]), - ); - - let makeDuplicatedArray = count => { - let arr = Belt.Array.range(1, count) |> E.A.fmap(float_of_int); - let sorted = arr |> Belt.SortArray.stableSortBy(_, compare); - E.A.concatMany([|sorted, sorted, sorted, sorted|]) - |> Belt.SortArray.stableSortBy(_, compare); - }; - - let (_, discrete) = - SamplesToShape.Internals.T.splitContinuousAndDiscrete(makeDuplicatedArray(10)); - let toArr = discrete |> E.FloatFloatMap.toArray; - makeTest("splitMedium", toArr |> Belt.Array.length, 10); - - let (c, discrete) = - SamplesToShape.Internals.T.splitContinuousAndDiscrete(makeDuplicatedArray(500)); - let toArr = discrete |> E.FloatFloatMap.toArray; - makeTest("splitMedium", toArr |> Belt.Array.length, 500); - }) -}); \ No newline at end of file diff --git a/__tests__/Samples__test.res b/__tests__/Samples__test.res new file mode 100644 index 00000000..6989b53c --- /dev/null +++ b/__tests__/Samples__test.res @@ -0,0 +1,47 @@ +open Jest +open Expect + +let makeTest = (~only=false, str, item1, item2) => + only + ? Only.test(str, () => expect(item1) |> toEqual(item2)) + : test(str, () => expect(item1) |> toEqual(item2)) + +describe("Lodash", () => + describe("Lodash", () => { + makeTest( + "split", + SamplesToShape.Internals.T.splitContinuousAndDiscrete([1.432, 1.33455, 2.0]), + ([1.432, 1.33455, 2.0], E.FloatFloatMap.empty()), + ) + makeTest( + "split", + SamplesToShape.Internals.T.splitContinuousAndDiscrete([ + 1.432, + 1.33455, + 2.0, + 2.0, + 2.0, + 2.0, + ]) |> (((c, disc)) => (c, disc |> E.FloatFloatMap.toArray)), + ([1.432, 1.33455], [(2.0, 4.0)]), + ) + + let makeDuplicatedArray = count => { + let arr = Belt.Array.range(1, count) |> E.A.fmap(float_of_int) + let sorted = arr |> Belt.SortArray.stableSortBy(_, compare) + E.A.concatMany([sorted, sorted, sorted, sorted]) |> Belt.SortArray.stableSortBy(_, compare) + } + + let (_, discrete) = SamplesToShape.Internals.T.splitContinuousAndDiscrete( + makeDuplicatedArray(10), + ) + let toArr = discrete |> E.FloatFloatMap.toArray + makeTest("splitMedium", toArr |> Belt.Array.length, 10) + + let (c, discrete) = SamplesToShape.Internals.T.splitContinuousAndDiscrete( + makeDuplicatedArray(500), + ) + let toArr = discrete |> E.FloatFloatMap.toArray + makeTest("splitMedium", toArr |> Belt.Array.length, 500) + }) +) diff --git a/__tests__/XYShape__Test.re b/__tests__/XYShape__Test.re deleted file mode 100644 index 67eee90b..00000000 --- a/__tests__/XYShape__Test.re +++ /dev/null @@ -1,63 +0,0 @@ -open Jest; -open Expect; - -let makeTest = (~only=false, str, item1, item2) => - only - ? Only.test(str, () => - expect(item1) |> toEqual(item2) - ) - : test(str, () => - expect(item1) |> toEqual(item2) - ); - -let shape1: DistTypes.xyShape = {xs: [|1., 4., 8.|], ys: [|0.2, 0.4, 0.8|]}; - -let shape2: DistTypes.xyShape = { - xs: [|1., 5., 10.|], - ys: [|0.2, 0.5, 0.8|], -}; - -let shape3: DistTypes.xyShape = { - xs: [|1., 20., 50.|], - ys: [|0.2, 0.5, 0.8|], -}; - -describe("XYShapes", () => { - describe("logScorePoint", () => { - makeTest( - "When identical", - XYShape.logScorePoint(30, shape1, shape1), - Some(0.0), - ); - makeTest( - "When similar", - XYShape.logScorePoint(30, shape1, shape2), - Some(1.658971191043856), - ); - makeTest( - "When very different", - XYShape.logScorePoint(30, shape1, shape3), - Some(210.3721280423322), - ); - }); - // describe("transverse", () => { - // makeTest( - // "When very different", - // XYShape.Transversal._transverse( - // (aCurrent, aLast) => aCurrent +. aLast, - // [|1.0, 2.0, 3.0, 4.0|], - // ), - // [|1.0, 3.0, 6.0, 10.0|], - // ) - // }); - describe("integrateWithTriangles", () => { - makeTest( - "integrates correctly", - XYShape.Range.integrateWithTriangles(shape1), - Some({ - xs: [|1., 4., 8.|], - ys: [|0.0, 0.9000000000000001, 3.3000000000000007|], - }), - ) - }); -}); \ No newline at end of file diff --git a/__tests__/XYShape__Test.res b/__tests__/XYShape__Test.res new file mode 100644 index 00000000..54e0934c --- /dev/null +++ b/__tests__/XYShape__Test.res @@ -0,0 +1,51 @@ +open Jest +open Expect + +let makeTest = (~only=false, str, item1, item2) => + only + ? Only.test(str, () => expect(item1) |> toEqual(item2)) + : test(str, () => expect(item1) |> toEqual(item2)) + +let shape1: DistTypes.xyShape = {xs: [1., 4., 8.], ys: [0.2, 0.4, 0.8]} + +let shape2: DistTypes.xyShape = { + xs: [1., 5., 10.], + ys: [0.2, 0.5, 0.8], +} + +let shape3: DistTypes.xyShape = { + xs: [1., 20., 50.], + ys: [0.2, 0.5, 0.8], +} + +describe("XYShapes", () => { + describe("logScorePoint", () => { + makeTest("When identical", XYShape.logScorePoint(30, shape1, shape1), Some(0.0)) + makeTest("When similar", XYShape.logScorePoint(30, shape1, shape2), Some(1.658971191043856)) + makeTest( + "When very different", + XYShape.logScorePoint(30, shape1, shape3), + Some(210.3721280423322), + ) + }) + // describe("transverse", () => { + // makeTest( + // "When very different", + // XYShape.Transversal._transverse( + // (aCurrent, aLast) => aCurrent +. aLast, + // [|1.0, 2.0, 3.0, 4.0|], + // ), + // [|1.0, 3.0, 6.0, 10.0|], + // ) + // }); + describe("integrateWithTriangles", () => + makeTest( + "integrates correctly", + XYShape.Range.integrateWithTriangles(shape1), + Some({ + xs: [1., 4., 8.], + ys: [0.0, 0.9000000000000001, 3.3000000000000007], + }), + ) + ) +}) diff --git a/bsconfig.json b/bsconfig.json index 3e798659..9b46c6cd 100644 --- a/bsconfig.json +++ b/bsconfig.json @@ -1,7 +1,6 @@ { "name": "probExample", - "reason": { - }, + "reason": {}, "sources": [ { "dir": "src", @@ -32,10 +31,17 @@ "rationale", "bs-moment" ], + "gentypeconfig": { + "language": "typescript", + "shims": {}, + "debug": { + "all": false, + "basic": false + } + }, "refmt": 3, "warnings": { "number": "+A-42-48-9-30-4-102" }, - "ppx-flags": [ - ] -} + "ppx-flags": [] +} \ No newline at end of file diff --git a/package.json b/package.json index cb3b851e..d3a0b183 100644 --- a/package.json +++ b/package.json @@ -47,6 +47,7 @@ "@glennsl/bs-jest": "^0.5.1", "bs-platform": "9.0.2", "docsify": "^4.12.2", + "gentype": "^4.3.0", "parcel-bundler": "1.12.4", "parcel-plugin-bundle-visualiser": "^1.2.0", "parcel-plugin-less-js-enabled": "1.0.2" diff --git a/src/distPlus/ProgramEvaluator.gen.tsx b/src/distPlus/ProgramEvaluator.gen.tsx new file mode 100644 index 00000000..51554a2f --- /dev/null +++ b/src/distPlus/ProgramEvaluator.gen.tsx @@ -0,0 +1,30 @@ +/* TypeScript file generated from ProgramEvaluator.res by genType. */ +/* eslint-disable import/first */ + + +// @ts-ignore: Implicit any on import +import * as ProgramEvaluatorBS__Es6Import from './ProgramEvaluator.bs'; +const ProgramEvaluatorBS: any = ProgramEvaluatorBS__Es6Import; + +import type {ExpressionTree_environment as ExpressionTypes_ExpressionTree_environment} from '../../src/distPlus/expressionTree/ExpressionTypes.gen'; + +import type {ExpressionTree_node as ExpressionTypes_ExpressionTree_node} from '../../src/distPlus/expressionTree/ExpressionTypes.gen'; + +import type {list} from './ReasonPervasives.gen'; + +import type {t as DistPlus_t} from '../../src/distPlus/distribution/DistPlus.gen'; + +// tslint:disable-next-line:interface-over-type-literal +export type export = + { NAME: "DistPlus"; VAL: DistPlus_t } + | { NAME: "Float"; VAL: number } + | { NAME: "Function"; VAL: [[string[], ExpressionTypes_ExpressionTree_node], ExpressionTypes_ExpressionTree_environment] }; + +export const runAll: (squiggleString:string) => + { tag: "Ok"; value: list } + | { tag: "Error"; value: string } = function (Arg1: any) { + const result = ProgramEvaluatorBS.runAll(Arg1); + return result.TAG===0 + ? {tag:"Ok", value:result._0} + : {tag:"Error", value:result._0} +}; diff --git a/src/distPlus/ProgramEvaluator.res b/src/distPlus/ProgramEvaluator.res index 7ab92f4c..9f08eb7a 100644 --- a/src/distPlus/ProgramEvaluator.res +++ b/src/distPlus/ProgramEvaluator.res @@ -91,8 +91,7 @@ module Internals = { } let inputsToLeaf = (inputs: Inputs.inputs) => - MathJsParser.fromString(inputs.squiggleString) - |> E.R.bind(_, g => runProgram(inputs, g)) + MathJsParser.fromString(inputs.squiggleString) |> E.R.bind(_, g => runProgram(inputs, g)) let outputToDistPlus = (inputs: Inputs.inputs, shape: DistTypes.shape) => DistPlus.make(~shape, ~squiggleString=Some(inputs.squiggleString), ()) @@ -117,6 +116,7 @@ let renderIfNeeded = (inputs: Inputs.inputs, node: ExpressionTypes.ExpressionTre | _ => Error("Didn't render, but intended to") } ) + | n => Ok(n) } ) @@ -141,20 +141,22 @@ let coersionToExportedTypes = ( let rec mapM = (f, xs) => switch xs { - | list{} => Ok(list{}) - | list{x, ...rest} => - switch f(x) { - | Error(err) => Error(err) - | Ok(val) => - switch mapM(f, rest) { - | Error(err) => Error(err) - | Ok(restList) => Ok(list{val, ...restList}) - } - } - } + | list{} => Ok(list{}) + | list{x, ...rest} => + switch f(x) { + | Error(err) => Error(err) + | Ok(val) => + switch mapM(f, rest) { + | Error(err) => Error(err) + | Ok(restList) => Ok(list{val, ...restList}) + } + } + } let evaluateProgram = (inputs: Inputs.inputs) => - inputs |> Internals.inputsToLeaf |> E.R.bind(_, xs => mapM(((a, b)) => coersionToExportedTypes(inputs, a, b), (Array.to_list(xs)))) + inputs + |> Internals.inputsToLeaf + |> E.R.bind(_, xs => mapM(((a, b)) => coersionToExportedTypes(inputs, a, b), Array.to_list(xs))) let evaluateFunction = ( inputs: Inputs.inputs, @@ -169,3 +171,20 @@ let evaluateFunction = ( ) output |> E.R.bind(_, coersionToExportedTypes(inputs, inputs.environment)) } + +@genType +let runAll = (squiggleString: string) => { + let inputs = Inputs.make( + ~samplingInputs={ + sampleCount: Some(10000), + outputXYPoints: Some(10000), + kernelWidth: None, + shapeLength: Some(1000), + }, + ~squiggleString, + ~environment=[]->Belt.Map.String.fromArray, + (), + ) + let response1 = evaluateProgram(inputs); + response1; +} diff --git a/src/distPlus/distribution/AlgebraicShapeCombination.re b/src/distPlus/distribution/AlgebraicShapeCombination.re deleted file mode 100644 index 631fb3a5..00000000 --- a/src/distPlus/distribution/AlgebraicShapeCombination.re +++ /dev/null @@ -1,298 +0,0 @@ -type pointMassesWithMoments = { - n: int, - masses: array(float), - means: array(float), - variances: array(float), -}; - -/* This function takes a continuous distribution and efficiently approximates it as - point masses that have variances associated with them. - We estimate the means and variances from overlapping triangular distributions which we imagine are making up the - XYShape. - We can then use the algebra of random variables to "convolve" the point masses and their variances, - and finally reconstruct a new distribution from them, e.g. using a Fast Gauss Transform or Raykar et al. (2007). */ -let toDiscretePointMassesFromTriangulars = - (~inverse=false, s: XYShape.T.t): pointMassesWithMoments => { - // TODO: what if there is only one point in the distribution? - let n = s |> XYShape.T.length; - // first, double up the leftmost and rightmost points: - let {xs, ys}: XYShape.T.t = s; - Js.Array.unshift(xs[0], xs) |> ignore; - Js.Array.unshift(ys[0], ys) |> ignore; - Js.Array.push(xs[n - 1], xs) |> ignore; - Js.Array.push(ys[n - 1], ys) |> ignore; - let n = E.A.length(xs); - // squares and neighbourly products of the xs - let xsSq: array(float) = Belt.Array.makeUninitializedUnsafe(n); - let xsProdN1: array(float) = Belt.Array.makeUninitializedUnsafe(n - 1); - let xsProdN2: array(float) = Belt.Array.makeUninitializedUnsafe(n - 2); - for (i in 0 to n - 1) { - Belt.Array.set(xsSq, i, xs[i] *. xs[i]) |> ignore; - (); - }; - for (i in 0 to n - 2) { - Belt.Array.set(xsProdN1, i, xs[i] *. xs[i + 1]) |> ignore; - (); - }; - for (i in 0 to n - 3) { - Belt.Array.set(xsProdN2, i, xs[i] *. xs[i + 2]) |> ignore; - (); - }; - // means and variances - let masses: array(float) = Belt.Array.makeUninitializedUnsafe(n - 2); // doesn't include the fake first and last points - let means: array(float) = Belt.Array.makeUninitializedUnsafe(n - 2); - let variances: array(float) = Belt.Array.makeUninitializedUnsafe(n - 2); - - if (inverse) { - for (i in 1 to n - 2) { - Belt.Array.set(masses, i - 1, (xs[i + 1] -. xs[i - 1]) *. ys[i] /. 2.) - |> ignore; - - // this only works when the whole triange is either on the left or on the right of zero - let a = xs[i - 1]; - let c = xs[i]; - let b = xs[i + 1]; - - // These are the moments of the reciprocal of a triangular distribution, as symbolically integrated by Mathematica. - // They're probably pretty close to invMean ~ 1/mean = 3/(a+b+c) and invVar. But I haven't worked out - // the worst case error, so for now let's use these monster equations - let inverseMean = - 2. - *. (a *. log(a /. c) /. (a -. c) +. b *. log(c /. b) /. (b -. c)) - /. (a -. b); - let inverseVar = - 2. - *. (log(c /. a) /. (a -. c) +. b *. log(b /. c) /. (b -. c)) - /. (a -. b) - -. inverseMean - ** 2.; - - Belt.Array.set(means, i - 1, inverseMean) |> ignore; - - Belt.Array.set(variances, i - 1, inverseVar) |> ignore; - (); - }; - - {n: n - 2, masses, means, variances}; - } else { - for (i in 1 to n - 2) { - // area of triangle = width * height / 2 - Belt.Array.set(masses, i - 1, (xs[i + 1] -. xs[i - 1]) *. ys[i] /. 2.) - |> ignore; - - // means of triangle = (a + b + c) / 3 - Belt.Array.set(means, i - 1, (xs[i - 1] +. xs[i] +. xs[i + 1]) /. 3.) - |> ignore; - - // variance of triangle = (a^2 + b^2 + c^2 - ab - ac - bc) / 18 - Belt.Array.set( - variances, - i - 1, - ( - xsSq[i - 1] - +. xsSq[i] - +. xsSq[i + 1] - -. xsProdN1[i - 1] - -. xsProdN1[i] - -. xsProdN2[i - 1] - ) - /. 18., - ) - |> ignore; - (); - }; - {n: n - 2, masses, means, variances}; - }; -}; - -let combineShapesContinuousContinuous = - ( - op: ExpressionTypes.algebraicOperation, - s1: DistTypes.xyShape, - s2: DistTypes.xyShape, - ) - : DistTypes.xyShape => { - let t1n = s1 |> XYShape.T.length; - let t2n = s2 |> XYShape.T.length; - - // if we add the two distributions, we should probably use normal filters. - // if we multiply the two distributions, we should probably use lognormal filters. - let t1m = toDiscretePointMassesFromTriangulars(s1); - let t2m = - switch (op) { - | `Divide => toDiscretePointMassesFromTriangulars(~inverse=true, s2) - | _ => toDiscretePointMassesFromTriangulars(~inverse=false, s2) - }; - - let combineMeansFn = - switch (op) { - | `Add => ((m1, m2) => m1 +. m2) - | `Subtract => ((m1, m2) => m1 -. m2) - | `Multiply => ((m1, m2) => m1 *. m2) - | `Divide => ((m1, mInv2) => m1 *. mInv2) - | `Exponentiate => ((m1, mInv2) => m1 ** mInv2) - }; // note: here, mInv2 = mean(1 / t2) ~= 1 / mean(t2) - - // TODO: I don't know what the variances are for exponentatiation - // converts the variances and means of the two inputs into the variance of the output - let combineVariancesFn = - switch (op) { - | `Add => ((v1, v2, _, _) => v1 +. v2) - | `Subtract => ((v1, v2, _, _) => v1 +. v2) - | `Multiply => ( - (v1, v2, m1, m2) => v1 *. v2 +. v1 *. m2 ** 2. +. v2 *. m1 ** 2. - ) - | `Exponentiate => - ((v1, v2, m1, m2) => v1 *. v2 +. v1 *. m2 ** 2. +. v2 *. m1 ** 2.); - | `Divide => ( - (v1, vInv2, m1, mInv2) => - v1 *. vInv2 +. v1 *. mInv2 ** 2. +. vInv2 *. m1 ** 2. - ) - }; - - // TODO: If operating on two positive-domain distributions, we should take that into account - let outputMinX: ref(float) = ref(infinity); - let outputMaxX: ref(float) = ref(neg_infinity); - let masses: array(float) = - Belt.Array.makeUninitializedUnsafe(t1m.n * t2m.n); - let means: array(float) = - Belt.Array.makeUninitializedUnsafe(t1m.n * t2m.n); - let variances: array(float) = - Belt.Array.makeUninitializedUnsafe(t1m.n * t2m.n); - // then convolve the two sets of pointMassesWithMoments - for (i in 0 to t1m.n - 1) { - for (j in 0 to t2m.n - 1) { - let k = i * t2m.n + j; - Belt.Array.set(masses, k, t1m.masses[i] *. t2m.masses[j]) |> ignore; - - let mean = combineMeansFn(t1m.means[i], t2m.means[j]); - let variance = - combineVariancesFn( - t1m.variances[i], - t2m.variances[j], - t1m.means[i], - t2m.means[j], - ); - Belt.Array.set(means, k, mean) |> ignore; - Belt.Array.set(variances, k, variance) |> ignore; - // update bounds - let minX = mean -. 2. *. sqrt(variance) *. 1.644854; - let maxX = mean +. 2. *. sqrt(variance) *. 1.644854; - if (minX < outputMinX^) { - outputMinX := minX; - }; - if (maxX > outputMaxX^) { - outputMaxX := maxX; - }; - }; - }; - - // we now want to create a set of target points. For now, let's just evenly distribute 200 points between - // between the outputMinX and outputMaxX - let nOut = 300; - let outputXs: array(float) = - E.A.Floats.range(outputMinX^, outputMaxX^, nOut); - let outputYs: array(float) = Belt.Array.make(nOut, 0.0); - // now, for each of the outputYs, accumulate from a Gaussian kernel over each input point. - for (j in 0 to E.A.length(masses) - 1) { - // go through all of the result points - if (variances[j] > 0. && masses[j] > 0.) { - for (i in 0 to E.A.length(outputXs) - 1) { - // go through all of the target points - let dx = outputXs[i] -. means[j]; - let contribution = - masses[j] - *. exp(-. (dx ** 2.) /. (2. *. variances[j])) - /. sqrt(2. *. 3.14159276 *. variances[j]); - Belt.Array.set(outputYs, i, outputYs[i] +. contribution) |> ignore; - }; - }; - }; - - {xs: outputXs, ys: outputYs}; -}; - -let toDiscretePointMassesFromDiscrete = - (s: DistTypes.xyShape): pointMassesWithMoments => { - let {xs, ys}: XYShape.T.t = s; - let n = E.A.length(xs); - - let masses: array(float) = Belt.Array.makeBy(n, i => ys[i]); - let means: array(float) = Belt.Array.makeBy(n, i => xs[i]); - let variances: array(float) = Belt.Array.makeBy(n, i => 0.0); - - {n, masses, means, variances}; -}; - -let combineShapesContinuousDiscrete = - ( - op: ExpressionTypes.algebraicOperation, - continuousShape: DistTypes.xyShape, - discreteShape: DistTypes.xyShape, - ) - : DistTypes.xyShape => { - let t1n = continuousShape |> XYShape.T.length; - let t2n = discreteShape |> XYShape.T.length; - - // each x pair is added/subtracted - let fn = Operation.Algebraic.toFn(op); - - let outXYShapes: array(array((float, float))) = - Belt.Array.makeUninitializedUnsafe(t2n); - - switch (op) { - | `Add - | `Subtract => - for (j in 0 to t2n - 1) { - // creates a new continuous shape for each one of the discrete points, and collects them in outXYShapes. - let dxyShape: array((float, float)) = - Belt.Array.makeUninitializedUnsafe(t1n); - for (i in 0 to t1n - 1) { - Belt.Array.set( - dxyShape, - i, - ( - fn(continuousShape.xs[i], discreteShape.xs[j]), - continuousShape.ys[i] *. discreteShape.ys[j], - ), - ) - |> ignore; - (); - }; - Belt.Array.set(outXYShapes, j, dxyShape) |> ignore; - (); - } - | `Multiply - | `Exponentiate - | `Divide => - for (j in 0 to t2n - 1) { - // creates a new continuous shape for each one of the discrete points, and collects them in outXYShapes. - let dxyShape: array((float, float)) = - Belt.Array.makeUninitializedUnsafe(t1n); - for (i in 0 to t1n - 1) { - Belt.Array.set( - dxyShape, - i, - ( - fn(continuousShape.xs[i], discreteShape.xs[j]), - {continuousShape.ys[i] *. discreteShape.ys[j] /. discreteShape.xs[j]} - ), - ) - |> ignore; - (); - }; - Belt.Array.set(outXYShapes, j, dxyShape) |> ignore; - (); - } - }; - - outXYShapes - |> E.A.fmap(XYShape.T.fromZippedArray) - |> E.A.fold_left( - XYShape.PointwiseCombination.combine( - (+.), - XYShape.XtoY.continuousInterpolator(`Linear, `UseZero), - ), - XYShape.T.empty, - ); -}; diff --git a/src/distPlus/distribution/AlgebraicShapeCombination.res b/src/distPlus/distribution/AlgebraicShapeCombination.res new file mode 100644 index 00000000..56e8309d --- /dev/null +++ b/src/distPlus/distribution/AlgebraicShapeCombination.res @@ -0,0 +1,266 @@ +type pointMassesWithMoments = { + n: int, + masses: array, + means: array, + variances: array, +} + +/* This function takes a continuous distribution and efficiently approximates it as + point masses that have variances associated with them. + We estimate the means and variances from overlapping triangular distributions which we imagine are making up the + XYShape. + We can then use the algebra of random variables to "convolve" the point masses and their variances, + and finally reconstruct a new distribution from them, e.g. using a Fast Gauss Transform or Raykar et al. (2007). */ +let toDiscretePointMassesFromTriangulars = ( + ~inverse=false, + s: XYShape.T.t, +): pointMassesWithMoments => { + // TODO: what if there is only one point in the distribution? + let n = s |> XYShape.T.length + // first, double up the leftmost and rightmost points: + let {xs, ys}: XYShape.T.t = s + Js.Array.unshift(xs[0], xs) |> ignore + Js.Array.unshift(ys[0], ys) |> ignore + Js.Array.push(xs[n - 1], xs) |> ignore + Js.Array.push(ys[n - 1], ys) |> ignore + let n = E.A.length(xs) + // squares and neighbourly products of the xs + let xsSq: array = Belt.Array.makeUninitializedUnsafe(n) + let xsProdN1: array = Belt.Array.makeUninitializedUnsafe(n - 1) + let xsProdN2: array = Belt.Array.makeUninitializedUnsafe(n - 2) + for i in 0 to n - 1 { + Belt.Array.set(xsSq, i, xs[i] *. xs[i]) |> ignore + () + } + for i in 0 to n - 2 { + Belt.Array.set(xsProdN1, i, xs[i] *. xs[i + 1]) |> ignore + () + } + for i in 0 to n - 3 { + Belt.Array.set(xsProdN2, i, xs[i] *. xs[i + 2]) |> ignore + () + } + // means and variances + let masses: array = Belt.Array.makeUninitializedUnsafe(n - 2) // doesn't include the fake first and last points + let means: array = Belt.Array.makeUninitializedUnsafe(n - 2) + let variances: array = Belt.Array.makeUninitializedUnsafe(n - 2) + + if inverse { + for i in 1 to n - 2 { + Belt.Array.set(masses, i - 1, (xs[i + 1] -. xs[i - 1]) *. ys[i] /. 2.) |> ignore + + // this only works when the whole triange is either on the left or on the right of zero + let a = xs[i - 1] + let c = xs[i] + let b = xs[i + 1] + + // These are the moments of the reciprocal of a triangular distribution, as symbolically integrated by Mathematica. + // They're probably pretty close to invMean ~ 1/mean = 3/(a+b+c) and invVar. But I haven't worked out + // the worst case error, so for now let's use these monster equations + let inverseMean = + 2. *. (a *. log(a /. c) /. (a -. c) +. b *. log(c /. b) /. (b -. c)) /. (a -. b) + let inverseVar = + 2. *. (log(c /. a) /. (a -. c) +. b *. log(b /. c) /. (b -. c)) /. (a -. b) -. + inverseMean ** 2. + + Belt.Array.set(means, i - 1, inverseMean) |> ignore + + Belt.Array.set(variances, i - 1, inverseVar) |> ignore + () + } + + {n: n - 2, masses: masses, means: means, variances: variances} + } else { + for i in 1 to n - 2 { + // area of triangle = width * height / 2 + Belt.Array.set(masses, i - 1, (xs[i + 1] -. xs[i - 1]) *. ys[i] /. 2.) |> ignore + + // means of triangle = (a + b + c) / 3 + Belt.Array.set(means, i - 1, (xs[i - 1] +. xs[i] +. xs[i + 1]) /. 3.) |> ignore + + // variance of triangle = (a^2 + b^2 + c^2 - ab - ac - bc) / 18 + Belt.Array.set( + variances, + i - 1, + (xsSq[i - 1] +. + xsSq[i] +. + xsSq[i + 1] -. + xsProdN1[i - 1] -. + xsProdN1[i] -. + xsProdN2[i - 1]) /. 18., + ) |> ignore + () + } + {n: n - 2, masses: masses, means: means, variances: variances} + } +} + +let combineShapesContinuousContinuous = ( + op: ExpressionTypes.algebraicOperation, + s1: DistTypes.xyShape, + s2: DistTypes.xyShape, +): DistTypes.xyShape => { + let t1n = s1 |> XYShape.T.length + let t2n = s2 |> XYShape.T.length + + // if we add the two distributions, we should probably use normal filters. + // if we multiply the two distributions, we should probably use lognormal filters. + let t1m = toDiscretePointMassesFromTriangulars(s1) + let t2m = switch op { + | #Divide => toDiscretePointMassesFromTriangulars(~inverse=true, s2) + | _ => toDiscretePointMassesFromTriangulars(~inverse=false, s2) + } + + let combineMeansFn = switch op { + | #Add => (m1, m2) => m1 +. m2 + | #Subtract => (m1, m2) => m1 -. m2 + | #Multiply => (m1, m2) => m1 *. m2 + | #Divide => (m1, mInv2) => m1 *. mInv2 + | #Exponentiate => (m1, mInv2) => m1 ** mInv2 + } // note: here, mInv2 = mean(1 / t2) ~= 1 / mean(t2) + + // TODO: I don't know what the variances are for exponentatiation + // converts the variances and means of the two inputs into the variance of the output + let combineVariancesFn = switch op { + | #Add => (v1, v2, _, _) => v1 +. v2 + | #Subtract => (v1, v2, _, _) => v1 +. v2 + | #Multiply => (v1, v2, m1, m2) => v1 *. v2 +. v1 *. m2 ** 2. +. v2 *. m1 ** 2. + | #Exponentiate => (v1, v2, m1, m2) => v1 *. v2 +. v1 *. m2 ** 2. +. v2 *. m1 ** 2. + | #Divide => (v1, vInv2, m1, mInv2) => v1 *. vInv2 +. v1 *. mInv2 ** 2. +. vInv2 *. m1 ** 2. + } + + // TODO: If operating on two positive-domain distributions, we should take that into account + let outputMinX: ref = ref(infinity) + let outputMaxX: ref = ref(neg_infinity) + let masses: array = Belt.Array.makeUninitializedUnsafe(t1m.n * t2m.n) + let means: array = Belt.Array.makeUninitializedUnsafe(t1m.n * t2m.n) + let variances: array = Belt.Array.makeUninitializedUnsafe(t1m.n * t2m.n) + // then convolve the two sets of pointMassesWithMoments + for i in 0 to t1m.n - 1 { + for j in 0 to t2m.n - 1 { + let k = i * t2m.n + j + Belt.Array.set(masses, k, t1m.masses[i] *. t2m.masses[j]) |> ignore + + let mean = combineMeansFn(t1m.means[i], t2m.means[j]) + let variance = combineVariancesFn( + t1m.variances[i], + t2m.variances[j], + t1m.means[i], + t2m.means[j], + ) + Belt.Array.set(means, k, mean) |> ignore + Belt.Array.set(variances, k, variance) |> ignore + // update bounds + let minX = mean -. 2. *. sqrt(variance) *. 1.644854 + let maxX = mean +. 2. *. sqrt(variance) *. 1.644854 + if minX < outputMinX.contents { + outputMinX := minX + } + if maxX > outputMaxX.contents { + outputMaxX := maxX + } + } + } + + // we now want to create a set of target points. For now, let's just evenly distribute 200 points between + // between the outputMinX and outputMaxX + let nOut = 300 + let outputXs: array = E.A.Floats.range(outputMinX.contents, outputMaxX.contents, nOut) + let outputYs: array = Belt.Array.make(nOut, 0.0) + // now, for each of the outputYs, accumulate from a Gaussian kernel over each input point. + for j in 0 to E.A.length(masses) - 1 { + if ( + // go through all of the result points + variances[j] > 0. && masses[j] > 0. + ) { + for i in 0 to E.A.length(outputXs) - 1 { + // go through all of the target points + let dx = outputXs[i] -. means[j] + let contribution = + masses[j] *. + exp(-.(dx ** 2.) /. (2. *. variances[j])) /. + sqrt(2. *. 3.14159276 *. variances[j]) + Belt.Array.set(outputYs, i, outputYs[i] +. contribution) |> ignore + } + } + } + + {xs: outputXs, ys: outputYs} +} + +let toDiscretePointMassesFromDiscrete = (s: DistTypes.xyShape): pointMassesWithMoments => { + let {xs, ys}: XYShape.T.t = s + let n = E.A.length(xs) + + let masses: array = Belt.Array.makeBy(n, i => ys[i]) + let means: array = Belt.Array.makeBy(n, i => xs[i]) + let variances: array = Belt.Array.makeBy(n, i => 0.0) + + {n: n, masses: masses, means: means, variances: variances} +} + +let combineShapesContinuousDiscrete = ( + op: ExpressionTypes.algebraicOperation, + continuousShape: DistTypes.xyShape, + discreteShape: DistTypes.xyShape, +): DistTypes.xyShape => { + let t1n = continuousShape |> XYShape.T.length + let t2n = discreteShape |> XYShape.T.length + + // each x pair is added/subtracted + let fn = Operation.Algebraic.toFn(op) + + let outXYShapes: array> = Belt.Array.makeUninitializedUnsafe(t2n) + + switch op { + | #Add + | #Subtract => + for j in 0 to t2n - 1 { + // creates a new continuous shape for each one of the discrete points, and collects them in outXYShapes. + let dxyShape: array<(float, float)> = Belt.Array.makeUninitializedUnsafe(t1n) + for i in 0 to t1n - 1 { + Belt.Array.set( + dxyShape, + i, + ( + fn(continuousShape.xs[i], discreteShape.xs[j]), + continuousShape.ys[i] *. discreteShape.ys[j], + ), + ) |> ignore + () + } + Belt.Array.set(outXYShapes, j, dxyShape) |> ignore + () + } + | #Multiply + | #Exponentiate + | #Divide => + for j in 0 to t2n - 1 { + // creates a new continuous shape for each one of the discrete points, and collects them in outXYShapes. + let dxyShape: array<(float, float)> = Belt.Array.makeUninitializedUnsafe(t1n) + for i in 0 to t1n - 1 { + Belt.Array.set( + dxyShape, + i, + ( + fn(continuousShape.xs[i], discreteShape.xs[j]), + continuousShape.ys[i] *. discreteShape.ys[j] /. discreteShape.xs[j], + ), + ) |> ignore + () + } + Belt.Array.set(outXYShapes, j, dxyShape) |> ignore + () + } + } + + outXYShapes + |> E.A.fmap(XYShape.T.fromZippedArray) + |> E.A.fold_left( + XYShape.PointwiseCombination.combine( + \"+.", + XYShape.XtoY.continuousInterpolator(#Linear, #UseZero), + ), + XYShape.T.empty, + ) +} diff --git a/src/distPlus/distribution/Continuous.re b/src/distPlus/distribution/Continuous.re deleted file mode 100644 index 4f08abbd..00000000 --- a/src/distPlus/distribution/Continuous.re +++ /dev/null @@ -1,332 +0,0 @@ -open Distributions; - -type t = DistTypes.continuousShape; -let getShape = (t: t) => t.xyShape; -let interpolation = (t: t) => t.interpolation; -let make = - ( - ~interpolation=`Linear, - ~integralSumCache=None, - ~integralCache=None, - xyShape, - ) - : t => { - xyShape, - interpolation, - integralSumCache, - integralCache, -}; -let shapeMap = - (fn, {xyShape, interpolation, integralSumCache, integralCache}: t): t => { - xyShape: fn(xyShape), - interpolation, - integralSumCache, - integralCache, -}; -let lastY = (t: t) => t |> getShape |> XYShape.T.lastY; -let oShapeMap = - (fn, {xyShape, interpolation, integralSumCache, integralCache}: t) - : option(DistTypes.continuousShape) => - fn(xyShape) - |> E.O.fmap(make(~interpolation, ~integralSumCache, ~integralCache)); - -let emptyIntegral: DistTypes.continuousShape = { - xyShape: { - xs: [|neg_infinity|], - ys: [|0.0|], - }, - interpolation: `Linear, - integralSumCache: Some(0.0), - integralCache: None, -}; -let empty: DistTypes.continuousShape = { - xyShape: XYShape.T.empty, - interpolation: `Linear, - integralSumCache: Some(0.0), - integralCache: Some(emptyIntegral), -}; - -let stepwiseToLinear = (t: t): t => - make( - ~integralSumCache=t.integralSumCache, - ~integralCache=t.integralCache, - XYShape.Range.stepwiseToLinear(t.xyShape), - ); - -// Note: This results in a distribution with as many points as the sum of those in t1 and t2. -let combinePointwise = - ( - ~integralSumCachesFn=(_, _) => None, - ~integralCachesFn: (t, t) => option(t)=(_, _) => None, - ~distributionType: DistTypes.distributionType=`PDF, - fn: (float, float) => float, - t1: DistTypes.continuousShape, - t2: DistTypes.continuousShape, - ) - : DistTypes.continuousShape => { - // If we're adding the distributions, and we know the total of each, then we - // can just sum them up. Otherwise, all bets are off. - let combinedIntegralSum = - Common.combineIntegralSums( - integralSumCachesFn, - t1.integralSumCache, - t2.integralSumCache, - ); - - // TODO: does it ever make sense to pointwise combine the integrals here? - // It could be done for pointwise additions, but is that ever needed? - - // If combining stepwise and linear, we must convert the stepwise to linear first, - // i.e. add a point at the bottom of each step - let (t1, t2) = - switch (t1.interpolation, t2.interpolation) { - | (`Linear, `Linear) => (t1, t2) - | (`Stepwise, `Stepwise) => (t1, t2) - | (`Linear, `Stepwise) => (t1, stepwiseToLinear(t2)) - | (`Stepwise, `Linear) => (stepwiseToLinear(t1), t2) - }; - - let extrapolation = - switch (distributionType) { - | `PDF => `UseZero - | `CDF => `UseOutermostPoints - }; - - let interpolator = - XYShape.XtoY.continuousInterpolator(t1.interpolation, extrapolation); - - make( - ~integralSumCache=combinedIntegralSum, - XYShape.PointwiseCombination.combine( - fn, - interpolator, - t1.xyShape, - t2.xyShape, - ), - ); -}; - -let toLinear = (t: t): option(t) => { - switch (t) { - | {interpolation: `Stepwise, xyShape, integralSumCache, integralCache} => - xyShape - |> XYShape.Range.stepsToContinuous - |> E.O.fmap(make(~integralSumCache, ~integralCache)) - | {interpolation: `Linear} => Some(t) - }; -}; -let shapeFn = (fn, t: t) => t |> getShape |> fn; - -let updateIntegralSumCache = (integralSumCache, t: t): t => { - ...t, - integralSumCache, -}; - -let updateIntegralCache = (integralCache, t: t): t => {...t, integralCache}; - -let reduce = - ( - ~integralSumCachesFn: (float, float) => option(float)=(_, _) => None, - ~integralCachesFn: (t, t) => option(t)=(_, _) => None, - fn, - continuousShapes, - ) => - continuousShapes - |> E.A.fold_left( - combinePointwise(~integralSumCachesFn, ~integralCachesFn, fn), - empty, - ); - -let mapY = - (~integralSumCacheFn=_ => None, ~integralCacheFn=_ => None, ~fn, t: t) => { - make( - ~interpolation=t.interpolation, - ~integralSumCache=t.integralSumCache |> E.O.bind(_, integralSumCacheFn), - ~integralCache=t.integralCache |> E.O.bind(_, integralCacheFn), - t |> getShape |> XYShape.T.mapY(fn), - ); -}; - -let rec scaleBy = (~scale=1.0, t: t): t => { - let scaledIntegralSumCache = - E.O.bind(t.integralSumCache, v => Some(scale *. v)); - let scaledIntegralCache = - E.O.bind(t.integralCache, v => Some(scaleBy(~scale, v))); - - t - |> mapY(~fn=(r: float) => r *. scale) - |> updateIntegralSumCache(scaledIntegralSumCache) - |> updateIntegralCache(scaledIntegralCache); -}; - -module T = - Dist({ - type t = DistTypes.continuousShape; - type integral = DistTypes.continuousShape; - let minX = shapeFn(XYShape.T.minX); - let maxX = shapeFn(XYShape.T.maxX); - let mapY = mapY; - let updateIntegralCache = updateIntegralCache; - let toDiscreteProbabilityMassFraction = _ => 0.0; - let toShape = (t: t): DistTypes.shape => Continuous(t); - let xToY = (f, {interpolation, xyShape}: t) => { - ( - switch (interpolation) { - | `Stepwise => - xyShape |> XYShape.XtoY.stepwiseIncremental(f) |> E.O.default(0.0) - | `Linear => xyShape |> XYShape.XtoY.linear(f) - } - ) - |> DistTypes.MixedPoint.makeContinuous; - }; - - let truncate = - (leftCutoff: option(float), rightCutoff: option(float), t: t) => { - let lc = E.O.default(neg_infinity, leftCutoff); - let rc = E.O.default(infinity, rightCutoff); - let truncatedZippedPairs = - t - |> getShape - |> XYShape.T.zip - |> XYShape.Zipped.filterByX(x => x >= lc && x <= rc); - - let leftNewPoint = - leftCutoff - |> E.O.dimap(lc => [|(lc -. epsilon_float, 0.)|], _ => [||]); - let rightNewPoint = - rightCutoff - |> E.O.dimap(rc => [|(rc +. epsilon_float, 0.)|], _ => [||]); - - let truncatedZippedPairsWithNewPoints = - E.A.concatMany([|leftNewPoint, truncatedZippedPairs, rightNewPoint|]); - let truncatedShape = - XYShape.T.fromZippedArray(truncatedZippedPairsWithNewPoints); - - make(truncatedShape); - }; - - // TODO: This should work with stepwise plots. - let integral = t => - switch (getShape(t) |> XYShape.T.isEmpty, t.integralCache) { - | (true, _) => emptyIntegral - | (false, Some(cache)) => cache - | (false, None) => - t - |> getShape - |> XYShape.Range.integrateWithTriangles - |> E.O.toExt("This should not have happened") - |> make - }; - - let downsample = (length, t): t => - t - |> shapeMap( - XYShape.XsConversion.proportionByProbabilityMass( - length, - integral(t).xyShape, - ), - ); - let integralEndY = (t: t) => - t.integralSumCache |> E.O.default(t |> integral |> lastY); - let integralXtoY = (f, t: t) => - t |> integral |> shapeFn(XYShape.XtoY.linear(f)); - let integralYtoX = (f, t: t) => - t |> integral |> shapeFn(XYShape.YtoX.linear(f)); - let toContinuous = t => Some(t); - let toDiscrete = _ => None; - - let normalize = (t: t): t => { - t - |> updateIntegralCache(Some(integral(t))) - |> scaleBy(~scale=1. /. integralEndY(t)) - |> updateIntegralSumCache(Some(1.0)); - }; - - let mean = (t: t) => { - let indefiniteIntegralStepwise = (p, h1) => h1 *. p ** 2.0 /. 2.0; - let indefiniteIntegralLinear = (p, a, b) => - a *. p ** 2.0 /. 2.0 +. b *. p ** 3.0 /. 3.0; - - XYShape.Analysis.integrateContinuousShape( - ~indefiniteIntegralStepwise, - ~indefiniteIntegralLinear, - t, - ); - }; - let variance = (t: t): float => - XYShape.Analysis.getVarianceDangerously( - t, - mean, - XYShape.Analysis.getMeanOfSquaresContinuousShape, - ); - }); - -/* This simply creates multiple copies of the continuous distribution, scaled and shifted according to - each discrete data point, and then adds them all together. */ -let combineAlgebraicallyWithDiscrete = - ( - op: ExpressionTypes.algebraicOperation, - t1: t, - t2: DistTypes.discreteShape, - ) => { - let t1s = t1 |> getShape; - let t2s = t2.xyShape; // TODO would like to use Discrete.getShape here, but current file structure doesn't allow for that - - if (XYShape.T.isEmpty(t1s) || XYShape.T.isEmpty(t2s)) { - empty; - } else { - let continuousAsLinear = - switch (t1.interpolation) { - | `Linear => t1 - | `Stepwise => stepwiseToLinear(t1) - }; - - let combinedShape = - AlgebraicShapeCombination.combineShapesContinuousDiscrete( - op, - continuousAsLinear |> getShape, - t2s, - ); - - let combinedIntegralSum = - switch (op) { - | `Multiply - | `Divide => - Common.combineIntegralSums( - (a, b) => Some(a *. b), - t1.integralSumCache, - t2.integralSumCache, - ) - | _ => None - }; - - // TODO: It could make sense to automatically transform the integrals here (shift or scale) - make( - ~interpolation=t1.interpolation, - ~integralSumCache=combinedIntegralSum, - combinedShape, - ); - }; -}; - -let combineAlgebraically = - (op: ExpressionTypes.algebraicOperation, t1: t, t2: t) => { - let s1 = t1 |> getShape; - let s2 = t2 |> getShape; - let t1n = s1 |> XYShape.T.length; - let t2n = s2 |> XYShape.T.length; - if (t1n == 0 || t2n == 0) { - empty; - } else { - let combinedShape = - AlgebraicShapeCombination.combineShapesContinuousContinuous(op, s1, s2); - let combinedIntegralSum = - Common.combineIntegralSums( - (a, b) => Some(a *. b), - t1.integralSumCache, - t2.integralSumCache, - ); - // return a new Continuous distribution - make(~integralSumCache=combinedIntegralSum, combinedShape); - }; -}; diff --git a/src/distPlus/distribution/Continuous.res b/src/distPlus/distribution/Continuous.res new file mode 100644 index 00000000..45c3c60f --- /dev/null +++ b/src/distPlus/distribution/Continuous.res @@ -0,0 +1,264 @@ +open Distributions + +type t = DistTypes.continuousShape +let getShape = (t: t) => t.xyShape +let interpolation = (t: t) => t.interpolation +let make = (~interpolation=#Linear, ~integralSumCache=None, ~integralCache=None, xyShape): t => { + xyShape: xyShape, + interpolation: interpolation, + integralSumCache: integralSumCache, + integralCache: integralCache, +} +let shapeMap = (fn, {xyShape, interpolation, integralSumCache, integralCache}: t): t => { + xyShape: fn(xyShape), + interpolation: interpolation, + integralSumCache: integralSumCache, + integralCache: integralCache, +} +let lastY = (t: t) => t |> getShape |> XYShape.T.lastY +let oShapeMap = (fn, {xyShape, interpolation, integralSumCache, integralCache}: t): option< + DistTypes.continuousShape, +> => fn(xyShape) |> E.O.fmap(make(~interpolation, ~integralSumCache, ~integralCache)) + +let emptyIntegral: DistTypes.continuousShape = { + xyShape: { + xs: [neg_infinity], + ys: [0.0], + }, + interpolation: #Linear, + integralSumCache: Some(0.0), + integralCache: None, +} +let empty: DistTypes.continuousShape = { + xyShape: XYShape.T.empty, + interpolation: #Linear, + integralSumCache: Some(0.0), + integralCache: Some(emptyIntegral), +} + +let stepwiseToLinear = (t: t): t => + make( + ~integralSumCache=t.integralSumCache, + ~integralCache=t.integralCache, + XYShape.Range.stepwiseToLinear(t.xyShape), + ) + +// Note: This results in a distribution with as many points as the sum of those in t1 and t2. +let combinePointwise = ( + ~integralSumCachesFn=(_, _) => None, + ~integralCachesFn: (t, t) => option=(_, _) => None, + ~distributionType: DistTypes.distributionType=#PDF, + fn: (float, float) => float, + t1: DistTypes.continuousShape, + t2: DistTypes.continuousShape, +): DistTypes.continuousShape => { + // If we're adding the distributions, and we know the total of each, then we + // can just sum them up. Otherwise, all bets are off. + let combinedIntegralSum = Common.combineIntegralSums( + integralSumCachesFn, + t1.integralSumCache, + t2.integralSumCache, + ) + + // TODO: does it ever make sense to pointwise combine the integrals here? + // It could be done for pointwise additions, but is that ever needed? + + // If combining stepwise and linear, we must convert the stepwise to linear first, + // i.e. add a point at the bottom of each step + let (t1, t2) = switch (t1.interpolation, t2.interpolation) { + | (#Linear, #Linear) => (t1, t2) + | (#Stepwise, #Stepwise) => (t1, t2) + | (#Linear, #Stepwise) => (t1, stepwiseToLinear(t2)) + | (#Stepwise, #Linear) => (stepwiseToLinear(t1), t2) + } + + let extrapolation = switch distributionType { + | #PDF => #UseZero + | #CDF => #UseOutermostPoints + } + + let interpolator = XYShape.XtoY.continuousInterpolator(t1.interpolation, extrapolation) + + make( + ~integralSumCache=combinedIntegralSum, + XYShape.PointwiseCombination.combine(fn, interpolator, t1.xyShape, t2.xyShape), + ) +} + +let toLinear = (t: t): option => + switch t { + | {interpolation: #Stepwise, xyShape, integralSumCache, integralCache} => + xyShape |> XYShape.Range.stepsToContinuous |> E.O.fmap(make(~integralSumCache, ~integralCache)) + | {interpolation: #Linear} => Some(t) + } +let shapeFn = (fn, t: t) => t |> getShape |> fn + +let updateIntegralSumCache = (integralSumCache, t: t): t => { + ...t, + integralSumCache: integralSumCache, +} + +let updateIntegralCache = (integralCache, t: t): t => {...t, integralCache: integralCache} + +let reduce = ( + ~integralSumCachesFn: (float, float) => option=(_, _) => None, + ~integralCachesFn: (t, t) => option=(_, _) => None, + fn, + continuousShapes, +) => + continuousShapes |> E.A.fold_left( + combinePointwise(~integralSumCachesFn, ~integralCachesFn, fn), + empty, + ) + +let mapY = (~integralSumCacheFn=_ => None, ~integralCacheFn=_ => None, ~fn, t: t) => + make( + ~interpolation=t.interpolation, + ~integralSumCache=t.integralSumCache |> E.O.bind(_, integralSumCacheFn), + ~integralCache=t.integralCache |> E.O.bind(_, integralCacheFn), + t |> getShape |> XYShape.T.mapY(fn), + ) + +let rec scaleBy = (~scale=1.0, t: t): t => { + let scaledIntegralSumCache = E.O.bind(t.integralSumCache, v => Some(scale *. v)) + let scaledIntegralCache = E.O.bind(t.integralCache, v => Some(scaleBy(~scale, v))) + + t + |> mapY(~fn=(r: float) => r *. scale) + |> updateIntegralSumCache(scaledIntegralSumCache) + |> updateIntegralCache(scaledIntegralCache) +} + +module T = Dist({ + type t = DistTypes.continuousShape + type integral = DistTypes.continuousShape + let minX = shapeFn(XYShape.T.minX) + let maxX = shapeFn(XYShape.T.maxX) + let mapY = mapY + let updateIntegralCache = updateIntegralCache + let toDiscreteProbabilityMassFraction = _ => 0.0 + let toShape = (t: t): DistTypes.shape => Continuous(t) + let xToY = (f, {interpolation, xyShape}: t) => + switch interpolation { + | #Stepwise => xyShape |> XYShape.XtoY.stepwiseIncremental(f) |> E.O.default(0.0) + | #Linear => xyShape |> XYShape.XtoY.linear(f) + } |> DistTypes.MixedPoint.makeContinuous + + let truncate = (leftCutoff: option, rightCutoff: option, t: t) => { + let lc = E.O.default(neg_infinity, leftCutoff) + let rc = E.O.default(infinity, rightCutoff) + let truncatedZippedPairs = + t |> getShape |> XYShape.T.zip |> XYShape.Zipped.filterByX(x => x >= lc && x <= rc) + + let leftNewPoint = leftCutoff |> E.O.dimap(lc => [(lc -. epsilon_float, 0.)], _ => []) + let rightNewPoint = rightCutoff |> E.O.dimap(rc => [(rc +. epsilon_float, 0.)], _ => []) + + let truncatedZippedPairsWithNewPoints = E.A.concatMany([ + leftNewPoint, + truncatedZippedPairs, + rightNewPoint, + ]) + let truncatedShape = XYShape.T.fromZippedArray(truncatedZippedPairsWithNewPoints) + + make(truncatedShape) + } + + // TODO: This should work with stepwise plots. + let integral = t => + switch (getShape(t) |> XYShape.T.isEmpty, t.integralCache) { + | (true, _) => emptyIntegral + | (false, Some(cache)) => cache + | (false, None) => + t + |> getShape + |> XYShape.Range.integrateWithTriangles + |> E.O.toExt("This should not have happened") + |> make + } + + let downsample = (length, t): t => + t |> shapeMap(XYShape.XsConversion.proportionByProbabilityMass(length, integral(t).xyShape)) + let integralEndY = (t: t) => t.integralSumCache |> E.O.default(t |> integral |> lastY) + let integralXtoY = (f, t: t) => t |> integral |> shapeFn(XYShape.XtoY.linear(f)) + let integralYtoX = (f, t: t) => t |> integral |> shapeFn(XYShape.YtoX.linear(f)) + let toContinuous = t => Some(t) + let toDiscrete = _ => None + + let normalize = (t: t): t => + t + |> updateIntegralCache(Some(integral(t))) + |> scaleBy(~scale=1. /. integralEndY(t)) + |> updateIntegralSumCache(Some(1.0)) + + let mean = (t: t) => { + let indefiniteIntegralStepwise = (p, h1) => h1 *. p ** 2.0 /. 2.0 + let indefiniteIntegralLinear = (p, a, b) => a *. p ** 2.0 /. 2.0 +. b *. p ** 3.0 /. 3.0 + + XYShape.Analysis.integrateContinuousShape( + ~indefiniteIntegralStepwise, + ~indefiniteIntegralLinear, + t, + ) + } + let variance = (t: t): float => + XYShape.Analysis.getVarianceDangerously( + t, + mean, + XYShape.Analysis.getMeanOfSquaresContinuousShape, + ) +}) + +/* This simply creates multiple copies of the continuous distribution, scaled and shifted according to + each discrete data point, and then adds them all together. */ +let combineAlgebraicallyWithDiscrete = ( + op: ExpressionTypes.algebraicOperation, + t1: t, + t2: DistTypes.discreteShape, +) => { + let t1s = t1 |> getShape + let t2s = t2.xyShape // TODO would like to use Discrete.getShape here, but current file structure doesn't allow for that + + if XYShape.T.isEmpty(t1s) || XYShape.T.isEmpty(t2s) { + empty + } else { + let continuousAsLinear = switch t1.interpolation { + | #Linear => t1 + | #Stepwise => stepwiseToLinear(t1) + } + + let combinedShape = AlgebraicShapeCombination.combineShapesContinuousDiscrete( + op, + continuousAsLinear |> getShape, + t2s, + ) + + let combinedIntegralSum = switch op { + | #Multiply + | #Divide => + Common.combineIntegralSums((a, b) => Some(a *. b), t1.integralSumCache, t2.integralSumCache) + | _ => None + } + + // TODO: It could make sense to automatically transform the integrals here (shift or scale) + make(~interpolation=t1.interpolation, ~integralSumCache=combinedIntegralSum, combinedShape) + } +} + +let combineAlgebraically = (op: ExpressionTypes.algebraicOperation, t1: t, t2: t) => { + let s1 = t1 |> getShape + let s2 = t2 |> getShape + let t1n = s1 |> XYShape.T.length + let t2n = s2 |> XYShape.T.length + if t1n == 0 || t2n == 0 { + empty + } else { + let combinedShape = AlgebraicShapeCombination.combineShapesContinuousContinuous(op, s1, s2) + let combinedIntegralSum = Common.combineIntegralSums( + (a, b) => Some(a *. b), + t1.integralSumCache, + t2.integralSumCache, + ) + // return a new Continuous distribution + make(~integralSumCache=combinedIntegralSum, combinedShape) + } +} diff --git a/src/distPlus/distribution/Discrete.re b/src/distPlus/distribution/Discrete.re deleted file mode 100644 index 8a64a454..00000000 --- a/src/distPlus/distribution/Discrete.re +++ /dev/null @@ -1,232 +0,0 @@ -open Distributions; - -type t = DistTypes.discreteShape; - -let make = (~integralSumCache=None, ~integralCache=None, xyShape): t => {xyShape, integralSumCache, integralCache}; -let shapeMap = (fn, {xyShape, integralSumCache, integralCache}: t): t => { - xyShape: fn(xyShape), - integralSumCache, - integralCache -}; -let getShape = (t: t) => t.xyShape; -let oShapeMap = (fn, {xyShape, integralSumCache, integralCache}: t): option(t) => - fn(xyShape) |> E.O.fmap(make(~integralSumCache, ~integralCache)); - -let emptyIntegral: DistTypes.continuousShape = { - xyShape: {xs: [|neg_infinity|], ys: [|0.0|]}, - interpolation: `Stepwise, - integralSumCache: Some(0.0), - integralCache: None, -}; -let empty: DistTypes.discreteShape = { - xyShape: XYShape.T.empty, - integralSumCache: Some(0.0), - integralCache: Some(emptyIntegral), -}; - - -let shapeFn = (fn, t: t) => t |> getShape |> fn; - -let lastY = (t: t) => t |> getShape |> XYShape.T.lastY; - -let combinePointwise = - ( - ~integralSumCachesFn = (_, _) => None, - ~integralCachesFn: (DistTypes.continuousShape, DistTypes.continuousShape) => option(DistTypes.continuousShape) = (_, _) => None, - fn, - t1: DistTypes.discreteShape, - t2: DistTypes.discreteShape, - ) - : DistTypes.discreteShape => { - let combinedIntegralSum = - Common.combineIntegralSums( - integralSumCachesFn, - t1.integralSumCache, - t2.integralSumCache, - ); - - // TODO: does it ever make sense to pointwise combine the integrals here? - // It could be done for pointwise additions, but is that ever needed? - - make( - ~integralSumCache=combinedIntegralSum, - XYShape.PointwiseCombination.combine( - (+.), - XYShape.XtoY.discreteInterpolator, - t1.xyShape, - t2.xyShape, - ), - ); -}; - -let reduce = - (~integralSumCachesFn=(_, _) => None, - ~integralCachesFn=(_, _) => None, - fn, discreteShapes) - : DistTypes.discreteShape => - discreteShapes - |> E.A.fold_left(combinePointwise(~integralSumCachesFn, ~integralCachesFn, fn), empty); - -let updateIntegralSumCache = (integralSumCache, t: t): t => { - ...t, - integralSumCache, -}; - -let updateIntegralCache = (integralCache, t: t): t => { - ...t, - integralCache, -}; - -/* This multiples all of the data points together and creates a new discrete distribution from the results. - Data points at the same xs get added together. It may be a good idea to downsample t1 and t2 before and/or the result after. */ -let combineAlgebraically = - (op: ExpressionTypes.algebraicOperation, t1: t, t2: t): t => { - let t1s = t1 |> getShape; - let t2s = t2 |> getShape; - let t1n = t1s |> XYShape.T.length; - let t2n = t2s |> XYShape.T.length; - - let combinedIntegralSum = - Common.combineIntegralSums( - (s1, s2) => Some(s1 *. s2), - t1.integralSumCache, - t2.integralSumCache, - ); - - let fn = Operation.Algebraic.toFn(op); - let xToYMap = E.FloatFloatMap.empty(); - - for (i in 0 to t1n - 1) { - for (j in 0 to t2n - 1) { - let x = fn(t1s.xs[i], t2s.xs[j]); - let cv = xToYMap |> E.FloatFloatMap.get(x) |> E.O.default(0.); - let my = t1s.ys[i] *. t2s.ys[j]; - let _ = Belt.MutableMap.set(xToYMap, x, cv +. my); - (); - }; - }; - - let rxys = xToYMap |> E.FloatFloatMap.toArray |> XYShape.Zipped.sortByX; - - let combinedShape = XYShape.T.fromZippedArray(rxys); - - make(~integralSumCache=combinedIntegralSum, combinedShape); -}; - -let mapY = (~integralSumCacheFn=_ => None, - ~integralCacheFn=_ => None, - ~fn, t: t) => { - make( - ~integralSumCache=t.integralSumCache |> E.O.bind(_, integralSumCacheFn), - ~integralCache=t.integralCache |> E.O.bind(_, integralCacheFn), - t |> getShape |> XYShape.T.mapY(fn), - ); -}; - - -let scaleBy = (~scale=1.0, t: t): t => { - let scaledIntegralSumCache = t.integralSumCache |> E.O.fmap((*.)(scale)); - let scaledIntegralCache = t.integralCache |> E.O.fmap(Continuous.scaleBy(~scale)); - - t - |> mapY(~fn=(r: float) => r *. scale) - |> updateIntegralSumCache(scaledIntegralSumCache) - |> updateIntegralCache(scaledIntegralCache) -}; - -module T = - Dist({ - type t = DistTypes.discreteShape; - type integral = DistTypes.continuousShape; - let integral = (t) => - switch (getShape(t) |> XYShape.T.isEmpty, t.integralCache) { - | (true, _) => emptyIntegral - | (false, Some(c)) => c - | (false, None) => { - let ts = getShape(t); - // The first xy of this integral should always be the zero, to ensure nice plotting - let firstX = ts |> XYShape.T.minX; - let prependedZeroPoint: XYShape.T.t = {xs: [|firstX -. epsilon_float|], ys: [|0.|]}; - let integralShape = - ts - |> XYShape.T.concat(prependedZeroPoint) - |> XYShape.T.accumulateYs((+.)); - - Continuous.make(~interpolation=`Stepwise, integralShape); - } - }; - - let integralEndY = (t: t) => - t.integralSumCache - |> E.O.default(t |> integral |> Continuous.lastY); - let minX = shapeFn(XYShape.T.minX); - let maxX = shapeFn(XYShape.T.maxX); - let toDiscreteProbabilityMassFraction = _ => 1.0; - let mapY = mapY; - let updateIntegralCache = updateIntegralCache; - let toShape = (t: t): DistTypes.shape => Discrete(t); - let toContinuous = _ => None; - let toDiscrete = t => Some(t); - - let normalize = (t: t): t => { - t - |> scaleBy(~scale=1. /. integralEndY(t)) - |> updateIntegralSumCache(Some(1.0)); - }; - - let downsample = (i, t: t): t => { - // It's not clear how to downsample a set of discrete points in a meaningful way. - // The best we can do is to clip off the smallest values. - let currentLength = t |> getShape |> XYShape.T.length; - - if (i < currentLength && i >= 1 && currentLength > 1) { - t - |> getShape - |> XYShape.T.zip - |> XYShape.Zipped.sortByY - |> Belt.Array.reverse - |> Belt.Array.slice(_, ~offset=0, ~len=i) - |> XYShape.Zipped.sortByX - |> XYShape.T.fromZippedArray - |> make; - } else { - t; - }; - }; - - let truncate = - (leftCutoff: option(float), rightCutoff: option(float), t: t): t => { - t - |> getShape - |> XYShape.T.zip - |> XYShape.Zipped.filterByX(x => - x >= E.O.default(neg_infinity, leftCutoff) - && x <= E.O.default(infinity, rightCutoff) - ) - |> XYShape.T.fromZippedArray - |> make; - }; - - let xToY = (f, t) => - t - |> getShape - |> XYShape.XtoY.stepwiseIfAtX(f) - |> E.O.default(0.0) - |> DistTypes.MixedPoint.makeDiscrete; - - let integralXtoY = (f, t) => - t |> integral |> Continuous.getShape |> XYShape.XtoY.linear(f); - - let integralYtoX = (f, t) => - t |> integral |> Continuous.getShape |> XYShape.YtoX.linear(f); - - let mean = (t: t): float => { - let s = getShape(t); - E.A.reducei(s.xs, 0.0, (acc, x, i) => acc +. x *. s.ys[i]); - }; - let variance = (t: t): float => { - let getMeanOfSquares = t => - t |> shapeMap(XYShape.Analysis.squareXYShape) |> mean; - XYShape.Analysis.getVarianceDangerously(t, mean, getMeanOfSquares); - }; - }); diff --git a/src/distPlus/distribution/Discrete.res b/src/distPlus/distribution/Discrete.res new file mode 100644 index 00000000..223301bd --- /dev/null +++ b/src/distPlus/distribution/Discrete.res @@ -0,0 +1,216 @@ +open Distributions + +type t = DistTypes.discreteShape + +let make = (~integralSumCache=None, ~integralCache=None, xyShape): t => { + xyShape: xyShape, + integralSumCache: integralSumCache, + integralCache: integralCache, +} +let shapeMap = (fn, {xyShape, integralSumCache, integralCache}: t): t => { + xyShape: fn(xyShape), + integralSumCache: integralSumCache, + integralCache: integralCache, +} +let getShape = (t: t) => t.xyShape +let oShapeMap = (fn, {xyShape, integralSumCache, integralCache}: t): option => + fn(xyShape) |> E.O.fmap(make(~integralSumCache, ~integralCache)) + +let emptyIntegral: DistTypes.continuousShape = { + xyShape: {xs: [neg_infinity], ys: [0.0]}, + interpolation: #Stepwise, + integralSumCache: Some(0.0), + integralCache: None, +} +let empty: DistTypes.discreteShape = { + xyShape: XYShape.T.empty, + integralSumCache: Some(0.0), + integralCache: Some(emptyIntegral), +} + +let shapeFn = (fn, t: t) => t |> getShape |> fn + +let lastY = (t: t) => t |> getShape |> XYShape.T.lastY + +let combinePointwise = ( + ~integralSumCachesFn=(_, _) => None, + ~integralCachesFn: ( + DistTypes.continuousShape, + DistTypes.continuousShape, + ) => option=(_, _) => None, + fn, + t1: DistTypes.discreteShape, + t2: DistTypes.discreteShape, +): DistTypes.discreteShape => { + let combinedIntegralSum = Common.combineIntegralSums( + integralSumCachesFn, + t1.integralSumCache, + t2.integralSumCache, + ) + + // TODO: does it ever make sense to pointwise combine the integrals here? + // It could be done for pointwise additions, but is that ever needed? + + make( + ~integralSumCache=combinedIntegralSum, + XYShape.PointwiseCombination.combine( + \"+.", + XYShape.XtoY.discreteInterpolator, + t1.xyShape, + t2.xyShape, + ), + ) +} + +let reduce = ( + ~integralSumCachesFn=(_, _) => None, + ~integralCachesFn=(_, _) => None, + fn, + discreteShapes, +): DistTypes.discreteShape => + discreteShapes |> E.A.fold_left( + combinePointwise(~integralSumCachesFn, ~integralCachesFn, fn), + empty, + ) + +let updateIntegralSumCache = (integralSumCache, t: t): t => { + ...t, + integralSumCache: integralSumCache, +} + +let updateIntegralCache = (integralCache, t: t): t => { + ...t, + integralCache: integralCache, +} + +/* This multiples all of the data points together and creates a new discrete distribution from the results. + Data points at the same xs get added together. It may be a good idea to downsample t1 and t2 before and/or the result after. */ +let combineAlgebraically = (op: ExpressionTypes.algebraicOperation, t1: t, t2: t): t => { + let t1s = t1 |> getShape + let t2s = t2 |> getShape + let t1n = t1s |> XYShape.T.length + let t2n = t2s |> XYShape.T.length + + let combinedIntegralSum = Common.combineIntegralSums( + (s1, s2) => Some(s1 *. s2), + t1.integralSumCache, + t2.integralSumCache, + ) + + let fn = Operation.Algebraic.toFn(op) + let xToYMap = E.FloatFloatMap.empty() + + for i in 0 to t1n - 1 { + for j in 0 to t2n - 1 { + let x = fn(t1s.xs[i], t2s.xs[j]) + let cv = xToYMap |> E.FloatFloatMap.get(x) |> E.O.default(0.) + let my = t1s.ys[i] *. t2s.ys[j] + let _ = Belt.MutableMap.set(xToYMap, x, cv +. my) + } + } + + let rxys = xToYMap |> E.FloatFloatMap.toArray |> XYShape.Zipped.sortByX + + let combinedShape = XYShape.T.fromZippedArray(rxys) + + make(~integralSumCache=combinedIntegralSum, combinedShape) +} + +let mapY = (~integralSumCacheFn=_ => None, ~integralCacheFn=_ => None, ~fn, t: t) => + make( + ~integralSumCache=t.integralSumCache |> E.O.bind(_, integralSumCacheFn), + ~integralCache=t.integralCache |> E.O.bind(_, integralCacheFn), + t |> getShape |> XYShape.T.mapY(fn), + ) + +let scaleBy = (~scale=1.0, t: t): t => { + let scaledIntegralSumCache = t.integralSumCache |> E.O.fmap(\"*."(scale)) + let scaledIntegralCache = t.integralCache |> E.O.fmap(Continuous.scaleBy(~scale)) + + t + |> mapY(~fn=(r: float) => r *. scale) + |> updateIntegralSumCache(scaledIntegralSumCache) + |> updateIntegralCache(scaledIntegralCache) +} + +module T = Dist({ + type t = DistTypes.discreteShape + type integral = DistTypes.continuousShape + let integral = t => + switch (getShape(t) |> XYShape.T.isEmpty, t.integralCache) { + | (true, _) => emptyIntegral + | (false, Some(c)) => c + | (false, None) => + let ts = getShape(t) + // The first xy of this integral should always be the zero, to ensure nice plotting + let firstX = ts |> XYShape.T.minX + let prependedZeroPoint: XYShape.T.t = {xs: [firstX -. epsilon_float], ys: [0.]} + let integralShape = + ts |> XYShape.T.concat(prependedZeroPoint) |> XYShape.T.accumulateYs(\"+.") + + Continuous.make(~interpolation=#Stepwise, integralShape) + } + + let integralEndY = (t: t) => t.integralSumCache |> E.O.default(t |> integral |> Continuous.lastY) + let minX = shapeFn(XYShape.T.minX) + let maxX = shapeFn(XYShape.T.maxX) + let toDiscreteProbabilityMassFraction = _ => 1.0 + let mapY = mapY + let updateIntegralCache = updateIntegralCache + let toShape = (t: t): DistTypes.shape => Discrete(t) + let toContinuous = _ => None + let toDiscrete = t => Some(t) + + let normalize = (t: t): t => + t |> scaleBy(~scale=1. /. integralEndY(t)) |> updateIntegralSumCache(Some(1.0)) + + let downsample = (i, t: t): t => { + // It's not clear how to downsample a set of discrete points in a meaningful way. + // The best we can do is to clip off the smallest values. + let currentLength = t |> getShape |> XYShape.T.length + + if i < currentLength && (i >= 1 && currentLength > 1) { + t + |> getShape + |> XYShape.T.zip + |> XYShape.Zipped.sortByY + |> Belt.Array.reverse + |> Belt.Array.slice(_, ~offset=0, ~len=i) + |> XYShape.Zipped.sortByX + |> XYShape.T.fromZippedArray + |> make + } else { + t + } + } + + let truncate = (leftCutoff: option, rightCutoff: option, t: t): t => + t + |> getShape + |> XYShape.T.zip + |> XYShape.Zipped.filterByX(x => + x >= E.O.default(neg_infinity, leftCutoff) && x <= E.O.default(infinity, rightCutoff) + ) + |> XYShape.T.fromZippedArray + |> make + + let xToY = (f, t) => + t + |> getShape + |> XYShape.XtoY.stepwiseIfAtX(f) + |> E.O.default(0.0) + |> DistTypes.MixedPoint.makeDiscrete + + let integralXtoY = (f, t) => t |> integral |> Continuous.getShape |> XYShape.XtoY.linear(f) + + let integralYtoX = (f, t) => t |> integral |> Continuous.getShape |> XYShape.YtoX.linear(f) + + let mean = (t: t): float => { + let s = getShape(t) + E.A.reducei(s.xs, 0.0, (acc, x, i) => acc +. x *. s.ys[i]) + } + let variance = (t: t): float => { + let getMeanOfSquares = t => t |> shapeMap(XYShape.Analysis.squareXYShape) |> mean + XYShape.Analysis.getVarianceDangerously(t, mean, getMeanOfSquares) + } +}) diff --git a/src/distPlus/distribution/DistPlus.re b/src/distPlus/distribution/DistPlus.res similarity index 97% rename from src/distPlus/distribution/DistPlus.re rename to src/distPlus/distribution/DistPlus.res index 15722bb7..6a954b42 100644 --- a/src/distPlus/distribution/DistPlus.re +++ b/src/distPlus/distribution/DistPlus.res @@ -84,7 +84,7 @@ module T = let integral = (t: t) => updateShape(Continuous(t.integralCache), t); - let updateIntegralCache = (integralCache: option(DistTypes.continuousShape), t) => + let updateIntegralCache = (integralCache: option, t) => update(~integralCache=E.O.default(t.integralCache, integralCache), t); let downsample = (i, t): t => diff --git a/src/distPlus/distribution/DistPlusTime.re b/src/distPlus/distribution/DistPlusTime.re deleted file mode 100644 index d5b36f8e..00000000 --- a/src/distPlus/distribution/DistPlusTime.re +++ /dev/null @@ -1,28 +0,0 @@ - open DistTypes; - - type t = DistTypes.distPlus; - - let unitToJson = ({unit}: t) => unit |> DistTypes.DistributionUnit.toJson; - - let timeVector = ({unit}: t) => - switch (unit) { - | TimeDistribution(timeVector) => Some(timeVector) - | UnspecifiedDistribution => None - }; - - let timeInVectorToX = (f: TimeTypes.timeInVector, t: t) => { - let timeVector = t |> timeVector; - timeVector |> E.O.fmap(TimeTypes.RelativeTimePoint.toXValue(_, f)); - }; - - let xToY = (f: TimeTypes.timeInVector, t: t) => { - timeInVectorToX(f, t) |> E.O.fmap(DistPlus.T.xToY(_, t)); - }; - - module Integral = { - include DistPlus.T.Integral; - let xToY = (f: TimeTypes.timeInVector, t: t) => { - timeInVectorToX(f, t) - |> E.O.fmap(x => DistPlus.T.Integral.xToY(x, t)); - }; - }; diff --git a/src/distPlus/distribution/DistPlusTime.res b/src/distPlus/distribution/DistPlusTime.res new file mode 100644 index 00000000..b0df8183 --- /dev/null +++ b/src/distPlus/distribution/DistPlusTime.res @@ -0,0 +1,25 @@ +open DistTypes + +type t = DistTypes.distPlus + +let unitToJson = ({unit}: t) => unit |> DistTypes.DistributionUnit.toJson + +let timeVector = ({unit}: t) => + switch unit { + | TimeDistribution(timeVector) => Some(timeVector) + | UnspecifiedDistribution => None + } + +let timeInVectorToX = (f: TimeTypes.timeInVector, t: t) => { + let timeVector = t |> timeVector + timeVector |> E.O.fmap(TimeTypes.RelativeTimePoint.toXValue(_, f)) +} + +let xToY = (f: TimeTypes.timeInVector, t: t) => + timeInVectorToX(f, t) |> E.O.fmap(DistPlus.T.xToY(_, t)) + +module Integral = { + include DistPlus.T.Integral + let xToY = (f: TimeTypes.timeInVector, t: t) => + timeInVectorToX(f, t) |> E.O.fmap(x => DistPlus.T.Integral.xToY(x, t)) +} diff --git a/src/distPlus/distribution/DistTypes.re b/src/distPlus/distribution/DistTypes.re deleted file mode 100644 index 6f687cba..00000000 --- a/src/distPlus/distribution/DistTypes.re +++ /dev/null @@ -1,179 +0,0 @@ -type domainLimit = { - xPoint: float, - excludingProbabilityMass: float, -}; - -type domain = - | Complete - | LeftLimited(domainLimit) - | RightLimited(domainLimit) - | LeftAndRightLimited(domainLimit, domainLimit); - -type distributionType = [ - | `PDF - | `CDF -]; - -type xyShape = { - xs: array(float), - ys: array(float), -}; - -type interpolationStrategy = [ - | `Stepwise - | `Linear -]; -type extrapolationStrategy = [ - | `UseZero - | `UseOutermostPoints -]; - -type interpolator = (xyShape, int, float) => float; - -type continuousShape = { - xyShape, - interpolation: interpolationStrategy, - integralSumCache: option(float), - integralCache: option(continuousShape), -}; - -type discreteShape = { - xyShape, - integralSumCache: option(float), - integralCache: option(continuousShape), -}; - -type mixedShape = { - continuous: continuousShape, - discrete: discreteShape, - integralSumCache: option(float), - integralCache: option(continuousShape), -}; - -type shapeMonad('a, 'b, 'c) = - | Mixed('a) - | Discrete('b) - | Continuous('c); - -type shape = shapeMonad(mixedShape, discreteShape, continuousShape); - -module ShapeMonad = { - let fmap = - (t: shapeMonad('a, 'b, 'c), (fn1, fn2, fn3)): shapeMonad('d, 'e, 'f) => - switch (t) { - | Mixed(m) => Mixed(fn1(m)) - | Discrete(m) => Discrete(fn2(m)) - | Continuous(m) => Continuous(fn3(m)) - }; -}; - -type generationSource = - | SquiggleString(string) - | Shape(shape); - -type distributionUnit = - | UnspecifiedDistribution - | TimeDistribution(TimeTypes.timeVector); - -type distPlus = { - shape, - domain, - integralCache: continuousShape, - unit: distributionUnit, - squiggleString: option(string), -}; - -module DistributionUnit = { - let toJson = (distributionUnit: distributionUnit) => - switch (distributionUnit) { - | TimeDistribution({zero, unit}) => - Js.Null.fromOption( - Some({"zero": zero, "unit": unit |> TimeTypes.TimeUnit.toString}), - ) - | _ => Js.Null.fromOption(None) - }; -}; - -module Domain = { - let excludedProbabilityMass = (t: domain) => { - switch (t) { - | Complete => 0.0 - | LeftLimited({excludingProbabilityMass}) => excludingProbabilityMass - | RightLimited({excludingProbabilityMass}) => excludingProbabilityMass - | LeftAndRightLimited( - {excludingProbabilityMass: l}, - {excludingProbabilityMass: r}, - ) => - l +. r - }; - }; - - let includedProbabilityMass = (t: domain) => - 1.0 -. excludedProbabilityMass(t); - - let initialProbabilityMass = (t: domain) => { - switch (t) { - | Complete - | RightLimited(_) => 0.0 - | LeftLimited({excludingProbabilityMass}) => excludingProbabilityMass - | LeftAndRightLimited({excludingProbabilityMass}, _) => excludingProbabilityMass - }; - }; - - let normalizeProbabilityMass = (t: domain) => { - 1. /. excludedProbabilityMass(t); - }; - - let yPointToSubYPoint = (t: domain, yPoint) => { - switch (t) { - | Complete => Some(yPoint) - | LeftLimited({excludingProbabilityMass}) - when yPoint < excludingProbabilityMass => - None - | LeftLimited({excludingProbabilityMass}) - when yPoint >= excludingProbabilityMass => - Some( - (yPoint -. excludingProbabilityMass) /. includedProbabilityMass(t), - ) - | RightLimited({excludingProbabilityMass}) - when yPoint > 1. -. excludingProbabilityMass => - None - | RightLimited({excludingProbabilityMass}) - when yPoint <= 1. -. excludingProbabilityMass => - Some(yPoint /. includedProbabilityMass(t)) - | LeftAndRightLimited({excludingProbabilityMass: l}, _) when yPoint < l => - None - | LeftAndRightLimited(_, {excludingProbabilityMass: r}) - when yPoint > 1.0 -. r => - None - | LeftAndRightLimited({excludingProbabilityMass: l}, _) => - Some((yPoint -. l) /. includedProbabilityMass(t)) - | _ => None - }; - }; -}; - -type mixedPoint = { - continuous: float, - discrete: float, -}; - -module MixedPoint = { - type t = mixedPoint; - let toContinuousValue = (t: t) => t.continuous; - let toDiscreteValue = (t: t) => t.discrete; - let makeContinuous = (continuous: float): t => {continuous, discrete: 0.0}; - let makeDiscrete = (discrete: float): t => {continuous: 0.0, discrete}; - - let fmap = (fn: float => float, t: t) => { - continuous: fn(t.continuous), - discrete: fn(t.discrete), - }; - - let combine2 = (fn, c: t, d: t): t => { - continuous: fn(c.continuous, d.continuous), - discrete: fn(c.discrete, d.discrete), - }; - - let add = combine2((a, b) => a +. b); -}; diff --git a/src/distPlus/distribution/DistTypes.res b/src/distPlus/distribution/DistTypes.res new file mode 100644 index 00000000..7c6e36a6 --- /dev/null +++ b/src/distPlus/distribution/DistTypes.res @@ -0,0 +1,155 @@ +type domainLimit = { + xPoint: float, + excludingProbabilityMass: float, +} + +type domain = + | Complete + | LeftLimited(domainLimit) + | RightLimited(domainLimit) + | LeftAndRightLimited(domainLimit, domainLimit) + +type distributionType = [ + | #PDF + | #CDF +] + +type xyShape = { + xs: array, + ys: array, +} + +type interpolationStrategy = [ + | #Stepwise + | #Linear +] +type extrapolationStrategy = [ + | #UseZero + | #UseOutermostPoints +] + +type interpolator = (xyShape, int, float) => float + +type rec continuousShape = { + xyShape: xyShape, + interpolation: interpolationStrategy, + integralSumCache: option, + integralCache: option, +} + +type discreteShape = { + xyShape: xyShape, + integralSumCache: option, + integralCache: option, +} + +type mixedShape = { + continuous: continuousShape, + discrete: discreteShape, + integralSumCache: option, + integralCache: option, +} + +type shapeMonad<'a, 'b, 'c> = + | Mixed('a) + | Discrete('b) + | Continuous('c) + +type shape = shapeMonad + +module ShapeMonad = { + let fmap = (t: shapeMonad<'a, 'b, 'c>, (fn1, fn2, fn3)): shapeMonad<'d, 'e, 'f> => + switch t { + | Mixed(m) => Mixed(fn1(m)) + | Discrete(m) => Discrete(fn2(m)) + | Continuous(m) => Continuous(fn3(m)) + } +} + +type generationSource = + | SquiggleString(string) + | Shape(shape) + +type distributionUnit = + | UnspecifiedDistribution + | TimeDistribution(TimeTypes.timeVector) + +type distPlus = { + shape: shape, + domain: domain, + integralCache: continuousShape, + unit: distributionUnit, + squiggleString: option, +} + +module DistributionUnit = { + let toJson = (distributionUnit: distributionUnit) => + switch distributionUnit { + | TimeDistribution({zero, unit}) => + Js.Null.fromOption(Some({"zero": zero, "unit": unit |> TimeTypes.TimeUnit.toString})) + | _ => Js.Null.fromOption(None) + } +} + +module Domain = { + let excludedProbabilityMass = (t: domain) => + switch t { + | Complete => 0.0 + | LeftLimited({excludingProbabilityMass}) => excludingProbabilityMass + | RightLimited({excludingProbabilityMass}) => excludingProbabilityMass + | LeftAndRightLimited({excludingProbabilityMass: l}, {excludingProbabilityMass: r}) => l +. r + } + + let includedProbabilityMass = (t: domain) => 1.0 -. excludedProbabilityMass(t) + + let initialProbabilityMass = (t: domain) => + switch t { + | Complete + | RightLimited(_) => 0.0 + | LeftLimited({excludingProbabilityMass}) => excludingProbabilityMass + | LeftAndRightLimited({excludingProbabilityMass}, _) => excludingProbabilityMass + } + + let normalizeProbabilityMass = (t: domain) => 1. /. excludedProbabilityMass(t) + + let yPointToSubYPoint = (t: domain, yPoint) => + switch t { + | Complete => Some(yPoint) + | LeftLimited({excludingProbabilityMass}) if yPoint < excludingProbabilityMass => None + | LeftLimited({excludingProbabilityMass}) if yPoint >= excludingProbabilityMass => + Some((yPoint -. excludingProbabilityMass) /. includedProbabilityMass(t)) + | RightLimited({excludingProbabilityMass}) if yPoint > 1. -. excludingProbabilityMass => None + | RightLimited({excludingProbabilityMass}) if yPoint <= 1. -. excludingProbabilityMass => + Some(yPoint /. includedProbabilityMass(t)) + | LeftAndRightLimited({excludingProbabilityMass: l}, _) if yPoint < l => None + | LeftAndRightLimited(_, {excludingProbabilityMass: r}) if yPoint > 1.0 -. r => None + | LeftAndRightLimited({excludingProbabilityMass: l}, _) => + Some((yPoint -. l) /. includedProbabilityMass(t)) + | _ => None + } +} + +type mixedPoint = { + continuous: float, + discrete: float, +} + +module MixedPoint = { + type t = mixedPoint + let toContinuousValue = (t: t) => t.continuous + let toDiscreteValue = (t: t) => t.discrete + let makeContinuous = (continuous: float): t => {continuous: continuous, discrete: 0.0} + let makeDiscrete = (discrete: float): t => {continuous: 0.0, discrete: discrete} + + let fmap = (fn: float => float, t: t) => { + continuous: fn(t.continuous), + discrete: fn(t.discrete), + } + + let combine2 = (fn, c: t, d: t): t => { + continuous: fn(c.continuous, d.continuous), + discrete: fn(c.discrete, d.discrete), + } + + let add = combine2((a, b) => a +. b) +} diff --git a/src/distPlus/distribution/Distributions.re b/src/distPlus/distribution/Distributions.re deleted file mode 100644 index 05b57edb..00000000 --- a/src/distPlus/distribution/Distributions.re +++ /dev/null @@ -1,84 +0,0 @@ -module type dist = { - type t; - type integral; - let minX: t => float; - let maxX: t => float; - let mapY: - (~integralSumCacheFn: float => option(float)=?, ~integralCacheFn: DistTypes.continuousShape => option(DistTypes.continuousShape)=?, ~fn: float => float, t) => t; - let xToY: (float, t) => DistTypes.mixedPoint; - let toShape: t => DistTypes.shape; - let toContinuous: t => option(DistTypes.continuousShape); - let toDiscrete: t => option(DistTypes.discreteShape); - let normalize: t => t; - let toDiscreteProbabilityMassFraction: t => float; - let downsample: (int, t) => t; - let truncate: (option(float), option(float), t) => t; - - let updateIntegralCache: (option(DistTypes.continuousShape), t) => t; - - let integral: (t) => integral; - let integralEndY: (t) => float; - let integralXtoY: (float, t) => float; - let integralYtoX: (float, t) => float; - - let mean: t => float; - let variance: t => float; -}; - -module Dist = (T: dist) => { - type t = T.t; - type integral = T.integral; - let minX = T.minX; - let maxX = T.maxX; - let integral = T.integral; - let xTotalRange = (t: t) => maxX(t) -. minX(t); - let mapY = T.mapY; - let xToY = T.xToY; - let downsample = T.downsample; - let toShape = T.toShape; - let toDiscreteProbabilityMassFraction = T.toDiscreteProbabilityMassFraction; - let toContinuous = T.toContinuous; - let toDiscrete = T.toDiscrete; - let normalize = T.normalize; - let truncate = T.truncate; - let mean = T.mean; - let variance = T.variance; - - let updateIntegralCache = T.updateIntegralCache; - - module Integral = { - type t = T.integral; - let get = T.integral; - let xToY = T.integralXtoY; - let yToX = T.integralYtoX; - let sum = T.integralEndY; - }; -}; - -module Common = { - let combineIntegralSums = - ( - combineFn: (float, float) => option(float), - t1IntegralSumCache: option(float), - t2IntegralSumCache: option(float), - ) => { - switch (t1IntegralSumCache, t2IntegralSumCache) { - | (None, _) - | (_, None) => None - | (Some(s1), Some(s2)) => combineFn(s1, s2) - }; - }; - - let combineIntegrals = - ( - combineFn: (DistTypes.continuousShape, DistTypes.continuousShape) => option(DistTypes.continuousShape), - t1IntegralCache: option(DistTypes.continuousShape), - t2IntegralCache: option(DistTypes.continuousShape), - ) => { - switch (t1IntegralCache, t2IntegralCache) { - | (None, _) - | (_, None) => None - | (Some(s1), Some(s2)) => combineFn(s1, s2) - }; - }; -}; diff --git a/src/distPlus/distribution/Distributions.res b/src/distPlus/distribution/Distributions.res new file mode 100644 index 00000000..d01a886b --- /dev/null +++ b/src/distPlus/distribution/Distributions.res @@ -0,0 +1,89 @@ +module type dist = { + type t + type integral + let minX: t => float + let maxX: t => float + let mapY: ( + ~integralSumCacheFn: float => option=?, + ~integralCacheFn: DistTypes.continuousShape => option=?, + ~fn: float => float, + t, + ) => t + let xToY: (float, t) => DistTypes.mixedPoint + let toShape: t => DistTypes.shape + let toContinuous: t => option + let toDiscrete: t => option + let normalize: t => t + let toDiscreteProbabilityMassFraction: t => float + let downsample: (int, t) => t + let truncate: (option, option, t) => t + + let updateIntegralCache: (option, t) => t + + let integral: t => integral + let integralEndY: t => float + let integralXtoY: (float, t) => float + let integralYtoX: (float, t) => float + + let mean: t => float + let variance: t => float +} + +module Dist = (T: dist) => { + type t = T.t + type integral = T.integral + let minX = T.minX + let maxX = T.maxX + let integral = T.integral + let xTotalRange = (t: t) => maxX(t) -. minX(t) + let mapY = T.mapY + let xToY = T.xToY + let downsample = T.downsample + let toShape = T.toShape + let toDiscreteProbabilityMassFraction = T.toDiscreteProbabilityMassFraction + let toContinuous = T.toContinuous + let toDiscrete = T.toDiscrete + let normalize = T.normalize + let truncate = T.truncate + let mean = T.mean + let variance = T.variance + + let updateIntegralCache = T.updateIntegralCache + + module Integral = { + type t = T.integral + let get = T.integral + let xToY = T.integralXtoY + let yToX = T.integralYtoX + let sum = T.integralEndY + } +} + +module Common = { + let combineIntegralSums = ( + combineFn: (float, float) => option, + t1IntegralSumCache: option, + t2IntegralSumCache: option, + ) => + switch (t1IntegralSumCache, t2IntegralSumCache) { + | (None, _) + | (_, None) => + None + | (Some(s1), Some(s2)) => combineFn(s1, s2) + } + + let combineIntegrals = ( + combineFn: ( + DistTypes.continuousShape, + DistTypes.continuousShape, + ) => option, + t1IntegralCache: option, + t2IntegralCache: option, + ) => + switch (t1IntegralCache, t2IntegralCache) { + | (None, _) + | (_, None) => + None + | (Some(s1), Some(s2)) => combineFn(s1, s2) + } +} diff --git a/src/distPlus/distribution/Mixed.re b/src/distPlus/distribution/Mixed.re deleted file mode 100644 index 4c28d08c..00000000 --- a/src/distPlus/distribution/Mixed.re +++ /dev/null @@ -1,332 +0,0 @@ -open Distributions; - -type t = DistTypes.mixedShape; -let make = (~integralSumCache=None, ~integralCache=None, ~continuous, ~discrete): t => {continuous, discrete, integralSumCache, integralCache}; - -let totalLength = (t: t): int => { - let continuousLength = - t.continuous |> Continuous.getShape |> XYShape.T.length; - let discreteLength = t.discrete |> Discrete.getShape |> XYShape.T.length; - - continuousLength + discreteLength; -}; - -let scaleBy = (~scale=1.0, t: t): t => { - let scaledDiscrete = Discrete.scaleBy(~scale, t.discrete); - let scaledContinuous = Continuous.scaleBy(~scale, t.continuous); - let scaledIntegralCache = E.O.bind(t.integralCache, v => Some(Continuous.scaleBy(~scale, v))); - let scaledIntegralSumCache = E.O.bind(t.integralSumCache, s => Some(s *. scale)); - make(~discrete=scaledDiscrete, ~continuous=scaledContinuous, ~integralSumCache=scaledIntegralSumCache, ~integralCache=scaledIntegralCache); -}; - -let toContinuous = ({continuous}: t) => Some(continuous); -let toDiscrete = ({discrete}: t) => Some(discrete); - -let updateIntegralCache = (integralCache, t: t): t => { - ...t, - integralCache, -}; - -module T = - Dist({ - type t = DistTypes.mixedShape; - type integral = DistTypes.continuousShape; - let minX = ({continuous, discrete}: t) => { - min(Continuous.T.minX(continuous), Discrete.T.minX(discrete)); - }; - let maxX = ({continuous, discrete}: t) => - max(Continuous.T.maxX(continuous), Discrete.T.maxX(discrete)); - let toShape = (t: t): DistTypes.shape => Mixed(t); - - let updateIntegralCache = updateIntegralCache; - - let toContinuous = toContinuous; - let toDiscrete = toDiscrete; - - let truncate = - ( - leftCutoff: option(float), - rightCutoff: option(float), - {discrete, continuous}: t, - ) => { - let truncatedContinuous = - Continuous.T.truncate(leftCutoff, rightCutoff, continuous); - let truncatedDiscrete = - Discrete.T.truncate(leftCutoff, rightCutoff, discrete); - - make(~integralSumCache=None, ~integralCache=None, ~discrete=truncatedDiscrete, ~continuous=truncatedContinuous); - }; - - let normalize = (t: t): t => { - let continuousIntegral = Continuous.T.Integral.get(t.continuous); - let discreteIntegral = Discrete.T.Integral.get(t.discrete); - - let continuous = t.continuous |> Continuous.updateIntegralCache(Some(continuousIntegral)); - let discrete = t.discrete |> Discrete.updateIntegralCache(Some(discreteIntegral)); - - let continuousIntegralSum = - Continuous.T.Integral.sum(continuous); - let discreteIntegralSum = - Discrete.T.Integral.sum(discrete); - let totalIntegralSum = continuousIntegralSum +. discreteIntegralSum; - - let newContinuousSum = continuousIntegralSum /. totalIntegralSum; - let newDiscreteSum = discreteIntegralSum /. totalIntegralSum; - - let normalizedContinuous = - continuous - |> Continuous.scaleBy(~scale=newContinuousSum /. continuousIntegralSum) - |> Continuous.updateIntegralSumCache(Some(newContinuousSum)); - let normalizedDiscrete = - discrete - |> Discrete.scaleBy(~scale=newDiscreteSum /. discreteIntegralSum) - |> Discrete.updateIntegralSumCache(Some(newDiscreteSum)); - - make(~integralSumCache=Some(1.0), ~integralCache=None, ~continuous=normalizedContinuous, ~discrete=normalizedDiscrete); - }; - - let xToY = (x, t: t) => { - // This evaluates the mixedShape at x, interpolating if necessary. - // Note that we normalize entire mixedShape first. - let {continuous, discrete}: t = normalize(t); - let c = Continuous.T.xToY(x, continuous); - let d = Discrete.T.xToY(x, discrete); - DistTypes.MixedPoint.add(c, d); // "add" here just combines the two values into a single MixedPoint. - }; - - let toDiscreteProbabilityMassFraction = ({discrete, continuous}: t) => { - let discreteIntegralSum = - Discrete.T.Integral.sum(discrete); - let continuousIntegralSum = - Continuous.T.Integral.sum(continuous); - let totalIntegralSum = discreteIntegralSum +. continuousIntegralSum; - - discreteIntegralSum /. totalIntegralSum; - }; - - let downsample = (count, t: t): t => { - // We will need to distribute the new xs fairly between the discrete and continuous shapes. - // The easiest way to do this is to simply go by the previous probability masses. - - let discreteIntegralSum = - Discrete.T.Integral.sum(t.discrete); - let continuousIntegralSum = - Continuous.T.Integral.sum(t.continuous); - let totalIntegralSum = discreteIntegralSum +. continuousIntegralSum; - - // TODO: figure out what to do when the totalIntegralSum is zero. - - let downsampledDiscrete = - Discrete.T.downsample( - int_of_float( - float_of_int(count) *. (discreteIntegralSum /. totalIntegralSum), - ), - t.discrete, - ); - - let downsampledContinuous = - Continuous.T.downsample( - int_of_float( - float_of_int(count) *. (continuousIntegralSum /. totalIntegralSum), - ), - t.continuous, - ); - - {...t, discrete: downsampledDiscrete, continuous: downsampledContinuous}; - }; - - let integral = (t: t) => { - switch (t.integralCache) { - | Some(cache) => cache - | None => - // note: if the underlying shapes aren't normalized, then these integrals won't be either -- but that's the way it should be. - let continuousIntegral = Continuous.T.Integral.get(t.continuous); - let discreteIntegral = Continuous.stepwiseToLinear(Discrete.T.Integral.get(t.discrete)); - - Continuous.make( - XYShape.PointwiseCombination.combine( - (+.), - XYShape.XtoY.continuousInterpolator(`Linear, `UseOutermostPoints), - Continuous.getShape(continuousIntegral), - Continuous.getShape(discreteIntegral), - ), - ); - }; - }; - - let integralEndY = (t: t) => { - t |> integral |> Continuous.lastY; - }; - - let integralXtoY = (f, t) => { - t |> integral |> Continuous.getShape |> XYShape.XtoY.linear(f); - }; - - let integralYtoX = (f, t) => { - t |> integral |> Continuous.getShape |> XYShape.YtoX.linear(f); - }; - - // This pipes all ys (continuous and discrete) through fn. - // If mapY is a linear operation, we might be able to update the integralSumCaches as well; - // if not, they'll be set to None. - let mapY = - ( - ~integralSumCacheFn=previousIntegralSum => None, - ~integralCacheFn=previousIntegral => None, - ~fn, - t: t, - ) - : t => { - let yMappedDiscrete: DistTypes.discreteShape = - t.discrete - |> Discrete.T.mapY(~fn) - |> Discrete.updateIntegralSumCache(E.O.bind(t.discrete.integralSumCache, integralSumCacheFn)) - |> Discrete.updateIntegralCache(E.O.bind(t.discrete.integralCache, integralCacheFn)); - - let yMappedContinuous: DistTypes.continuousShape = - t.continuous - |> Continuous.T.mapY(~fn) - |> Continuous.updateIntegralSumCache(E.O.bind(t.continuous.integralSumCache, integralSumCacheFn)) - |> Continuous.updateIntegralCache(E.O.bind(t.continuous.integralCache, integralCacheFn)); - - { - discrete: yMappedDiscrete, - continuous: yMappedContinuous, - integralSumCache: E.O.bind(t.integralSumCache, integralSumCacheFn), - integralCache: E.O.bind(t.integralCache, integralCacheFn), - }; - }; - - let mean = ({discrete, continuous}: t): float => { - let discreteMean = Discrete.T.mean(discrete); - let continuousMean = Continuous.T.mean(continuous); - - // the combined mean is the weighted sum of the two: - let discreteIntegralSum = Discrete.T.Integral.sum(discrete); - let continuousIntegralSum = Continuous.T.Integral.sum(continuous); - let totalIntegralSum = discreteIntegralSum +. continuousIntegralSum; - - ( - discreteMean - *. discreteIntegralSum - +. continuousMean - *. continuousIntegralSum - ) - /. totalIntegralSum; - }; - - let variance = ({discrete, continuous} as t: t): float => { - // the combined mean is the weighted sum of the two: - let discreteIntegralSum = Discrete.T.Integral.sum(discrete); - let continuousIntegralSum = Continuous.T.Integral.sum(continuous); - let totalIntegralSum = discreteIntegralSum +. continuousIntegralSum; - - let getMeanOfSquares = ({discrete, continuous}: t) => { - let discreteMean = - discrete - |> Discrete.shapeMap(XYShape.Analysis.squareXYShape) - |> Discrete.T.mean; - let continuousMean = - continuous |> XYShape.Analysis.getMeanOfSquaresContinuousShape; - ( - discreteMean - *. discreteIntegralSum - +. continuousMean - *. continuousIntegralSum - ) - /. totalIntegralSum; - }; - - switch (discreteIntegralSum /. totalIntegralSum) { - | 1.0 => Discrete.T.variance(discrete) - | 0.0 => Continuous.T.variance(continuous) - | _ => - XYShape.Analysis.getVarianceDangerously(t, mean, getMeanOfSquares) - }; - }; - }); - -let combineAlgebraically = - (op: ExpressionTypes.algebraicOperation, t1: t, t2: t) - : t => { - // Discrete convolution can cause a huge increase in the number of samples, - // so we'll first downsample. - - // An alternative (to be explored in the future) may be to first perform the full convolution and then to downsample the result; - // to use non-uniform fast Fourier transforms (for addition only), add web workers or gpu.js, etc. ... - - // we have to figure out where to downsample, and how to effectively - //let downsampleIfTooLarge = (t: t) => { - // let sqtl = sqrt(float_of_int(totalLength(t))); - // sqtl > 10 ? T.downsample(int_of_float(sqtl), t) : t; - //}; - - let t1d = t1; - let t2d = t2; - - // continuous (*) continuous => continuous, but also - // discrete (*) continuous => continuous (and vice versa). We have to take care of all combos and then combine them: - let ccConvResult = - Continuous.combineAlgebraically( - op, - t1.continuous, - t2.continuous, - ); - let dcConvResult = - Continuous.combineAlgebraicallyWithDiscrete( - op, - t2.continuous, - t1.discrete, - ); - let cdConvResult = - Continuous.combineAlgebraicallyWithDiscrete( - op, - t1.continuous, - t2.discrete, - ); - let continuousConvResult = - Continuous.reduce((+.), [|ccConvResult, dcConvResult, cdConvResult|]); - - // ... finally, discrete (*) discrete => discrete, obviously: - let discreteConvResult = - Discrete.combineAlgebraically(op, t1.discrete, t2.discrete); - - let combinedIntegralSum = - Common.combineIntegralSums( - (a, b) => Some(a *. b), - t1.integralSumCache, - t2.integralSumCache, - ); - - {discrete: discreteConvResult, continuous: continuousConvResult, integralSumCache: combinedIntegralSum, integralCache: None}; -}; - -let combinePointwise = (~integralSumCachesFn = (_, _) => None, ~integralCachesFn = (_, _) => None, fn, t1: t, t2: t): t => { - let reducedDiscrete = - [|t1, t2|] - |> E.A.fmap(toDiscrete) - |> E.A.O.concatSomes - |> Discrete.reduce(~integralSumCachesFn, ~integralCachesFn, fn); - - let reducedContinuous = - [|t1, t2|] - |> E.A.fmap(toContinuous) - |> E.A.O.concatSomes - |> Continuous.reduce(~integralSumCachesFn, ~integralCachesFn, fn); - - let combinedIntegralSum = - Common.combineIntegralSums( - integralSumCachesFn, - t1.integralSumCache, - t2.integralSumCache, - ); - - let combinedIntegral = - Common.combineIntegrals( - integralCachesFn, - t1.integralCache, - t2.integralCache, - ); - - make(~integralSumCache=combinedIntegralSum, ~integralCache=combinedIntegral, ~discrete=reducedDiscrete, ~continuous=reducedContinuous); -}; diff --git a/src/distPlus/distribution/Mixed.res b/src/distPlus/distribution/Mixed.res new file mode 100644 index 00000000..7e7cc207 --- /dev/null +++ b/src/distPlus/distribution/Mixed.res @@ -0,0 +1,307 @@ +open Distributions + +type t = DistTypes.mixedShape +let make = (~integralSumCache=None, ~integralCache=None, ~continuous, ~discrete): t => { + continuous: continuous, + discrete: discrete, + integralSumCache: integralSumCache, + integralCache: integralCache, +} + +let totalLength = (t: t): int => { + let continuousLength = t.continuous |> Continuous.getShape |> XYShape.T.length + let discreteLength = t.discrete |> Discrete.getShape |> XYShape.T.length + + continuousLength + discreteLength +} + +let scaleBy = (~scale=1.0, t: t): t => { + let scaledDiscrete = Discrete.scaleBy(~scale, t.discrete) + let scaledContinuous = Continuous.scaleBy(~scale, t.continuous) + let scaledIntegralCache = E.O.bind(t.integralCache, v => Some(Continuous.scaleBy(~scale, v))) + let scaledIntegralSumCache = E.O.bind(t.integralSumCache, s => Some(s *. scale)) + make( + ~discrete=scaledDiscrete, + ~continuous=scaledContinuous, + ~integralSumCache=scaledIntegralSumCache, + ~integralCache=scaledIntegralCache, + ) +} + +let toContinuous = ({continuous}: t) => Some(continuous) +let toDiscrete = ({discrete}: t) => Some(discrete) + +let updateIntegralCache = (integralCache, t: t): t => { + ...t, + integralCache: integralCache, +} + +module T = Dist({ + type t = DistTypes.mixedShape + type integral = DistTypes.continuousShape + let minX = ({continuous, discrete}: t) => + min(Continuous.T.minX(continuous), Discrete.T.minX(discrete)) + let maxX = ({continuous, discrete}: t) => + max(Continuous.T.maxX(continuous), Discrete.T.maxX(discrete)) + let toShape = (t: t): DistTypes.shape => Mixed(t) + + let updateIntegralCache = updateIntegralCache + + let toContinuous = toContinuous + let toDiscrete = toDiscrete + + let truncate = ( + leftCutoff: option, + rightCutoff: option, + {discrete, continuous}: t, + ) => { + let truncatedContinuous = Continuous.T.truncate(leftCutoff, rightCutoff, continuous) + let truncatedDiscrete = Discrete.T.truncate(leftCutoff, rightCutoff, discrete) + + make( + ~integralSumCache=None, + ~integralCache=None, + ~discrete=truncatedDiscrete, + ~continuous=truncatedContinuous, + ) + } + + let normalize = (t: t): t => { + let continuousIntegral = Continuous.T.Integral.get(t.continuous) + let discreteIntegral = Discrete.T.Integral.get(t.discrete) + + let continuous = t.continuous |> Continuous.updateIntegralCache(Some(continuousIntegral)) + let discrete = t.discrete |> Discrete.updateIntegralCache(Some(discreteIntegral)) + + let continuousIntegralSum = Continuous.T.Integral.sum(continuous) + let discreteIntegralSum = Discrete.T.Integral.sum(discrete) + let totalIntegralSum = continuousIntegralSum +. discreteIntegralSum + + let newContinuousSum = continuousIntegralSum /. totalIntegralSum + let newDiscreteSum = discreteIntegralSum /. totalIntegralSum + + let normalizedContinuous = + continuous + |> Continuous.scaleBy(~scale=newContinuousSum /. continuousIntegralSum) + |> Continuous.updateIntegralSumCache(Some(newContinuousSum)) + let normalizedDiscrete = + discrete + |> Discrete.scaleBy(~scale=newDiscreteSum /. discreteIntegralSum) + |> Discrete.updateIntegralSumCache(Some(newDiscreteSum)) + + make( + ~integralSumCache=Some(1.0), + ~integralCache=None, + ~continuous=normalizedContinuous, + ~discrete=normalizedDiscrete, + ) + } + + let xToY = (x, t: t) => { + // This evaluates the mixedShape at x, interpolating if necessary. + // Note that we normalize entire mixedShape first. + let {continuous, discrete}: t = normalize(t) + let c = Continuous.T.xToY(x, continuous) + let d = Discrete.T.xToY(x, discrete) + DistTypes.MixedPoint.add(c, d) // "add" here just combines the two values into a single MixedPoint. + } + + let toDiscreteProbabilityMassFraction = ({discrete, continuous}: t) => { + let discreteIntegralSum = Discrete.T.Integral.sum(discrete) + let continuousIntegralSum = Continuous.T.Integral.sum(continuous) + let totalIntegralSum = discreteIntegralSum +. continuousIntegralSum + + discreteIntegralSum /. totalIntegralSum + } + + let downsample = (count, t: t): t => { + // We will need to distribute the new xs fairly between the discrete and continuous shapes. + // The easiest way to do this is to simply go by the previous probability masses. + + let discreteIntegralSum = Discrete.T.Integral.sum(t.discrete) + let continuousIntegralSum = Continuous.T.Integral.sum(t.continuous) + let totalIntegralSum = discreteIntegralSum +. continuousIntegralSum + + // TODO: figure out what to do when the totalIntegralSum is zero. + + let downsampledDiscrete = Discrete.T.downsample( + int_of_float(float_of_int(count) *. (discreteIntegralSum /. totalIntegralSum)), + t.discrete, + ) + + let downsampledContinuous = Continuous.T.downsample( + int_of_float(float_of_int(count) *. (continuousIntegralSum /. totalIntegralSum)), + t.continuous, + ) + + {...t, discrete: downsampledDiscrete, continuous: downsampledContinuous} + } + + let integral = (t: t) => + switch t.integralCache { + | Some(cache) => cache + | None => + // note: if the underlying shapes aren't normalized, then these integrals won't be either -- but that's the way it should be. + let continuousIntegral = Continuous.T.Integral.get(t.continuous) + let discreteIntegral = Continuous.stepwiseToLinear(Discrete.T.Integral.get(t.discrete)) + + Continuous.make( + XYShape.PointwiseCombination.combine( + \"+.", + XYShape.XtoY.continuousInterpolator(#Linear, #UseOutermostPoints), + Continuous.getShape(continuousIntegral), + Continuous.getShape(discreteIntegral), + ), + ) + } + + let integralEndY = (t: t) => t |> integral |> Continuous.lastY + + let integralXtoY = (f, t) => t |> integral |> Continuous.getShape |> XYShape.XtoY.linear(f) + + let integralYtoX = (f, t) => t |> integral |> Continuous.getShape |> XYShape.YtoX.linear(f) + + // This pipes all ys (continuous and discrete) through fn. + // If mapY is a linear operation, we might be able to update the integralSumCaches as well; + // if not, they'll be set to None. + let mapY = ( + ~integralSumCacheFn=previousIntegralSum => None, + ~integralCacheFn=previousIntegral => None, + ~fn, + t: t, + ): t => { + let yMappedDiscrete: DistTypes.discreteShape = + t.discrete + |> Discrete.T.mapY(~fn) + |> Discrete.updateIntegralSumCache(E.O.bind(t.discrete.integralSumCache, integralSumCacheFn)) + |> Discrete.updateIntegralCache(E.O.bind(t.discrete.integralCache, integralCacheFn)) + + let yMappedContinuous: DistTypes.continuousShape = + t.continuous + |> Continuous.T.mapY(~fn) + |> Continuous.updateIntegralSumCache( + E.O.bind(t.continuous.integralSumCache, integralSumCacheFn), + ) + |> Continuous.updateIntegralCache(E.O.bind(t.continuous.integralCache, integralCacheFn)) + + { + discrete: yMappedDiscrete, + continuous: yMappedContinuous, + integralSumCache: E.O.bind(t.integralSumCache, integralSumCacheFn), + integralCache: E.O.bind(t.integralCache, integralCacheFn), + } + } + + let mean = ({discrete, continuous}: t): float => { + let discreteMean = Discrete.T.mean(discrete) + let continuousMean = Continuous.T.mean(continuous) + + // the combined mean is the weighted sum of the two: + let discreteIntegralSum = Discrete.T.Integral.sum(discrete) + let continuousIntegralSum = Continuous.T.Integral.sum(continuous) + let totalIntegralSum = discreteIntegralSum +. continuousIntegralSum + + (discreteMean *. discreteIntegralSum +. continuousMean *. continuousIntegralSum) /. + totalIntegralSum + } + + let variance = ({discrete, continuous} as t: t): float => { + // the combined mean is the weighted sum of the two: + let discreteIntegralSum = Discrete.T.Integral.sum(discrete) + let continuousIntegralSum = Continuous.T.Integral.sum(continuous) + let totalIntegralSum = discreteIntegralSum +. continuousIntegralSum + + let getMeanOfSquares = ({discrete, continuous}: t) => { + let discreteMean = + discrete |> Discrete.shapeMap(XYShape.Analysis.squareXYShape) |> Discrete.T.mean + let continuousMean = continuous |> XYShape.Analysis.getMeanOfSquaresContinuousShape + (discreteMean *. discreteIntegralSum +. continuousMean *. continuousIntegralSum) /. + totalIntegralSum + } + + switch discreteIntegralSum /. totalIntegralSum { + | 1.0 => Discrete.T.variance(discrete) + | 0.0 => Continuous.T.variance(continuous) + | _ => XYShape.Analysis.getVarianceDangerously(t, mean, getMeanOfSquares) + } + } +}) + +let combineAlgebraically = (op: ExpressionTypes.algebraicOperation, t1: t, t2: t): t => { + // Discrete convolution can cause a huge increase in the number of samples, + // so we'll first downsample. + + // An alternative (to be explored in the future) may be to first perform the full convolution and then to downsample the result; + // to use non-uniform fast Fourier transforms (for addition only), add web workers or gpu.js, etc. ... + + // we have to figure out where to downsample, and how to effectively + //let downsampleIfTooLarge = (t: t) => { + // let sqtl = sqrt(float_of_int(totalLength(t))); + // sqtl > 10 ? T.downsample(int_of_float(sqtl), t) : t; + //}; + + let t1d = t1 + let t2d = t2 + + // continuous (*) continuous => continuous, but also + // discrete (*) continuous => continuous (and vice versa). We have to take care of all combos and then combine them: + let ccConvResult = Continuous.combineAlgebraically(op, t1.continuous, t2.continuous) + let dcConvResult = Continuous.combineAlgebraicallyWithDiscrete(op, t2.continuous, t1.discrete) + let cdConvResult = Continuous.combineAlgebraicallyWithDiscrete(op, t1.continuous, t2.discrete) + let continuousConvResult = Continuous.reduce(\"+.", [ccConvResult, dcConvResult, cdConvResult]) + + // ... finally, discrete (*) discrete => discrete, obviously: + let discreteConvResult = Discrete.combineAlgebraically(op, t1.discrete, t2.discrete) + + let combinedIntegralSum = Common.combineIntegralSums( + (a, b) => Some(a *. b), + t1.integralSumCache, + t2.integralSumCache, + ) + + { + discrete: discreteConvResult, + continuous: continuousConvResult, + integralSumCache: combinedIntegralSum, + integralCache: None, + } +} + +let combinePointwise = ( + ~integralSumCachesFn=(_, _) => None, + ~integralCachesFn=(_, _) => None, + fn, + t1: t, + t2: t, +): t => { + let reducedDiscrete = + [t1, t2] + |> E.A.fmap(toDiscrete) + |> E.A.O.concatSomes + |> Discrete.reduce(~integralSumCachesFn, ~integralCachesFn, fn) + + let reducedContinuous = + [t1, t2] + |> E.A.fmap(toContinuous) + |> E.A.O.concatSomes + |> Continuous.reduce(~integralSumCachesFn, ~integralCachesFn, fn) + + let combinedIntegralSum = Common.combineIntegralSums( + integralSumCachesFn, + t1.integralSumCache, + t2.integralSumCache, + ) + + let combinedIntegral = Common.combineIntegrals( + integralCachesFn, + t1.integralCache, + t2.integralCache, + ) + + make( + ~integralSumCache=combinedIntegralSum, + ~integralCache=combinedIntegral, + ~discrete=reducedDiscrete, + ~continuous=reducedContinuous, + ) +} diff --git a/src/distPlus/distribution/MixedShapeBuilder.re b/src/distPlus/distribution/MixedShapeBuilder.re deleted file mode 100644 index 647fefdc..00000000 --- a/src/distPlus/distribution/MixedShapeBuilder.re +++ /dev/null @@ -1,34 +0,0 @@ -type assumption = - | ADDS_TO_1 - | ADDS_TO_CORRECT_PROBABILITY; - -type assumptions = { - continuous: assumption, - discrete: assumption, - discreteProbabilityMass: option(float), -}; - -let buildSimple = (~continuous: option(DistTypes.continuousShape), ~discrete: option(DistTypes.discreteShape)): option(DistTypes.shape) => { - let continuous = continuous |> E.O.default(Continuous.make(~integralSumCache=Some(0.0), {xs: [||], ys: [||]})); - let discrete = discrete |> E.O.default(Discrete.make(~integralSumCache=Some(0.0), {xs: [||], ys: [||]})); - let cLength = - continuous - |> Continuous.getShape - |> XYShape.T.xs - |> E.A.length; - let dLength = discrete |> Discrete.getShape |> XYShape.T.xs |> E.A.length; - switch (cLength, dLength) { - | (0 | 1, 0) => None - | (0 | 1, _) => Some(Discrete(discrete)) - | (_, 0) => Some(Continuous(continuous)) - | (_, _) => - let mixedDist = - Mixed.make( - ~integralSumCache=None, - ~integralCache=None, - ~continuous, - ~discrete, - ); - Some(Mixed(mixedDist)); - }; -}; \ No newline at end of file diff --git a/src/distPlus/distribution/MixedShapeBuilder.res b/src/distPlus/distribution/MixedShapeBuilder.res new file mode 100644 index 00000000..40027d7c --- /dev/null +++ b/src/distPlus/distribution/MixedShapeBuilder.res @@ -0,0 +1,29 @@ +type assumption = + | ADDS_TO_1 + | ADDS_TO_CORRECT_PROBABILITY + +type assumptions = { + continuous: assumption, + discrete: assumption, + discreteProbabilityMass: option, +} + +let buildSimple = ( + ~continuous: option, + ~discrete: option, +): option => { + let continuous = + continuous |> E.O.default(Continuous.make(~integralSumCache=Some(0.0), {xs: [], ys: []})) + let discrete = + discrete |> E.O.default(Discrete.make(~integralSumCache=Some(0.0), {xs: [], ys: []})) + let cLength = continuous |> Continuous.getShape |> XYShape.T.xs |> E.A.length + let dLength = discrete |> Discrete.getShape |> XYShape.T.xs |> E.A.length + switch (cLength, dLength) { + | (0 | 1, 0) => None + | (0 | 1, _) => Some(Discrete(discrete)) + | (_, 0) => Some(Continuous(continuous)) + | (_, _) => + let mixedDist = Mixed.make(~integralSumCache=None, ~integralCache=None, ~continuous, ~discrete) + Some(Mixed(mixedDist)) + } +} diff --git a/src/distPlus/distribution/Shape.re b/src/distPlus/distribution/Shape.re deleted file mode 100644 index e249c216..00000000 --- a/src/distPlus/distribution/Shape.re +++ /dev/null @@ -1,240 +0,0 @@ -open Distributions; - -type t = DistTypes.shape; -let mapToAll = ((fn1, fn2, fn3), t: t) => - switch (t) { - | Mixed(m) => fn1(m) - | Discrete(m) => fn2(m) - | Continuous(m) => fn3(m) - }; - -let fmap = ((fn1, fn2, fn3), t: t): t => - switch (t) { - | Mixed(m) => Mixed(fn1(m)) - | Discrete(m) => Discrete(fn2(m)) - | Continuous(m) => Continuous(fn3(m)) - }; - - -let toMixed = - mapToAll(( - m => m, - d => Mixed.make(~integralSumCache=d.integralSumCache, ~integralCache=d.integralCache, ~discrete=d, ~continuous=Continuous.empty), - c => Mixed.make(~integralSumCache=c.integralSumCache, ~integralCache=c.integralCache, ~discrete=Discrete.empty, ~continuous=c), - )); - -let combineAlgebraically = - (op: ExpressionTypes.algebraicOperation, t1: t, t2: t): t => { - - switch (t1, t2) { - | (Continuous(m1), Continuous(m2)) => - Continuous.combineAlgebraically(op, m1, m2) |> Continuous.T.toShape; - | (Continuous(m1), Discrete(m2)) - | (Discrete(m2), Continuous(m1)) => - Continuous.combineAlgebraicallyWithDiscrete(op, m1, m2) |> Continuous.T.toShape - | (Discrete(m1), Discrete(m2)) => - Discrete.combineAlgebraically(op, m1, m2) |> Discrete.T.toShape - | (m1, m2) => - Mixed.combineAlgebraically( - op, - toMixed(m1), - toMixed(m2), - ) - |> Mixed.T.toShape - }; -}; - -let combinePointwise = - (~integralSumCachesFn: (float, float) => option(float) = (_, _) => None, - ~integralCachesFn: (DistTypes.continuousShape, DistTypes.continuousShape) => option(DistTypes.continuousShape) = (_, _) => None, - fn, - t1: t, - t2: t) => - switch (t1, t2) { - | (Continuous(m1), Continuous(m2)) => - DistTypes.Continuous( - Continuous.combinePointwise(~integralSumCachesFn, ~integralCachesFn, fn, m1, m2), - ) - | (Discrete(m1), Discrete(m2)) => - DistTypes.Discrete( - Discrete.combinePointwise(~integralSumCachesFn, ~integralCachesFn, fn, m1, m2), - ) - | (m1, m2) => - DistTypes.Mixed( - Mixed.combinePointwise( - ~integralSumCachesFn, - ~integralCachesFn, - fn, - toMixed(m1), - toMixed(m2), - ), - ) - }; - -module T = - Dist({ - type t = DistTypes.shape; - type integral = DistTypes.continuousShape; - - let xToY = (f: float) => - mapToAll(( - Mixed.T.xToY(f), - Discrete.T.xToY(f), - Continuous.T.xToY(f), - )); - - let toShape = (t: t) => t; - - let toContinuous = t => None; - let toDiscrete = t => None; - - let downsample = (i, t) => - fmap( - ( - Mixed.T.downsample(i), - Discrete.T.downsample(i), - Continuous.T.downsample(i), - ), - t, - ); - - let truncate = (leftCutoff, rightCutoff, t): t => - fmap( - ( - Mixed.T.truncate(leftCutoff, rightCutoff), - Discrete.T.truncate(leftCutoff, rightCutoff), - Continuous.T.truncate(leftCutoff, rightCutoff), - ), - t, - ); - - let toDiscreteProbabilityMassFraction = t => 0.0; - - let normalize = - fmap(( - Mixed.T.normalize, - Discrete.T.normalize, - Continuous.T.normalize - )); - - let updateIntegralCache = (integralCache, t: t): t => - fmap(( - Mixed.T.updateIntegralCache(integralCache), - Discrete.T.updateIntegralCache(integralCache), - Continuous.T.updateIntegralCache(integralCache), - ), t); - - let toContinuous = - mapToAll(( - Mixed.T.toContinuous, - Discrete.T.toContinuous, - Continuous.T.toContinuous, - )); - let toDiscrete = - mapToAll(( - Mixed.T.toDiscrete, - Discrete.T.toDiscrete, - Continuous.T.toDiscrete, - )); - - let toDiscreteProbabilityMassFraction = - mapToAll(( - Mixed.T.toDiscreteProbabilityMassFraction, - Discrete.T.toDiscreteProbabilityMassFraction, - Continuous.T.toDiscreteProbabilityMassFraction, - )); - - let minX = mapToAll((Mixed.T.minX, Discrete.T.minX, Continuous.T.minX)); - let integral = - mapToAll(( - Mixed.T.Integral.get, - Discrete.T.Integral.get, - Continuous.T.Integral.get, - )); - let integralEndY = - mapToAll(( - Mixed.T.Integral.sum, - Discrete.T.Integral.sum, - Continuous.T.Integral.sum, - )); - let integralXtoY = (f) => { - mapToAll(( - Mixed.T.Integral.xToY(f), - Discrete.T.Integral.xToY(f), - Continuous.T.Integral.xToY(f), - )); - }; - let integralYtoX = (f) => { - mapToAll(( - Mixed.T.Integral.yToX(f), - Discrete.T.Integral.yToX(f), - Continuous.T.Integral.yToX(f), - )); - }; - let maxX = mapToAll((Mixed.T.maxX, Discrete.T.maxX, Continuous.T.maxX)); - let mapY = (~integralSumCacheFn=previousIntegralSum => None, ~integralCacheFn=previousIntegral=>None, ~fn) =>{ - fmap(( - Mixed.T.mapY(~integralSumCacheFn, ~integralCacheFn, ~fn), - Discrete.T.mapY(~integralSumCacheFn, ~integralCacheFn, ~fn), - Continuous.T.mapY(~integralSumCacheFn, ~integralCacheFn, ~fn), - )); - } - - let mean = (t: t): float => - switch (t) { - | Mixed(m) => Mixed.T.mean(m) - | Discrete(m) => Discrete.T.mean(m) - | Continuous(m) => Continuous.T.mean(m) - }; - - let variance = (t: t): float => - switch (t) { - | Mixed(m) => Mixed.T.variance(m) - | Discrete(m) => Discrete.T.variance(m) - | Continuous(m) => Continuous.T.variance(m) - }; - }); - -let pdf = (f: float, t: t) => { - let mixedPoint: DistTypes.mixedPoint = T.xToY(f, t); - mixedPoint.continuous +. mixedPoint.discrete; -}; - -let inv = T.Integral.yToX; -let cdf = T.Integral.xToY; - -let doN = (n, fn) => { - let items = Belt.Array.make(n, 0.0); - for (x in 0 to n - 1) { - let _ = Belt.Array.set(items, x, fn()); - (); - }; - items; -}; - -let sample = (t: t): float => { - let randomItem = Random.float(1.); - let bar = t |> T.Integral.yToX(randomItem); - bar; -}; - -let isFloat = (t:t) => switch(t){ -| Discrete({xyShape: {xs: [|_|], ys: [|1.0|]}}) => true -| _ => false -} - -let sampleNRendered = (n, dist) => { - let integralCache = T.Integral.get(dist); - let distWithUpdatedIntegralCache = T.updateIntegralCache(Some(integralCache), dist); - - doN(n, () => sample(distWithUpdatedIntegralCache)); -}; - -let operate = (distToFloatOp: ExpressionTypes.distToFloatOperation, s): float => - switch (distToFloatOp) { - | `Pdf(f) => pdf(f, s) - | `Cdf(f) => pdf(f, s) - | `Inv(f) => inv(f, s) - | `Sample => sample(s) - | `Mean => T.mean(s) - }; diff --git a/src/distPlus/distribution/Shape.res b/src/distPlus/distribution/Shape.res new file mode 100644 index 00000000..94213e65 --- /dev/null +++ b/src/distPlus/distribution/Shape.res @@ -0,0 +1,207 @@ +open Distributions + +type t = DistTypes.shape +let mapToAll = ((fn1, fn2, fn3), t: t) => + switch t { + | Mixed(m) => fn1(m) + | Discrete(m) => fn2(m) + | Continuous(m) => fn3(m) + } + +let fmap = ((fn1, fn2, fn3), t: t): t => + switch t { + | Mixed(m) => Mixed(fn1(m)) + | Discrete(m) => Discrete(fn2(m)) + | Continuous(m) => Continuous(fn3(m)) + } + +let toMixed = mapToAll(( + m => m, + d => + Mixed.make( + ~integralSumCache=d.integralSumCache, + ~integralCache=d.integralCache, + ~discrete=d, + ~continuous=Continuous.empty, + ), + c => + Mixed.make( + ~integralSumCache=c.integralSumCache, + ~integralCache=c.integralCache, + ~discrete=Discrete.empty, + ~continuous=c, + ), +)) + +let combineAlgebraically = (op: ExpressionTypes.algebraicOperation, t1: t, t2: t): t => + switch (t1, t2) { + | (Continuous(m1), Continuous(m2)) => + Continuous.combineAlgebraically(op, m1, m2) |> Continuous.T.toShape + | (Continuous(m1), Discrete(m2)) + | (Discrete(m2), Continuous(m1)) => + Continuous.combineAlgebraicallyWithDiscrete(op, m1, m2) |> Continuous.T.toShape + | (Discrete(m1), Discrete(m2)) => Discrete.combineAlgebraically(op, m1, m2) |> Discrete.T.toShape + | (m1, m2) => Mixed.combineAlgebraically(op, toMixed(m1), toMixed(m2)) |> Mixed.T.toShape + } + +let combinePointwise = ( + ~integralSumCachesFn: (float, float) => option=(_, _) => None, + ~integralCachesFn: ( + DistTypes.continuousShape, + DistTypes.continuousShape, + ) => option=(_, _) => None, + fn, + t1: t, + t2: t, +) => + switch (t1, t2) { + | (Continuous(m1), Continuous(m2)) => + DistTypes.Continuous( + Continuous.combinePointwise(~integralSumCachesFn, ~integralCachesFn, fn, m1, m2), + ) + | (Discrete(m1), Discrete(m2)) => + DistTypes.Discrete( + Discrete.combinePointwise(~integralSumCachesFn, ~integralCachesFn, fn, m1, m2), + ) + | (m1, m2) => + DistTypes.Mixed( + Mixed.combinePointwise(~integralSumCachesFn, ~integralCachesFn, fn, toMixed(m1), toMixed(m2)), + ) + } + +module T = Dist({ + type t = DistTypes.shape + type integral = DistTypes.continuousShape + + let xToY = (f: float) => mapToAll((Mixed.T.xToY(f), Discrete.T.xToY(f), Continuous.T.xToY(f))) + + let toShape = (t: t) => t + + let toContinuous = t => None + let toDiscrete = t => None + + let downsample = (i, t) => + fmap((Mixed.T.downsample(i), Discrete.T.downsample(i), Continuous.T.downsample(i)), t) + + let truncate = (leftCutoff, rightCutoff, t): t => + fmap( + ( + Mixed.T.truncate(leftCutoff, rightCutoff), + Discrete.T.truncate(leftCutoff, rightCutoff), + Continuous.T.truncate(leftCutoff, rightCutoff), + ), + t, + ) + + let toDiscreteProbabilityMassFraction = t => 0.0 + + let normalize = fmap((Mixed.T.normalize, Discrete.T.normalize, Continuous.T.normalize)) + + let updateIntegralCache = (integralCache, t: t): t => + fmap( + ( + Mixed.T.updateIntegralCache(integralCache), + Discrete.T.updateIntegralCache(integralCache), + Continuous.T.updateIntegralCache(integralCache), + ), + t, + ) + + let toContinuous = mapToAll(( + Mixed.T.toContinuous, + Discrete.T.toContinuous, + Continuous.T.toContinuous, + )) + let toDiscrete = mapToAll((Mixed.T.toDiscrete, Discrete.T.toDiscrete, Continuous.T.toDiscrete)) + + let toDiscreteProbabilityMassFraction = mapToAll(( + Mixed.T.toDiscreteProbabilityMassFraction, + Discrete.T.toDiscreteProbabilityMassFraction, + Continuous.T.toDiscreteProbabilityMassFraction, + )) + + let minX = mapToAll((Mixed.T.minX, Discrete.T.minX, Continuous.T.minX)) + let integral = mapToAll(( + Mixed.T.Integral.get, + Discrete.T.Integral.get, + Continuous.T.Integral.get, + )) + let integralEndY = mapToAll(( + Mixed.T.Integral.sum, + Discrete.T.Integral.sum, + Continuous.T.Integral.sum, + )) + let integralXtoY = f => + mapToAll((Mixed.T.Integral.xToY(f), Discrete.T.Integral.xToY(f), Continuous.T.Integral.xToY(f))) + let integralYtoX = f => + mapToAll((Mixed.T.Integral.yToX(f), Discrete.T.Integral.yToX(f), Continuous.T.Integral.yToX(f))) + let maxX = mapToAll((Mixed.T.maxX, Discrete.T.maxX, Continuous.T.maxX)) + let mapY = ( + ~integralSumCacheFn=previousIntegralSum => None, + ~integralCacheFn=previousIntegral => None, + ~fn, + ) => + fmap(( + Mixed.T.mapY(~integralSumCacheFn, ~integralCacheFn, ~fn), + Discrete.T.mapY(~integralSumCacheFn, ~integralCacheFn, ~fn), + Continuous.T.mapY(~integralSumCacheFn, ~integralCacheFn, ~fn), + )) + + let mean = (t: t): float => + switch t { + | Mixed(m) => Mixed.T.mean(m) + | Discrete(m) => Discrete.T.mean(m) + | Continuous(m) => Continuous.T.mean(m) + } + + let variance = (t: t): float => + switch t { + | Mixed(m) => Mixed.T.variance(m) + | Discrete(m) => Discrete.T.variance(m) + | Continuous(m) => Continuous.T.variance(m) + } +}) + +let pdf = (f: float, t: t) => { + let mixedPoint: DistTypes.mixedPoint = T.xToY(f, t) + mixedPoint.continuous +. mixedPoint.discrete +} + +let inv = T.Integral.yToX +let cdf = T.Integral.xToY + +let doN = (n, fn) => { + let items = Belt.Array.make(n, 0.0) + for x in 0 to n - 1 { + let _ = Belt.Array.set(items, x, fn()) + } + items +} + +let sample = (t: t): float => { + let randomItem = Random.float(1.) + let bar = t |> T.Integral.yToX(randomItem) + bar +} + +let isFloat = (t: t) => + switch t { + | Discrete({xyShape: {xs: [_], ys: [1.0]}}) => true + | _ => false + } + +let sampleNRendered = (n, dist) => { + let integralCache = T.Integral.get(dist) + let distWithUpdatedIntegralCache = T.updateIntegralCache(Some(integralCache), dist) + + doN(n, () => sample(distWithUpdatedIntegralCache)) +} + +let operate = (distToFloatOp: ExpressionTypes.distToFloatOperation, s): float => + switch distToFloatOp { + | #Pdf(f) => pdf(f, s) + | #Cdf(f) => pdf(f, s) + | #Inv(f) => inv(f, s) + | #Sample => sample(s) + | #Mean => T.mean(s) + } diff --git a/src/distPlus/distribution/XYShape.re b/src/distPlus/distribution/XYShape.re deleted file mode 100644 index 92cffe60..00000000 --- a/src/distPlus/distribution/XYShape.re +++ /dev/null @@ -1,504 +0,0 @@ -open DistTypes; - -let interpolate = - (xMin: float, xMax: float, yMin: float, yMax: float, xIntended: float) - : float => { - let minProportion = (xMax -. xIntended) /. (xMax -. xMin); - let maxProportion = (xIntended -. xMin) /. (xMax -. xMin); - yMin *. minProportion +. yMax *. maxProportion; -}; - -// TODO: Make sure that shapes cannot be empty. -let extImp = E.O.toExt("Tried to perform an operation on an empty XYShape."); - -module T = { - type t = xyShape; - let toXyShape = (t: t): xyShape => t; - type ts = array(xyShape); - let xs = (t: t) => t.xs; - let ys = (t: t) => t.ys; - let length = (t: t) => E.A.length(t.xs); - let empty = {xs: [||], ys: [||]}; - let isEmpty = (t: t) => length(t) == 0; - let minX = (t: t) => t |> xs |> E.A.Sorted.min |> extImp; - let maxX = (t: t) => t |> xs |> E.A.Sorted.max |> extImp; - let firstY = (t: t) => t |> ys |> E.A.first |> extImp; - let lastY = (t: t) => t |> ys |> E.A.last |> extImp; - let xTotalRange = (t: t) => maxX(t) -. minX(t); - let mapX = (fn, t: t): t => {xs: E.A.fmap(fn, t.xs), ys: t.ys}; - let mapY = (fn, t: t): t => {xs: t.xs, ys: E.A.fmap(fn, t.ys)}; - let zip = ({xs, ys}: t) => Belt.Array.zip(xs, ys); - let fromArray = ((xs, ys)): t => {xs, ys}; - let fromArrays = (xs, ys): t => {xs, ys}; - let accumulateYs = (fn, p: t) => { - fromArray((p.xs, E.A.accumulate(fn, p.ys))); - }; - let concat = (t1: t, t2: t) => { - let cxs = Array.concat([t1.xs, t2.xs]); - let cys = Array.concat([t1.ys, t2.ys]); - {xs: cxs, ys: cys}; - }; - let fromZippedArray = (pairs: array((float, float))): t => - pairs |> Belt.Array.unzip |> fromArray; - let equallyDividedXs = (t: t, newLength) => { - E.A.Floats.range(minX(t), maxX(t), newLength); - }; - let toJs = (t: t) => { - {"xs": t.xs, "ys": t.ys}; - }; -}; - -module Ts = { - type t = T.ts; - let minX = (t: t) => t |> E.A.fmap(T.minX) |> E.A.min |> extImp; - let maxX = (t: t) => t |> E.A.fmap(T.maxX) |> E.A.max |> extImp; - let equallyDividedXs = (t: t, newLength) => { - E.A.Floats.range(minX(t), maxX(t), newLength); - }; - let allXs = (t: t) => t |> E.A.fmap(T.xs) |> E.A.Sorted.concatMany; -}; - -module Pairs = { - let x = fst; - let y = snd; - let first = (t: T.t) => (T.minX(t), T.firstY(t)); - let last = (t: T.t) => (T.maxX(t), T.lastY(t)); - - let getBy = (t: T.t, fn) => t |> T.zip |> E.A.getBy(_, fn); - - let firstAtOrBeforeXValue = (xValue, t: T.t) => { - let zipped = T.zip(t); - let firstIndex = - zipped |> Belt.Array.getIndexBy(_, ((x, _)) => x > xValue); - let previousIndex = - switch (firstIndex) { - | None => Some(Array.length(zipped) - 1) - | Some(0) => None - | Some(n) => Some(n - 1) - }; - previousIndex |> Belt.Option.flatMap(_, Belt.Array.get(zipped)); - }; -}; - -module YtoX = { - let linear = (y: float, t: T.t): float => { - let firstHigherIndex = - E.A.Sorted.binarySearchFirstElementGreaterIndex(T.ys(t), y); - let foundX = - switch (firstHigherIndex) { - | `overMax => T.maxX(t) - | `underMin => T.minX(t) - | `firstHigher(firstHigherIndex) => - let lowerOrEqualIndex = - firstHigherIndex - 1 < 0 ? 0 : firstHigherIndex - 1; - let (_xs, _ys) = (T.xs(t), T.ys(t)); - let needsInterpolation = _ys[lowerOrEqualIndex] != y; - if (needsInterpolation) { - interpolate( - _ys[lowerOrEqualIndex], - _ys[firstHigherIndex], - _xs[lowerOrEqualIndex], - _xs[firstHigherIndex], - y, - ); - } else { - _xs[lowerOrEqualIndex]; - }; - }; - foundX; - }; -}; - -module XtoY = { - let stepwiseIncremental = (f, t: T.t) => - Pairs.firstAtOrBeforeXValue(f, t) |> E.O.fmap(Pairs.y); - - let stepwiseIfAtX = (f: float, t: T.t) => { - Pairs.getBy(t, ((x: float, _)) => {x == f}) |> E.O.fmap(Pairs.y); - }; - - let linear = (x: float, t: T.t): float => { - let firstHigherIndex = - E.A.Sorted.binarySearchFirstElementGreaterIndex(T.xs(t), x); - let n = - switch (firstHigherIndex) { - | `overMax => T.lastY(t) - | `underMin => T.firstY(t) - | `firstHigher(firstHigherIndex) => - let lowerOrEqualIndex = - firstHigherIndex - 1 < 0 ? 0 : firstHigherIndex - 1; - let (_xs, _ys) = (T.xs(t), T.ys(t)); - let needsInterpolation = _xs[lowerOrEqualIndex] != x; - if (needsInterpolation) { - interpolate( - _xs[lowerOrEqualIndex], - _xs[firstHigherIndex], - _ys[lowerOrEqualIndex], - _ys[firstHigherIndex], - x, - ); - } else { - _ys[lowerOrEqualIndex]; - }; - }; - n; - }; - - /* Returns a between-points-interpolating function that can be used with PointwiseCombination.combine. - Interpolation can either be stepwise (using the value on the left) or linear. Extrapolation can be `UseZero or `UseOutermostPoints. */ - let continuousInterpolator = (interpolation: DistTypes.interpolationStrategy, extrapolation: DistTypes.extrapolationStrategy): interpolator => { - switch (interpolation, extrapolation) { - | (`Linear, `UseZero) => (t: T.t, leftIndex: int, x: float) => { - if (leftIndex < 0) { - 0.0 - } else if (leftIndex >= T.length(t) - 1) { - 0.0 - } else { - let x1 = t.xs[leftIndex]; - let x2 = t.xs[leftIndex + 1]; - let y1 = t.ys[leftIndex]; - let y2 = t.ys[leftIndex + 1]; - let fraction = (x -. x1) /. (x2 -. x1); - y1 *. (1. -. fraction) +. y2 *. fraction; - }; - } - | (`Linear, `UseOutermostPoints) => (t: T.t, leftIndex: int, x: float) => { - if (leftIndex < 0) { - t.ys[0]; - } else if (leftIndex >= T.length(t) - 1) { - t.ys[T.length(t) - 1] - } else { - let x1 = t.xs[leftIndex]; - let x2 = t.xs[leftIndex + 1]; - let y1 = t.ys[leftIndex]; - let y2 = t.ys[leftIndex + 1]; - let fraction = (x -. x1) /. (x2 -. x1); - y1 *. (1. -. fraction) +. y2 *. fraction; - }; - } - | (`Stepwise, `UseZero) => (t: T.t, leftIndex: int, x: float) => { - if (leftIndex < 0) { - 0.0 - } else if (leftIndex >= T.length(t) - 1) { - 0.0 - } else { - t.ys[leftIndex]; - } - } - | (`Stepwise, `UseOutermostPoints) => (t: T.t, leftIndex: int, x: float) => { - if (leftIndex < 0) { - t.ys[0]; - } else if (leftIndex >= T.length(t) - 1) { - t.ys[T.length(t) - 1] - } else { - t.ys[leftIndex]; - } - } - } - }; - - /* Returns a between-points-interpolating function that can be used with PointwiseCombination.combine. - For discrete distributions, the probability density between points is zero, so we just return zero here. */ - let discreteInterpolator: interpolator = (t: T.t, leftIndex: int, x: float) => 0.0; -}; - -module XsConversion = { - let _replaceWithXs = (newXs: array(float), t: T.t): T.t => { - let newYs = Belt.Array.map(newXs, XtoY.linear(_, t)); - {xs: newXs, ys: newYs}; - }; - - let equallyDivideXByMass = (newLength: int, integral: T.t) => - E.A.Floats.range(0.0, 1.0, newLength) - |> E.A.fmap(YtoX.linear(_, integral)); - - let proportionEquallyOverX = (newLength: int, t: T.t): T.t => { - T.equallyDividedXs(t, newLength) |> _replaceWithXs(_, t); - }; - - let proportionByProbabilityMass = - (newLength: int, integral: T.t, t: T.t): T.t => { - integral - |> equallyDivideXByMass(newLength) // creates a new set of xs at evenly spaced percentiles - |> _replaceWithXs(_, t); // linearly interpolates new ys for the new xs - }; -}; - -module Zipped = { - type zipped = array((float, float)); - let compareYs = ((_, y1), (_, y2)) => y1 > y2 ? 1 : 0; - let compareXs = ((x1, _), (x2, _)) => x1 > x2 ? 1 : 0; - let sortByY = (t: zipped) => t |> E.A.stableSortBy(_, compareYs); - let sortByX = (t: zipped) => t |> E.A.stableSortBy(_, compareXs); - let filterByX = (testFn: (float => bool), t: zipped) => t |> E.A.filter(((x, _)) => testFn(x)); -}; - -module PointwiseCombination = { - - // t1Interpolator and t2Interpolator are functions from XYShape.XtoY, e.g. linearBetweenPointsExtrapolateFlat. - let combine = [%raw {| // : (float => float => float, T.t, T.t, bool) => T.t - // This function combines two xyShapes by looping through both of them simultaneously. - // It always moves on to the next smallest x, whether that's in the first or second input's xs, - // and interpolates the value on the other side, thus accumulating xs and ys. - // This is written in raw JS because this can still be a bottleneck, and using refs for the i and j indices is quite painful. - - function(fn, interpolator, t1, t2) { - let t1n = t1.xs.length; - let t2n = t2.xs.length; - let outX = []; - let outY = []; - let i = -1; - let j = -1; - - while (i <= t1n - 1 && j <= t2n - 1) { - let x, ya, yb; - if (j == t2n - 1 && i < t1n - 1 || - t1.xs[i+1] < t2.xs[j+1]) { // if a has to catch up to b, or if b is already done - i++; - - x = t1.xs[i]; - ya = t1.ys[i]; - - yb = interpolator(t2, j, x); - } else if (i == t1n - 1 && j < t2n - 1 || - t1.xs[i+1] > t2.xs[j+1]) { // if b has to catch up to a, or if a is already done - j++; - - x = t2.xs[j]; - yb = t2.ys[j]; - - ya = interpolator(t1, i, x); - } else if (i < t1n - 1 && j < t2n && t1.xs[i+1] === t2.xs[j+1]) { // if they happen to be equal, move both ahead - i++; - j++; - x = t1.xs[i]; - ya = t1.ys[i]; - yb = t2.ys[j]; - } else if (i === t1n - 1 && j === t2n - 1) { - // finished! - i = t1n; - j = t2n; - continue; - } else { - console.log("Error!", i, j); - } - - outX.push(x); - outY.push(fn(ya, yb)); - } - - return {xs: outX, ys: outY}; - } - |}]; - - let combineEvenXs = - ( - ~fn, - ~xToYSelection, - sampleCount, - t1: T.t, - t2: T.t, - ) => { - - switch ((E.A.length(t1.xs), E.A.length(t2.xs))) { - | (0, 0) => T.empty - | (0, _) => t2 - | (_, 0) => t1 - | (_, _) => { - let allXs = Ts.equallyDividedXs([|t1, t2|], sampleCount); - - let allYs = allXs |> E.A.fmap(x => fn(xToYSelection(x, t1), xToYSelection(x, t2))); - - T.fromArrays(allXs, allYs); - } - } - }; - - // TODO: I'd bet this is pretty slow. Maybe it would be faster to intersperse Xs and Ys separately. - let intersperse = (t1: T.t, t2: T.t) => { - E.A.intersperse(T.zip(t1), T.zip(t2)) |> T.fromZippedArray; - }; -}; - -// I'm really not sure this part is actually what we want at this point. -module Range = { - // ((lastX, lastY), (nextX, nextY)) - type zippedRange = ((float, float), (float, float)); - - let toT = T.fromZippedArray; - let nextX = ((_, (nextX, _)): zippedRange) => nextX; - - let rangePointAssumingSteps = (((_, lastY), (nextX, _)): zippedRange) => ( - nextX, - lastY, - ); - - let rangeAreaAssumingTriangles = - (((lastX, lastY), (nextX, nextY)): zippedRange) => - (nextX -. lastX) *. (lastY +. nextY) /. 2.; - - //Todo: figure out how to without making new array. - let rangeAreaAssumingTrapezoids = - (((lastX, lastY), (nextX, nextY)): zippedRange) => - (nextX -. lastX) - *. (Js.Math.min_float(lastY, nextY) +. (lastY +. nextY) /. 2.); - - let delta_y_over_delta_x = - (((lastX, lastY), (nextX, nextY)): zippedRange) => - (nextY -. lastY) /. (nextX -. lastX); - - let mapYsBasedOnRanges = (fn, t) => - Belt.Array.zip(t.xs, t.ys) - |> E.A.toRanges - |> E.R.toOption - |> E.O.fmap(r => r |> Belt.Array.map(_, r => (nextX(r), fn(r)))); - - // This code is messy, in part because I'm trying to make things easy on garbage collection here. - // It's using triangles instead of trapezoids right now. - let integrateWithTriangles = ({xs, ys}) => { - let length = E.A.length(xs); - let cumulativeY = Belt.Array.make(length, 0.0); - for (x in 0 to E.A.length(xs) - 2) { - let _ = - Belt.Array.set( - cumulativeY, - x + 1, - (xs[x + 1] -. xs[x]) // dx - *. ((ys[x] +. ys[x + 1]) /. 2.) // (1/2) * (avgY) - +. cumulativeY[x], - ); - (); - }; - Some({xs, ys: cumulativeY}); - }; - - let derivative = mapYsBasedOnRanges(delta_y_over_delta_x); - - let stepwiseToLinear = ({xs, ys}: T.t): T.t => { - // adds points at the bottom of each step. - let length = E.A.length(xs); - let newXs: array(float) = Belt.Array.makeUninitializedUnsafe(2 * length); - let newYs: array(float) = Belt.Array.makeUninitializedUnsafe(2 * length); - - Belt.Array.set(newXs, 0, xs[0] -. epsilon_float) |> ignore; - Belt.Array.set(newYs, 0, 0.) |> ignore; - Belt.Array.set(newXs, 1, xs[0]) |> ignore; - Belt.Array.set(newYs, 1, ys[0]) |> ignore; - - for (i in 1 to E.A.length(xs) - 1) { - Belt.Array.set(newXs, i * 2, xs[i] -. epsilon_float) |> ignore; - Belt.Array.set(newYs, i * 2, ys[i-1]) |> ignore; - Belt.Array.set(newXs, i * 2 + 1, xs[i]) |> ignore; - Belt.Array.set(newYs, i * 2 + 1, ys[i]) |> ignore; - (); - }; - - {xs: newXs, ys: newYs}; - }; - - // TODO: I think this isn't needed by any functions anymore. - let stepsToContinuous = t => { - // TODO: It would be nicer if this the diff didn't change the first element, and also maybe if there were a more elegant way of doing this. - let diff = T.xTotalRange(t) |> (r => r *. 0.00001); - let items = - switch (E.A.toRanges(Belt.Array.zip(t.xs, t.ys))) { - | Ok(items) => - Some( - items - |> Belt.Array.map(_, rangePointAssumingSteps) - |> T.fromZippedArray - |> PointwiseCombination.intersperse(t |> T.mapX(e => e +. diff)), - ) - | _ => Some(t) - }; - let first = items |> E.O.fmap(T.zip) |> E.O.bind(_, E.A.get(_, 0)); - switch (items, first) { - | (Some(items), Some((0.0, _))) => Some(items) - | (Some(items), Some((firstX, _))) => - let all = E.A.append([|(firstX, 0.0)|], items |> T.zip); - all |> T.fromZippedArray |> E.O.some; - | _ => None - }; - }; -}; - -let pointLogScore = (prediction, answer) => - switch (answer) { - | 0. => 0.0 - | answer => answer *. Js.Math.log2(Js.Math.abs_float(prediction /. answer)) - }; - -let logScorePoint = (sampleCount, t1, t2) => - PointwiseCombination.combineEvenXs( - ~fn=pointLogScore, - ~xToYSelection=XtoY.linear, - sampleCount, - t1, - t2, - ) - |> Range.integrateWithTriangles - |> E.O.fmap(T.accumulateYs((+.))) - |> E.O.fmap(Pairs.last) - |> E.O.fmap(Pairs.y); - -module Analysis = { - let integrateContinuousShape = - ( - ~indefiniteIntegralStepwise=(p, h1) => h1 *. p, - ~indefiniteIntegralLinear=(p, a, b) => a *. p +. b *. p ** 2.0 /. 2.0, - t: DistTypes.continuousShape, - ) - : float => { - let xs = t.xyShape.xs; - let ys = t.xyShape.ys; - - E.A.reducei( - xs, - 0.0, - (acc, _x, i) => { - let areaUnderIntegral = - // TODO Take this switch statement out of the loop body - switch (t.interpolation, i) { - | (_, 0) => 0.0 - | (`Stepwise, _) => - indefiniteIntegralStepwise(xs[i], ys[i - 1]) - -. indefiniteIntegralStepwise(xs[i - 1], ys[i - 1]) - | (`Linear, _) => - let x1 = xs[i - 1]; - let x2 = xs[i]; - if (x1 == x2) { - 0.0 - } else { - let h1 = ys[i - 1]; - let h2 = ys[i]; - let b = (h1 -. h2) /. (x1 -. x2); - let a = h1 -. b *. x1; - indefiniteIntegralLinear(x2, a, b) - -. indefiniteIntegralLinear(x1, a, b); - }; - }; - acc +. areaUnderIntegral; - }, - ); - }; - - let getMeanOfSquaresContinuousShape = (t: DistTypes.continuousShape) => { - let indefiniteIntegralLinear = (p, a, b) => - a *. p ** 3.0 /. 3.0 +. b *. p ** 4.0 /. 4.0; - let indefiniteIntegralStepwise = (p, h1) => h1 *. p ** 3.0 /. 3.0; - integrateContinuousShape( - ~indefiniteIntegralStepwise, - ~indefiniteIntegralLinear, - t, - ); - }; - - let getVarianceDangerously = - (t: 't, mean: 't => float, getMeanOfSquares: 't => float): float => { - let meanSquared = mean(t) ** 2.0; - let meanOfSquares = getMeanOfSquares(t); - meanOfSquares -. meanSquared; - }; - - let squareXYShape = T.mapX(x => x ** 2.0) -}; diff --git a/src/distPlus/distribution/XYShape.res b/src/distPlus/distribution/XYShape.res new file mode 100644 index 00000000..c48d5557 --- /dev/null +++ b/src/distPlus/distribution/XYShape.res @@ -0,0 +1,440 @@ +open DistTypes + +let interpolate = (xMin: float, xMax: float, yMin: float, yMax: float, xIntended: float): float => { + let minProportion = (xMax -. xIntended) /. (xMax -. xMin) + let maxProportion = (xIntended -. xMin) /. (xMax -. xMin) + yMin *. minProportion +. yMax *. maxProportion +} + +// TODO: Make sure that shapes cannot be empty. +let extImp = E.O.toExt("Tried to perform an operation on an empty XYShape.") + +module T = { + type t = xyShape + let toXyShape = (t: t): xyShape => t + type ts = array + let xs = (t: t) => t.xs + let ys = (t: t) => t.ys + let length = (t: t) => E.A.length(t.xs) + let empty = {xs: [], ys: []} + let isEmpty = (t: t) => length(t) == 0 + let minX = (t: t) => t |> xs |> E.A.Sorted.min |> extImp + let maxX = (t: t) => t |> xs |> E.A.Sorted.max |> extImp + let firstY = (t: t) => t |> ys |> E.A.first |> extImp + let lastY = (t: t) => t |> ys |> E.A.last |> extImp + let xTotalRange = (t: t) => maxX(t) -. minX(t) + let mapX = (fn, t: t): t => {xs: E.A.fmap(fn, t.xs), ys: t.ys} + let mapY = (fn, t: t): t => {xs: t.xs, ys: E.A.fmap(fn, t.ys)} + let zip = ({xs, ys}: t) => Belt.Array.zip(xs, ys) + let fromArray = ((xs, ys)): t => {xs: xs, ys: ys} + let fromArrays = (xs, ys): t => {xs: xs, ys: ys} + let accumulateYs = (fn, p: t) => fromArray((p.xs, E.A.accumulate(fn, p.ys))) + let concat = (t1: t, t2: t) => { + let cxs = Array.concat(list{t1.xs, t2.xs}) + let cys = Array.concat(list{t1.ys, t2.ys}) + {xs: cxs, ys: cys} + } + let fromZippedArray = (pairs: array<(float, float)>): t => pairs |> Belt.Array.unzip |> fromArray + let equallyDividedXs = (t: t, newLength) => E.A.Floats.range(minX(t), maxX(t), newLength) + let toJs = (t: t) => {"xs": t.xs, "ys": t.ys} +} + +module Ts = { + type t = T.ts + let minX = (t: t) => t |> E.A.fmap(T.minX) |> E.A.min |> extImp + let maxX = (t: t) => t |> E.A.fmap(T.maxX) |> E.A.max |> extImp + let equallyDividedXs = (t: t, newLength) => E.A.Floats.range(minX(t), maxX(t), newLength) + let allXs = (t: t) => t |> E.A.fmap(T.xs) |> E.A.Sorted.concatMany +} + +module Pairs = { + let x = fst + let y = snd + let first = (t: T.t) => (T.minX(t), T.firstY(t)) + let last = (t: T.t) => (T.maxX(t), T.lastY(t)) + + let getBy = (t: T.t, fn) => t |> T.zip |> E.A.getBy(_, fn) + + let firstAtOrBeforeXValue = (xValue, t: T.t) => { + let zipped = T.zip(t) + let firstIndex = zipped |> Belt.Array.getIndexBy(_, ((x, _)) => x > xValue) + let previousIndex = switch firstIndex { + | None => Some(Array.length(zipped) - 1) + | Some(0) => None + | Some(n) => Some(n - 1) + } + previousIndex |> Belt.Option.flatMap(_, Belt.Array.get(zipped)) + } +} + +module YtoX = { + let linear = (y: float, t: T.t): float => { + let firstHigherIndex = E.A.Sorted.binarySearchFirstElementGreaterIndex(T.ys(t), y) + let foundX = switch firstHigherIndex { + | #overMax => T.maxX(t) + | #underMin => T.minX(t) + | #firstHigher(firstHigherIndex) => + let lowerOrEqualIndex = firstHigherIndex - 1 < 0 ? 0 : firstHigherIndex - 1 + let (_xs, _ys) = (T.xs(t), T.ys(t)) + let needsInterpolation = _ys[lowerOrEqualIndex] != y + if needsInterpolation { + interpolate( + _ys[lowerOrEqualIndex], + _ys[firstHigherIndex], + _xs[lowerOrEqualIndex], + _xs[firstHigherIndex], + y, + ) + } else { + _xs[lowerOrEqualIndex] + } + } + foundX + } +} + +module XtoY = { + let stepwiseIncremental = (f, t: T.t) => Pairs.firstAtOrBeforeXValue(f, t) |> E.O.fmap(Pairs.y) + + let stepwiseIfAtX = (f: float, t: T.t) => + Pairs.getBy(t, ((x: float, _)) => x == f) |> E.O.fmap(Pairs.y) + + let linear = (x: float, t: T.t): float => { + let firstHigherIndex = E.A.Sorted.binarySearchFirstElementGreaterIndex(T.xs(t), x) + let n = switch firstHigherIndex { + | #overMax => T.lastY(t) + | #underMin => T.firstY(t) + | #firstHigher(firstHigherIndex) => + let lowerOrEqualIndex = firstHigherIndex - 1 < 0 ? 0 : firstHigherIndex - 1 + let (_xs, _ys) = (T.xs(t), T.ys(t)) + let needsInterpolation = _xs[lowerOrEqualIndex] != x + if needsInterpolation { + interpolate( + _xs[lowerOrEqualIndex], + _xs[firstHigherIndex], + _ys[lowerOrEqualIndex], + _ys[firstHigherIndex], + x, + ) + } else { + _ys[lowerOrEqualIndex] + } + } + n + } + + /* Returns a between-points-interpolating function that can be used with PointwiseCombination.combine. + Interpolation can either be stepwise (using the value on the left) or linear. Extrapolation can be `UseZero or `UseOutermostPoints. */ + let continuousInterpolator = ( + interpolation: DistTypes.interpolationStrategy, + extrapolation: DistTypes.extrapolationStrategy, + ): interpolator => + switch (interpolation, extrapolation) { + | (#Linear, #UseZero) => + (t: T.t, leftIndex: int, x: float) => + if leftIndex < 0 { + 0.0 + } else if leftIndex >= T.length(t) - 1 { + 0.0 + } else { + let x1 = t.xs[leftIndex] + let x2 = t.xs[leftIndex + 1] + let y1 = t.ys[leftIndex] + let y2 = t.ys[leftIndex + 1] + let fraction = (x -. x1) /. (x2 -. x1) + y1 *. (1. -. fraction) +. y2 *. fraction + } + | (#Linear, #UseOutermostPoints) => + (t: T.t, leftIndex: int, x: float) => + if leftIndex < 0 { + t.ys[0] + } else if leftIndex >= T.length(t) - 1 { + t.ys[T.length(t) - 1] + } else { + let x1 = t.xs[leftIndex] + let x2 = t.xs[leftIndex + 1] + let y1 = t.ys[leftIndex] + let y2 = t.ys[leftIndex + 1] + let fraction = (x -. x1) /. (x2 -. x1) + y1 *. (1. -. fraction) +. y2 *. fraction + } + | (#Stepwise, #UseZero) => + (t: T.t, leftIndex: int, x: float) => + if leftIndex < 0 { + 0.0 + } else if leftIndex >= T.length(t) - 1 { + 0.0 + } else { + t.ys[leftIndex] + } + | (#Stepwise, #UseOutermostPoints) => + (t: T.t, leftIndex: int, x: float) => + if leftIndex < 0 { + t.ys[0] + } else if leftIndex >= T.length(t) - 1 { + t.ys[T.length(t) - 1] + } else { + t.ys[leftIndex] + } + } + + /* Returns a between-points-interpolating function that can be used with PointwiseCombination.combine. + For discrete distributions, the probability density between points is zero, so we just return zero here. */ + let discreteInterpolator: interpolator = (t: T.t, leftIndex: int, x: float) => 0.0 +} + +module XsConversion = { + let _replaceWithXs = (newXs: array, t: T.t): T.t => { + let newYs = Belt.Array.map(newXs, XtoY.linear(_, t)) + {xs: newXs, ys: newYs} + } + + let equallyDivideXByMass = (newLength: int, integral: T.t) => + E.A.Floats.range(0.0, 1.0, newLength) |> E.A.fmap(YtoX.linear(_, integral)) + + let proportionEquallyOverX = (newLength: int, t: T.t): T.t => + T.equallyDividedXs(t, newLength) |> _replaceWithXs(_, t) + + let proportionByProbabilityMass = (newLength: int, integral: T.t, t: T.t): T.t => + integral |> equallyDivideXByMass(newLength) |> _replaceWithXs(_, t) // creates a new set of xs at evenly spaced percentiles // linearly interpolates new ys for the new xs +} + +module Zipped = { + type zipped = array<(float, float)> + let compareYs = ((_, y1), (_, y2)) => y1 > y2 ? 1 : 0 + let compareXs = ((x1, _), (x2, _)) => x1 > x2 ? 1 : 0 + let sortByY = (t: zipped) => t |> E.A.stableSortBy(_, compareYs) + let sortByX = (t: zipped) => t |> E.A.stableSortBy(_, compareXs) + let filterByX = (testFn: float => bool, t: zipped) => t |> E.A.filter(((x, _)) => testFn(x)) +} + +module PointwiseCombination = { + // t1Interpolator and t2Interpolator are functions from XYShape.XtoY, e.g. linearBetweenPointsExtrapolateFlat. + let combine = %raw(` // : (float => float => float, T.t, T.t, bool) => T.t + // This function combines two xyShapes by looping through both of them simultaneously. + // It always moves on to the next smallest x, whether that's in the first or second input's xs, + // and interpolates the value on the other side, thus accumulating xs and ys. + // This is written in raw JS because this can still be a bottleneck, and using refs for the i and j indices is quite painful. + + function(fn, interpolator, t1, t2) { + let t1n = t1.xs.length; + let t2n = t2.xs.length; + let outX = []; + let outY = []; + let i = -1; + let j = -1; + + while (i <= t1n - 1 && j <= t2n - 1) { + let x, ya, yb; + if (j == t2n - 1 && i < t1n - 1 || + t1.xs[i+1] < t2.xs[j+1]) { // if a has to catch up to b, or if b is already done + i++; + + x = t1.xs[i]; + ya = t1.ys[i]; + + yb = interpolator(t2, j, x); + } else if (i == t1n - 1 && j < t2n - 1 || + t1.xs[i+1] > t2.xs[j+1]) { // if b has to catch up to a, or if a is already done + j++; + + x = t2.xs[j]; + yb = t2.ys[j]; + + ya = interpolator(t1, i, x); + } else if (i < t1n - 1 && j < t2n && t1.xs[i+1] === t2.xs[j+1]) { // if they happen to be equal, move both ahead + i++; + j++; + x = t1.xs[i]; + ya = t1.ys[i]; + yb = t2.ys[j]; + } else if (i === t1n - 1 && j === t2n - 1) { + // finished! + i = t1n; + j = t2n; + continue; + } else { + console.log("Error!", i, j); + } + + outX.push(x); + outY.push(fn(ya, yb)); + } + + return {xs: outX, ys: outY}; + } + `) + + let combineEvenXs = (~fn, ~xToYSelection, sampleCount, t1: T.t, t2: T.t) => + switch (E.A.length(t1.xs), E.A.length(t2.xs)) { + | (0, 0) => T.empty + | (0, _) => t2 + | (_, 0) => t1 + | (_, _) => + let allXs = Ts.equallyDividedXs([t1, t2], sampleCount) + + let allYs = allXs |> E.A.fmap(x => fn(xToYSelection(x, t1), xToYSelection(x, t2))) + + T.fromArrays(allXs, allYs) + } + + // TODO: I'd bet this is pretty slow. Maybe it would be faster to intersperse Xs and Ys separately. + let intersperse = (t1: T.t, t2: T.t) => E.A.intersperse(T.zip(t1), T.zip(t2)) |> T.fromZippedArray +} + +// I'm really not sure this part is actually what we want at this point. +module Range = { + // ((lastX, lastY), (nextX, nextY)) + type zippedRange = ((float, float), (float, float)) + + let toT = T.fromZippedArray + let nextX = ((_, (nextX, _)): zippedRange) => nextX + + let rangePointAssumingSteps = (((_, lastY), (nextX, _)): zippedRange) => (nextX, lastY) + + let rangeAreaAssumingTriangles = (((lastX, lastY), (nextX, nextY)): zippedRange) => + (nextX -. lastX) *. (lastY +. nextY) /. 2. + + //Todo: figure out how to without making new array. + let rangeAreaAssumingTrapezoids = (((lastX, lastY), (nextX, nextY)): zippedRange) => + (nextX -. lastX) *. (Js.Math.min_float(lastY, nextY) +. (lastY +. nextY) /. 2.) + + let delta_y_over_delta_x = (((lastX, lastY), (nextX, nextY)): zippedRange) => + (nextY -. lastY) /. (nextX -. lastX) + + let mapYsBasedOnRanges = (fn, t) => + Belt.Array.zip(t.xs, t.ys) + |> E.A.toRanges + |> E.R.toOption + |> E.O.fmap(r => r |> Belt.Array.map(_, r => (nextX(r), fn(r)))) + + // This code is messy, in part because I'm trying to make things easy on garbage collection here. + // It's using triangles instead of trapezoids right now. + let integrateWithTriangles = ({xs, ys}) => { + let length = E.A.length(xs) + let cumulativeY = Belt.Array.make(length, 0.0) + for x in 0 to E.A.length(xs) - 2 { + let _ = Belt.Array.set( + cumulativeY, + x + 1, + (xs[x + 1] -. xs[x]) *. ((ys[x] +. ys[x + 1]) /. 2.) +. cumulativeY[x], // dx // (1/2) * (avgY) + ) + } + Some({xs: xs, ys: cumulativeY}) + } + + let derivative = mapYsBasedOnRanges(delta_y_over_delta_x) + + let stepwiseToLinear = ({xs, ys}: T.t): T.t => { + // adds points at the bottom of each step. + let length = E.A.length(xs) + let newXs: array = Belt.Array.makeUninitializedUnsafe(2 * length) + let newYs: array = Belt.Array.makeUninitializedUnsafe(2 * length) + + Belt.Array.set(newXs, 0, xs[0] -. epsilon_float) |> ignore + Belt.Array.set(newYs, 0, 0.) |> ignore + Belt.Array.set(newXs, 1, xs[0]) |> ignore + Belt.Array.set(newYs, 1, ys[0]) |> ignore + + for i in 1 to E.A.length(xs) - 1 { + Belt.Array.set(newXs, i * 2, xs[i] -. epsilon_float) |> ignore + Belt.Array.set(newYs, i * 2, ys[i - 1]) |> ignore + Belt.Array.set(newXs, i * 2 + 1, xs[i]) |> ignore + Belt.Array.set(newYs, i * 2 + 1, ys[i]) |> ignore + () + } + + {xs: newXs, ys: newYs} + } + + // TODO: I think this isn't needed by any functions anymore. + let stepsToContinuous = t => { + // TODO: It would be nicer if this the diff didn't change the first element, and also maybe if there were a more elegant way of doing this. + let diff = T.xTotalRange(t) |> (r => r *. 0.00001) + let items = switch E.A.toRanges(Belt.Array.zip(t.xs, t.ys)) { + | Ok(items) => + Some( + items + |> Belt.Array.map(_, rangePointAssumingSteps) + |> T.fromZippedArray + |> PointwiseCombination.intersperse(t |> T.mapX(e => e +. diff)), + ) + | _ => Some(t) + } + let first = items |> E.O.fmap(T.zip) |> E.O.bind(_, E.A.get(_, 0)) + switch (items, first) { + | (Some(items), Some((0.0, _))) => Some(items) + | (Some(items), Some((firstX, _))) => + let all = E.A.append([(firstX, 0.0)], items |> T.zip) + all |> T.fromZippedArray |> E.O.some + | _ => None + } + } +} + +let pointLogScore = (prediction, answer) => + switch answer { + | 0. => 0.0 + | answer => answer *. Js.Math.log2(Js.Math.abs_float(prediction /. answer)) + } + +let logScorePoint = (sampleCount, t1, t2) => + PointwiseCombination.combineEvenXs( + ~fn=pointLogScore, + ~xToYSelection=XtoY.linear, + sampleCount, + t1, + t2, + ) + |> Range.integrateWithTriangles + |> E.O.fmap(T.accumulateYs(\"+.")) + |> E.O.fmap(Pairs.last) + |> E.O.fmap(Pairs.y) + +module Analysis = { + let integrateContinuousShape = ( + ~indefiniteIntegralStepwise=(p, h1) => h1 *. p, + ~indefiniteIntegralLinear=(p, a, b) => a *. p +. b *. p ** 2.0 /. 2.0, + t: DistTypes.continuousShape, + ): float => { + let xs = t.xyShape.xs + let ys = t.xyShape.ys + + E.A.reducei(xs, 0.0, (acc, _x, i) => { + let areaUnderIntegral = // TODO Take this switch statement out of the loop body + switch (t.interpolation, i) { + | (_, 0) => 0.0 + | (#Stepwise, _) => + indefiniteIntegralStepwise(xs[i], ys[i - 1]) -. + indefiniteIntegralStepwise(xs[i - 1], ys[i - 1]) + | (#Linear, _) => + let x1 = xs[i - 1] + let x2 = xs[i] + if x1 == x2 { + 0.0 + } else { + let h1 = ys[i - 1] + let h2 = ys[i] + let b = (h1 -. h2) /. (x1 -. x2) + let a = h1 -. b *. x1 + indefiniteIntegralLinear(x2, a, b) -. indefiniteIntegralLinear(x1, a, b) + } + } + acc +. areaUnderIntegral + }) + } + + let getMeanOfSquaresContinuousShape = (t: DistTypes.continuousShape) => { + let indefiniteIntegralLinear = (p, a, b) => a *. p ** 3.0 /. 3.0 +. b *. p ** 4.0 /. 4.0 + let indefiniteIntegralStepwise = (p, h1) => h1 *. p ** 3.0 /. 3.0 + integrateContinuousShape(~indefiniteIntegralStepwise, ~indefiniteIntegralLinear, t) + } + + let getVarianceDangerously = (t: 't, mean: 't => float, getMeanOfSquares: 't => float): float => { + let meanSquared = mean(t) ** 2.0 + let meanOfSquares = getMeanOfSquares(t) + meanOfSquares -. meanSquared + } + + let squareXYShape = T.mapX(x => x ** 2.0) +} diff --git a/yarn.lock b/yarn.lock index cf1bd55a..22963a62 100644 --- a/yarn.lock +++ b/yarn.lock @@ -3132,6 +3132,11 @@ gensync@^1.0.0-beta.1: resolved "https://registry.yarnpkg.com/gensync/-/gensync-1.0.0-beta.1.tgz#58f4361ff987e5ff6e1e7a210827aa371eaac269" integrity sha512-r8EC6NO1sngH/zdD9fiRDLdcgnbayXah+mLgManTaIZJqEC1MZstmnox8KpnI2/fxQwrp5OpCOYWLp4rBl4Jcg== +gentype@^4.3.0: + version "4.3.0" + resolved "https://registry.yarnpkg.com/gentype/-/gentype-4.3.0.tgz#ebac3abcdde2ce2a8fc85611b11568a4cb349c8d" + integrity sha512-lqkc1ZS/Iog4uslRD4De47OV54Hu61vEBsirMKxRlgHIRvm8u6RqsdKxJ7JdJdrzmtKgPNvq1He69SozzW+6dQ== + get-caller-file@^1.0.1: version "1.0.3" resolved "https://registry.yarnpkg.com/get-caller-file/-/get-caller-file-1.0.3.tgz#f978fa4c90d1dfe7ff2d6beda2a515e713bdcf4a"