squiggle/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/XYShape.res

441 lines
15 KiB
Plaintext
Raw Normal View History

2022-02-15 20:47:33 +00:00
open PointSetTypes
2022-01-29 22:43:08 +00:00
let interpolate = (xMin: float, xMax: float, yMin: float, yMax: float, xIntended: float): float => {
let minProportion = (xMax -. xIntended) /. (xMax -. xMin)
let maxProportion = (xIntended -. xMin) /. (xMax -. xMin)
yMin *. minProportion +. yMax *. maxProportion
}
// TODO: Make sure that shapes cannot be empty.
let extImp = E.O.toExt("Tried to perform an operation on an empty XYShape.")
module T = {
type t = xyShape
let toXyShape = (t: t): xyShape => t
type ts = array<xyShape>
let xs = (t: t) => t.xs
let ys = (t: t) => t.ys
let length = (t: t) => E.A.length(t.xs)
let empty = {xs: [], ys: []}
let isEmpty = (t: t) => length(t) == 0
let minX = (t: t) => t |> xs |> E.A.Sorted.min |> extImp
let maxX = (t: t) => t |> xs |> E.A.Sorted.max |> extImp
let firstY = (t: t) => t |> ys |> E.A.first |> extImp
let lastY = (t: t) => t |> ys |> E.A.last |> extImp
let xTotalRange = (t: t) => maxX(t) -. minX(t)
let mapX = (fn, t: t): t => {xs: E.A.fmap(fn, t.xs), ys: t.ys}
let mapY = (fn, t: t): t => {xs: t.xs, ys: E.A.fmap(fn, t.ys)}
let zip = ({xs, ys}: t) => Belt.Array.zip(xs, ys)
let fromArray = ((xs, ys)): t => {xs: xs, ys: ys}
let fromArrays = (xs, ys): t => {xs: xs, ys: ys}
let accumulateYs = (fn, p: t) => fromArray((p.xs, E.A.accumulate(fn, p.ys)))
let concat = (t1: t, t2: t) => {
let cxs = Array.concat(list{t1.xs, t2.xs})
let cys = Array.concat(list{t1.ys, t2.ys})
{xs: cxs, ys: cys}
}
let fromZippedArray = (pairs: array<(float, float)>): t => pairs |> Belt.Array.unzip |> fromArray
let equallyDividedXs = (t: t, newLength) => E.A.Floats.range(minX(t), maxX(t), newLength)
let toJs = (t: t) => {"xs": t.xs, "ys": t.ys}
}
module Ts = {
type t = T.ts
let minX = (t: t) => t |> E.A.fmap(T.minX) |> E.A.min |> extImp
let maxX = (t: t) => t |> E.A.fmap(T.maxX) |> E.A.max |> extImp
let equallyDividedXs = (t: t, newLength) => E.A.Floats.range(minX(t), maxX(t), newLength)
let allXs = (t: t) => t |> E.A.fmap(T.xs) |> E.A.Sorted.concatMany
}
module Pairs = {
let x = fst
let y = snd
let first = (t: T.t) => (T.minX(t), T.firstY(t))
let last = (t: T.t) => (T.maxX(t), T.lastY(t))
let getBy = (t: T.t, fn) => t |> T.zip |> E.A.getBy(_, fn)
let firstAtOrBeforeXValue = (xValue, t: T.t) => {
let zipped = T.zip(t)
let firstIndex = zipped |> Belt.Array.getIndexBy(_, ((x, _)) => x > xValue)
let previousIndex = switch firstIndex {
| None => Some(Array.length(zipped) - 1)
| Some(0) => None
| Some(n) => Some(n - 1)
}
previousIndex |> Belt.Option.flatMap(_, Belt.Array.get(zipped))
}
}
module YtoX = {
let linear = (y: float, t: T.t): float => {
let firstHigherIndex = E.A.Sorted.binarySearchFirstElementGreaterIndex(T.ys(t), y)
let foundX = switch firstHigherIndex {
| #overMax => T.maxX(t)
| #underMin => T.minX(t)
| #firstHigher(firstHigherIndex) =>
let lowerOrEqualIndex = firstHigherIndex - 1 < 0 ? 0 : firstHigherIndex - 1
let (_xs, _ys) = (T.xs(t), T.ys(t))
let needsInterpolation = _ys[lowerOrEqualIndex] != y
if needsInterpolation {
interpolate(
_ys[lowerOrEqualIndex],
_ys[firstHigherIndex],
_xs[lowerOrEqualIndex],
_xs[firstHigherIndex],
y,
)
} else {
_xs[lowerOrEqualIndex]
}
}
foundX
}
}
module XtoY = {
let stepwiseIncremental = (f, t: T.t) => Pairs.firstAtOrBeforeXValue(f, t) |> E.O.fmap(Pairs.y)
let stepwiseIfAtX = (f: float, t: T.t) =>
Pairs.getBy(t, ((x: float, _)) => x == f) |> E.O.fmap(Pairs.y)
let linear = (x: float, t: T.t): float => {
let firstHigherIndex = E.A.Sorted.binarySearchFirstElementGreaterIndex(T.xs(t), x)
let n = switch firstHigherIndex {
| #overMax => T.lastY(t)
| #underMin => T.firstY(t)
| #firstHigher(firstHigherIndex) =>
let lowerOrEqualIndex = firstHigherIndex - 1 < 0 ? 0 : firstHigherIndex - 1
let (_xs, _ys) = (T.xs(t), T.ys(t))
let needsInterpolation = _xs[lowerOrEqualIndex] != x
if needsInterpolation {
interpolate(
_xs[lowerOrEqualIndex],
_xs[firstHigherIndex],
_ys[lowerOrEqualIndex],
_ys[firstHigherIndex],
x,
)
} else {
_ys[lowerOrEqualIndex]
}
}
n
}
/* Returns a between-points-interpolating function that can be used with PointwiseCombination.combine.
Interpolation can either be stepwise (using the value on the left) or linear. Extrapolation can be `UseZero or `UseOutermostPoints. */
let continuousInterpolator = (
2022-02-15 20:47:33 +00:00
interpolation: PointSetTypes.interpolationStrategy,
extrapolation: PointSetTypes.extrapolationStrategy,
2022-01-29 22:43:08 +00:00
): interpolator =>
switch (interpolation, extrapolation) {
| (#Linear, #UseZero) =>
(t: T.t, leftIndex: int, x: float) =>
if leftIndex < 0 {
0.0
} else if leftIndex >= T.length(t) - 1 {
0.0
} else {
let x1 = t.xs[leftIndex]
let x2 = t.xs[leftIndex + 1]
let y1 = t.ys[leftIndex]
let y2 = t.ys[leftIndex + 1]
let fraction = (x -. x1) /. (x2 -. x1)
y1 *. (1. -. fraction) +. y2 *. fraction
}
| (#Linear, #UseOutermostPoints) =>
(t: T.t, leftIndex: int, x: float) =>
if leftIndex < 0 {
t.ys[0]
} else if leftIndex >= T.length(t) - 1 {
t.ys[T.length(t) - 1]
} else {
let x1 = t.xs[leftIndex]
let x2 = t.xs[leftIndex + 1]
let y1 = t.ys[leftIndex]
let y2 = t.ys[leftIndex + 1]
let fraction = (x -. x1) /. (x2 -. x1)
y1 *. (1. -. fraction) +. y2 *. fraction
}
| (#Stepwise, #UseZero) =>
2022-02-17 14:50:43 +00:00
(t: T.t, leftIndex: int, _x: float) =>
2022-01-29 22:43:08 +00:00
if leftIndex < 0 {
0.0
} else if leftIndex >= T.length(t) - 1 {
0.0
} else {
t.ys[leftIndex]
}
| (#Stepwise, #UseOutermostPoints) =>
2022-02-17 14:50:43 +00:00
(t: T.t, leftIndex: int, _x: float) =>
2022-01-29 22:43:08 +00:00
if leftIndex < 0 {
t.ys[0]
} else if leftIndex >= T.length(t) - 1 {
t.ys[T.length(t) - 1]
} else {
t.ys[leftIndex]
}
}
/* Returns a between-points-interpolating function that can be used with PointwiseCombination.combine.
For discrete distributions, the probability density between points is zero, so we just return zero here. */
let discreteInterpolator: interpolator = (t: T.t, leftIndex: int, x: float) => 0.0
}
module XsConversion = {
let _replaceWithXs = (newXs: array<float>, t: T.t): T.t => {
let newYs = Belt.Array.map(newXs, XtoY.linear(_, t))
{xs: newXs, ys: newYs}
}
let equallyDivideXByMass = (newLength: int, integral: T.t) =>
E.A.Floats.range(0.0, 1.0, newLength) |> E.A.fmap(YtoX.linear(_, integral))
let proportionEquallyOverX = (newLength: int, t: T.t): T.t =>
T.equallyDividedXs(t, newLength) |> _replaceWithXs(_, t)
let proportionByProbabilityMass = (newLength: int, integral: T.t, t: T.t): T.t =>
integral |> equallyDivideXByMass(newLength) |> _replaceWithXs(_, t) // creates a new set of xs at evenly spaced percentiles // linearly interpolates new ys for the new xs
}
module Zipped = {
type zipped = array<(float, float)>
let compareYs = ((_, y1), (_, y2)) => y1 > y2 ? 1 : 0
let compareXs = ((x1, _), (x2, _)) => x1 > x2 ? 1 : 0
let sortByY = (t: zipped) => t |> E.A.stableSortBy(_, compareYs)
let sortByX = (t: zipped) => t |> E.A.stableSortBy(_, compareXs)
let filterByX = (testFn: float => bool, t: zipped) => t |> E.A.filter(((x, _)) => testFn(x))
}
module PointwiseCombination = {
// t1Interpolator and t2Interpolator are functions from XYShape.XtoY, e.g. linearBetweenPointsExtrapolateFlat.
let combine = %raw(` // : (float => float => float, T.t, T.t, bool) => T.t
// This function combines two xyShapes by looping through both of them simultaneously.
// It always moves on to the next smallest x, whether that's in the first or second input's xs,
// and interpolates the value on the other side, thus accumulating xs and ys.
// This is written in raw JS because this can still be a bottleneck, and using refs for the i and j indices is quite painful.
function(fn, interpolator, t1, t2) {
let t1n = t1.xs.length;
let t2n = t2.xs.length;
let outX = [];
let outY = [];
let i = -1;
let j = -1;
while (i <= t1n - 1 && j <= t2n - 1) {
let x, ya, yb;
if (j == t2n - 1 && i < t1n - 1 ||
t1.xs[i+1] < t2.xs[j+1]) { // if a has to catch up to b, or if b is already done
i++;
x = t1.xs[i];
ya = t1.ys[i];
yb = interpolator(t2, j, x);
} else if (i == t1n - 1 && j < t2n - 1 ||
t1.xs[i+1] > t2.xs[j+1]) { // if b has to catch up to a, or if a is already done
j++;
x = t2.xs[j];
yb = t2.ys[j];
ya = interpolator(t1, i, x);
} else if (i < t1n - 1 && j < t2n && t1.xs[i+1] === t2.xs[j+1]) { // if they happen to be equal, move both ahead
i++;
j++;
x = t1.xs[i];
ya = t1.ys[i];
yb = t2.ys[j];
} else if (i === t1n - 1 && j === t2n - 1) {
// finished!
i = t1n;
j = t2n;
continue;
} else {
2022-03-28 19:14:39 +00:00
console.log("PointwiseCombination Error", i, j);
2022-01-29 22:43:08 +00:00
}
outX.push(x);
outY.push(fn(ya, yb));
}
return {xs: outX, ys: outY};
}
`)
let combineEvenXs = (~fn, ~xToYSelection, sampleCount, t1: T.t, t2: T.t) =>
switch (E.A.length(t1.xs), E.A.length(t2.xs)) {
| (0, 0) => T.empty
| (0, _) => t2
| (_, 0) => t1
| (_, _) =>
let allXs = Ts.equallyDividedXs([t1, t2], sampleCount)
let allYs = allXs |> E.A.fmap(x => fn(xToYSelection(x, t1), xToYSelection(x, t2)))
T.fromArrays(allXs, allYs)
}
// TODO: I'd bet this is pretty slow. Maybe it would be faster to intersperse Xs and Ys separately.
let intersperse = (t1: T.t, t2: T.t) => E.A.intersperse(T.zip(t1), T.zip(t2)) |> T.fromZippedArray
}
// I'm really not sure this part is actually what we want at this point.
module Range = {
// ((lastX, lastY), (nextX, nextY))
type zippedRange = ((float, float), (float, float))
let toT = T.fromZippedArray
let nextX = ((_, (nextX, _)): zippedRange) => nextX
let rangePointAssumingSteps = (((_, lastY), (nextX, _)): zippedRange) => (nextX, lastY)
let rangeAreaAssumingTriangles = (((lastX, lastY), (nextX, nextY)): zippedRange) =>
(nextX -. lastX) *. (lastY +. nextY) /. 2.
//Todo: figure out how to without making new array.
let rangeAreaAssumingTrapezoids = (((lastX, lastY), (nextX, nextY)): zippedRange) =>
(nextX -. lastX) *. (Js.Math.min_float(lastY, nextY) +. (lastY +. nextY) /. 2.)
let delta_y_over_delta_x = (((lastX, lastY), (nextX, nextY)): zippedRange) =>
(nextY -. lastY) /. (nextX -. lastX)
let mapYsBasedOnRanges = (fn, t) =>
Belt.Array.zip(t.xs, t.ys)
|> E.A.toRanges
|> E.R.toOption
|> E.O.fmap(r => r |> Belt.Array.map(_, r => (nextX(r), fn(r))))
// This code is messy, in part because I'm trying to make things easy on garbage collection here.
// It's using triangles instead of trapezoids right now.
let integrateWithTriangles = ({xs, ys}) => {
let length = E.A.length(xs)
let cumulativeY = Belt.Array.make(length, 0.0)
for x in 0 to E.A.length(xs) - 2 {
let _ = Belt.Array.set(
cumulativeY,
x + 1,
(xs[x + 1] -. xs[x]) *. ((ys[x] +. ys[x + 1]) /. 2.) +. cumulativeY[x], // dx // (1/2) * (avgY)
)
}
Some({xs: xs, ys: cumulativeY})
}
let derivative = mapYsBasedOnRanges(delta_y_over_delta_x)
let stepwiseToLinear = ({xs, ys}: T.t): T.t => {
// adds points at the bottom of each step.
let length = E.A.length(xs)
let newXs: array<float> = Belt.Array.makeUninitializedUnsafe(2 * length)
let newYs: array<float> = Belt.Array.makeUninitializedUnsafe(2 * length)
Belt.Array.set(newXs, 0, xs[0] -. epsilon_float) |> ignore
Belt.Array.set(newYs, 0, 0.) |> ignore
Belt.Array.set(newXs, 1, xs[0]) |> ignore
Belt.Array.set(newYs, 1, ys[0]) |> ignore
for i in 1 to E.A.length(xs) - 1 {
Belt.Array.set(newXs, i * 2, xs[i] -. epsilon_float) |> ignore
Belt.Array.set(newYs, i * 2, ys[i - 1]) |> ignore
Belt.Array.set(newXs, i * 2 + 1, xs[i]) |> ignore
Belt.Array.set(newYs, i * 2 + 1, ys[i]) |> ignore
()
}
{xs: newXs, ys: newYs}
}
// TODO: I think this isn't needed by any functions anymore.
let stepsToContinuous = t => {
// TODO: It would be nicer if this the diff didn't change the first element, and also maybe if there were a more elegant way of doing this.
let diff = T.xTotalRange(t) |> (r => r *. 0.00001)
let items = switch E.A.toRanges(Belt.Array.zip(t.xs, t.ys)) {
| Ok(items) =>
Some(
items
|> Belt.Array.map(_, rangePointAssumingSteps)
|> T.fromZippedArray
|> PointwiseCombination.intersperse(t |> T.mapX(e => e +. diff)),
)
| _ => Some(t)
}
let first = items |> E.O.fmap(T.zip) |> E.O.bind(_, E.A.get(_, 0))
switch (items, first) {
| (Some(items), Some((0.0, _))) => Some(items)
| (Some(items), Some((firstX, _))) =>
let all = E.A.append([(firstX, 0.0)], items |> T.zip)
all |> T.fromZippedArray |> E.O.some
| _ => None
}
}
}
let pointLogScore = (prediction, answer) =>
switch answer {
| 0. => 0.0
| answer => answer *. Js.Math.log2(Js.Math.abs_float(prediction /. answer))
}
let logScorePoint = (sampleCount, t1, t2) =>
PointwiseCombination.combineEvenXs(
~fn=pointLogScore,
~xToYSelection=XtoY.linear,
sampleCount,
t1,
t2,
)
|> Range.integrateWithTriangles
|> E.O.fmap(T.accumulateYs(\"+."))
|> E.O.fmap(Pairs.last)
|> E.O.fmap(Pairs.y)
module Analysis = {
let integrateContinuousShape = (
~indefiniteIntegralStepwise=(p, h1) => h1 *. p,
~indefiniteIntegralLinear=(p, a, b) => a *. p +. b *. p ** 2.0 /. 2.0,
2022-02-15 20:47:33 +00:00
t: PointSetTypes.continuousShape,
2022-01-29 22:43:08 +00:00
): float => {
let xs = t.xyShape.xs
let ys = t.xyShape.ys
E.A.reducei(xs, 0.0, (acc, _x, i) => {
let areaUnderIntegral = // TODO Take this switch statement out of the loop body
switch (t.interpolation, i) {
| (_, 0) => 0.0
| (#Stepwise, _) =>
indefiniteIntegralStepwise(xs[i], ys[i - 1]) -.
indefiniteIntegralStepwise(xs[i - 1], ys[i - 1])
| (#Linear, _) =>
let x1 = xs[i - 1]
let x2 = xs[i]
if x1 == x2 {
0.0
} else {
let h1 = ys[i - 1]
let h2 = ys[i]
let b = (h1 -. h2) /. (x1 -. x2)
let a = h1 -. b *. x1
indefiniteIntegralLinear(x2, a, b) -. indefiniteIntegralLinear(x1, a, b)
}
}
acc +. areaUnderIntegral
})
}
2022-02-15 20:47:33 +00:00
let getMeanOfSquaresContinuousShape = (t: PointSetTypes.continuousShape) => {
2022-01-29 22:43:08 +00:00
let indefiniteIntegralLinear = (p, a, b) => a *. p ** 3.0 /. 3.0 +. b *. p ** 4.0 /. 4.0
let indefiniteIntegralStepwise = (p, h1) => h1 *. p ** 3.0 /. 3.0
integrateContinuousShape(~indefiniteIntegralStepwise, ~indefiniteIntegralLinear, t)
}
let getVarianceDangerously = (t: 't, mean: 't => float, getMeanOfSquares: 't => float): float => {
let meanSquared = mean(t) ** 2.0
let meanOfSquares = getMeanOfSquares(t)
meanOfSquares -. meanSquared
}
let squareXYShape = T.mapX(x => x ** 2.0)
}