squiggle/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/Discrete.res

240 lines
7.3 KiB
Plaintext
Raw Normal View History

2022-01-29 22:43:08 +00:00
open Distributions
2022-02-15 20:47:33 +00:00
type t = PointSetTypes.discreteShape
2022-01-29 22:43:08 +00:00
let make = (~integralSumCache=None, ~integralCache=None, xyShape): t => {
xyShape: xyShape,
integralSumCache: integralSumCache,
integralCache: integralCache,
}
let shapeMap = (fn, {xyShape, integralSumCache, integralCache}: t): t => {
xyShape: fn(xyShape),
integralSumCache: integralSumCache,
integralCache: integralCache,
}
let getShape = (t: t) => t.xyShape
let oShapeMap = (fn, {xyShape, integralSumCache, integralCache}: t): option<t> =>
fn(xyShape) |> E.O.fmap(make(~integralSumCache, ~integralCache))
2022-02-15 20:47:33 +00:00
let emptyIntegral: PointSetTypes.continuousShape = {
2022-01-29 22:43:08 +00:00
xyShape: {xs: [neg_infinity], ys: [0.0]},
interpolation: #Stepwise,
integralSumCache: Some(0.0),
integralCache: None,
}
2022-02-15 20:47:33 +00:00
let empty: PointSetTypes.discreteShape = {
2022-01-29 22:43:08 +00:00
xyShape: XYShape.T.empty,
integralSumCache: Some(0.0),
integralCache: Some(emptyIntegral),
}
let shapeFn = (fn, t: t) => t |> getShape |> fn
let lastY = (t: t) => t |> getShape |> XYShape.T.lastY
let combinePointwise = (
~integralSumCachesFn=(_, _) => None,
2022-04-29 00:24:13 +00:00
~fn=(a, b) => Ok(a +. b),
2022-02-15 20:47:33 +00:00
t1: PointSetTypes.discreteShape,
t2: PointSetTypes.discreteShape,
2022-04-29 01:14:03 +00:00
): result<PointSetTypes.discreteShape, 'e> => {
// let combinedIntegralSum = Common.combineIntegralSums(
// integralSumCachesFn,
// t1.integralSumCache,
// t2.integralSumCache,
// )
2022-01-29 22:43:08 +00:00
// TODO: does it ever make sense to pointwise combine the integrals here?
// It could be done for pointwise additions, but is that ever needed?
make(
XYShape.PointwiseCombination.combine(
2022-04-29 00:24:13 +00:00
fn,
2022-01-29 22:43:08 +00:00
XYShape.XtoY.discreteInterpolator,
t1.xyShape,
t2.xyShape,
2022-04-23 18:09:06 +00:00
)->E.R.toExn("Addition operation should never fail", _),
2022-04-29 01:14:03 +00:00
)->Ok
2022-01-29 22:43:08 +00:00
}
2022-04-29 01:14:03 +00:00
let reduce = (
~integralSumCachesFn=(_, _) => None,
fn: (float, float) => result<float, 'e>,
discreteShapes: array<PointSetTypes.discreteShape>,
): result<t, 'e> => {
let merge = combinePointwise(~integralSumCachesFn, ~fn)
2022-04-29 01:14:03 +00:00
discreteShapes |> E.A.R.foldM(merge, empty)
}
2022-01-29 22:43:08 +00:00
let updateIntegralSumCache = (integralSumCache, t: t): t => {
...t,
integralSumCache: integralSumCache,
}
let updateIntegralCache = (integralCache, t: t): t => {
...t,
integralCache: integralCache,
}
/* This multiples all of the data points together and creates a new discrete distribution from the results.
Data points at the same xs get added together. It may be a good idea to downsample t1 and t2 before and/or the result after. */
let combineAlgebraically = (op: Operation.convolutionOperation, t1: t, t2: t): t => {
2022-01-29 22:43:08 +00:00
let t1s = t1 |> getShape
let t2s = t2 |> getShape
let t1n = t1s |> XYShape.T.length
let t2n = t2s |> XYShape.T.length
let combinedIntegralSum = Common.combineIntegralSums(
(s1, s2) => Some(s1 *. s2),
t1.integralSumCache,
t2.integralSumCache,
)
let fn = Operation.Convolution.toFn(op)
2022-01-29 22:43:08 +00:00
let xToYMap = E.FloatFloatMap.empty()
for i in 0 to t1n - 1 {
for j in 0 to t2n - 1 {
let x = fn(t1s.xs[i], t2s.xs[j])
let cv = xToYMap |> E.FloatFloatMap.get(x) |> E.O.default(0.)
let my = t1s.ys[i] *. t2s.ys[j]
let _ = Belt.MutableMap.set(xToYMap, x, cv +. my)
}
}
let rxys = xToYMap |> E.FloatFloatMap.toArray |> XYShape.Zipped.sortByX
let combinedShape = XYShape.T.fromZippedArray(rxys)
make(~integralSumCache=combinedIntegralSum, combinedShape)
}
let mapYResult = (
~integralSumCacheFn=_ => None,
~integralCacheFn=_ => None,
~fn: float => result<float, 'e>,
t: t,
): result<t, 'e> =>
XYShape.T.mapYResult(fn, getShape(t))->E.R2.fmap(x =>
make(
~integralSumCache=t.integralSumCache |> E.O.bind(_, integralSumCacheFn),
~integralCache=t.integralCache |> E.O.bind(_, integralCacheFn),
x,
)
)
let mapY = (
~integralSumCacheFn=_ => None,
~integralCacheFn=_ => None,
~fn: float => float,
t: t,
): t =>
2022-01-29 22:43:08 +00:00
make(
~integralSumCache=t.integralSumCache |> E.O.bind(_, integralSumCacheFn),
~integralCache=t.integralCache |> E.O.bind(_, integralCacheFn),
t |> getShape |> XYShape.T.mapY(fn),
)
let scaleBy = (~scale=1.0, t: t): t => {
let scaledIntegralSumCache = t.integralSumCache |> E.O.fmap(\"*."(scale))
let scaledIntegralCache = t.integralCache |> E.O.fmap(Continuous.scaleBy(~scale))
t
|> mapY(~fn=(r: float) => r *. scale)
|> updateIntegralSumCache(scaledIntegralSumCache)
|> updateIntegralCache(scaledIntegralCache)
}
module T = Dist({
2022-02-15 20:47:33 +00:00
type t = PointSetTypes.discreteShape
type integral = PointSetTypes.continuousShape
2022-01-29 22:43:08 +00:00
let integral = t =>
switch (getShape(t) |> XYShape.T.isEmpty, t.integralCache) {
| (true, _) => emptyIntegral
| (false, Some(c)) => c
| (false, None) =>
let ts = getShape(t)
// The first xy of this integral should always be the zero, to ensure nice plotting
let firstX = ts |> XYShape.T.minX
let prependedZeroPoint: XYShape.T.t = {xs: [firstX -. epsilon_float], ys: [0.]}
let integralShape =
ts |> XYShape.T.concat(prependedZeroPoint) |> XYShape.T.accumulateYs(\"+.")
Continuous.make(~interpolation=#Stepwise, integralShape)
}
let integralEndY = (t: t) => t.integralSumCache |> E.O.default(t |> integral |> Continuous.lastY)
2022-05-02 15:55:42 +00:00
let integralEndYResult = (t: t) => t->integralEndY->Ok
2022-01-29 22:43:08 +00:00
let minX = shapeFn(XYShape.T.minX)
let maxX = shapeFn(XYShape.T.maxX)
let toDiscreteProbabilityMassFraction = _ => 1.0
let mapY = mapY
let mapYResult = mapYResult
2022-01-29 22:43:08 +00:00
let updateIntegralCache = updateIntegralCache
let toPointSetDist = (t: t): PointSetTypes.pointSetDist => Discrete(t)
2022-01-29 22:43:08 +00:00
let toContinuous = _ => None
let toDiscrete = t => Some(t)
let normalize = (t: t): t =>
t |> scaleBy(~scale=1. /. integralEndY(t)) |> updateIntegralSumCache(Some(1.0))
let downsample = (i, t: t): t => {
// It's not clear how to downsample a set of discrete points in a meaningful way.
// The best we can do is to clip off the smallest values.
let currentLength = t |> getShape |> XYShape.T.length
if i < currentLength && (i >= 1 && currentLength > 1) {
t
|> getShape
|> XYShape.T.zip
|> XYShape.Zipped.sortByY
|> Belt.Array.reverse
|> Belt.Array.slice(_, ~offset=0, ~len=i)
|> XYShape.Zipped.sortByX
|> XYShape.T.fromZippedArray
|> make
} else {
t
}
}
let truncate = (leftCutoff: option<float>, rightCutoff: option<float>, t: t): t =>
t
|> getShape
|> XYShape.T.zip
|> XYShape.Zipped.filterByX(x =>
x >= E.O.default(neg_infinity, leftCutoff) && x <= E.O.default(infinity, rightCutoff)
)
|> XYShape.T.fromZippedArray
|> make
let xToY = (f, t) =>
t
|> getShape
|> XYShape.XtoY.stepwiseIfAtX(f)
|> E.O.default(0.0)
2022-02-15 20:47:33 +00:00
|> PointSetTypes.MixedPoint.makeDiscrete
2022-01-29 22:43:08 +00:00
let integralXtoY = (f, t) => t |> integral |> Continuous.getShape |> XYShape.XtoY.linear(f)
let integralYtoX = (f, t) => t |> integral |> Continuous.getShape |> XYShape.YtoX.linear(f)
let mean = (t: t): float => {
let s = getShape(t)
E.A.reducei(s.xs, 0.0, (acc, x, i) => acc +. x *. s.ys[i])
}
2022-04-04 17:41:22 +00:00
2022-01-29 22:43:08 +00:00
let variance = (t: t): float => {
2022-04-04 17:41:22 +00:00
let getMeanOfSquares = t => t |> shapeMap(XYShape.T.square) |> mean
2022-01-29 22:43:08 +00:00
XYShape.Analysis.getVarianceDangerously(t, mean, getMeanOfSquares)
}
2022-04-29 00:24:13 +00:00
let klDivergence = (prediction: t, answer: t) => {
combinePointwise(
~fn=PointSetDist_Scoring.KLDivergence.integrand,
prediction,
answer,
) |> E.R2.bind(integralEndYResult)
2022-04-29 00:24:13 +00:00
}
2022-04-11 06:16:29 +00:00
})