squiggle/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/Continuous.res
Quinn Dougherty 28cb6b9c88 Final cleanup, but typescript is still borked
Value: [1e-8 to 1e-6]
2022-05-06 15:09:00 -04:00

354 lines
12 KiB
Plaintext

open Distributions
type t = PointSetTypes.continuousShape
module Analysis = {
let integrate = (
~indefiniteIntegralStepwise=(p, h1) => h1 *. p,
~indefiniteIntegralLinear=(p, a, b) => a *. p +. b *. p ** 2.0 /. 2.0,
t: t,
): float => {
let xs = t.xyShape.xs
let ys = t.xyShape.ys
E.A.reducei(xs, 0.0, (acc, _x, i) => {
let areaUnderIntegral = // TODO Take this switch statement out of the loop body
switch (t.interpolation, i) {
| (_, 0) => 0.0
| (#Stepwise, _) =>
indefiniteIntegralStepwise(xs[i], ys[i - 1]) -.
indefiniteIntegralStepwise(xs[i - 1], ys[i - 1])
| (#Linear, _) =>
let x1 = xs[i - 1]
let x2 = xs[i]
if x1 == x2 {
0.0
} else {
let h1 = ys[i - 1]
let h2 = ys[i]
let b = (h1 -. h2) /. (x1 -. x2)
let a = h1 -. b *. x1
indefiniteIntegralLinear(x2, a, b) -. indefiniteIntegralLinear(x1, a, b)
}
}
acc +. areaUnderIntegral
})
}
let getMeanOfSquares = (t: t) => {
let indefiniteIntegralLinear = (p, a, b) => a *. p ** 3.0 /. 3.0 +. b *. p ** 4.0 /. 4.0
let indefiniteIntegralStepwise = (p, h1) => h1 *. p ** 3.0 /. 3.0
integrate(~indefiniteIntegralStepwise, ~indefiniteIntegralLinear, t)
}
}
let getShape = (t: t) => t.xyShape
let interpolation = (t: t) => t.interpolation
let make = (~interpolation=#Linear, ~integralSumCache=None, ~integralCache=None, xyShape): t => {
xyShape: xyShape,
interpolation: interpolation,
integralSumCache: integralSumCache,
integralCache: integralCache,
}
let shapeMap = (fn, {xyShape, interpolation, integralSumCache, integralCache}: t): t => {
xyShape: fn(xyShape),
interpolation: interpolation,
integralSumCache: integralSumCache,
integralCache: integralCache,
}
let lastY = (t: t) => t |> getShape |> XYShape.T.lastY
let oShapeMap = (fn, {xyShape, interpolation, integralSumCache, integralCache}: t): option<
PointSetTypes.continuousShape,
> => fn(xyShape) |> E.O.fmap(make(~interpolation, ~integralSumCache, ~integralCache))
let emptyIntegral: PointSetTypes.continuousShape = {
xyShape: {
xs: [neg_infinity],
ys: [0.0],
},
interpolation: #Linear,
integralSumCache: Some(0.0),
integralCache: None,
}
let empty: PointSetTypes.continuousShape = {
xyShape: XYShape.T.empty,
interpolation: #Linear,
integralSumCache: Some(0.0),
integralCache: Some(emptyIntegral),
}
let stepwiseToLinear = (t: t): t =>
make(
~integralSumCache=t.integralSumCache,
~integralCache=t.integralCache,
XYShape.Range.stepwiseToLinear(t.xyShape),
)
// Note: This results in a distribution with as many points as the sum of those in t1 and t2.
let combinePointwise = (
~combiner=XYShape.PointwiseCombination.combine,
~integralSumCachesFn=(_, _) => None,
~distributionType: PointSetTypes.distributionType=#PDF,
fn: (float, float) => result<float, Operation.Error.t>,
t1: PointSetTypes.continuousShape,
t2: PointSetTypes.continuousShape,
): result<PointSetTypes.continuousShape, 'e> => {
// If we're adding the distributions, and we know the total of each, then we
// can just sum them up. Otherwise, all bets are off.
let combinedIntegralSum = Common.combineIntegralSums(
integralSumCachesFn,
t1.integralSumCache,
t2.integralSumCache,
)
// TODO: does it ever make sense to pointwise combine the integrals here?
// It could be done for pointwise additions, but is that ever needed?
// If combining stepwise and linear, we must convert the stepwise to linear first,
// i.e. add a point at the bottom of each step
let (t1, t2) = switch (t1.interpolation, t2.interpolation) {
| (#Linear, #Linear) => (t1, t2)
| (#Stepwise, #Stepwise) => (t1, t2)
| (#Linear, #Stepwise) => (t1, stepwiseToLinear(t2))
| (#Stepwise, #Linear) => (stepwiseToLinear(t1), t2)
}
let extrapolation = switch distributionType {
| #PDF => #UseZero
| #CDF => #UseOutermostPoints
}
let interpolator = XYShape.XtoY.continuousInterpolator(t1.interpolation, extrapolation)
combiner(fn, interpolator, t1.xyShape, t2.xyShape)->E.R2.fmap(x =>
make(~integralSumCache=combinedIntegralSum, x)
)
}
let toLinear = (t: t): option<t> =>
switch t {
| {interpolation: #Stepwise, xyShape, integralSumCache, integralCache} =>
xyShape |> XYShape.Range.stepsToContinuous |> E.O.fmap(make(~integralSumCache, ~integralCache))
| {interpolation: #Linear} => Some(t)
}
let shapeFn = (fn, t: t) => t |> getShape |> fn
let updateIntegralSumCache = (integralSumCache, t: t): t => {
...t,
integralSumCache: integralSumCache,
}
let updateIntegralCache = (integralCache, t: t): t => {...t, integralCache: integralCache}
let sum = (
~integralSumCachesFn: (float, float) => option<float>=(_, _) => None,
continuousShapes,
): t =>
continuousShapes |> E.A.fold_left(
(x, y) =>
combinePointwise(~integralSumCachesFn, (a, b) => Ok(a +. b), x, y)->E.R.toExn(
"Addition should never fail",
_,
),
empty,
)
let reduce = (
~integralSumCachesFn: (float, float) => option<float>=(_, _) => None,
fn: (float, float) => result<float, 'e>,
continuousShapes,
): result<t, 'e> => {
let merge = combinePointwise(~integralSumCachesFn, fn)
continuousShapes |> E.A.R.foldM(merge, empty)
}
let mapYResult = (
~integralSumCacheFn=_ => None,
~integralCacheFn=_ => None,
~fn: float => result<float, 'e>,
t: t,
): result<t, 'e> =>
XYShape.T.mapYResult(fn, getShape(t))->E.R2.fmap(x =>
make(
~interpolation=t.interpolation,
~integralSumCache=t.integralSumCache |> E.O.bind(_, integralSumCacheFn),
~integralCache=t.integralCache |> E.O.bind(_, integralCacheFn),
x,
)
)
let mapY = (
~integralSumCacheFn=_ => None,
~integralCacheFn=_ => None,
~fn: float => float,
t: t,
): t =>
make(
~interpolation=t.interpolation,
~integralSumCache=t.integralSumCache |> E.O.bind(_, integralSumCacheFn),
~integralCache=t.integralCache |> E.O.bind(_, integralCacheFn),
t |> getShape |> XYShape.T.mapY(fn),
)
let rec scaleBy = (~scale=1.0, t: t): t => {
let scaledIntegralSumCache = E.O.bind(t.integralSumCache, v => Some(scale *. v))
let scaledIntegralCache = E.O.bind(t.integralCache, v => Some(scaleBy(~scale, v)))
t
|> mapY(~fn=(r: float) => r *. scale)
|> updateIntegralSumCache(scaledIntegralSumCache)
|> updateIntegralCache(scaledIntegralCache)
}
module T = Dist({
type t = PointSetTypes.continuousShape
type integral = PointSetTypes.continuousShape
let minX = shapeFn(XYShape.T.minX)
let maxX = shapeFn(XYShape.T.maxX)
let mapY = mapY
let mapYResult = mapYResult
let updateIntegralCache = updateIntegralCache
let toDiscreteProbabilityMassFraction = _ => 0.0
let toPointSetDist = (t: t): PointSetTypes.pointSetDist => Continuous(t)
let xToY = (f, {interpolation, xyShape}: t) =>
switch interpolation {
| #Stepwise => xyShape |> XYShape.XtoY.stepwiseIncremental(f) |> E.O.default(0.0)
| #Linear => xyShape |> XYShape.XtoY.linear(f)
} |> PointSetTypes.MixedPoint.makeContinuous
let truncate = (leftCutoff: option<float>, rightCutoff: option<float>, t: t) => {
let lc = E.O.default(neg_infinity, leftCutoff)
let rc = E.O.default(infinity, rightCutoff)
let truncatedZippedPairs =
t |> getShape |> XYShape.T.zip |> XYShape.Zipped.filterByX(x => x >= lc && x <= rc)
let leftNewPoint = leftCutoff |> E.O.dimap(lc => [(lc -. epsilon_float, 0.)], _ => [])
let rightNewPoint = rightCutoff |> E.O.dimap(rc => [(rc +. epsilon_float, 0.)], _ => [])
let truncatedZippedPairsWithNewPoints = E.A.concatMany([
leftNewPoint,
truncatedZippedPairs,
rightNewPoint,
])
let truncatedShape = XYShape.T.fromZippedArray(truncatedZippedPairsWithNewPoints)
make(truncatedShape)
}
// TODO: This should work with stepwise plots.
let integral = t =>
switch (getShape(t) |> XYShape.T.isEmpty, t.integralCache) {
| (true, _) => emptyIntegral
| (false, Some(cache)) => cache
| (false, None) =>
t
|> getShape
|> XYShape.Range.integrateWithTriangles
|> E.O.toExt("This should not have happened")
|> make
}
let downsample = (length, t): t =>
t |> shapeMap(XYShape.XsConversion.proportionByProbabilityMass(length, integral(t).xyShape))
let integralEndY = (t: t) => t.integralSumCache |> E.O.default(t |> integral |> lastY)
let integralXtoY = (f, t: t) => t |> integral |> shapeFn(XYShape.XtoY.linear(f))
let integralYtoX = (f, t: t) => t |> integral |> shapeFn(XYShape.YtoX.linear(f))
let toContinuous = t => Some(t)
let toDiscrete = _ => None
let normalize = (t: t): t =>
t
|> updateIntegralCache(Some(integral(t)))
|> scaleBy(~scale=1. /. integralEndY(t))
|> updateIntegralSumCache(Some(1.0))
let mean = (t: t) => {
let indefiniteIntegralStepwise = (p, h1) => h1 *. p ** 2.0 /. 2.0
let indefiniteIntegralLinear = (p, a, b) => a *. p ** 2.0 /. 2.0 +. b *. p ** 3.0 /. 3.0
Analysis.integrate(~indefiniteIntegralStepwise, ~indefiniteIntegralLinear, t)
}
let variance = (t: t): float =>
XYShape.Analysis.getVarianceDangerously(t, mean, Analysis.getMeanOfSquares)
let klDivergence = (prediction: t, answer: t) => {
let newShape = XYShape.PointwiseCombination.combineAlongSupportOfSecondArgument(
PointSetDist_Scoring.KLDivergence.integrand,
prediction.xyShape,
answer.xyShape,
)
let xyShapeToContinuous: XYShape.xyShape => t = xyShape => {
xyShape: xyShape,
interpolation: #Linear,
integralSumCache: None,
integralCache: None,
}
newShape->E.R2.fmap(x => x->xyShapeToContinuous->integralEndY)
}
})
let isNormalized = (t: t): bool => {
let areaUnderIntegral = t |> updateIntegralCache(Some(T.integral(t))) |> T.integralEndY
areaUnderIntegral < 1. +. MagicNumbers.Epsilon.seven &&
areaUnderIntegral > 1. -. MagicNumbers.Epsilon.seven
}
let downsampleEquallyOverX = (length, t): t =>
t |> shapeMap(XYShape.XsConversion.proportionEquallyOverX(length))
/* This simply creates multiple copies of the continuous distribution, scaled and shifted according to
each discrete data point, and then adds them all together. */
let combineAlgebraicallyWithDiscrete = (
op: Operation.convolutionOperation,
t1: t,
t2: PointSetTypes.discreteShape,
~discretePosition: AlgebraicShapeCombination.argumentPosition,
) => {
let t1s = t1 |> getShape
let t2s = t2.xyShape // TODO would like to use Discrete.getShape here, but current file structure doesn't allow for that
if XYShape.T.isEmpty(t1s) || XYShape.T.isEmpty(t2s) {
empty
} else {
let continuousAsLinear = switch t1.interpolation {
| #Linear => t1
| #Stepwise => stepwiseToLinear(t1)
}
let combinedShape = AlgebraicShapeCombination.combineShapesContinuousDiscrete(
op,
continuousAsLinear |> getShape,
t2s,
~discretePosition,
)
let combinedIntegralSum = switch op {
| #Multiply =>
Common.combineIntegralSums((a, b) => Some(a *. b), t1.integralSumCache, t2.integralSumCache)
| _ => None
}
// TODO: It could make sense to automatically transform the integrals here (shift or scale)
make(~interpolation=t1.interpolation, ~integralSumCache=combinedIntegralSum, combinedShape)
}
}
let combineAlgebraically = (op: Operation.convolutionOperation, t1: t, t2: t) => {
let s1 = t1 |> getShape
let s2 = t2 |> getShape
let t1n = s1 |> XYShape.T.length
let t2n = s2 |> XYShape.T.length
if t1n == 0 || t2n == 0 {
empty
} else {
let combinedShape = AlgebraicShapeCombination.combineShapesContinuousContinuous(op, s1, s2)
let combinedIntegralSum = Common.combineIntegralSums(
(a, b) => Some(a *. b),
t1.integralSumCache,
t2.integralSumCache,
)
// return a new Continuous distribution
make(~integralSumCache=combinedIntegralSum, combinedShape)
}
}