337 lines
12 KiB
Plaintext
337 lines
12 KiB
Plaintext
open Distributions
|
|
|
|
type t = PointSetTypes.mixedShape
|
|
let make = (~integralSumCache=None, ~integralCache=None, ~continuous, ~discrete): t => {
|
|
continuous: continuous,
|
|
discrete: discrete,
|
|
integralSumCache: integralSumCache,
|
|
integralCache: integralCache,
|
|
}
|
|
|
|
let totalLength = (t: t): int => {
|
|
let continuousLength = t.continuous |> Continuous.getShape |> XYShape.T.length
|
|
let discreteLength = t.discrete |> Discrete.getShape |> XYShape.T.length
|
|
|
|
continuousLength + discreteLength
|
|
}
|
|
|
|
let scaleBy = (~scale=1.0, t: t): t => {
|
|
let scaledDiscrete = Discrete.scaleBy(~scale, t.discrete)
|
|
let scaledContinuous = Continuous.scaleBy(~scale, t.continuous)
|
|
let scaledIntegralCache = E.O.bind(t.integralCache, v => Some(Continuous.scaleBy(~scale, v)))
|
|
let scaledIntegralSumCache = E.O.bind(t.integralSumCache, s => Some(s *. scale))
|
|
make(
|
|
~discrete=scaledDiscrete,
|
|
~continuous=scaledContinuous,
|
|
~integralSumCache=scaledIntegralSumCache,
|
|
~integralCache=scaledIntegralCache,
|
|
)
|
|
}
|
|
|
|
let toContinuous = ({continuous}: t) => Some(continuous)
|
|
let toDiscrete = ({discrete}: t) => Some(discrete)
|
|
|
|
let updateIntegralCache = (integralCache, t: t): t => {
|
|
...t,
|
|
integralCache: integralCache,
|
|
}
|
|
|
|
module T = Dist({
|
|
type t = PointSetTypes.mixedShape
|
|
type integral = PointSetTypes.continuousShape
|
|
let minX = ({continuous, discrete}: t) =>
|
|
min(Continuous.T.minX(continuous), Discrete.T.minX(discrete))
|
|
let maxX = ({continuous, discrete}: t) =>
|
|
max(Continuous.T.maxX(continuous), Discrete.T.maxX(discrete))
|
|
let toPointSetDist = (t: t): PointSetTypes.pointSetDist => Mixed(t)
|
|
|
|
let updateIntegralCache = updateIntegralCache
|
|
|
|
let toContinuous = toContinuous
|
|
let toDiscrete = toDiscrete
|
|
|
|
let truncate = (
|
|
leftCutoff: option<float>,
|
|
rightCutoff: option<float>,
|
|
{discrete, continuous}: t,
|
|
) => {
|
|
let truncatedContinuous = Continuous.T.truncate(leftCutoff, rightCutoff, continuous)
|
|
let truncatedDiscrete = Discrete.T.truncate(leftCutoff, rightCutoff, discrete)
|
|
|
|
make(
|
|
~integralSumCache=None,
|
|
~integralCache=None,
|
|
~discrete=truncatedDiscrete,
|
|
~continuous=truncatedContinuous,
|
|
)
|
|
}
|
|
|
|
let normalize = (t: t): t => {
|
|
let continuousIntegral = Continuous.T.Integral.get(t.continuous)
|
|
let discreteIntegral = Discrete.T.Integral.get(t.discrete)
|
|
|
|
let continuous = t.continuous |> Continuous.updateIntegralCache(Some(continuousIntegral))
|
|
let discrete = t.discrete |> Discrete.updateIntegralCache(Some(discreteIntegral))
|
|
|
|
let continuousIntegralSum = Continuous.T.Integral.sum(continuous)
|
|
let discreteIntegralSum = Discrete.T.Integral.sum(discrete)
|
|
let totalIntegralSum = continuousIntegralSum +. discreteIntegralSum
|
|
|
|
let newContinuousSum = continuousIntegralSum /. totalIntegralSum
|
|
let newDiscreteSum = discreteIntegralSum /. totalIntegralSum
|
|
|
|
let normalizedContinuous =
|
|
continuous
|
|
|> Continuous.scaleBy(~scale=newContinuousSum /. continuousIntegralSum)
|
|
|> Continuous.updateIntegralSumCache(Some(newContinuousSum))
|
|
let normalizedDiscrete =
|
|
discrete
|
|
|> Discrete.scaleBy(~scale=newDiscreteSum /. discreteIntegralSum)
|
|
|> Discrete.updateIntegralSumCache(Some(newDiscreteSum))
|
|
|
|
make(
|
|
~integralSumCache=Some(1.0),
|
|
~integralCache=None,
|
|
~continuous=normalizedContinuous,
|
|
~discrete=normalizedDiscrete,
|
|
)
|
|
}
|
|
|
|
let xToY = (x, t: t) => {
|
|
// This evaluates the mixedShape at x, interpolating if necessary.
|
|
// Note that we normalize entire mixedShape first.
|
|
let {continuous, discrete}: t = normalize(t)
|
|
let c = Continuous.T.xToY(x, continuous)
|
|
let d = Discrete.T.xToY(x, discrete)
|
|
PointSetTypes.MixedPoint.add(c, d) // "add" here just combines the two values into a single MixedPoint.
|
|
}
|
|
|
|
let toDiscreteProbabilityMassFraction = ({discrete, continuous}: t) => {
|
|
let discreteIntegralSum = Discrete.T.Integral.sum(discrete)
|
|
let continuousIntegralSum = Continuous.T.Integral.sum(continuous)
|
|
let totalIntegralSum = discreteIntegralSum +. continuousIntegralSum
|
|
|
|
discreteIntegralSum /. totalIntegralSum
|
|
}
|
|
|
|
let downsample = (count, t: t): t => {
|
|
// We will need to distribute the new xs fairly between the discrete and continuous shapes.
|
|
// The easiest way to do this is to simply go by the previous probability masses.
|
|
|
|
let discreteIntegralSum = Discrete.T.Integral.sum(t.discrete)
|
|
let continuousIntegralSum = Continuous.T.Integral.sum(t.continuous)
|
|
let totalIntegralSum = discreteIntegralSum +. continuousIntegralSum
|
|
|
|
// TODO: figure out what to do when the totalIntegralSum is zero.
|
|
|
|
let downsampledDiscrete = Discrete.T.downsample(
|
|
int_of_float(float_of_int(count) *. (discreteIntegralSum /. totalIntegralSum)),
|
|
t.discrete,
|
|
)
|
|
|
|
let downsampledContinuous = Continuous.T.downsample(
|
|
int_of_float(float_of_int(count) *. (continuousIntegralSum /. totalIntegralSum)),
|
|
t.continuous,
|
|
)
|
|
|
|
{...t, discrete: downsampledDiscrete, continuous: downsampledContinuous}
|
|
}
|
|
|
|
let integral = (t: t) =>
|
|
switch t.integralCache {
|
|
| Some(cache) => cache
|
|
| None =>
|
|
// note: if the underlying shapes aren't normalized, then these integrals won't be either -- but that's the way it should be.
|
|
let continuousIntegral = Continuous.T.Integral.get(t.continuous)
|
|
let discreteIntegral = Continuous.stepwiseToLinear(Discrete.T.Integral.get(t.discrete))
|
|
|
|
Continuous.make(
|
|
XYShape.PointwiseCombination.addCombine(
|
|
XYShape.XtoY.continuousInterpolator(#Linear, #UseOutermostPoints),
|
|
Continuous.getShape(continuousIntegral),
|
|
Continuous.getShape(discreteIntegral),
|
|
),
|
|
)
|
|
}
|
|
|
|
let integralEndY = (t: t) => t |> integral |> Continuous.lastY
|
|
|
|
let integralXtoY = (f, t) => t |> integral |> Continuous.getShape |> XYShape.XtoY.linear(f)
|
|
|
|
let integralYtoX = (f, t) => t |> integral |> Continuous.getShape |> XYShape.YtoX.linear(f)
|
|
|
|
let createMixedFromContinuousDiscrete = (
|
|
~integralSumCacheFn=_ => None,
|
|
~integralCacheFn=_ => None,
|
|
t: t,
|
|
discrete: PointSetTypes.discreteShape,
|
|
continuous: PointSetTypes.continuousShape,
|
|
): t => {
|
|
let yMappedDiscrete: PointSetTypes.discreteShape =
|
|
discrete
|
|
|> Discrete.updateIntegralSumCache(E.O.bind(t.discrete.integralSumCache, integralSumCacheFn))
|
|
|> Discrete.updateIntegralCache(E.O.bind(t.discrete.integralCache, integralCacheFn))
|
|
|
|
let yMappedContinuous: PointSetTypes.continuousShape =
|
|
continuous
|
|
|> Continuous.updateIntegralSumCache(
|
|
E.O.bind(t.continuous.integralSumCache, integralSumCacheFn),
|
|
)
|
|
|> Continuous.updateIntegralCache(E.O.bind(t.continuous.integralCache, integralCacheFn))
|
|
|
|
{
|
|
discrete: yMappedDiscrete,
|
|
continuous: yMappedContinuous,
|
|
integralSumCache: E.O.bind(t.integralSumCache, integralSumCacheFn),
|
|
integralCache: E.O.bind(t.integralCache, integralCacheFn),
|
|
}
|
|
}
|
|
|
|
// This pipes all ys (continuous and discrete) through fn.
|
|
// If mapY is a linear operation, we might be able to update the integralSumCaches as well;
|
|
// if not, they'll be set to None.
|
|
let mapY = (
|
|
~integralSumCacheFn=_ => None,
|
|
~integralCacheFn=_ => None,
|
|
~fn: float => float,
|
|
t: t,
|
|
): t => {
|
|
let discrete = t.discrete |> Discrete.T.mapY(~fn)
|
|
let continuous = t.continuous |> Continuous.T.mapY(~fn)
|
|
createMixedFromContinuousDiscrete(
|
|
~integralCacheFn,
|
|
~integralSumCacheFn,
|
|
t,
|
|
discrete,
|
|
continuous,
|
|
)
|
|
}
|
|
|
|
let mapYResult = (
|
|
~integralSumCacheFn=_ => None,
|
|
~integralCacheFn=_ => None,
|
|
~fn: float => result<float, 'e>,
|
|
t: t,
|
|
): result<t, 'e> => {
|
|
E.R.merge(
|
|
Discrete.T.mapYResult(~fn, t.discrete),
|
|
Continuous.T.mapYResult(~fn, t.continuous),
|
|
)->E.R2.fmap(((discreteMapped, continuousMapped)) => {
|
|
createMixedFromContinuousDiscrete(
|
|
~integralCacheFn,
|
|
~integralSumCacheFn,
|
|
t,
|
|
discreteMapped,
|
|
continuousMapped,
|
|
)
|
|
})
|
|
}
|
|
|
|
let mean = ({discrete, continuous}: t): float => {
|
|
let discreteMean = Discrete.T.mean(discrete)
|
|
let continuousMean = Continuous.T.mean(continuous)
|
|
|
|
// the combined mean is the weighted sum of the two:
|
|
let discreteIntegralSum = Discrete.T.Integral.sum(discrete)
|
|
let continuousIntegralSum = Continuous.T.Integral.sum(continuous)
|
|
let totalIntegralSum = discreteIntegralSum +. continuousIntegralSum
|
|
|
|
(discreteMean *. discreteIntegralSum +. continuousMean *. continuousIntegralSum) /.
|
|
totalIntegralSum
|
|
}
|
|
|
|
let variance = ({discrete, continuous} as t: t): float => {
|
|
// the combined mean is the weighted sum of the two:
|
|
let discreteIntegralSum = Discrete.T.Integral.sum(discrete)
|
|
let continuousIntegralSum = Continuous.T.Integral.sum(continuous)
|
|
let totalIntegralSum = discreteIntegralSum +. continuousIntegralSum
|
|
|
|
let getMeanOfSquares = ({discrete, continuous}: t) => {
|
|
let discreteMean = discrete |> Discrete.shapeMap(XYShape.T.square) |> Discrete.T.mean
|
|
let continuousMean = continuous |> Continuous.Analysis.getMeanOfSquares
|
|
(discreteMean *. discreteIntegralSum +. continuousMean *. continuousIntegralSum) /.
|
|
totalIntegralSum
|
|
}
|
|
|
|
switch discreteIntegralSum /. totalIntegralSum {
|
|
| 1.0 => Discrete.T.variance(discrete)
|
|
| 0.0 => Continuous.T.variance(continuous)
|
|
| _ => XYShape.Analysis.getVarianceDangerously(t, mean, getMeanOfSquares)
|
|
}
|
|
}
|
|
})
|
|
|
|
let combineAlgebraically = (op: Operation.convolutionOperation, t1: t, t2: t): t => {
|
|
// Discrete convolution can cause a huge increase in the number of samples,
|
|
// so we'll first downsample.
|
|
|
|
// An alternative (to be explored in the future) may be to first perform the full convolution and then to downsample the result;
|
|
// to use non-uniform fast Fourier transforms (for addition only), add web workers or gpu.js, etc. ...
|
|
|
|
// we have to figure out where to downsample, and how to effectively
|
|
//let downsampleIfTooLarge = (t: t) => {
|
|
// let sqtl = sqrt(float_of_int(totalLength(t)));
|
|
// sqtl > 10 ? T.downsample(int_of_float(sqtl), t) : t;
|
|
//};
|
|
|
|
// continuous (*) continuous => continuous, but also
|
|
// discrete (*) continuous => continuous (and vice versa). We have to take care of all combos and then combine them:
|
|
let ccConvResult = Continuous.combineAlgebraically(op, t1.continuous, t2.continuous)
|
|
let dcConvResult = Continuous.combineAlgebraicallyWithDiscrete(op, t2.continuous, t1.discrete)
|
|
let cdConvResult = Continuous.combineAlgebraicallyWithDiscrete(op, t1.continuous, t2.discrete)
|
|
let continuousConvResult = Continuous.sum([ccConvResult, dcConvResult, cdConvResult])
|
|
|
|
// ... finally, discrete (*) discrete => discrete, obviously:
|
|
let discreteConvResult = Discrete.combineAlgebraically(op, t1.discrete, t2.discrete)
|
|
|
|
let combinedIntegralSum = Common.combineIntegralSums(
|
|
(a, b) => Some(a *. b),
|
|
t1.integralSumCache,
|
|
t2.integralSumCache,
|
|
)
|
|
|
|
{
|
|
discrete: discreteConvResult,
|
|
continuous: continuousConvResult,
|
|
integralSumCache: combinedIntegralSum,
|
|
integralCache: None,
|
|
}
|
|
}
|
|
|
|
let combinePointwise = (
|
|
~integralSumCachesFn=(_, _) => None,
|
|
~integralCachesFn=(_, _) => None,
|
|
fn: (float, float) => result<float, 'e>,
|
|
t1: t,
|
|
t2: t,
|
|
): result<t, 'e> => {
|
|
let reducedDiscrete =
|
|
[t1, t2] |> E.A.fmap(toDiscrete) |> E.A.O.concatSomes |> Discrete.reduce(~integralSumCachesFn)
|
|
|
|
let reducedContinuous =
|
|
[t1, t2]
|
|
|> E.A.fmap(toContinuous)
|
|
|> E.A.O.concatSomes
|
|
|> Continuous.reduce(~integralSumCachesFn, fn)
|
|
|
|
let combinedIntegralSum = Common.combineIntegralSums(
|
|
integralSumCachesFn,
|
|
t1.integralSumCache,
|
|
t2.integralSumCache,
|
|
)
|
|
|
|
let combinedIntegral = Common.combineIntegrals(
|
|
integralCachesFn,
|
|
t1.integralCache,
|
|
t2.integralCache,
|
|
)
|
|
reducedContinuous->E.R2.fmap(continuous =>
|
|
make(
|
|
~integralSumCache=combinedIntegralSum,
|
|
~integralCache=combinedIntegral,
|
|
~discrete=reducedDiscrete,
|
|
~continuous,
|
|
)
|
|
)
|
|
}
|