squiggle/src/distPlus/distribution/AlgebraicShapeCombination.re

type pointMassesWithMoments = {
  n: int,
  masses: array(float),
  means: array(float),
  variances: array(float),
};

/* This function takes a continuous distribution and efficiently approximates it as
   point masses that have variances associated with them.
   We estimate the means and variances from overlapping triangular distributions which we imagine are making up the
   XYShape.
   We can then use the algebra of random variables to "convolve" the point masses and their variances,
   and finally reconstruct a new distribution from them, e.g. using a Fast Gauss Transform or Raykar et al. (2007). */
let toDiscretePointMassesFromTriangulars =
    (~inverse=false, s: XYShape.T.t): pointMassesWithMoments => {
  // TODO: what if there is only one point in the distribution?
  let n = s |> XYShape.T.length;
  // first, double up the leftmost and rightmost points:
  let {xs, ys}: XYShape.T.t = s;
  let _ = Js.Array.unshift(xs[0], xs);
  let _ = Js.Array.unshift(ys[0], ys);
  let _ = Js.Array.push(xs[n - 1], xs);
  let _ = Js.Array.push(ys[n - 1], ys);
  let n = E.A.length(xs);
  // squares and neighbourly products of the xs
  let xsSq: array(float) = Belt.Array.makeUninitializedUnsafe(n);
  let xsProdN1: array(float) = Belt.Array.makeUninitializedUnsafe(n - 1);
  let xsProdN2: array(float) = Belt.Array.makeUninitializedUnsafe(n - 2);
  for (i in 0 to n - 1) {
    let _ = Belt.Array.set(xsSq, i, xs[i] *. xs[i]);
    ();
  };
  for (i in 0 to n - 2) {
    let _ = Belt.Array.set(xsProdN1, i, xs[i] *. xs[i + 1]);
    ();
  };
  for (i in 0 to n - 3) {
    let _ = Belt.Array.set(xsProdN2, i, xs[i] *. xs[i + 2]);
    ();
  };
  // means and variances
  let masses: array(float) = Belt.Array.makeUninitializedUnsafe(n - 2); // doesn't include the fake first and last points
  let means: array(float) = Belt.Array.makeUninitializedUnsafe(n - 2);
  let variances: array(float) = Belt.Array.makeUninitializedUnsafe(n - 2);

  if (inverse) {
    for (i in 1 to n - 2) {
      let _ =
        Belt.Array.set(
          masses,
          i - 1,
          (xs[i + 1] -. xs[i - 1]) *. ys[i] /. 2.,
        );

      // this only works when the whole triange is either on the left or on the right of zero
      let a = xs[i - 1];
      let c = xs[i];
      let b = xs[i + 1];

      // These are the moments of the reciprocal of a triangular distribution, as symbolically integrated by Mathematica.
      // They're probably pretty close to invMean ~ 1/mean = 3/(a+b+c) and invVar. But I haven't worked out
      // the worst case error, so for now let's use these monster equations
      let inverseMean =
        2.
        *. (a *. log(a /. c) /. (a -. c) +. b *. log(c /. b) /. (b -. c))
        /. (a -. b);
      let inverseVar =
        2.
        *. (log(c /. a) /. (a -. c) +. b *. log(b /. c) /. (b -. c))
        /. (a -. b)
        -. inverseMean
        ** 2.;

      let _ = Belt.Array.set(means, i - 1, inverseMean);

      let _ = Belt.Array.set(variances, i - 1, inverseVar);
      ();
    };

    {n: n - 2, masses, means, variances};
  } else {
    for (i in 1 to n - 2) {

      // area of triangle = width * height / 2
      let _ =
        Belt.Array.set(
          masses,
          i - 1,
          (xs[i + 1] -. xs[i - 1]) *. ys[i] /. 2.,
        );

      // means of triangle = (a + b + c) / 3
      let _ =
        Belt.Array.set(means, i - 1, (xs[i - 1] +. xs[i] +. xs[i + 1]) /. 3.);

      // variance of triangle = (a^2 + b^2 + c^2 - ab - ac - bc) / 18
      let _ =
        Belt.Array.set(
          variances,
          i - 1,
          (
            xsSq[i - 1]
            +. xsSq[i]
            +. xsSq[i + 1]
            -. xsProdN1[i - 1]
            -. xsProdN1[i]
            -. xsProdN2[i - 1]
          )
          /. 18.,
        );
      ();
    };
    {n: n - 2, masses, means, variances};
  };
};

let combineShapesContinuousContinuous =
    (op: ExpressionTypes.algebraicOperation, s1: DistTypes.xyShape, s2: DistTypes.xyShape)
    : DistTypes.xyShape => {
  let t1n = s1 |> XYShape.T.length;
  let t2n = s2 |> XYShape.T.length;

  // if we add the two distributions, we should probably use normal filters.
  // if we multiply the two distributions, we should probably use lognormal filters.
  let t1m = toDiscretePointMassesFromTriangulars(s1);
  let t2m = switch (op) {
    | `Divide => toDiscretePointMassesFromTriangulars(~inverse=true, s2)
    | _ => toDiscretePointMassesFromTriangulars(~inverse=false, s2)
  };

  let combineMeansFn =
    switch (op) {
    | `Add => ((m1, m2) => m1 +. m2)
    | `Subtract => ((m1, m2) => m1 -. m2)
    | `Multiply => ((m1, m2) => m1 *. m2)
    | `Divide => ((m1, mInv2) => m1 *. mInv2)
    }; // note: here, mInv2 = mean(1 / t2) ~= 1 / mean(t2)

  // converts the variances and means of the two inputs into the variance of the output
  let combineVariancesFn =
    switch (op) {
    | `Add => ((v1, v2, _, _) => v1 +. v2)
    | `Subtract => ((v1, v2, _, _) => v1 +. v2)
    | `Multiply => (
        (v1, v2, m1, m2) => v1 *. v2 +. v1 *. m2 ** 2. +. v2 *. m1 ** 2.
      )
    | `Divide => (
        (v1, vInv2, m1, mInv2) =>
          v1 *. vInv2 +. v1 *. mInv2 ** 2. +. vInv2 *. m1 ** 2.
      )
    };

  // TODO: If operating on two positive-domain distributions, we should take that into account
  let outputMinX: ref(float) = ref(infinity);
  let outputMaxX: ref(float) = ref(neg_infinity);
  let masses: array(float) =
    Belt.Array.makeUninitializedUnsafe(t1m.n * t2m.n);
  let means: array(float) =
    Belt.Array.makeUninitializedUnsafe(t1m.n * t2m.n);
  let variances: array(float) =
    Belt.Array.makeUninitializedUnsafe(t1m.n * t2m.n);
  // then convolve the two sets of pointMassesWithMoments
  for (i in 0 to t1m.n - 1) {
    for (j in 0 to t2m.n - 1) {
      let k = i * t2m.n + j;
      let _ = Belt.Array.set(masses, k, t1m.masses[i] *. t2m.masses[j]);

      let mean = combineMeansFn(t1m.means[i], t2m.means[j]);
      let variance =
        combineVariancesFn(
          t1m.variances[i],
          t2m.variances[j],
          t1m.means[i],
          t2m.means[j],
        );
      let _ = Belt.Array.set(means, k, mean);
      let _ = Belt.Array.set(variances, k, variance);
      // update bounds
      let minX = mean -. 2. *. sqrt(variance) *. 1.644854;
      let maxX = mean +. 2. *. sqrt(variance) *. 1.644854;
      if (minX < outputMinX^) {
        outputMinX := minX;
      };
      if (maxX > outputMaxX^) {
        outputMaxX := maxX;
      };
    };
  };

  // we now want to create a set of target points. For now, let's just evenly distribute 200 points between
  // between the outputMinX and outputMaxX
  let nOut = 300;
  let outputXs: array(float) = E.A.Floats.range(outputMinX^, outputMaxX^, nOut);
  let outputYs: array(float) = Belt.Array.make(nOut, 0.0);
  // now, for each of the outputYs, accumulate from a Gaussian kernel over each input point.
  for (j in 0 to E.A.length(masses) - 1) { // go through all of the result points
    let _ = if (variances[j] > 0. && masses[j] > 0.) {
      for (i in 0 to E.A.length(outputXs) - 1) { // go through all of the target points
        let dx = outputXs[i] -. means[j];
        let contribution = masses[j] *. exp(-. (dx ** 2.) /. (2. *. variances[j])) /. (sqrt(2. *. 3.14159276 *. variances[j]));
        let _ = Belt.Array.set(outputYs, i, outputYs[i] +. contribution);
        ();
      };
      ();
    };
    ();
  };

  {xs: outputXs, ys: outputYs};
};

let toDiscretePointMassesFromDiscrete = (s: DistTypes.xyShape): pointMassesWithMoments => {
  let n = s |> XYShape.T.length;
  let {xs, ys}: XYShape.T.t = s;
  let n = E.A.length(xs);

  let masses: array(float) = Belt.Array.makeUninitializedUnsafe(n); // doesn't include the fake first and last points
  let means: array(float) = Belt.Array.makeUninitializedUnsafe(n);
  let variances: array(float) = Belt.Array.makeUninitializedUnsafe(n);

  for (i in 0 to n - 1) {
    let _ =
      Belt.Array.set(
        masses,
        i,
        ys[i]
      );

    let _ =
      Belt.Array.set(
        means,
        i,
        xs[i]
      );

    let _ =
      Belt.Array.set(
        variances,
        i,
        0.0
      );
    ();
  };

  {n, masses, means, variances};
};

let combineShapesContinuousDiscreteAdd =
    (op: ExpressionTypes.algebraicOperation, s1: DistTypes.xyShape, s2: DistTypes.xyShape)
  : DistTypes.xyShape => {
  let t1n = s1 |> XYShape.T.length;
  let t2n = s2 |> XYShape.T.length;

  // each x pair is added/subtracted
  let fn = Operation.Algebraic.toFn(op);

  let outXYShapes: array(array((float, float))) =
    Belt.Array.makeUninitializedUnsafe(t2n);

  for (j in 0 to t2n - 1) {
    // for each one of the discrete points
    // create a new distribution, as long as the original continuous one

    let dxyShape: array((float, float)) =
      Belt.Array.makeUninitializedUnsafe(t1n);
    for (i in 0 to t1n - 1) {
      let _ =
        Belt.Array.set(
          dxyShape,
          i,
          (fn(s1.xs[i], s2.xs[j]), s1.ys[i] *. s2.ys[j]),
        );
      ();
    };

    let _ = Belt.Array.set(outXYShapes, j, dxyShape);
    ();
  };

  outXYShapes
  |> E.A.fold_left(XYShape.PointwiseCombination.combineLinear((+.)), XYShape.T.empty);
};

let combineShapesContinuousDiscreteMultiply =
    (op: ExpressionTypes.algebraicOperation, s1: DistTypes.xyShape, s2: DistTypes.xyShape)
  : DistTypes.xyShape => {
  let t1n = s1 |> XYShape.T.length;
  let t2n = s2 |> XYShape.T.length;

  let t1m = toDiscretePointMassesFromTriangulars(s1);
  let t2m = toDiscretePointMassesFromDiscrete(s2);

  let combineMeansFn =
    switch (op) {
    | `Add => ((m1, m2) => m1 +. m2)
    | `Subtract => ((m1, m2) => m1 -. m2)
    | `Multiply => ((m1, m2) => m1 *. m2)
    | `Divide => ((m1, m2) => m1 /. m2)
    };

  let combineVariancesFn =
    switch (op) {
    | `Add
    | `Subtract => ((v1, v2, _, _) => v1 +. v2)
    | `Multiply
    | `Divide => (
        (v1, v2, m1, m2) => v1 *. m2 ** 2.
      )
    };

  let outputMinX: ref(float) = ref(infinity);
  let outputMaxX: ref(float) = ref(neg_infinity);
  let masses: array(float) =
    Belt.Array.makeUninitializedUnsafe(t1m.n * t2m.n);
  let means: array(float) =
    Belt.Array.makeUninitializedUnsafe(t1m.n * t2m.n);
  let variances: array(float) =
    Belt.Array.makeUninitializedUnsafe(t1m.n * t2m.n);
  // then convolve the two sets of pointMassesWithMoments
  for (i in 0 to t1m.n - 1) {
    for (j in 0 to t2m.n - 1) {
      let k = i * t2m.n + j;
      let _ = Belt.Array.set(masses, k, t1m.masses[i] *. t2m.masses[j]);

      let mean = combineMeansFn(t1m.means[i], t2m.means[j]);
      let variance =
        combineVariancesFn(
          t1m.variances[i],
          t2m.variances[j],
          t1m.means[i],
          t2m.means[j],
        );
      let _ = Belt.Array.set(means, k, mean);
      let _ = Belt.Array.set(variances, k, variance);

      // update bounds
      let minX = mean -. 2. *. sqrt(variance) *. 1.644854;
      let maxX = mean +. 2. *. sqrt(variance) *. 1.644854;
      if (minX < outputMinX^) {
        outputMinX := minX;
      };
      if (maxX > outputMaxX^) {
        outputMaxX := maxX;
      };
    };
  };


  // we now want to create a set of target points. For now, let's just evenly distribute 200 points between
  // between the outputMinX and outputMaxX
  let nOut = 300;
  let outputXs: array(float) = E.A.Floats.range(outputMinX^, outputMaxX^, nOut);
  let outputYs: array(float) = Belt.Array.make(nOut, 0.0);
  // now, for each of the outputYs, accumulate from a Gaussian kernel over each input point.
  for (j in 0 to E.A.length(masses) - 1) { // go through all of the result points
    let _ = if (variances[j] > 0. && masses[j] > 0.) {
      for (i in 0 to E.A.length(outputXs) - 1) { // go through all of the target points
        let dx = outputXs[i] -. means[j];
        let contribution = masses[j] *. exp(-. (dx ** 2.) /. (2. *. variances[j])) /. (sqrt(2. *. 3.14159276 *. variances[j]));
        let _ = Belt.Array.set(outputYs, i, outputYs[i] +. contribution);
        ();
      };
      ();
    };
    ();
  };

  {xs: outputXs, ys: outputYs};
};

let combineShapesContinuousDiscrete =
    (op: ExpressionTypes.algebraicOperation, s1: DistTypes.xyShape, s2: DistTypes.xyShape)
    : DistTypes.xyShape => {

    switch (op) {
    | `Add
    | `Subtract => combineShapesContinuousDiscreteAdd(op, s1, s2);
    | `Multiply
    | `Divide => combineShapesContinuousDiscreteMultiply(op, s1, s2);
    };

};