squiggle/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/Discrete.res

open Distributions

type t = PointSetTypes.discreteShape

let make = (~integralSumCache=None, ~integralCache=None, xyShape): t => {
  xyShape: xyShape,
  integralSumCache: integralSumCache,
  integralCache: integralCache,
}
let shapeMap = (fn, {xyShape, integralSumCache, integralCache}: t): t => {
  xyShape: fn(xyShape),
  integralSumCache: integralSumCache,
  integralCache: integralCache,
}
let getShape = (t: t) => t.xyShape
let oShapeMap = (fn, {xyShape, integralSumCache, integralCache}: t): option<t> =>
  fn(xyShape) |> E.O.fmap(make(~integralSumCache, ~integralCache))

let emptyIntegral: PointSetTypes.continuousShape = {
  xyShape: {xs: [neg_infinity], ys: [0.0]},
  interpolation: #Stepwise,
  integralSumCache: Some(0.0),
  integralCache: None,
}
let empty: PointSetTypes.discreteShape = {
  xyShape: XYShape.T.empty,
  integralSumCache: Some(0.0),
  integralCache: Some(emptyIntegral),
}

let shapeFn = (fn, t: t) => t |> getShape |> fn

let lastY = (t: t) => t |> getShape |> XYShape.T.lastY

let combinePointwise = (
  ~integralSumCachesFn=(_, _) => None,
  ~fn=(a, b) => Ok(a +. b),
  t1: PointSetTypes.discreteShape,
  t2: PointSetTypes.discreteShape,
): result<PointSetTypes.discreteShape, 'e> => {
  //  let combinedIntegralSum = Common.combineIntegralSums(
  //    integralSumCachesFn,
  //    t1.integralSumCache,
  //    t2.integralSumCache,
  //  )

  // TODO: does it ever make sense to pointwise combine the integrals here?
  // It could be done for pointwise additions, but is that ever needed?

  make(
    XYShape.PointwiseCombination.combine(
      fn,
      XYShape.XtoY.discreteInterpolator,
      t1.xyShape,
      t2.xyShape,
    )->E.R.toExn("Addition operation should never fail", _),
  )->Ok
}

let reduce = (
  ~integralSumCachesFn=(_, _) => None,
  fn: (float, float) => result<float, 'e>,
  discreteShapes: array<PointSetTypes.discreteShape>,
): result<t, 'e> => {
  let merge = combinePointwise(~integralSumCachesFn, ~fn)
  discreteShapes |> E.A.R.foldM(merge, empty)
}

let updateIntegralSumCache = (integralSumCache, t: t): t => {
  ...t,
  integralSumCache: integralSumCache,
}

let updateIntegralCache = (integralCache, t: t): t => {
  ...t,
  integralCache: integralCache,
}

/* This multiples all of the data points together and creates a new discrete distribution from the results.
 Data points at the same xs get added together. It may be a good idea to downsample t1 and t2 before and/or the result after. */
let combineAlgebraically = (op: Operation.convolutionOperation, t1: t, t2: t): t => {
  let t1s = t1 |> getShape
  let t2s = t2 |> getShape
  let t1n = t1s |> XYShape.T.length
  let t2n = t2s |> XYShape.T.length

  let combinedIntegralSum = Common.combineIntegralSums(
    (s1, s2) => Some(s1 *. s2),
    t1.integralSumCache,
    t2.integralSumCache,
  )

  let fn = Operation.Convolution.toFn(op)
  let xToYMap = E.FloatFloatMap.empty()

  for i in 0 to t1n - 1 {
    for j in 0 to t2n - 1 {
      let x = fn(t1s.xs[i], t2s.xs[j])
      let cv = xToYMap |> E.FloatFloatMap.get(x) |> E.O.default(0.)
      let my = t1s.ys[i] *. t2s.ys[j]
      let _ = Belt.MutableMap.set(xToYMap, x, cv +. my)
    }
  }

  let rxys = xToYMap |> E.FloatFloatMap.toArray |> XYShape.Zipped.sortByX

  let combinedShape = XYShape.T.fromZippedArray(rxys)

  make(~integralSumCache=combinedIntegralSum, combinedShape)
}

let mapYResult = (
  ~integralSumCacheFn=_ => None,
  ~integralCacheFn=_ => None,
  ~fn: float => result<float, 'e>,
  t: t,
): result<t, 'e> =>
  XYShape.T.mapYResult(fn, getShape(t))->E.R2.fmap(x =>
    make(
      ~integralSumCache=t.integralSumCache |> E.O.bind(_, integralSumCacheFn),
      ~integralCache=t.integralCache |> E.O.bind(_, integralCacheFn),
      x,
    )
  )

let mapY = (
  ~integralSumCacheFn=_ => None,
  ~integralCacheFn=_ => None,
  ~fn: float => float,
  t: t,
): t =>
  make(
    ~integralSumCache=t.integralSumCache |> E.O.bind(_, integralSumCacheFn),
    ~integralCache=t.integralCache |> E.O.bind(_, integralCacheFn),
    t |> getShape |> XYShape.T.mapY(fn),
  )

let scaleBy = (~scale=1.0, t: t): t => {
  let scaledIntegralSumCache = t.integralSumCache |> E.O.fmap(\"*."(scale))
  let scaledIntegralCache = t.integralCache |> E.O.fmap(Continuous.scaleBy(~scale))

  t
  |> mapY(~fn=(r: float) => r *. scale)
  |> updateIntegralSumCache(scaledIntegralSumCache)
  |> updateIntegralCache(scaledIntegralCache)
}

module T = Dist({
  type t = PointSetTypes.discreteShape
  type integral = PointSetTypes.continuousShape
  let integral = t =>
    switch (getShape(t) |> XYShape.T.isEmpty, t.integralCache) {
    | (true, _) => emptyIntegral
    | (false, Some(c)) => c
    | (false, None) =>
      let ts = getShape(t)
      // The first xy of this integral should always be the zero, to ensure nice plotting
      let firstX = ts |> XYShape.T.minX
      let prependedZeroPoint: XYShape.T.t = {xs: [firstX -. epsilon_float], ys: [0.]}
      let integralShape =
        ts |> XYShape.T.concat(prependedZeroPoint) |> XYShape.T.accumulateYs(\"+.")

      Continuous.make(~interpolation=#Stepwise, integralShape)
    }

  let integralEndY = (t: t) => t.integralSumCache |> E.O.default(t |> integral |> Continuous.lastY)
  let integralEndYResult = (t: t) => t->integralEndY->Ok
  let minX = shapeFn(XYShape.T.minX)
  let maxX = shapeFn(XYShape.T.maxX)
  let toDiscreteProbabilityMassFraction = _ => 1.0
  let mapY = mapY
  let mapYResult = mapYResult
  let updateIntegralCache = updateIntegralCache
  let toPointSetDist = (t: t): PointSetTypes.pointSetDist => Discrete(t)
  let toContinuous = _ => None
  let toDiscrete = t => Some(t)

  let normalize = (t: t): t =>
    t |> scaleBy(~scale=1. /. integralEndY(t)) |> updateIntegralSumCache(Some(1.0))

  let downsample = (i, t: t): t => {
    // It's not clear how to downsample a set of discrete points in a meaningful way.
    // The best we can do is to clip off the smallest values.
    let currentLength = t |> getShape |> XYShape.T.length

    if i < currentLength && (i >= 1 && currentLength > 1) {
      t
      |> getShape
      |> XYShape.T.zip
      |> XYShape.Zipped.sortByY
      |> Belt.Array.reverse
      |> Belt.Array.slice(_, ~offset=0, ~len=i)
      |> XYShape.Zipped.sortByX
      |> XYShape.T.fromZippedArray
      |> make
    } else {
      t
    }
  }

  let truncate = (leftCutoff: option<float>, rightCutoff: option<float>, t: t): t =>
    t
    |> getShape
    |> XYShape.T.zip
    |> XYShape.Zipped.filterByX(x =>
      x >= E.O.default(neg_infinity, leftCutoff) && x <= E.O.default(infinity, rightCutoff)
    )
    |> XYShape.T.fromZippedArray
    |> make

  let xToY = (f, t) =>
    t
    |> getShape
    |> XYShape.XtoY.stepwiseIfAtX(f)
    |> E.O.default(0.0)
    |> PointSetTypes.MixedPoint.makeDiscrete

  let integralXtoY = (f, t) => t |> integral |> Continuous.getShape |> XYShape.XtoY.linear(f)

  let integralYtoX = (f, t) => t |> integral |> Continuous.getShape |> XYShape.YtoX.linear(f)

  let mean = (t: t): float => {
    let s = getShape(t)
    E.A.reducei(s.xs, 0.0, (acc, x, i) => acc +. x *. s.ys[i])
  }

  let variance = (t: t): float => {
    let getMeanOfSquares = t => t |> shapeMap(XYShape.T.square) |> mean
    XYShape.Analysis.getVarianceDangerously(t, mean, getMeanOfSquares)
  }

  let klDivergence = (prediction: t, answer: t) => {
    combinePointwise(
      ~fn=PointSetDist_Scoring.KLDivergence.integrand,
      prediction,
      answer,
    ) |> E.R2.bind(integralEndYResult)
  }
})
Basic convertion to gentype 2022-01-29 22:43:08 +00:00			`open Distributions`

First pass at file renaming 2022-02-15 20:47:33 +00:00			`type t = PointSetTypes.discreteShape`
Basic convertion to gentype 2022-01-29 22:43:08 +00:00
			`let make = (~integralSumCache=None, ~integralCache=None, xyShape): t => {`
			`xyShape: xyShape,`
			`integralSumCache: integralSumCache,`
			`integralCache: integralCache,`
			`}`
			`let shapeMap = (fn, {xyShape, integralSumCache, integralCache}: t): t => {`
			`xyShape: fn(xyShape),`
			`integralSumCache: integralSumCache,`
			`integralCache: integralCache,`
			`}`
			`let getShape = (t: t) => t.xyShape`
			`let oShapeMap = (fn, {xyShape, integralSumCache, integralCache}: t): option<t> =>`
			`fn(xyShape) \|> E.O.fmap(make(~integralSumCache, ~integralCache))`

First pass at file renaming 2022-02-15 20:47:33 +00:00			`let emptyIntegral: PointSetTypes.continuousShape = {`
Basic convertion to gentype 2022-01-29 22:43:08 +00:00			`xyShape: {xs: [neg_infinity], ys: [0.0]},`
			`interpolation: #Stepwise,`
			`integralSumCache: Some(0.0),`
			`integralCache: None,`
			`}`
First pass at file renaming 2022-02-15 20:47:33 +00:00			`let empty: PointSetTypes.discreteShape = {`
Basic convertion to gentype 2022-01-29 22:43:08 +00:00			`xyShape: XYShape.T.empty,`
			`integralSumCache: Some(0.0),`
			`integralCache: Some(emptyIntegral),`
			`}`

			`let shapeFn = (fn, t: t) => t \|> getShape \|> fn`

			`let lastY = (t: t) => t \|> getShape \|> XYShape.T.lastY`

			`let combinePointwise = (`
			`~integralSumCachesFn=(_, _) => None,`
First attempt at LogScore 2022-04-29 00:24:13 +00:00			`~fn=(a, b) => Ok(a +. b),`
First pass at file renaming 2022-02-15 20:47:33 +00:00			`t1: PointSetTypes.discreteShape,`
			`t2: PointSetTypes.discreteShape,`
Added scalePower and scaleLog 2022-04-29 01:14:03 +00:00			`): result<PointSetTypes.discreteShape, 'e> => {`
Added infinity error; fixed infinities in logs Value: [1e-3 to 3e-2] 2022-05-02 21:15:23 +00:00			`// let combinedIntegralSum = Common.combineIntegralSums(`
			`// integralSumCachesFn,`
			`// t1.integralSumCache,`
			`// t2.integralSumCache,`
			`// )`
Basic convertion to gentype 2022-01-29 22:43:08 +00:00
			`// TODO: does it ever make sense to pointwise combine the integrals here?`
			`// It could be done for pointwise additions, but is that ever needed?`

			`make(`
			`XYShape.PointwiseCombination.combine(`
First attempt at LogScore 2022-04-29 00:24:13 +00:00			`fn,`
Basic convertion to gentype 2022-01-29 22:43:08 +00:00			`XYShape.XtoY.discreteInterpolator,`
			`t1.xyShape,`
			`t2.xyShape,`
Remove NaN from pointwise operations 2022-04-23 18:09:06 +00:00			`)->E.R.toExn("Addition operation should never fail", _),`
Added scalePower and scaleLog 2022-04-29 01:14:03 +00:00			`)->Ok`
Basic convertion to gentype 2022-01-29 22:43:08 +00:00			`}`

Added scalePower and scaleLog 2022-04-29 01:14:03 +00:00			`let reduce = (`
			`~integralSumCachesFn=(_, _) => None,`
			`fn: (float, float) => result<float, 'e>,`
			`discreteShapes: array<PointSetTypes.discreteShape>,`
			`): result<t, 'e> => {`
Make typechecker happy Value: [1e-7 to 1e-3] 2022-05-02 15:55:13 +00:00			`let merge = combinePointwise(~integralSumCachesFn, ~fn)`
Added scalePower and scaleLog 2022-04-29 01:14:03 +00:00			`discreteShapes \|> E.A.R.foldM(merge, empty)`
			`}`
Basic convertion to gentype 2022-01-29 22:43:08 +00:00
			`let updateIntegralSumCache = (integralSumCache, t: t): t => {`
			`...t,`
			`integralSumCache: integralSumCache,`
			`}`

			`let updateIntegralCache = (integralCache, t: t): t => {`
			`...t,`
			`integralCache: integralCache,`
			`}`

			`/* This multiples all of the data points together and creates a new discrete distribution from the results.`
			`Data points at the same xs get added together. It may be a good idea to downsample t1 and t2 before and/or the result after. */`
Use a more conservative convolution policy 2022-04-22 16:43:18 +00:00			`let combineAlgebraically = (op: Operation.convolutionOperation, t1: t, t2: t): t => {`
Basic convertion to gentype 2022-01-29 22:43:08 +00:00			`let t1s = t1 \|> getShape`
			`let t2s = t2 \|> getShape`
			`let t1n = t1s \|> XYShape.T.length`
			`let t2n = t2s \|> XYShape.T.length`

			`let combinedIntegralSum = Common.combineIntegralSums(`
			`(s1, s2) => Some(s1 *. s2),`
			`t1.integralSumCache,`
			`t2.integralSumCache,`
			`)`

Use a more conservative convolution policy 2022-04-22 16:43:18 +00:00			`let fn = Operation.Convolution.toFn(op)`
Basic convertion to gentype 2022-01-29 22:43:08 +00:00			`let xToYMap = E.FloatFloatMap.empty()`

			`for i in 0 to t1n - 1 {`
			`for j in 0 to t2n - 1 {`
			`let x = fn(t1s.xs[i], t2s.xs[j])`
			`let cv = xToYMap \|> E.FloatFloatMap.get(x) \|> E.O.default(0.)`
			`let my = t1s.ys[i] *. t2s.ys[j]`
			`let _ = Belt.MutableMap.set(xToYMap, x, cv +. my)`
			`}`
			`}`

			`let rxys = xToYMap \|> E.FloatFloatMap.toArray \|> XYShape.Zipped.sortByX`

			`let combinedShape = XYShape.T.fromZippedArray(rxys)`

			`make(~integralSumCache=combinedIntegralSum, combinedShape)`
			`}`

Change NaN operations to results - Also deleting old GenericDist_Types - Also removing some string errors 2022-04-22 20:27:17 +00:00			`let mapYResult = (`
			`~integralSumCacheFn=_ => None,`
			`~integralCacheFn=_ => None,`
			`~fn: float => result<float, 'e>,`
			`t: t,`
			`): result<t, 'e> =>`
			`XYShape.T.mapYResult(fn, getShape(t))->E.R2.fmap(x =>`
			`make(`
			`~integralSumCache=t.integralSumCache \|> E.O.bind(_, integralSumCacheFn),`
			`~integralCache=t.integralCache \|> E.O.bind(_, integralCacheFn),`
			`x,`
			`)`
			`)`

			`let mapY = (`
			`~integralSumCacheFn=_ => None,`
			`~integralCacheFn=_ => None,`
			`~fn: float => float,`
			`t: t,`
			`): t =>`
Basic convertion to gentype 2022-01-29 22:43:08 +00:00			`make(`
			`~integralSumCache=t.integralSumCache \|> E.O.bind(_, integralSumCacheFn),`
			`~integralCache=t.integralCache \|> E.O.bind(_, integralCacheFn),`
			`t \|> getShape \|> XYShape.T.mapY(fn),`
			`)`

			`let scaleBy = (~scale=1.0, t: t): t => {`
			`let scaledIntegralSumCache = t.integralSumCache \|> E.O.fmap(\"*."(scale))`
			`let scaledIntegralCache = t.integralCache \|> E.O.fmap(Continuous.scaleBy(~scale))`

			`t`
			`\|> mapY(~fn=(r: float) => r *. scale)`
			`\|> updateIntegralSumCache(scaledIntegralSumCache)`
			`\|> updateIntegralCache(scaledIntegralCache)`
			`}`

			`module T = Dist({`
First pass at file renaming 2022-02-15 20:47:33 +00:00			`type t = PointSetTypes.discreteShape`
			`type integral = PointSetTypes.continuousShape`
Basic convertion to gentype 2022-01-29 22:43:08 +00:00			`let integral = t =>`
			`switch (getShape(t) \|> XYShape.T.isEmpty, t.integralCache) {`
			`\| (true, _) => emptyIntegral`
			`\| (false, Some(c)) => c`
			`\| (false, None) =>`
			`let ts = getShape(t)`
			`// The first xy of this integral should always be the zero, to ensure nice plotting`
			`let firstX = ts \|> XYShape.T.minX`
			`let prependedZeroPoint: XYShape.T.t = {xs: [firstX -. epsilon_float], ys: [0.]}`
			`let integralShape =`
			`ts \|> XYShape.T.concat(prependedZeroPoint) \|> XYShape.T.accumulateYs(\"+.")`

			`Continuous.make(~interpolation=#Stepwise, integralShape)`
			`}`

			`let integralEndY = (t: t) => t.integralSumCache \|> E.O.default(t \|> integral \|> Continuous.lastY)`
make lint happy Value: [1e-9 to 1e-5] 2022-05-02 15:55:42 +00:00			`let integralEndYResult = (t: t) => t->integralEndY->Ok`
Basic convertion to gentype 2022-01-29 22:43:08 +00:00			`let minX = shapeFn(XYShape.T.minX)`
			`let maxX = shapeFn(XYShape.T.maxX)`
			`let toDiscreteProbabilityMassFraction = _ => 1.0`
			`let mapY = mapY`
Change NaN operations to results - Also deleting old GenericDist_Types - Also removing some string errors 2022-04-22 20:27:17 +00:00			`let mapYResult = mapYResult`
Basic convertion to gentype 2022-01-29 22:43:08 +00:00			`let updateIntegralCache = updateIntegralCache`
Finish converting shape -> pointSetDist 2022-02-15 22:43:31 +00:00			`let toPointSetDist = (t: t): PointSetTypes.pointSetDist => Discrete(t)`
Basic convertion to gentype 2022-01-29 22:43:08 +00:00			`let toContinuous = _ => None`
			`let toDiscrete = t => Some(t)`

			`let normalize = (t: t): t =>`
			`t \|> scaleBy(~scale=1. /. integralEndY(t)) \|> updateIntegralSumCache(Some(1.0))`

			`let downsample = (i, t: t): t => {`
			`// It's not clear how to downsample a set of discrete points in a meaningful way.`
			`// The best we can do is to clip off the smallest values.`
			`let currentLength = t \|> getShape \|> XYShape.T.length`

			`if i < currentLength && (i >= 1 && currentLength > 1) {`
			`t`
			`\|> getShape`
			`\|> XYShape.T.zip`
			`\|> XYShape.Zipped.sortByY`
			`\|> Belt.Array.reverse`
			`\|> Belt.Array.slice(_, ~offset=0, ~len=i)`
			`\|> XYShape.Zipped.sortByX`
			`\|> XYShape.T.fromZippedArray`
			`\|> make`
			`} else {`
			`t`
			`}`
			`}`

			`let truncate = (leftCutoff: option<float>, rightCutoff: option<float>, t: t): t =>`
			`t`
			`\|> getShape`
			`\|> XYShape.T.zip`
			`\|> XYShape.Zipped.filterByX(x =>`
			`x >= E.O.default(neg_infinity, leftCutoff) && x <= E.O.default(infinity, rightCutoff)`
			`)`
			`\|> XYShape.T.fromZippedArray`
			`\|> make`

			`let xToY = (f, t) =>`
			`t`
			`\|> getShape`
			`\|> XYShape.XtoY.stepwiseIfAtX(f)`
			`\|> E.O.default(0.0)`
First pass at file renaming 2022-02-15 20:47:33 +00:00			`\|> PointSetTypes.MixedPoint.makeDiscrete`
Basic convertion to gentype 2022-01-29 22:43:08 +00:00
			`let integralXtoY = (f, t) => t \|> integral \|> Continuous.getShape \|> XYShape.XtoY.linear(f)`

			`let integralYtoX = (f, t) => t \|> integral \|> Continuous.getShape \|> XYShape.YtoX.linear(f)`

			`let mean = (t: t): float => {`
			`let s = getShape(t)`
			`E.A.reducei(s.xs, 0.0, (acc, x, i) => acc +. x *. s.ys[i])`
			`}`
Pulled out XYShape to be more separate 2022-04-04 17:41:22 +00:00
Basic convertion to gentype 2022-01-29 22:43:08 +00:00			`let variance = (t: t): float => {`
Pulled out XYShape to be more separate 2022-04-04 17:41:22 +00:00			`let getMeanOfSquares = t => t \|> shapeMap(XYShape.T.square) \|> mean`
Basic convertion to gentype 2022-01-29 22:43:08 +00:00			`XYShape.Analysis.getVarianceDangerously(t, mean, getMeanOfSquares)`
			`}`
First attempt at LogScore 2022-04-29 00:24:13 +00:00
progress on klDivergence (still working) Value: [1e-5 to 1e-2] 2022-05-05 19:37:28 +00:00			`let klDivergence = (prediction: t, answer: t) => {`
			`combinePointwise(`
			`~fn=PointSetDist_Scoring.KLDivergence.integrand,`
			`prediction,`
			`answer,`
			`) \|> E.R2.bind(integralEndYResult)`
First attempt at LogScore 2022-04-29 00:24:13 +00:00			`}`
A playground based on reducer 2022-04-11 06:16:29 +00:00			`})`