First attempt at LogScore

This commit is contained in:
Ozzie Gooen 2022-04-28 20:24:13 -04:00
parent f7afbf9c39
commit ced3af5813
13 changed files with 105 additions and 40 deletions

View File

@ -139,6 +139,10 @@ let rec run = (~env, functionCallInfo: functionCallInfo): outputType => {
Dist(dist)
}
| ToDist(Normalize) => dist->GenericDist.normalize->Dist
| ToScore(LogScore(t2)) =>
GenericDist.logScore(dist, t2, ~toPointSetFn)
->E.R2.fmap(r => Float(r))
->OutputLocal.fromResult
| ToBool(IsNormalized) => dist->GenericDist.isNormalized->Bool
| ToDist(Truncate(leftCutoff, rightCutoff)) =>
GenericDist.truncate(~toPointSetFn, ~leftCutoff, ~rightCutoff, dist, ())
@ -227,6 +231,7 @@ module Constructors = {
let pdf = (~env, dist, f) => C.pdf(dist, f)->run(~env)->toFloatR
let normalize = (~env, dist) => C.normalize(dist)->run(~env)->toDistR
let isNormalized = (~env, dist) => C.isNormalized(dist)->run(~env)->toBoolR
let logScore = (~env, dist1, dist2) => C.logScore(dist1, dist2)->run(~env)->toFloatR
let toPointSet = (~env, dist) => C.toPointSet(dist)->run(~env)->toDistR
let toSampleSet = (~env, dist, n) => C.toSampleSet(dist, n)->run(~env)->toDistR
let truncate = (~env, dist, leftCutoff, rightCutoff) =>

View File

@ -57,6 +57,8 @@ module Constructors: {
@genType
let isNormalized: (~env: env, genericDist) => result<bool, error>
@genType
let logScore: (~env: env, genericDist, genericDist) => result<float, error>
@genType
let toPointSet: (~env: env, genericDist) => result<genericDist, error>
@genType
let toSampleSet: (~env: env, genericDist, int) => result<genericDist, error>

View File

@ -85,9 +85,12 @@ module DistributionOperation = {
| ToString
| ToSparkline(int)
type toScore = LogScore(genericDist)
type fromDist =
| ToFloat(toFloat)
| ToDist(toDist)
| ToScore(toScore)
| ToDistCombination(direction, Operation.Algebraic.t, [#Dist(genericDist) | #Float(float)])
| ToString(toString)
| ToBool(toBool)
@ -108,6 +111,7 @@ module DistributionOperation = {
| ToFloat(#Mean) => `mean`
| ToFloat(#Pdf(r)) => `pdf(${E.Float.toFixed(r)})`
| ToFloat(#Sample) => `sample`
| ToScore(LogScore(_)) => `logScore`
| ToDist(Normalize) => `normalize`
| ToDist(ToPointSet) => `toPointSet`
| ToDist(ToSampleSet(r)) => `toSampleSet(${E.I.toString(r)})`
@ -142,6 +146,7 @@ module Constructors = {
let toSampleSet = (dist, r): t => FromDist(ToDist(ToSampleSet(r)), dist)
let truncate = (dist, left, right): t => FromDist(ToDist(Truncate(left, right)), dist)
let inspect = (dist): t => FromDist(ToDist(Inspect), dist)
let logScore = (dist1, dist2): t => FromDist(ToScore(LogScore(dist2)), dist1)
let toString = (dist): t => FromDist(ToString(ToString), dist)
let toSparkline = (dist, n): t => FromDist(ToString(ToSparkline(n)), dist)
let algebraicAdd = (dist1, dist2: genericDist): t => FromDist(

View File

@ -59,6 +59,13 @@ let integralEndY = (t: t): float =>
let isNormalized = (t: t): bool => Js.Math.abs_float(integralEndY(t) -. 1.0) < 1e-7
let logScore = (t1, t2, ~toPointSetFn: toPointSetFn): result<float, error> => {
let pointSets = E.R.merge(toPointSetFn(t1), toPointSetFn(t2))
pointSets |> E.R2.bind(((a, b)) =>
PointSetDist.T.logScore(a, b)->E.R2.errMap(x => DistributionTypes.OperationError(x))
)
}
let toFloatOperation = (
t,
~toPointSetFn: toPointSetFn,

View File

@ -23,6 +23,8 @@ let toFloatOperation: (
~distToFloatOperation: Operation.distToFloatOperation,
) => result<float, error>
let logScore: (t, t, ~toPointSetFn: toPointSetFn) => result<float, error>
@genType
let toPointSet: (
t,

View File

@ -267,6 +267,12 @@ module T = Dist({
}
let variance = (t: t): float =>
XYShape.Analysis.getVarianceDangerously(t, mean, Analysis.getMeanOfSquares)
let logScore = (base: t, reference: t) => {
combinePointwise(PointSetDist_Scoring.LogScoring.logScore, base, reference)
|> E.R.fmap(shapeMap(XYShape.T.filterYValues(Js.Float.isFinite)))
|> E.R.fmap(integralEndY)
}
})
let isNormalized = (t: t): bool => {

View File

@ -34,6 +34,7 @@ let lastY = (t: t) => t |> getShape |> XYShape.T.lastY
let combinePointwise = (
~integralSumCachesFn=(_, _) => None,
~fn=(a, b) => Ok(a +. b),
t1: PointSetTypes.discreteShape,
t2: PointSetTypes.discreteShape,
): PointSetTypes.discreteShape => {
@ -47,9 +48,8 @@ let combinePointwise = (
// It could be done for pointwise additions, but is that ever needed?
make(
~integralSumCache=combinedIntegralSum,
XYShape.PointwiseCombination.combine(
(a, b) => Ok(a +. b),
fn,
XYShape.XtoY.discreteInterpolator,
t1.xyShape,
t2.xyShape,
@ -221,4 +221,10 @@ module T = Dist({
let getMeanOfSquares = t => t |> shapeMap(XYShape.T.square) |> mean
XYShape.Analysis.getVarianceDangerously(t, mean, getMeanOfSquares)
}
let logScore = (base: t, reference: t) => {
combinePointwise(~fn=PointSetDist_Scoring.LogScoring.logScore, base, reference)
|> integralEndY
|> (r => Ok(r))
}
})

View File

@ -33,6 +33,7 @@ module type dist = {
let mean: t => float
let variance: t => float
let logScore: (t,t) => result<float, Operation.Error.t>
}
module Dist = (T: dist) => {
@ -55,6 +56,7 @@ module Dist = (T: dist) => {
let mean = T.mean
let variance = T.variance
let integralEndY = T.integralEndY
let logScore = T.logScore
let updateIntegralCache = T.updateIntegralCache

View File

@ -36,6 +36,43 @@ let updateIntegralCache = (integralCache, t: t): t => {
integralCache: integralCache,
}
let combinePointwise = (
~integralSumCachesFn=(_, _) => None,
~integralCachesFn=(_, _) => None,
fn: (float, float) => result<float, 'e>,
t1: t,
t2: t,
): result<t, 'e> => {
let reducedDiscrete =
[t1, t2] |> E.A.fmap(toDiscrete) |> E.A.O.concatSomes |> Discrete.reduce(~integralSumCachesFn)
let reducedContinuous =
[t1, t2]
|> E.A.fmap(toContinuous)
|> E.A.O.concatSomes
|> Continuous.reduce(~integralSumCachesFn, fn)
let combinedIntegralSum = Common.combineIntegralSums(
integralSumCachesFn,
t1.integralSumCache,
t2.integralSumCache,
)
let combinedIntegral = Common.combineIntegrals(
integralCachesFn,
t1.integralCache,
t2.integralCache,
)
reducedContinuous->E.R2.fmap(continuous =>
make(
~integralSumCache=combinedIntegralSum,
~integralCache=combinedIntegral,
~discrete=reducedDiscrete,
~continuous,
)
)
}
module T = Dist({
type t = PointSetTypes.mixedShape
type integral = PointSetTypes.continuousShape
@ -259,6 +296,12 @@ module T = Dist({
| _ => XYShape.Analysis.getVarianceDangerously(t, mean, getMeanOfSquares)
}
}
let logScore = (base: t, reference: t) => {
combinePointwise(PointSetDist_Scoring.LogScoring.logScore, base, reference) |> E.R.fmap(
integralEndY,
)
}
})
let combineAlgebraically = (op: Operation.convolutionOperation, t1: t, t2: t): t => {
@ -307,40 +350,3 @@ let combineAlgebraically = (op: Operation.convolutionOperation, t1: t, t2: t): t
integralCache: None,
}
}
let combinePointwise = (
~integralSumCachesFn=(_, _) => None,
~integralCachesFn=(_, _) => None,
fn: (float, float) => result<float, 'e>,
t1: t,
t2: t,
): result<t, 'e> => {
let reducedDiscrete =
[t1, t2] |> E.A.fmap(toDiscrete) |> E.A.O.concatSomes |> Discrete.reduce(~integralSumCachesFn)
let reducedContinuous =
[t1, t2]
|> E.A.fmap(toContinuous)
|> E.A.O.concatSomes
|> Continuous.reduce(~integralSumCachesFn, fn)
let combinedIntegralSum = Common.combineIntegralSums(
integralSumCachesFn,
t1.integralSumCache,
t2.integralSumCache,
)
let combinedIntegral = Common.combineIntegrals(
integralCachesFn,
t1.integralCache,
t2.integralCache,
)
reducedContinuous->E.R2.fmap(continuous =>
make(
~integralSumCache=combinedIntegralSum,
~integralCache=combinedIntegral,
~discrete=reducedDiscrete,
~continuous,
)
)
}

View File

@ -190,6 +190,18 @@ module T = Dist({
| Discrete(m) => Discrete.T.variance(m)
| Continuous(m) => Continuous.T.variance(m)
}
let logScore = (t1: t, t2: t) =>
switch (t1, t2) {
| (Continuous(t1), Continuous(t2)) => Continuous.T.logScore(t1, t2)
| (Discrete(t1), Discrete(t2)) => Discrete.T.logScore(t1, t2)
| (Mixed(t1), Mixed(t2)) => Mixed.T.logScore(t1, t2)
| _ => {
let t1 = toMixed(t1)
let t2 = toMixed(t2)
Mixed.T.logScore(t1, t2)
}
}
})
let pdf = (f: float, t: t) => {
@ -239,4 +251,4 @@ let toSparkline = (t: t, bucketCount): result<string, PointSetTypes.sparklineErr
T.toContinuous(t)
->E.O2.fmap(Continuous.downsampleEquallyOverX(bucketCount))
->E.O2.toResult(PointSetTypes.CannotSparklineDiscrete)
->E.R2.fmap(r => Continuous.getShape(r).ys->Sparklines.create())
->E.R2.fmap(r => Continuous.getShape(r).ys->Sparklines.create())

View File

@ -0,0 +1,7 @@
module LogScoring = {
let logFn = Js.Math.log2
let subtraction = (a, b) => Ok(a -. b)
let logScore = (a: float, b: float): result<float, Operation.Error.t> => Ok(
Js.Math.log2(Js.Math.abs_float(a /. b)),
)
}

View File

@ -80,6 +80,7 @@ module Helpers = {
dist1,
)->runGenericOperation
}
let parseNumber = (args: expressionValue): Belt.Result.t<float, string> =>
switch args {
| EvNumber(x) => Ok(x)
@ -209,6 +210,9 @@ let dispatchToGenericOutput = (call: ExpressionValue.functionCall): option<
a,
)->Some
| ("normalize", [EvDistribution(dist)]) => Helpers.toDistFn(Normalize, dist)
| ("logScore", [EvDistribution(a), EvDistribution(b)]) => Some(
runGenericOperation(FromDist(ToScore(LogScore(b)), a)),
)
| ("isNormalized", [EvDistribution(dist)]) => Helpers.toBoolFn(IsNormalized, dist)
| ("toPointSet", [EvDistribution(dist)]) => Helpers.toDistFn(ToPointSet, dist)
| ("cdf", [EvDistribution(dist), EvNumber(float)]) => Helpers.toFloatFn(#Cdf(float), dist)

View File

@ -96,6 +96,7 @@ module T = {
let fromZippedArray = (pairs: array<(float, float)>): t => pairs |> Belt.Array.unzip |> fromArray
let equallyDividedXs = (t: t, newLength) => E.A.Floats.range(minX(t), maxX(t), newLength)
let toJs = (t: t) => {"xs": t.xs, "ys": t.ys}
let filterYValues = (fn, t: t): t => t |> zip |> E.A.filter(((_,y)) => fn(y)) |> fromZippedArray
module Validator = {
let fnName = "XYShape validate"