Merge pull request #521 from quantified-uncertainty/score-dist-on-scalar-resolution

score a dist against a scalar resolution
This commit is contained in:
Ozzie Gooen 2022-05-16 18:25:14 -04:00 committed by GitHub
commit 833175bce2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
15 changed files with 192 additions and 25 deletions

View File

@ -146,7 +146,16 @@ let rec run = (~env, functionCallInfo: functionCallInfo): outputType => {
} }
| ToDist(Normalize) => dist->GenericDist.normalize->Dist | ToDist(Normalize) => dist->GenericDist.normalize->Dist
| ToScore(KLDivergence(t2)) => | ToScore(KLDivergence(t2)) =>
GenericDist.klDivergence(dist, t2, ~toPointSetFn) GenericDist.Score.klDivergence(dist, t2, ~toPointSetFn)
->E.R2.fmap(r => Float(r))
->OutputLocal.fromResult
| ToScore(LogScore(answer, prior)) =>
GenericDist.Score.logScoreWithPointResolution(
~prediction=dist,
~answer,
~prior,
~toPointSetFn,
)
->E.R2.fmap(r => Float(r)) ->E.R2.fmap(r => Float(r))
->OutputLocal.fromResult ->OutputLocal.fromResult
| ToBool(IsNormalized) => dist->GenericDist.isNormalized->Bool | ToBool(IsNormalized) => dist->GenericDist.isNormalized->Bool
@ -263,6 +272,12 @@ module Constructors = {
let normalize = (~env, dist) => C.normalize(dist)->run(~env)->toDistR let normalize = (~env, dist) => C.normalize(dist)->run(~env)->toDistR
let isNormalized = (~env, dist) => C.isNormalized(dist)->run(~env)->toBoolR let isNormalized = (~env, dist) => C.isNormalized(dist)->run(~env)->toBoolR
let klDivergence = (~env, dist1, dist2) => C.klDivergence(dist1, dist2)->run(~env)->toFloatR let klDivergence = (~env, dist1, dist2) => C.klDivergence(dist1, dist2)->run(~env)->toFloatR
let logScoreWithPointResolution = (
~env,
~prediction: DistributionTypes.genericDist,
~answer: float,
~prior: option<DistributionTypes.genericDist>,
) => C.logScoreWithPointResolution(~prediction, ~answer, ~prior)->run(~env)->toFloatR
let toPointSet = (~env, dist) => C.toPointSet(dist)->run(~env)->toDistR let toPointSet = (~env, dist) => C.toPointSet(dist)->run(~env)->toDistR
let toSampleSet = (~env, dist, n) => C.toSampleSet(dist, n)->run(~env)->toDistR let toSampleSet = (~env, dist, n) => C.toSampleSet(dist, n)->run(~env)->toDistR
let fromSamples = (~env, xs) => C.fromSamples(xs)->run(~env)->toDistR let fromSamples = (~env, xs) => C.fromSamples(xs)->run(~env)->toDistR

View File

@ -63,6 +63,13 @@ module Constructors: {
@genType @genType
let klDivergence: (~env: env, genericDist, genericDist) => result<float, error> let klDivergence: (~env: env, genericDist, genericDist) => result<float, error>
@genType @genType
let logScoreWithPointResolution: (
~env: env,
~prediction: genericDist,
~answer: float,
~prior: option<genericDist>,
) => result<float, error>
@genType
let toPointSet: (~env: env, genericDist) => result<genericDist, error> let toPointSet: (~env: env, genericDist) => result<genericDist, error>
@genType @genType
let toSampleSet: (~env: env, genericDist, int) => result<genericDist, error> let toSampleSet: (~env: env, genericDist, int) => result<genericDist, error>

View File

@ -91,7 +91,7 @@ module DistributionOperation = {
| ToString | ToString
| ToSparkline(int) | ToSparkline(int)
type toScore = KLDivergence(genericDist) type toScore = KLDivergence(genericDist) | LogScore(float, option<genericDist>)
type fromDist = type fromDist =
| ToFloat(toFloat) | ToFloat(toFloat)
@ -120,6 +120,7 @@ module DistributionOperation = {
| ToFloat(#Sample) => `sample` | ToFloat(#Sample) => `sample`
| ToFloat(#IntegralSum) => `integralSum` | ToFloat(#IntegralSum) => `integralSum`
| ToScore(KLDivergence(_)) => `klDivergence` | ToScore(KLDivergence(_)) => `klDivergence`
| ToScore(LogScore(x, _)) => `logScore against ${E.Float.toFixed(x)}`
| ToDist(Normalize) => `normalize` | ToDist(Normalize) => `normalize`
| ToDist(ToPointSet) => `toPointSet` | ToDist(ToPointSet) => `toPointSet`
| ToDist(ToSampleSet(r)) => `toSampleSet(${E.I.toString(r)})` | ToDist(ToSampleSet(r)) => `toSampleSet(${E.I.toString(r)})`
@ -161,6 +162,10 @@ module Constructors = {
let truncate = (dist, left, right): t => FromDist(ToDist(Truncate(left, right)), dist) let truncate = (dist, left, right): t => FromDist(ToDist(Truncate(left, right)), dist)
let inspect = (dist): t => FromDist(ToDist(Inspect), dist) let inspect = (dist): t => FromDist(ToDist(Inspect), dist)
let klDivergence = (dist1, dist2): t => FromDist(ToScore(KLDivergence(dist2)), dist1) let klDivergence = (dist1, dist2): t => FromDist(ToScore(KLDivergence(dist2)), dist1)
let logScoreWithPointResolution = (~prediction, ~answer, ~prior): t => FromDist(
ToScore(LogScore(answer, prior)),
prediction,
)
let scalePower = (dist, n): t => FromDist(ToDist(Scale(#Power, n)), dist) let scalePower = (dist, n): t => FromDist(ToDist(Scale(#Power, n)), dist)
let scaleLogarithm = (dist, n): t => FromDist(ToDist(Scale(#Logarithm, n)), dist) let scaleLogarithm = (dist, n): t => FromDist(ToDist(Scale(#Logarithm, n)), dist)
let scaleLogarithmWithThreshold = (dist, n, eps): t => FromDist( let scaleLogarithmWithThreshold = (dist, n, eps): t => FromDist(

View File

@ -59,13 +59,46 @@ let integralEndY = (t: t): float =>
let isNormalized = (t: t): bool => Js.Math.abs_float(integralEndY(t) -. 1.0) < 1e-7 let isNormalized = (t: t): bool => Js.Math.abs_float(integralEndY(t) -. 1.0) < 1e-7
let klDivergence = (t1, t2, ~toPointSetFn: toPointSetFn): result<float, error> => { module Score = {
let pointSets = E.R.merge(toPointSetFn(t1), toPointSetFn(t2)) let klDivergence = (prediction, answer, ~toPointSetFn: toPointSetFn): result<float, error> => {
pointSets |> E.R2.bind(((a, b)) => let pointSets = E.R.merge(toPointSetFn(prediction), toPointSetFn(answer))
PointSetDist.T.klDivergence(a, b)->E.R2.errMap(x => DistributionTypes.OperationError(x)) pointSets |> E.R2.bind(((predi, ans)) =>
PointSetDist.T.klDivergence(predi, ans)->E.R2.errMap(x => DistributionTypes.OperationError(x))
) )
} }
let logScoreWithPointResolution = (
~prediction: DistributionTypes.genericDist,
~answer: float,
~prior: option<DistributionTypes.genericDist>,
~toPointSetFn: toPointSetFn,
): result<float, error> => {
switch prior {
| Some(prior') =>
E.R.merge(toPointSetFn(prior'), toPointSetFn(prediction))->E.R.bind(((
prior'',
prediction'',
)) =>
PointSetDist.T.logScoreWithPointResolution(
~prediction=prediction'',
~answer,
~prior=prior''->Some,
)->E.R2.errMap(x => DistributionTypes.OperationError(x))
)
| None =>
prediction
->toPointSetFn
->E.R.bind(x =>
PointSetDist.T.logScoreWithPointResolution(
~prediction=x,
~answer,
~prior=None,
)->E.R2.errMap(x => DistributionTypes.OperationError(x))
)
}
}
}
let toFloatOperation = ( let toFloatOperation = (
t, t,
~toPointSetFn: toPointSetFn, ~toPointSetFn: toPointSetFn,

View File

@ -23,7 +23,15 @@ let toFloatOperation: (
~distToFloatOperation: DistributionTypes.DistributionOperation.toFloat, ~distToFloatOperation: DistributionTypes.DistributionOperation.toFloat,
) => result<float, error> ) => result<float, error>
module Score: {
let klDivergence: (t, t, ~toPointSetFn: toPointSetFn) => result<float, error> let klDivergence: (t, t, ~toPointSetFn: toPointSetFn) => result<float, error>
let logScoreWithPointResolution: (
~prediction: t,
~answer: float,
~prior: option<t>,
~toPointSetFn: toPointSetFn,
) => result<float, error>
}
@genType @genType
let toPointSet: ( let toPointSet: (

View File

@ -277,13 +277,12 @@ module T = Dist({
prediction.xyShape, prediction.xyShape,
answer.xyShape, answer.xyShape,
) )
let xyShapeToContinuous: XYShape.xyShape => t = xyShape => { newShape->E.R2.fmap(x => x->make->integralEndY)
xyShape: xyShape,
interpolation: #Linear,
integralSumCache: None,
integralCache: None,
} }
newShape->E.R2.fmap(x => x->xyShapeToContinuous->integralEndY) let logScoreWithPointResolution = (~prediction: t, ~answer: float, ~prior: option<t>) => {
let priorPdf = prior->E.O2.fmap((shape, x) => XYShape.XtoY.linear(x, shape.xyShape))
let predictionPdf = x => XYShape.XtoY.linear(x, prediction.xyShape)
PointSetDist_Scoring.LogScoreWithPointResolution.score(~priorPdf, ~predictionPdf, ~answer)
} }
}) })

View File

@ -229,4 +229,7 @@ module T = Dist({
answer, answer,
)->E.R2.fmap(integralEndY) )->E.R2.fmap(integralEndY)
} }
let logScoreWithPointResolution = (~prediction: t, ~answer: float, ~prior: option<t>) => {
Error(Operation.NotYetImplemented)
}
}) })

View File

@ -34,6 +34,11 @@ module type dist = {
let mean: t => float let mean: t => float
let variance: t => float let variance: t => float
let klDivergence: (t, t) => result<float, Operation.Error.t> let klDivergence: (t, t) => result<float, Operation.Error.t>
let logScoreWithPointResolution: (
~prediction: t,
~answer: float,
~prior: option<t>,
) => result<float, Operation.Error.t>
} }
module Dist = (T: dist) => { module Dist = (T: dist) => {
@ -57,6 +62,7 @@ module Dist = (T: dist) => {
let variance = T.variance let variance = T.variance
let integralEndY = T.integralEndY let integralEndY = T.integralEndY
let klDivergence = T.klDivergence let klDivergence = T.klDivergence
let logScoreWithPointResolution = T.logScoreWithPointResolution
let updateIntegralCache = T.updateIntegralCache let updateIntegralCache = T.updateIntegralCache

View File

@ -306,6 +306,9 @@ module T = Dist({
let klContinuousPart = Continuous.T.klDivergence(prediction.continuous, answer.continuous) let klContinuousPart = Continuous.T.klDivergence(prediction.continuous, answer.continuous)
E.R.merge(klDiscretePart, klContinuousPart)->E.R2.fmap(t => fst(t) +. snd(t)) E.R.merge(klDiscretePart, klContinuousPart)->E.R2.fmap(t => fst(t) +. snd(t))
} }
let logScoreWithPointResolution = (~prediction: t, ~answer: float, ~prior: option<t>) => {
Error(Operation.NotYetImplemented)
}
}) })
let combineAlgebraically = (op: Operation.convolutionOperation, t1: t, t2: t): t => { let combineAlgebraically = (op: Operation.convolutionOperation, t1: t, t2: t): t => {

View File

@ -196,12 +196,21 @@ module T = Dist({
| Continuous(m) => Continuous.T.variance(m) | Continuous(m) => Continuous.T.variance(m)
} }
let klDivergence = (t1: t, t2: t) => let klDivergence = (prediction: t, answer: t) =>
switch (t1, t2) { switch (prediction, answer) {
| (Continuous(t1), Continuous(t2)) => Continuous.T.klDivergence(t1, t2) | (Continuous(t1), Continuous(t2)) => Continuous.T.klDivergence(t1, t2)
| (Discrete(t1), Discrete(t2)) => Discrete.T.klDivergence(t1, t2) | (Discrete(t1), Discrete(t2)) => Discrete.T.klDivergence(t1, t2)
| (Mixed(t1), Mixed(t2)) => Mixed.T.klDivergence(t1, t2) | (m1, m2) => Mixed.T.klDivergence(m1->toMixed, m2->toMixed)
| _ => Error(NotYetImplemented) }
let logScoreWithPointResolution = (~prediction: t, ~answer: float, ~prior: option<t>) => {
switch (prior, prediction) {
| (Some(Continuous(t1)), Continuous(t2)) =>
Continuous.T.logScoreWithPointResolution(~prediction=t2, ~answer, ~prior=t1->Some)
| (None, Continuous(t2)) =>
Continuous.T.logScoreWithPointResolution(~prediction=t2, ~answer, ~prior=None)
| _ => Error(Operation.NotYetImplemented)
}
} }
}) })

View File

@ -14,3 +14,33 @@ module KLDivergence = {
quot < 0.0 ? Error(Operation.ComplexNumberError) : Ok(-.answerElement *. logFn(quot)) quot < 0.0 ? Error(Operation.ComplexNumberError) : Ok(-.answerElement *. logFn(quot))
} }
} }
module LogScoreWithPointResolution = {
let logFn = Js.Math.log
let score = (
~priorPdf: option<float => float>,
~predictionPdf: float => float,
~answer: float,
): result<float, Operation.Error.t> => {
let numerator = answer->predictionPdf
if numerator < 0.0 {
Operation.PdfInvalidError->Error
} else if numerator == 0.0 {
infinity->Ok
} else {
-.(
switch priorPdf {
| None => numerator->logFn
| Some(f) => {
let priorDensityOfAnswer = f(answer)
if priorDensityOfAnswer == 0.0 {
neg_infinity
} else {
(numerator /. priorDensityOfAnswer)->logFn
}
}
}
)->Ok
}
}
}

View File

@ -12,6 +12,7 @@ module Epsilon = {
module Environment = { module Environment = {
let defaultXYPointLength = 1000 let defaultXYPointLength = 1000
let defaultSampleCount = 10000 let defaultSampleCount = 10000
let sparklineLength = 20
} }
module OpCost = { module OpCost = {

View File

@ -1,5 +1,5 @@
module ExpressionValue = ReducerInterface_ExpressionValue module ExpressionValue = ReducerInterface_ExpressionValue
type expressionValue = ReducerInterface_ExpressionValue.expressionValue type expressionValue = ExpressionValue.expressionValue
module Helpers = { module Helpers = {
let arithmeticMap = r => let arithmeticMap = r =>
@ -162,6 +162,20 @@ module Helpers = {
} }
} }
} }
let klDivergenceWithPrior = (
prediction: DistributionTypes.genericDist,
answer: DistributionTypes.genericDist,
prior: DistributionTypes.genericDist,
env: DistributionOperation.env,
) => {
let term1 = DistributionOperation.Constructors.klDivergence(~env, prediction, answer)
let term2 = DistributionOperation.Constructors.klDivergence(~env, prior, answer)
switch E.R.merge(term1, term2)->E.R2.fmap(((a, b)) => a -. b) {
| Ok(x) => x->DistributionOperation.Float->Some
| Error(_) => None
}
}
} }
module SymbolicConstructors = { module SymbolicConstructors = {
@ -236,7 +250,8 @@ let dispatchToGenericOutput = (
| ("mean", [EvDistribution(dist)]) => Helpers.toFloatFn(#Mean, dist, ~env) | ("mean", [EvDistribution(dist)]) => Helpers.toFloatFn(#Mean, dist, ~env)
| ("integralSum", [EvDistribution(dist)]) => Helpers.toFloatFn(#IntegralSum, dist, ~env) | ("integralSum", [EvDistribution(dist)]) => Helpers.toFloatFn(#IntegralSum, dist, ~env)
| ("toString", [EvDistribution(dist)]) => Helpers.toStringFn(ToString, dist, ~env) | ("toString", [EvDistribution(dist)]) => Helpers.toStringFn(ToString, dist, ~env)
| ("toSparkline", [EvDistribution(dist)]) => Helpers.toStringFn(ToSparkline(20), dist, ~env) | ("toSparkline", [EvDistribution(dist)]) =>
Helpers.toStringFn(ToSparkline(MagicNumbers.Environment.sparklineLength), dist, ~env)
| ("toSparkline", [EvDistribution(dist), EvNumber(n)]) => | ("toSparkline", [EvDistribution(dist), EvNumber(n)]) =>
Helpers.toStringFn(ToSparkline(Belt.Float.toInt(n)), dist, ~env) Helpers.toStringFn(ToSparkline(Belt.Float.toInt(n)), dist, ~env)
| ("exp", [EvDistribution(a)]) => | ("exp", [EvDistribution(a)]) =>
@ -249,8 +264,28 @@ let dispatchToGenericOutput = (
~env, ~env,
)->Some )->Some
| ("normalize", [EvDistribution(dist)]) => Helpers.toDistFn(Normalize, dist, ~env) | ("normalize", [EvDistribution(dist)]) => Helpers.toDistFn(Normalize, dist, ~env)
| ("klDivergence", [EvDistribution(a), EvDistribution(b)]) => | ("klDivergence", [EvDistribution(prediction), EvDistribution(answer)]) =>
Some(DistributionOperation.run(FromDist(ToScore(KLDivergence(b)), a), ~env)) Some(DistributionOperation.run(FromDist(ToScore(KLDivergence(answer)), prediction), ~env))
| ("klDivergence", [EvDistribution(prediction), EvDistribution(answer), EvDistribution(prior)]) =>
Helpers.klDivergenceWithPrior(prediction, answer, prior, env)
| (
"logScoreWithPointAnswer",
[EvDistribution(prediction), EvNumber(answer), EvDistribution(prior)],
)
| (
"logScoreWithPointAnswer",
[EvDistribution(prediction), EvDistribution(Symbolic(#Float(answer))), EvDistribution(prior)],
) =>
DistributionOperation.run(
FromDist(ToScore(LogScore(answer, prior->Some)), prediction),
~env,
)->Some
| ("logScoreWithPointAnswer", [EvDistribution(prediction), EvNumber(answer)])
| (
"logScoreWithPointAnswer",
[EvDistribution(prediction), EvDistribution(Symbolic(#Float(answer)))],
) =>
DistributionOperation.run(FromDist(ToScore(LogScore(answer, None)), prediction), ~env)->Some
| ("isNormalized", [EvDistribution(dist)]) => Helpers.toBoolFn(IsNormalized, dist, ~env) | ("isNormalized", [EvDistribution(dist)]) => Helpers.toBoolFn(IsNormalized, dist, ~env)
| ("toPointSet", [EvDistribution(dist)]) => Helpers.toDistFn(ToPointSet, dist, ~env) | ("toPointSet", [EvDistribution(dist)]) => Helpers.toDistFn(ToPointSet, dist, ~env)
| ("scaleLog", [EvDistribution(dist)]) => | ("scaleLog", [EvDistribution(dist)]) =>

View File

@ -620,6 +620,19 @@ module A = {
| Some(o) => o | Some(o) => o
| None => [] | None => []
} }
// REturns `None` there are no non-`None` elements
let rec arrSomeToSomeArr = (optionals: array<option<'a>>): option<array<'a>> => {
let optionals' = optionals->Belt.List.fromArray
switch optionals' {
| list{} => []->Some
| list{x, ...xs} =>
switch x {
| Some(_) => xs->Belt.List.toArray->arrSomeToSomeArr
| None => None
}
}
}
let firstSome = x => Belt.Array.getBy(x, O.isSome)
} }
module R = { module R = {

View File

@ -55,7 +55,7 @@ type operationError =
| ComplexNumberError | ComplexNumberError
| InfinityError | InfinityError
| NegativeInfinityError | NegativeInfinityError
| LogicallyInconsistentPathwayError | PdfInvalidError
| NotYetImplemented // should be removed when `klDivergence` for mixed and discrete is implemented. | NotYetImplemented // should be removed when `klDivergence` for mixed and discrete is implemented.
@genType @genType
@ -69,7 +69,7 @@ module Error = {
| ComplexNumberError => "Operation returned complex result" | ComplexNumberError => "Operation returned complex result"
| InfinityError => "Operation returned positive infinity" | InfinityError => "Operation returned positive infinity"
| NegativeInfinityError => "Operation returned negative infinity" | NegativeInfinityError => "Operation returned negative infinity"
| LogicallyInconsistentPathwayError => "This pathway should have been logically unreachable" | PdfInvalidError => "This Pdf is invalid"
| NotYetImplemented => "This pathway is not yet implemented" | NotYetImplemented => "This pathway is not yet implemented"
} }
} }