Compare commits

...

3 Commits

Author SHA1 Message Date
Sam Nolan
ad73ff98cc Merge branch 'develop' into metalog 2022-08-13 13:39:10 +01:00
Sam Nolan
594e6504ca Add comments about metalog distribution math 2022-07-19 17:19:30 +10:00
Sam Nolan
2adaba7d91 Add metalog 2022-07-19 17:05:15 +10:00
6 changed files with 139 additions and 25 deletions

View File

@ -31,6 +31,7 @@ describe("eval on distribution functions", () => {
testEval("mean(normal(5,2))", "Ok(5)")
testEval("mean(lognormal(1,2))", "Ok(20.085536923187668)")
testEval("mean(gamma(5,5))", "Ok(25)")
testEval("mean(metalog([1, 2]))", "Ok(1.0000000000000024)")
testEval("mean(bernoulli(0.2))", "Ok(0.2)")
testEval("mean(bernoulli(0.8))", "Ok(0.8)")
testEval("mean(logistic(5,1))", "Ok(5)")

View File

@ -254,6 +254,97 @@ module Logistic = {
let toString = ({location, scale}: t) => j`Logistic($location,$scale)`
}
module Metalog = {
type t = metalog
let make = terms =>
Js.Array.length(terms) > 1
? Ok(#Metalog({terms: terms}))
: Error("Metalog must have 2 or more terms")
// The flagship of the Metalog distribution. This is taken from:
// http://www.metalogdistributions.com/equations/unboundedmetalog.html
// keep in mind that in JS, arrays are 0 indexed, so i = k - 1
let inv = (p, t: t) => {
let logy = log(p /. (1. -. p))
E.A.Floats.sum(Js.Array.mapi((term, i) =>
if i == 0 {
term
} else if i == 1 {
term *. logy
} else if i == 2 {
term *. (p -. 0.5) *. logy
} else if i == 3 {
term *. (p -. 0.5)
} else if mod(i, 2) == 0 {
term *. (p -. 0.5) ** (Belt.Int.toFloat(i) /. 2.)
} else {
term *. (p -. 0.5) ** ((Belt.Int.toFloat(i) -. 1.) /. 2.) *. logy
}
, t.terms))
}
// The inverse quantile density function, the derivative of above. Taken from:
// http://www.metalogdistributions.com/equations/unboundedmetalog.html
// Keep in mind here that people often find the reciprocal of this value. I don't
// because it's easier to code this way and then just find the reciprocal at
// the very end
let invderiv = (y: float, t: t): float => E.A.Floats.sum(Js.Array.mapi((term, i) => {
let k = Belt.Int.toFloat(i) +. 1.
if i == 0 {
0.
} else if i == 1 {
term /. ((1. -. y) *. y)
} else if i == 2 {
term /. (1. -. y) ** 2.
} else if i == 3 {
term
} else if mod(i, 2) == 0 {
term *. ((k -. 1.) /. 2.) *. (y -. 0.5) ** ((k -. 3.) /. 2.)
} else {
term *.
((y -. 0.5) ** (k /. 2. -. 1.) /. (y *. (1. -. y)) +.
(k /. 2. -. 1.) *. (y -. 0.5) ** (k /. 2. -. 2.) *. log(y /. (1. -. y)))
}
}, t.terms))
// Simply approximates cdf values by bisection on the inv function over 50 rounds, starting at
// a guess of 0.5. This use to be newton's method to approximate the CDF. However,
// I found that this is as easy if not faster and easier to understand.
let cdf = (x: float, t: t): float => {
let guess = ref(0.5)
let bisection_rounds = 50
for i in 0 to bisection_rounds - 1 {
let correction = 0.5 ** Belt.Int.toFloat(i + 2)
if inv(guess.contents, t) < x {
guess := guess.contents +. correction
} else {
guess := guess.contents -. correction
}
}
guess.contents
}
// The pdf first gets the cdf at x, then finds the density of that percentile
// See the notes at the bottom of this:
// http://www.metalogdistributions.com/equations/unboundedmetalog.html
// to see the logic behind this
let pdf = (x: float, t: t): float => 1. /. invderiv(cdf(x, t), t)
let sample = (t: t) => inv(Jstat.Uniform.sample(0., 1.), t)
let toString = ({terms}: t) =>
j`Metalog([${Js.Array.joinWith(", ", Js.Array.map(Belt.Float.toString, terms))}])`
// The mean of a metalog is simply the total integral of the inverse function.
// Discussed here: http://www.metalogdistributions.com/moments.html
let mean = (t: t) => {
let meanSteps = 1000
let stepCountFloat = Belt.Int.toFloat(meanSteps)
let range = E.A.Floats.range(0. +. 1. /. stepCountFloat, 1. -. 1. /. stepCountFloat, meanSteps)
Ok(E.A.Floats.sum(E.A.fmap(x => inv(x, t) /. stepCountFloat, range)))
}
}
module Bernoulli = {
type t = bernoulli
let make = p =>
@ -347,6 +438,7 @@ module T = {
| #Beta(n) => Beta.pdf(x, n)
| #Float(n) => Float.pdf(x, n)
| #Bernoulli(n) => Bernoulli.pdf(x, n)
| #Metalog(n) => Metalog.pdf(x, n)
}
let cdf = (x, dist) =>
@ -362,6 +454,7 @@ module T = {
| #Beta(n) => Beta.cdf(x, n)
| #Float(n) => Float.cdf(x, n)
| #Bernoulli(n) => Bernoulli.cdf(x, n)
| #Metalog(n) => Metalog.cdf(x, n)
}
let inv = (x, dist) =>
@ -377,6 +470,7 @@ module T = {
| #Beta(n) => Beta.inv(x, n)
| #Float(n) => Float.inv(x, n)
| #Bernoulli(n) => Bernoulli.inv(x, n)
| #Metalog(n) => Metalog.inv(x, n)
}
let sample: symbolicDist => float = x =>
@ -392,6 +486,7 @@ module T = {
| #Beta(n) => Beta.sample(n)
| #Float(n) => Float.sample(n)
| #Bernoulli(n) => Bernoulli.sample(n)
| #Metalog(n) => Metalog.sample(n)
}
let doN = (n, fn) => {
@ -417,6 +512,7 @@ module T = {
| #Beta(n) => Beta.toString(n)
| #Float(n) => Float.toString(n)
| #Bernoulli(n) => Bernoulli.toString(n)
| #Metalog(n) => Metalog.toString(n)
}
let min: symbolicDist => float = x =>
@ -431,6 +527,7 @@ module T = {
| #Uniform({low}) => low
| #Bernoulli(n) => Bernoulli.min(n)
| #Beta(n) => Beta.inv(minCdfValue, n)
| #Metalog(n) => Metalog.inv(minCdfValue, n)
| #Float(n) => n
}
@ -445,6 +542,7 @@ module T = {
| #Logistic(n) => Logistic.inv(maxCdfValue, n)
| #Beta(n) => Beta.inv(maxCdfValue, n)
| #Bernoulli(n) => Bernoulli.max(n)
| #Metalog(n) => Metalog.inv(maxCdfValue, n)
| #Uniform({high}) => high
| #Float(n) => n
}
@ -461,6 +559,7 @@ module T = {
| #Uniform(n) => Uniform.mean(n)
| #Gamma(n) => Gamma.mean(n)
| #Bernoulli(n) => Bernoulli.mean(n)
| #Metalog(n) => Metalog.mean(n)
| #Float(n) => Float.mean(n)
}

View File

@ -41,6 +41,8 @@ type logistic = {
scale: float,
}
type metalog = {terms: array<float>}
type bernoulli = {p: float}
@genType
@ -56,6 +58,7 @@ type symbolicDist = [
| #Float(float)
| #Bernoulli(bernoulli)
| #Logistic(logistic)
| #Metalog(metalog)
]
type analyticalSimplificationResult = [

View File

@ -215,3 +215,28 @@ module Process = {
twoValues(~fn=Helpers.wrapSymbolic(fn), ~values)
}
}
module ArrayNumberDist = {
let make = (name, fn) => {
FnDefinition.make(
~name,
~inputs=[FRTypeArray(FRTypeNumber)],
~run=(_, inputs, _, _) =>
Prepare.ToTypedArray.numbers(inputs)
->E.R.bind(r => E.A.length(r) === 0 ? Error("List is empty") : Ok(r))
->E.R.bind(fn),
(),
)
}
let make2 = (name, fn) => {
FnDefinition.make(
~name,
~inputs=[FRTypeArray(FRTypeAny)],
~run=(_, inputs, _, _) =>
Prepare.ToTypedArray.numbers(inputs)
->E.R.bind(r => E.A.length(r) === 0 ? Error("List is empty") : Ok(r))
->E.R.bind(fn),
(),
)
}
}

View File

@ -122,6 +122,17 @@ module DistributionCreation = {
~examples=[`logistic(5, 1)`],
~definitions=[TwoArgDist.make("logistic", twoArgs(SymbolicDist.Logistic.make))],
),
fnMake(
~name="metalog",
~examples=[`metalog([1, 2, 3])`],
~definitions=[
ArrayNumberDist.make("metalog", x =>
SymbolicDist.Metalog.make(x)->E.R2.fmap(x =>
Wrappers.symbolic(x)->Wrappers.evDistribution
)
),
],
),
fnMake(
~name="to (distribution)",
~examples=[`5 to 10`, `to(5,10)`, `-5 to 5`],

View File

@ -20,31 +20,6 @@ module NumberToNumber = {
)
}
module ArrayNumberDist = {
let make = (name, fn) => {
FnDefinition.make(
~name,
~inputs=[FRTypeArray(FRTypeNumber)],
~run=(_, inputs, _, _) =>
Prepare.ToTypedArray.numbers(inputs)
->E.R.bind(r => E.A.length(r) === 0 ? Error("List is empty") : Ok(r))
->E.R.bind(fn),
(),
)
}
let make2 = (name, fn) => {
FnDefinition.make(
~name,
~inputs=[FRTypeArray(FRTypeAny)],
~run=(_, inputs, _, _) =>
Prepare.ToTypedArray.numbers(inputs)
->E.R.bind(r => E.A.length(r) === 0 ? Error("List is empty") : Ok(r))
->E.R.bind(fn),
(),
)
}
}
let library = [
Function.make(
~name="floor",