From ec9c67f090a746862a91d46a139cf6c5894450de Mon Sep 17 00:00:00 2001 From: NunoSempere Date: Thu, 14 Apr 2022 16:03:54 -0400 Subject: [PATCH 1/7] feat: Audit SymbolicDist.res - Fix buggy lognormal multiplication code - Add precision to 90% confidence intervals code - Simplified lognormal code - Added sources for many of the manipulations --- .../SymbolicDist/SymbolicDist.res | 26 +++++++++++-------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/packages/squiggle-lang/src/rescript/Distributions/SymbolicDist/SymbolicDist.res b/packages/squiggle-lang/src/rescript/Distributions/SymbolicDist/SymbolicDist.res index 69abc726..d1d99534 100644 --- a/packages/squiggle-lang/src/rescript/Distributions/SymbolicDist/SymbolicDist.res +++ b/packages/squiggle-lang/src/rescript/Distributions/SymbolicDist/SymbolicDist.res @@ -11,7 +11,7 @@ module Normal = { let from90PercentCI = (low, high) => { let mean = E.A.Floats.mean([low, high]) - let stdev = (high -. low) /. (2. *. 1.644854) + let stdev = (high -. low) /. (2. *. 1.6448536269514722) #Normal({mean: mean, stdev: stdev}) } let inv = (p, t: t) => Jstat.Normal.inv(p, t.mean, t.stdev) @@ -19,14 +19,14 @@ module Normal = { let mean = (t: t) => Ok(Jstat.Normal.mean(t.mean, t.stdev)) let toString = ({mean, stdev}: t) => j`Normal($mean,$stdev)` - let add = (n1: t, n2: t) => { + let add = (n1: t, n2: t) => { let mean = n1.mean +. n2.mean - let stdev = sqrt(n1.stdev ** 2. +. n2.stdev ** 2.) + let stdev = Js.Math.sqrt(n1.stdev ** 2. +. n2.stdev ** 2.) #Normal({mean: mean, stdev: stdev}) } let subtract = (n1: t, n2: t) => { let mean = n1.mean -. n2.mean - let stdev = sqrt(n1.stdev ** 2. +. n2.stdev ** 2.) + let stdev = Js.Math.sqrt(n1.stdev ** 2. +. n2.stdev ** 2.) #Normal({mean: mean, stdev: stdev}) } @@ -115,19 +115,22 @@ module Lognormal = { let mean = (t: t) => Ok(Jstat.Lognormal.mean(t.mu, t.sigma)) let sample = (t: t) => Jstat.Lognormal.sample(t.mu, t.sigma) let toString = ({mu, sigma}: t) => j`Lognormal($mu,$sigma)` + let from90PercentCI = (low, high) => { let logLow = Js.Math.log(low) let logHigh = Js.Math.log(high) let mu = E.A.Floats.mean([logLow, logHigh]) - let sigma = (logHigh -. logLow) /. (2.0 *. 1.645) + let sigma = (logHigh -. logLow) /. (2.0 *. 1.6448536269514722) #Lognormal({mu: mu, sigma: sigma}) } let fromMeanAndStdev = (mean, stdev) => { + // https://math.stackexchange.com/questions/2501783/parameters-of-a-lognormal-distribution + // https://wikiless.org/wiki/Log-normal_distribution?lang=en#Generation_and_parameters if stdev > 0.0 { - let variance = Js.Math.pow_float(~base=stdev, ~exp=2.0) - let meanSquared = Js.Math.pow_float(~base=mean, ~exp=2.0) - let mu = Js.Math.log(mean) -. 0.5 *. Js.Math.log(variance /. meanSquared +. 1.0) - let sigma = Js.Math.pow_float(~base=Js.Math.log(variance /. meanSquared +. 1.0), ~exp=0.5) + let variance = stdev ** 2. + let meanSquared = mean ** 2. + let mu = 2*Js.Math.log(mean) -. 0.5 *. Js.Math.log(variance +. meanSquared) + let sigma = Js.Math.sqrt(Js.Math.log((variance /. meanSquared) +. 1) ) Ok(#Lognormal({mu: mu, sigma: sigma})) } else { Error("Lognormal standard deviation must be larger than 0") @@ -135,10 +138,11 @@ module Lognormal = { } let multiply = (l1, l2) => { + // https://wikiless.org/wiki/Log-normal_distribution?lang=en#Multiplication_and_division_of_independent,_log-normal_random_variables let mu = l1.mu +. l2.mu - let sigma = l1.sigma +. l2.sigma + let sigma = Math.sqrt(l1.sigma ** 2. +. l2.sigma ** 2.) #Lognormal({mu: mu, sigma: sigma}) - } + } let divide = (l1, l2) => { let mu = l1.mu -. l2.mu // We believe the ratiands will have covariance zero. From 69148bb3502fe85dfee8c836c3dbfb1e2aa7aa4b Mon Sep 17 00:00:00 2001 From: NunoSempere Date: Thu, 14 Apr 2022 16:17:59 -0400 Subject: [PATCH 2/7] fix: Rescript bugs --- .../rescript/Distributions/SymbolicDist/SymbolicDist.res | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/squiggle-lang/src/rescript/Distributions/SymbolicDist/SymbolicDist.res b/packages/squiggle-lang/src/rescript/Distributions/SymbolicDist/SymbolicDist.res index d1d99534..e3388d00 100644 --- a/packages/squiggle-lang/src/rescript/Distributions/SymbolicDist/SymbolicDist.res +++ b/packages/squiggle-lang/src/rescript/Distributions/SymbolicDist/SymbolicDist.res @@ -129,8 +129,8 @@ module Lognormal = { if stdev > 0.0 { let variance = stdev ** 2. let meanSquared = mean ** 2. - let mu = 2*Js.Math.log(mean) -. 0.5 *. Js.Math.log(variance +. meanSquared) - let sigma = Js.Math.sqrt(Js.Math.log((variance /. meanSquared) +. 1) ) + let mu = 2. *. Js.Math.log(mean) -. 0.5 *. Js.Math.log(variance +. meanSquared) + let sigma = Js.Math.sqrt(Js.Math.log((variance /. meanSquared) +. 1.) ) Ok(#Lognormal({mu: mu, sigma: sigma})) } else { Error("Lognormal standard deviation must be larger than 0") @@ -140,7 +140,7 @@ module Lognormal = { let multiply = (l1, l2) => { // https://wikiless.org/wiki/Log-normal_distribution?lang=en#Multiplication_and_division_of_independent,_log-normal_random_variables let mu = l1.mu +. l2.mu - let sigma = Math.sqrt(l1.sigma ** 2. +. l2.sigma ** 2.) + let sigma = Js.Math.sqrt(l1.sigma ** 2. +. l2.sigma ** 2.) // m #Lognormal({mu: mu, sigma: sigma}) } let divide = (l1, l2) => { From e5655dc2d12736d9d529252006e7ac81f629bc60 Mon Sep 17 00:00:00 2001 From: NunoSempere Date: Thu, 14 Apr 2022 16:20:23 -0400 Subject: [PATCH 3/7] fix: formatting --- .../rescript/Distributions/SymbolicDist/SymbolicDist.res | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/packages/squiggle-lang/src/rescript/Distributions/SymbolicDist/SymbolicDist.res b/packages/squiggle-lang/src/rescript/Distributions/SymbolicDist/SymbolicDist.res index e3388d00..28119e57 100644 --- a/packages/squiggle-lang/src/rescript/Distributions/SymbolicDist/SymbolicDist.res +++ b/packages/squiggle-lang/src/rescript/Distributions/SymbolicDist/SymbolicDist.res @@ -19,7 +19,7 @@ module Normal = { let mean = (t: t) => Ok(Jstat.Normal.mean(t.mean, t.stdev)) let toString = ({mean, stdev}: t) => j`Normal($mean,$stdev)` - let add = (n1: t, n2: t) => { + let add = (n1: t, n2: t) => { let mean = n1.mean +. n2.mean let stdev = Js.Math.sqrt(n1.stdev ** 2. +. n2.stdev ** 2.) #Normal({mean: mean, stdev: stdev}) @@ -115,7 +115,7 @@ module Lognormal = { let mean = (t: t) => Ok(Jstat.Lognormal.mean(t.mu, t.sigma)) let sample = (t: t) => Jstat.Lognormal.sample(t.mu, t.sigma) let toString = ({mu, sigma}: t) => j`Lognormal($mu,$sigma)` - + let from90PercentCI = (low, high) => { let logLow = Js.Math.log(low) let logHigh = Js.Math.log(high) @@ -130,7 +130,7 @@ module Lognormal = { let variance = stdev ** 2. let meanSquared = mean ** 2. let mu = 2. *. Js.Math.log(mean) -. 0.5 *. Js.Math.log(variance +. meanSquared) - let sigma = Js.Math.sqrt(Js.Math.log((variance /. meanSquared) +. 1.) ) + let sigma = Js.Math.sqrt(Js.Math.log(variance /. meanSquared +. 1.)) Ok(#Lognormal({mu: mu, sigma: sigma})) } else { Error("Lognormal standard deviation must be larger than 0") @@ -142,7 +142,7 @@ module Lognormal = { let mu = l1.mu +. l2.mu let sigma = Js.Math.sqrt(l1.sigma ** 2. +. l2.sigma ** 2.) // m #Lognormal({mu: mu, sigma: sigma}) - } + } let divide = (l1, l2) => { let mu = l1.mu -. l2.mu // We believe the ratiands will have covariance zero. From bd3f2c99d1165ae509d07d859ae25fa795f1e735 Mon Sep 17 00:00:00 2001 From: NunoSempere Date: Thu, 14 Apr 2022 16:22:21 -0400 Subject: [PATCH 4/7] tweak: add explanation for magic number --- .../SymbolicDist/SymbolicDist.res | 7 +++-- .../Internal/ProcessingConfidenceIntervals.md | 31 +++++++++++++++++++ 2 files changed, 36 insertions(+), 2 deletions(-) create mode 100644 packages/website/docs/Internal/ProcessingConfidenceIntervals.md diff --git a/packages/squiggle-lang/src/rescript/Distributions/SymbolicDist/SymbolicDist.res b/packages/squiggle-lang/src/rescript/Distributions/SymbolicDist/SymbolicDist.res index 28119e57..eb6ea136 100644 --- a/packages/squiggle-lang/src/rescript/Distributions/SymbolicDist/SymbolicDist.res +++ b/packages/squiggle-lang/src/rescript/Distributions/SymbolicDist/SymbolicDist.res @@ -1,5 +1,8 @@ open SymbolicDistTypes +let normal95confidencePoint = 1.6448536269514722 +// explained in website/docs/internal/ProcessingConfidenceIntervals + module Normal = { type t = normal let make = (mean: float, stdev: float): result => @@ -11,7 +14,7 @@ module Normal = { let from90PercentCI = (low, high) => { let mean = E.A.Floats.mean([low, high]) - let stdev = (high -. low) /. (2. *. 1.6448536269514722) + let stdev = (high -. low) /. (2. *. normal95confidencePoint) #Normal({mean: mean, stdev: stdev}) } let inv = (p, t: t) => Jstat.Normal.inv(p, t.mean, t.stdev) @@ -120,7 +123,7 @@ module Lognormal = { let logLow = Js.Math.log(low) let logHigh = Js.Math.log(high) let mu = E.A.Floats.mean([logLow, logHigh]) - let sigma = (logHigh -. logLow) /. (2.0 *. 1.6448536269514722) + let sigma = (logHigh -. logLow) /. (2.0 *. normal95confidencePoint) #Lognormal({mu: mu, sigma: sigma}) } let fromMeanAndStdev = (mean, stdev) => { diff --git a/packages/website/docs/Internal/ProcessingConfidenceIntervals.md b/packages/website/docs/Internal/ProcessingConfidenceIntervals.md new file mode 100644 index 00000000..202c291c --- /dev/null +++ b/packages/website/docs/Internal/ProcessingConfidenceIntervals.md @@ -0,0 +1,31 @@ +# Processing confidence intervals + +This page explains what we are doing when we take a 95% confidence interval, and we get a mean and a standard deviation from it + +## For normals + +```js +module Normal = { + //... + let from90PercentCI = (low, high) => { + let mean = E.A.Floats.mean([low, high]) + let stdev = (high -. low) /. (2. *. 1.6448536269514722) + #Normal({mean: mean, stdev: stdev}) + } + //... +} +``` + +We know that for a normal with mean $\mu$ and standard deviation $\sigma$, + +$$ + +a \cdot Normal(\mu, \sigma) = Normal(a\cdot \mu, |a|\cdot \sigma) + +$$ + +We can now look at the inverse cdf of a $Normal(0,1)$. We find that the 95% point is reached at $1.6448536269514722$. ([source](https://stackoverflow.com/questions/20626994/how-to-calculate-the-inverse-of-the-normal-cumulative-distribution-function-in-p)) This means that the 90% confidence interval is $[-1.6448536269514722, 1.6448536269514722]$, which has a width of $2 \cdot 1.6448536269514722$. + +So then, if we take a $Normal(0,1)$ and we multiply it by $\frac{(high -. low)}{(2. *. 1.6448536269514722)}$, it's 90% confidence interval will be multiplied by the same amount. Then we just have to shift it by the mean to get our target normal. + +## For lognormals \ No newline at end of file From aad6a9c603592c638aee3ec52f11e671bff3a7c4 Mon Sep 17 00:00:00 2001 From: NunoSempere Date: Thu, 14 Apr 2022 16:35:24 -0400 Subject: [PATCH 5/7] fix: Bad tests These tests are failing because I increased the precision of a magic constant. This might be a signal that these are bad tests --- .../ReducerInterface/ReducerInterface_Distribution_test.res | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/squiggle-lang/__tests__/ReducerInterface/ReducerInterface_Distribution_test.res b/packages/squiggle-lang/__tests__/ReducerInterface/ReducerInterface_Distribution_test.res index 0a131d93..22fb437f 100644 --- a/packages/squiggle-lang/__tests__/ReducerInterface/ReducerInterface_Distribution_test.res +++ b/packages/squiggle-lang/__tests__/ReducerInterface/ReducerInterface_Distribution_test.res @@ -23,8 +23,8 @@ describe("eval on distribution functions", () => { }) describe("to", () => { testEval("5 to 2", "Error(TODO: Low value must be less than high value.)") - testEval("to(2,5)", "Ok(Lognormal(1.1512925464970227,0.278507821238345))") - testEval("to(-2,2)", "Ok(Normal(0,1.215913388057542))") + testEval("to(2,5)", "Ok(Lognormal(1.1512925464970227,0.27853260523016377))") + testEval("to(-2,2)", "Ok(Normal(0,1.2159136638235384))") }) describe("mean", () => { testEval("mean(normal(5,2))", "Ok(5)") From 8cb689e661a76d4d70f03d453eaf6648694fdcd7 Mon Sep 17 00:00:00 2001 From: NunoSempere Date: Thu, 14 Apr 2022 16:38:12 -0400 Subject: [PATCH 6/7] fix: style --- .../website/docs/Internal/ProcessingConfidenceIntervals.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/packages/website/docs/Internal/ProcessingConfidenceIntervals.md b/packages/website/docs/Internal/ProcessingConfidenceIntervals.md index 202c291c..99e72e5a 100644 --- a/packages/website/docs/Internal/ProcessingConfidenceIntervals.md +++ b/packages/website/docs/Internal/ProcessingConfidenceIntervals.md @@ -18,14 +18,15 @@ module Normal = { We know that for a normal with mean $\mu$ and standard deviation $\sigma$, -$$ +$$ a \cdot Normal(\mu, \sigma) = Normal(a\cdot \mu, |a|\cdot \sigma) + $$ -We can now look at the inverse cdf of a $Normal(0,1)$. We find that the 95% point is reached at $1.6448536269514722$. ([source](https://stackoverflow.com/questions/20626994/how-to-calculate-the-inverse-of-the-normal-cumulative-distribution-function-in-p)) This means that the 90% confidence interval is $[-1.6448536269514722, 1.6448536269514722]$, which has a width of $2 \cdot 1.6448536269514722$. +We can now look at the inverse cdf of a $Normal(0,1)$. We find that the 95% point is reached at $1.6448536269514722$. ([source](https://stackoverflow.com/questions/20626994/how-to-calculate-the-inverse-of-the-normal-cumulative-distribution-function-in-p)) This means that the 90% confidence interval is $[-1.6448536269514722, 1.6448536269514722]$, which has a width of $2 \cdot 1.6448536269514722$. So then, if we take a $Normal(0,1)$ and we multiply it by $\frac{(high -. low)}{(2. *. 1.6448536269514722)}$, it's 90% confidence interval will be multiplied by the same amount. Then we just have to shift it by the mean to get our target normal. -## For lognormals \ No newline at end of file +## For lognormals From 9910afd74b6c2178702eae44252feac09bdc4956 Mon Sep 17 00:00:00 2001 From: NunoSempere Date: Thu, 14 Apr 2022 16:52:36 -0400 Subject: [PATCH 7/7] fix: Failing tests The tests were wrong, not the code. Feels like it should be the other way around --- .../ReducerInterface/ReducerInterface_Distribution_test.res | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/squiggle-lang/__tests__/ReducerInterface/ReducerInterface_Distribution_test.res b/packages/squiggle-lang/__tests__/ReducerInterface/ReducerInterface_Distribution_test.res index 3fe4c426..9e25f7e2 100644 --- a/packages/squiggle-lang/__tests__/ReducerInterface/ReducerInterface_Distribution_test.res +++ b/packages/squiggle-lang/__tests__/ReducerInterface/ReducerInterface_Distribution_test.res @@ -57,8 +57,8 @@ describe("eval on distribution functions", () => { describe("multiply", () => { testEval("normal(10, 2) * 2", "Ok(Normal(20,4))") testEval("2 * normal(10, 2)", "Ok(Normal(20,4))") - testEval("lognormal(5,2) * lognormal(10,2)", "Ok(Lognormal(15,4))") - testEval("lognormal(10, 2) * lognormal(5, 2)", "Ok(Lognormal(15,4))") + testEval("lognormal(5,2) * lognormal(10,2)", "Ok(Lognormal(15,2.8284271247461903))") + testEval("lognormal(10, 2) * lognormal(5, 2)", "Ok(Lognormal(15,2.8284271247461903))") testEval("2 * lognormal(5, 2)", "Ok(Lognormal(5.693147180559945,2))") testEval("lognormal(5, 2) * 2", "Ok(Lognormal(5.693147180559945,2))") })