diff --git a/packages/squiggle-lang/__tests__/ReducerInterface/ReducerInterface_Distribution_test.res b/packages/squiggle-lang/__tests__/ReducerInterface/ReducerInterface_Distribution_test.res index 6045d6e0..9e25f7e2 100644 --- a/packages/squiggle-lang/__tests__/ReducerInterface/ReducerInterface_Distribution_test.res +++ b/packages/squiggle-lang/__tests__/ReducerInterface/ReducerInterface_Distribution_test.res @@ -23,8 +23,8 @@ describe("eval on distribution functions", () => { }) describe("to", () => { testEval("5 to 2", "Error(TODO: Low value must be less than high value.)") - testEval("to(2,5)", "Ok(Lognormal(1.1512925464970227,0.278507821238345))") - testEval("to(-2,2)", "Ok(Normal(0,1.215913388057542))") + testEval("to(2,5)", "Ok(Lognormal(1.1512925464970227,0.27853260523016377))") + testEval("to(-2,2)", "Ok(Normal(0,1.2159136638235384))") }) describe("mean", () => { testEval("mean(normal(5,2))", "Ok(5)") @@ -57,8 +57,8 @@ describe("eval on distribution functions", () => { describe("multiply", () => { testEval("normal(10, 2) * 2", "Ok(Normal(20,4))") testEval("2 * normal(10, 2)", "Ok(Normal(20,4))") - testEval("lognormal(5,2) * lognormal(10,2)", "Ok(Lognormal(15,4))") - testEval("lognormal(10, 2) * lognormal(5, 2)", "Ok(Lognormal(15,4))") + testEval("lognormal(5,2) * lognormal(10,2)", "Ok(Lognormal(15,2.8284271247461903))") + testEval("lognormal(10, 2) * lognormal(5, 2)", "Ok(Lognormal(15,2.8284271247461903))") testEval("2 * lognormal(5, 2)", "Ok(Lognormal(5.693147180559945,2))") testEval("lognormal(5, 2) * 2", "Ok(Lognormal(5.693147180559945,2))") }) diff --git a/packages/squiggle-lang/src/rescript/Distributions/SymbolicDist/SymbolicDist.res b/packages/squiggle-lang/src/rescript/Distributions/SymbolicDist/SymbolicDist.res index 1652c799..afd35800 100644 --- a/packages/squiggle-lang/src/rescript/Distributions/SymbolicDist/SymbolicDist.res +++ b/packages/squiggle-lang/src/rescript/Distributions/SymbolicDist/SymbolicDist.res @@ -1,5 +1,8 @@ open SymbolicDistTypes +let normal95confidencePoint = 1.6448536269514722 +// explained in website/docs/internal/ProcessingConfidenceIntervals + module Normal = { type t = normal let make = (mean: float, stdev: float): result => @@ -11,7 +14,7 @@ module Normal = { let from90PercentCI = (low, high) => { let mean = E.A.Floats.mean([low, high]) - let stdev = (high -. low) /. (2. *. 1.644854) + let stdev = (high -. low) /. (2. *. normal95confidencePoint) #Normal({mean: mean, stdev: stdev}) } let inv = (p, t: t) => Jstat.Normal.inv(p, t.mean, t.stdev) @@ -21,12 +24,12 @@ module Normal = { let add = (n1: t, n2: t) => { let mean = n1.mean +. n2.mean - let stdev = sqrt(n1.stdev ** 2. +. n2.stdev ** 2.) + let stdev = Js.Math.sqrt(n1.stdev ** 2. +. n2.stdev ** 2.) #Normal({mean: mean, stdev: stdev}) } let subtract = (n1: t, n2: t) => { let mean = n1.mean -. n2.mean - let stdev = sqrt(n1.stdev ** 2. +. n2.stdev ** 2.) + let stdev = Js.Math.sqrt(n1.stdev ** 2. +. n2.stdev ** 2.) #Normal({mean: mean, stdev: stdev}) } @@ -132,19 +135,22 @@ module Lognormal = { let mean = (t: t) => Ok(Jstat.Lognormal.mean(t.mu, t.sigma)) let sample = (t: t) => Jstat.Lognormal.sample(t.mu, t.sigma) let toString = ({mu, sigma}: t) => j`Lognormal($mu,$sigma)` + let from90PercentCI = (low, high) => { let logLow = Js.Math.log(low) let logHigh = Js.Math.log(high) let mu = E.A.Floats.mean([logLow, logHigh]) - let sigma = (logHigh -. logLow) /. (2.0 *. 1.645) + let sigma = (logHigh -. logLow) /. (2.0 *. normal95confidencePoint) #Lognormal({mu: mu, sigma: sigma}) } let fromMeanAndStdev = (mean, stdev) => { + // https://math.stackexchange.com/questions/2501783/parameters-of-a-lognormal-distribution + // https://wikiless.org/wiki/Log-normal_distribution?lang=en#Generation_and_parameters if stdev > 0.0 { - let variance = Js.Math.pow_float(~base=stdev, ~exp=2.0) - let meanSquared = Js.Math.pow_float(~base=mean, ~exp=2.0) - let mu = Js.Math.log(mean) -. 0.5 *. Js.Math.log(variance /. meanSquared +. 1.0) - let sigma = Js.Math.pow_float(~base=Js.Math.log(variance /. meanSquared +. 1.0), ~exp=0.5) + let variance = stdev ** 2. + let meanSquared = mean ** 2. + let mu = 2. *. Js.Math.log(mean) -. 0.5 *. Js.Math.log(variance +. meanSquared) + let sigma = Js.Math.sqrt(Js.Math.log(variance /. meanSquared +. 1.)) Ok(#Lognormal({mu: mu, sigma: sigma})) } else { Error("Lognormal standard deviation must be larger than 0") @@ -152,8 +158,9 @@ module Lognormal = { } let multiply = (l1, l2) => { + // https://wikiless.org/wiki/Log-normal_distribution?lang=en#Multiplication_and_division_of_independent,_log-normal_random_variables let mu = l1.mu +. l2.mu - let sigma = l1.sigma +. l2.sigma + let sigma = Js.Math.sqrt(l1.sigma ** 2. +. l2.sigma ** 2.) // m #Lognormal({mu: mu, sigma: sigma}) } let divide = (l1, l2) => { diff --git a/packages/website/docs/Internal/ProcessingConfidenceIntervals.md b/packages/website/docs/Internal/ProcessingConfidenceIntervals.md new file mode 100644 index 00000000..99e72e5a --- /dev/null +++ b/packages/website/docs/Internal/ProcessingConfidenceIntervals.md @@ -0,0 +1,32 @@ +# Processing confidence intervals + +This page explains what we are doing when we take a 95% confidence interval, and we get a mean and a standard deviation from it + +## For normals + +```js +module Normal = { + //... + let from90PercentCI = (low, high) => { + let mean = E.A.Floats.mean([low, high]) + let stdev = (high -. low) /. (2. *. 1.6448536269514722) + #Normal({mean: mean, stdev: stdev}) + } + //... +} +``` + +We know that for a normal with mean $\mu$ and standard deviation $\sigma$, + +$$ + +a \cdot Normal(\mu, \sigma) = Normal(a\cdot \mu, |a|\cdot \sigma) + + +$$ + +We can now look at the inverse cdf of a $Normal(0,1)$. We find that the 95% point is reached at $1.6448536269514722$. ([source](https://stackoverflow.com/questions/20626994/how-to-calculate-the-inverse-of-the-normal-cumulative-distribution-function-in-p)) This means that the 90% confidence interval is $[-1.6448536269514722, 1.6448536269514722]$, which has a width of $2 \cdot 1.6448536269514722$. + +So then, if we take a $Normal(0,1)$ and we multiply it by $\frac{(high -. low)}{(2. *. 1.6448536269514722)}$, it's 90% confidence interval will be multiplied by the same amount. Then we just have to shift it by the mean to get our target normal. + +## For lognormals