From fe5a42353ea2c07c0d77930a4ff7484af218f286 Mon Sep 17 00:00:00 2001 From: Sam Nolan Date: Tue, 12 Jul 2022 15:16:13 +1000 Subject: [PATCH] Add percentile graphed as environment option --- .../src/components/SquiggleItem.tsx | 4 +- .../src/components/SquigglePlayground.tsx | 17 ++++++ .../src/lib/distributionSpecBuilder.ts | 4 +- .../DistributionOperation_test.res | 6 +- .../squiggle-lang/__tests__/TestHelpers.res | 1 + packages/squiggle-lang/src/js/distribution.ts | 9 +-- packages/squiggle-lang/src/js/index.ts | 17 +++--- .../Distributions/DistributionOperation.res | 6 +- .../Distributions/DistributionOperation.resi | 1 + .../Distributions/DistributionTypes.res | 8 +-- .../rescript/Distributions/GenericDist.res | 16 +++-- .../rescript/Distributions/GenericDist.resi | 9 ++- .../PointSetDist/PointSetDist.res | 4 +- .../SymbolicDist/SymbolicDist.res | 60 +++++++++++-------- .../ReducerInterface_GenericDistribution.res | 4 +- .../src/rescript/Utility/Operation.res | 4 +- 16 files changed, 101 insertions(+), 69 deletions(-) diff --git a/packages/components/src/components/SquiggleItem.tsx b/packages/components/src/components/SquiggleItem.tsx index 48a8a0fb..e805e874 100644 --- a/packages/components/src/components/SquiggleItem.tsx +++ b/packages/components/src/components/SquiggleItem.tsx @@ -219,6 +219,7 @@ export const SquiggleItem: React.FC = ({ distributionPlotSettings={distributionPlotSettings} height={height} environment={{ + ...environment, sampleCount: environment.sampleCount / 10, xyPointLength: environment.xyPointLength / 10, }} @@ -234,6 +235,7 @@ export const SquiggleItem: React.FC = ({ distributionPlotSettings={distributionPlotSettings} height={height} environment={{ + ...environment, sampleCount: environment.sampleCount / 10, xyPointLength: environment.xyPointLength / 10, }} @@ -246,7 +248,7 @@ export const SquiggleItem: React.FC = ({
{Object.entries(expression.value) - .filter(([key, r]) => key !== "Math") + .filter(([key, _]) => key !== "Math") .map(([key, r]) => (
diff --git a/packages/components/src/components/SquigglePlayground.tsx b/packages/components/src/components/SquigglePlayground.tsx index 534be721..995cdc10 100644 --- a/packages/components/src/components/SquigglePlayground.tsx +++ b/packages/components/src/components/SquigglePlayground.tsx @@ -55,6 +55,7 @@ const schema = yup.object({}).shape({ .default(1000) .min(10) .max(10000), + percentile: yup.number().required().positive().default(0.9998).min(0).max(1), chartHeight: yup.number().required().positive().integer().default(350), leftSizePercent: yup .number() @@ -155,6 +156,20 @@ const SamplingSettings: React.FC<{ register: UseFormRegister }> = ({
+
+ +
+ + When converting symbolic distributions to PointSet distributions, what + percentile to sample the points within. + +
+
); @@ -436,6 +451,7 @@ export const SquigglePlayground: FC = ({ const { register, control } = useForm({ resolver: yupResolver(schema), defaultValues: { + percentile: 0.9998, sampleCount: 1000, xyPointLength: 1000, chartHeight: 150, @@ -468,6 +484,7 @@ export const SquigglePlayground: FC = ({ const env: environment = useMemo( () => ({ + percentile: Number(vars.percentile), sampleCount: Number(vars.sampleCount), xyPointLength: Number(vars.xyPointLength), }), diff --git a/packages/components/src/lib/distributionSpecBuilder.ts b/packages/components/src/lib/distributionSpecBuilder.ts index 4286dbdb..7a5bcd2f 100644 --- a/packages/components/src/lib/distributionSpecBuilder.ts +++ b/packages/components/src/lib/distributionSpecBuilder.ts @@ -42,7 +42,7 @@ export let linearYScale: LinearScale = { name: "yscale", type: "linear", range: "height", - zero: false, + zero: true, domain: { fields: [ { @@ -84,7 +84,7 @@ export let expYScale: PowScale = { type: "pow", exponent: 0.1, range: "height", - zero: false, + zero: true, nice: false, domain: { fields: [ diff --git a/packages/squiggle-lang/__tests__/Distributions/DistributionOperation_test.res b/packages/squiggle-lang/__tests__/Distributions/DistributionOperation_test.res index 60e74d74..3bd359f8 100644 --- a/packages/squiggle-lang/__tests__/Distributions/DistributionOperation_test.res +++ b/packages/squiggle-lang/__tests__/Distributions/DistributionOperation_test.res @@ -1,10 +1,6 @@ open Jest open Expect - -let env: DistributionOperation.env = { - sampleCount: 100, - xyPointLength: 100, -} +open TestHelpers let { normalDist5, diff --git a/packages/squiggle-lang/__tests__/TestHelpers.res b/packages/squiggle-lang/__tests__/TestHelpers.res index 71805c70..5a1aaa43 100644 --- a/packages/squiggle-lang/__tests__/TestHelpers.res +++ b/packages/squiggle-lang/__tests__/TestHelpers.res @@ -30,6 +30,7 @@ let {toFloat, toDist, toString, toError, fmap} = module(DistributionOperation.Ou let fnImage = (theFn, inps) => Js.Array.map(theFn, inps) let env: DistributionOperation.env = { + percentile: 0.9998, sampleCount: MagicNumbers.Environment.defaultSampleCount, xyPointLength: MagicNumbers.Environment.defaultXYPointLength, } diff --git a/packages/squiggle-lang/src/js/distribution.ts b/packages/squiggle-lang/src/js/distribution.ts index 9bd3723b..8fae2911 100644 --- a/packages/squiggle-lang/src/js/distribution.ts +++ b/packages/squiggle-lang/src/js/distribution.ts @@ -105,14 +105,7 @@ export class Distribution { } pointSet(): result { - let pointSet = toPointSet( - this.t, - { - xyPointLength: this.env.xyPointLength, - sampleCount: this.env.sampleCount, - }, - undefined - ); + let pointSet = toPointSet(this.t, this.env, undefined); if (pointSet.tag === "Ok") { let distribution = pointSet.value; if (distribution.tag === "Continuous") { diff --git a/packages/squiggle-lang/src/js/index.ts b/packages/squiggle-lang/src/js/index.ts index 5e4cf2c1..e1ced177 100644 --- a/packages/squiggle-lang/src/js/index.ts +++ b/packages/squiggle-lang/src/js/index.ts @@ -39,20 +39,17 @@ export type { result, shape, environment, lambdaValue, squiggleExpression }; export { parse } from "./parse"; -export let defaultSamplingInputs: environment = { - sampleCount: 10000, - xyPointLength: 10000, -}; - export function run( squiggleString: string, bindings?: externalBindings, - environment?: environment, + environment?: Partial, imports?: jsImports ): result { let b = bindings ? bindings : defaultBindings; let i = imports ? imports : defaultImports; - let e = environment ? environment : defaultEnvironment; + let e = environment + ? _.merge(defaultEnvironment, environment) + : defaultEnvironment; let res: result = evaluateUsingOptions( { externalBindings: mergeImportsWithBindings(b, i), environment: e }, squiggleString @@ -64,12 +61,14 @@ export function run( export function runPartial( squiggleString: string, bindings?: externalBindings, - environment?: environment, + environment?: Partial, imports?: jsImports ): result { let b = bindings ? bindings : defaultBindings; let i = imports ? imports : defaultImports; - let e = environment ? environment : defaultEnvironment; + let e = environment + ? _.merge(defaultEnvironment, environment) + : defaultEnvironment; return evaluatePartialUsingExternalBindings( squiggleString, diff --git a/packages/squiggle-lang/src/rescript/Distributions/DistributionOperation.res b/packages/squiggle-lang/src/rescript/Distributions/DistributionOperation.res index 25eb70f2..d91fa2e7 100644 --- a/packages/squiggle-lang/src/rescript/Distributions/DistributionOperation.res +++ b/packages/squiggle-lang/src/rescript/Distributions/DistributionOperation.res @@ -5,11 +5,13 @@ type error = DistributionTypes.error // TODO: It could be great to use a cache for some calculations (basically, do memoization). Also, better analytics/tracking could go a long way. type env = { + percentile: float, sampleCount: int, xyPointLength: int, } let defaultEnv = { + percentile: 0.9998, sampleCount: MagicNumbers.Environment.defaultSampleCount, xyPointLength: MagicNumbers.Environment.defaultXYPointLength, } @@ -137,7 +139,7 @@ let rec run = (~env, functionCallInfo: functionCallInfo): outputType => { ->OutputLocal.fromResult | ToString(ToString) => dist->GenericDist.toString->String | ToString(ToSparkline(bucketCount)) => - GenericDist.toSparkline(dist, ~sampleCount, ~bucketCount, ()) + GenericDist.toSparkline(dist, ~percentile=env.percentile, ~sampleCount, ~bucketCount, ()) ->E.R2.fmap(r => String(r)) ->OutputLocal.fromResult | ToDist(Inspect) => { @@ -170,7 +172,7 @@ let rec run = (~env, functionCallInfo: functionCallInfo): outputType => { ->OutputLocal.fromResult | ToDist(ToPointSet) => dist - ->GenericDist.toPointSet(~xyPointLength, ~sampleCount, ()) + ->GenericDist.toPointSet(~percentile=env.percentile, ~xyPointLength, ~sampleCount, ()) ->E.R2.fmap(r => Dist(PointSet(r))) ->OutputLocal.fromResult | ToDist(Scale(#LogarithmWithThreshold(eps), f)) => diff --git a/packages/squiggle-lang/src/rescript/Distributions/DistributionOperation.resi b/packages/squiggle-lang/src/rescript/Distributions/DistributionOperation.resi index bfa7b3ad..c682860c 100644 --- a/packages/squiggle-lang/src/rescript/Distributions/DistributionOperation.resi +++ b/packages/squiggle-lang/src/rescript/Distributions/DistributionOperation.resi @@ -1,5 +1,6 @@ @genType type env = { + percentile: float, sampleCount: int, xyPointLength: int, } diff --git a/packages/squiggle-lang/src/rescript/Distributions/DistributionTypes.res b/packages/squiggle-lang/src/rescript/Distributions/DistributionTypes.res index a23c2cd6..92ca974b 100644 --- a/packages/squiggle-lang/src/rescript/Distributions/DistributionTypes.res +++ b/packages/squiggle-lang/src/rescript/Distributions/DistributionTypes.res @@ -70,8 +70,8 @@ module DistributionOperation = { | #IntegralSum | #Mode | #Stdev - | #Min - | #Max + | #Min(float) + | #Max(float) | #Variance ] @@ -123,8 +123,8 @@ module DistributionOperation = { | ToFloat(#Cdf(r)) => `cdf(${E.Float.toFixed(r)})` | ToFloat(#Inv(r)) => `inv(${E.Float.toFixed(r)})` | ToFloat(#Mean) => `mean` - | ToFloat(#Min) => `min` - | ToFloat(#Max) => `max` + | ToFloat(#Min(_)) => `min` + | ToFloat(#Max(_)) => `max` | ToFloat(#Stdev) => `stdev` | ToFloat(#Variance) => `variance` | ToFloat(#Mode) => `mode` diff --git a/packages/squiggle-lang/src/rescript/Distributions/GenericDist.res b/packages/squiggle-lang/src/rescript/Distributions/GenericDist.res index addbdaa1..bbc1b84e 100644 --- a/packages/squiggle-lang/src/rescript/Distributions/GenericDist.res +++ b/packages/squiggle-lang/src/rescript/Distributions/GenericDist.res @@ -108,7 +108,7 @@ let toFloatOperation = ( ) => { switch distToFloatOperation { | #IntegralSum => Ok(integralEndY(t)) - | (#Pdf(_) | #Cdf(_) | #Inv(_) | #Mean | #Sample | #Min | #Max) as op => { + | (#Pdf(_) | #Cdf(_) | #Inv(_) | #Mean | #Sample | #Min(_) | #Max(_)) as op => { let trySymbolicSolution = switch (t: t) { | Symbolic(r) => SymbolicDist.T.operate(op, r)->E.R.toOption | _ => None @@ -118,8 +118,8 @@ let toFloatOperation = ( | (SampleSet(sampleSet), #Mean) => SampleSetDist.mean(sampleSet)->Some | (SampleSet(sampleSet), #Sample) => SampleSetDist.sample(sampleSet)->Some | (SampleSet(sampleSet), #Inv(r)) => SampleSetDist.percentile(sampleSet, r)->Some - | (SampleSet(sampleSet), #Min) => SampleSetDist.min(sampleSet)->Some - | (SampleSet(sampleSet), #Max) => SampleSetDist.max(sampleSet)->Some + | (SampleSet(sampleSet), #Min(_)) => SampleSetDist.min(sampleSet)->Some + | (SampleSet(sampleSet), #Max(_)) => SampleSetDist.max(sampleSet)->Some | _ => None } @@ -150,6 +150,7 @@ let toFloatOperation = ( // Also, change the outputXYPoints/pointSetDistLength details let toPointSet = ( t, + ~percentile: float, ~xyPointLength, ~sampleCount, ~xSelection: DistributionTypes.DistributionOperation.pointsetXSelection=#ByWeight, @@ -157,7 +158,7 @@ let toPointSet = ( ): result => { switch (t: t) { | PointSet(pointSet) => Ok(pointSet) - | Symbolic(r) => Ok(SymbolicDist.T.toPointSetDist(~xSelection, xyPointLength, r)) + | Symbolic(r) => Ok(SymbolicDist.T.toPointSetDist(~percentile, ~xSelection, xyPointLength, r)) | SampleSet(r) => SampleSetDist.toPointSetDist( ~samples=r, @@ -177,9 +178,12 @@ let toPointSet = ( xyPointLength to be a bit longer than the eventual toSparkline downsampling. I chose 3 fairly arbitrarily. */ -let toSparkline = (t: t, ~sampleCount: int, ~bucketCount: int=20, ()): result => +let toSparkline = (t: t, ~percentile: float, ~sampleCount: int, ~bucketCount: int=20, ()): result< + string, + error, +> => t - ->toPointSet(~xSelection=#Linear, ~xyPointLength=bucketCount * 3, ~sampleCount, ()) + ->toPointSet(~percentile, ~xSelection=#Linear, ~xyPointLength=bucketCount * 3, ~sampleCount, ()) ->E.R.bind(r => r->PointSetDist.toSparkline(bucketCount)->E.R2.errMap(x => DistributionTypes.SparklineError(x)) ) diff --git a/packages/squiggle-lang/src/rescript/Distributions/GenericDist.resi b/packages/squiggle-lang/src/rescript/Distributions/GenericDist.resi index fd9afa58..6996b26e 100644 --- a/packages/squiggle-lang/src/rescript/Distributions/GenericDist.resi +++ b/packages/squiggle-lang/src/rescript/Distributions/GenericDist.resi @@ -37,12 +37,19 @@ module Score: { @genType let toPointSet: ( t, + ~percentile: float, ~xyPointLength: int, ~sampleCount: int, ~xSelection: DistributionTypes.DistributionOperation.pointsetXSelection=?, unit, ) => result -let toSparkline: (t, ~sampleCount: int, ~bucketCount: int=?, unit) => result +let toSparkline: ( + t, + ~percentile: float, + ~sampleCount: int, + ~bucketCount: int=?, + unit, +) => result let truncate: ( t, diff --git a/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/PointSetDist.res b/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/PointSetDist.res index 74913d06..769c0fd7 100644 --- a/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/PointSetDist.res +++ b/packages/squiggle-lang/src/rescript/Distributions/PointSetDist/PointSetDist.res @@ -254,8 +254,8 @@ let operate = (distToFloatOp: Operation.distToFloatOperation, s): float => | #Inv(f) => inv(f, s) | #Sample => sample(s) | #Mean => T.mean(s) - | #Min => T.minX(s) - | #Max => T.maxX(s) + | #Min(_) => T.minX(s) + | #Max(_) => T.maxX(s) } let toSparkline = (t: t, bucketCount): result => diff --git a/packages/squiggle-lang/src/rescript/Distributions/SymbolicDist/SymbolicDist.res b/packages/squiggle-lang/src/rescript/Distributions/SymbolicDist/SymbolicDist.res index 249deb02..d0b4769f 100644 --- a/packages/squiggle-lang/src/rescript/Distributions/SymbolicDist/SymbolicDist.res +++ b/packages/squiggle-lang/src/rescript/Distributions/SymbolicDist/SymbolicDist.res @@ -331,9 +331,6 @@ module From90thPercentile = { } module T = { - let minCdfValue = 0.0001 - let maxCdfValue = 0.9999 - let pdf = (x, dist) => switch dist { | #Normal(n) => Normal.pdf(x, n) @@ -419,35 +416,39 @@ module T = { | #Bernoulli(n) => Bernoulli.toString(n) } - let min: symbolicDist => float = x => + let min = (~percentile: float, x: symbolicDist): float => { + let minCdf = (1. -. percentile) /. 2. switch x { | #Triangular({low}) => low - | #Exponential(n) => Exponential.inv(minCdfValue, n) - | #Cauchy(n) => Cauchy.inv(minCdfValue, n) - | #Normal(n) => Normal.inv(minCdfValue, n) - | #Lognormal(n) => Lognormal.inv(minCdfValue, n) - | #Logistic(n) => Logistic.inv(minCdfValue, n) - | #Gamma(n) => Gamma.inv(minCdfValue, n) + | #Exponential(n) => Exponential.inv(minCdf, n) + | #Cauchy(n) => Cauchy.inv(minCdf, n) + | #Normal(n) => Normal.inv(minCdf, n) + | #Lognormal(n) => Lognormal.inv(minCdf, n) + | #Logistic(n) => Logistic.inv(minCdf, n) + | #Gamma(n) => Gamma.inv(minCdf, n) | #Uniform({low}) => low | #Bernoulli(n) => Bernoulli.min(n) - | #Beta(n) => Beta.inv(minCdfValue, n) + | #Beta(n) => Beta.inv(minCdf, n) | #Float(n) => n } + } - let max: symbolicDist => float = x => + let max = (~percentile: float, x: symbolicDist): float => { + let maxCdf = 1. -. (1. -. percentile) /. 2. switch x { | #Triangular(n) => n.high - | #Exponential(n) => Exponential.inv(maxCdfValue, n) - | #Cauchy(n) => Cauchy.inv(maxCdfValue, n) - | #Normal(n) => Normal.inv(maxCdfValue, n) - | #Gamma(n) => Gamma.inv(maxCdfValue, n) - | #Lognormal(n) => Lognormal.inv(maxCdfValue, n) - | #Logistic(n) => Logistic.inv(maxCdfValue, n) - | #Beta(n) => Beta.inv(maxCdfValue, n) + | #Exponential(n) => Exponential.inv(maxCdf, n) + | #Cauchy(n) => Cauchy.inv(maxCdf, n) + | #Normal(n) => Normal.inv(maxCdf, n) + | #Gamma(n) => Gamma.inv(maxCdf, n) + | #Lognormal(n) => Lognormal.inv(maxCdf, n) + | #Logistic(n) => Logistic.inv(maxCdf, n) + | #Beta(n) => Beta.inv(maxCdf, n) | #Bernoulli(n) => Bernoulli.max(n) | #Uniform({high}) => high | #Float(n) => n } + } let mean: symbolicDist => result = x => switch x { @@ -469,15 +470,20 @@ module T = { | #Cdf(f) => Ok(cdf(f, s)) | #Pdf(f) => Ok(pdf(f, s)) | #Inv(f) => Ok(inv(f, s)) - | #Min => Ok(min(s)) - | #Max => Ok(max(s)) + | #Min(percentile) => Ok(min(~percentile, s)) + | #Max(percentile) => Ok(min(~percentile, s)) | #Sample => Ok(sample(s)) | #Mean => mean(s) } - let interpolateXs = (~xSelection: [#Linear | #ByWeight]=#Linear, dist: symbolicDist, n) => + let interpolateXs = ( + ~percentile: float, + ~xSelection: [#Linear | #ByWeight]=#Linear, + dist: symbolicDist, + n, + ) => { switch (xSelection, dist) { - | (#Linear, _) => E.A.Floats.range(min(dist), max(dist), n) + | (#Linear, _) => E.A.Floats.range(min(~percentile, dist), max(~percentile, dist), n) | (#ByWeight, #Uniform(n)) => // In `ByWeight mode, uniform distributions get special treatment because we need two x's // on either side for proper rendering (just left and right of the discontinuities). @@ -485,9 +491,12 @@ module T = { let dx = MagicNumbers.Epsilon.ten *. distance [n.low -. dx, n.low, n.low +. dx, n.high -. dx, n.high, n.high +. dx] | (#ByWeight, _) => - let ys = E.A.Floats.range(minCdfValue, maxCdfValue, n) + let minCdf = (1. -. percentile) /. 2. + let maxCdf = 1. -. minCdf + let ys = E.A.Floats.range(minCdf, maxCdf, n) ys |> E.A.fmap(y => inv(y, dist)) } + } /* Calling e.g. "Normal.operate" returns an optional that wraps a result. If the optional is None, there is no valid analytic solution. If it Some, it @@ -533,6 +542,7 @@ module T = { } let toPointSetDist = ( + ~percentile: float, ~xSelection=#ByWeight, sampleCount, d: symbolicDist, @@ -541,7 +551,7 @@ module T = { | #Float(v) => Float.toPointSetDist(v) | #Bernoulli(v) => Bernoulli.toPointSetDist(v) | _ => - let xs = interpolateXs(~xSelection, d, sampleCount) + let xs = interpolateXs(~percentile, ~xSelection, d, sampleCount) let ys = xs |> E.A.fmap(x => pdf(x, d)) Continuous(Continuous.make(~integralSumCache=Some(1.0), {xs: xs, ys: ys})) } diff --git a/packages/squiggle-lang/src/rescript/ReducerInterface/ReducerInterface_GenericDistribution.res b/packages/squiggle-lang/src/rescript/ReducerInterface/ReducerInterface_GenericDistribution.res index 6ca0a440..5681c416 100644 --- a/packages/squiggle-lang/src/rescript/ReducerInterface/ReducerInterface_GenericDistribution.res +++ b/packages/squiggle-lang/src/rescript/ReducerInterface/ReducerInterface_GenericDistribution.res @@ -216,8 +216,8 @@ let dispatchToGenericOutput = (call: IEV.functionCall, env: DistributionOperatio | "mean" => #Mean | "stdev" => #Stdev | "variance" => #Variance - | "min" => #Min - | "max" => #Max + | "min" => #Min(env.percentile) + | "max" => #Max(env.percentile) | "mode" => #Mode | _ => #Mean } diff --git a/packages/squiggle-lang/src/rescript/Utility/Operation.res b/packages/squiggle-lang/src/rescript/Utility/Operation.res index 8e22169c..70332f37 100644 --- a/packages/squiggle-lang/src/rescript/Utility/Operation.res +++ b/packages/squiggle-lang/src/rescript/Utility/Operation.res @@ -26,8 +26,8 @@ type distToFloatOperation = [ | #Inv(float) | #Mean | #Sample - | #Min - | #Max + | #Min(float) + | #Max(float) ] module Convolution = {