magic numbers in bandwidth; fromSamples
implementation
Value: [1e-3 to 4e-2]
This commit is contained in:
parent
bb8ed5ce4f
commit
8217801de3
|
@ -1,5 +1,5 @@
|
||||||
import { Distribution } from "../../src/js/index";
|
import { Distribution } from "../../src/js/index";
|
||||||
import { expectErrorToBeBounded, failDefault } from "./TestHelpers";
|
import { expectErrorToBeBounded, failDefault, testRun } from "./TestHelpers";
|
||||||
import * as fc from "fast-check";
|
import * as fc from "fast-check";
|
||||||
|
|
||||||
// Beware: float64Array makes it appear in an infinite loop.
|
// Beware: float64Array makes it appear in an infinite loop.
|
||||||
|
@ -212,3 +212,17 @@ describe("mean is mean", () => {
|
||||||
);
|
);
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
describe("fromSamples function", () => {
|
||||||
|
test("gives a mean near the mean of the input", () => {
|
||||||
|
fc.assert(
|
||||||
|
fc.property(arrayGen(), (xs_) => {
|
||||||
|
let xs = Array.from(xs_);
|
||||||
|
let squiggleString = `x = fromSamples($xs); mean(x)`;
|
||||||
|
let squiggleResult = testRun(squiggleString, {}, { xs: xs });
|
||||||
|
let mean = xs.reduce((a, b) => a + b, 0.0) / xs.length;
|
||||||
|
expect(squiggleResult.value).toBeCloseTo(mean, 3);
|
||||||
|
})
|
||||||
|
);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
|
@ -31,6 +31,7 @@ import {
|
||||||
Constructors_isNormalized,
|
Constructors_isNormalized,
|
||||||
Constructors_toPointSet,
|
Constructors_toPointSet,
|
||||||
Constructors_toSampleSet,
|
Constructors_toSampleSet,
|
||||||
|
Constructors_fromSamples,
|
||||||
Constructors_truncate,
|
Constructors_truncate,
|
||||||
Constructors_inspect,
|
Constructors_inspect,
|
||||||
Constructors_toString,
|
Constructors_toString,
|
||||||
|
@ -404,6 +405,10 @@ export class Distribution {
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fromSamples(n: Array<number>): result<Distribution, distributionError> {
|
||||||
|
return this.mapResultDist(Constructors_fromSamples({ env: this.env }, n));
|
||||||
|
}
|
||||||
|
|
||||||
truncate(
|
truncate(
|
||||||
left: number,
|
left: number,
|
||||||
right: number
|
right: number
|
||||||
|
|
|
@ -189,6 +189,12 @@ let rec run = (~env, functionCallInfo: functionCallInfo): outputType => {
|
||||||
->GenericDist.mixture(~scaleMultiplyFn=scaleMultiply, ~pointwiseAddFn=pointwiseAdd)
|
->GenericDist.mixture(~scaleMultiplyFn=scaleMultiply, ~pointwiseAddFn=pointwiseAdd)
|
||||||
->E.R2.fmap(r => Dist(r))
|
->E.R2.fmap(r => Dist(r))
|
||||||
->OutputLocal.fromResult
|
->OutputLocal.fromResult
|
||||||
|
| FromArray(xs) =>
|
||||||
|
xs
|
||||||
|
->SampleSetDist.make
|
||||||
|
->E.R2.fmap2(x => DistributionTypes.SampleSetError(x))
|
||||||
|
->E.R2.fmap(x => x->DistributionTypes.SampleSet->Dist)
|
||||||
|
->OutputLocal.fromResult
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -229,6 +235,7 @@ module Constructors = {
|
||||||
let isNormalized = (~env, dist) => C.isNormalized(dist)->run(~env)->toBoolR
|
let isNormalized = (~env, dist) => C.isNormalized(dist)->run(~env)->toBoolR
|
||||||
let toPointSet = (~env, dist) => C.toPointSet(dist)->run(~env)->toDistR
|
let toPointSet = (~env, dist) => C.toPointSet(dist)->run(~env)->toDistR
|
||||||
let toSampleSet = (~env, dist, n) => C.toSampleSet(dist, n)->run(~env)->toDistR
|
let toSampleSet = (~env, dist, n) => C.toSampleSet(dist, n)->run(~env)->toDistR
|
||||||
|
let fromSamples = (~env, xs) => C.fromSamples(xs)->run(~env)->toDistR
|
||||||
let truncate = (~env, dist, leftCutoff, rightCutoff) =>
|
let truncate = (~env, dist, leftCutoff, rightCutoff) =>
|
||||||
C.truncate(dist, leftCutoff, rightCutoff)->run(~env)->toDistR
|
C.truncate(dist, leftCutoff, rightCutoff)->run(~env)->toDistR
|
||||||
let inspect = (~env, dist) => C.inspect(dist)->run(~env)->toDistR
|
let inspect = (~env, dist) => C.inspect(dist)->run(~env)->toDistR
|
||||||
|
|
|
@ -61,6 +61,8 @@ module Constructors: {
|
||||||
@genType
|
@genType
|
||||||
let toSampleSet: (~env: env, genericDist, int) => result<genericDist, error>
|
let toSampleSet: (~env: env, genericDist, int) => result<genericDist, error>
|
||||||
@genType
|
@genType
|
||||||
|
let fromSamples: (~env: env, SampleSetDist.t) => result<genericDist, error>
|
||||||
|
@genType
|
||||||
let truncate: (~env: env, genericDist, option<float>, option<float>) => result<genericDist, error>
|
let truncate: (~env: env, genericDist, option<float>, option<float>) => result<genericDist, error>
|
||||||
@genType
|
@genType
|
||||||
let inspect: (~env: env, genericDist) => result<genericDist, error>
|
let inspect: (~env: env, genericDist) => result<genericDist, error>
|
||||||
|
|
|
@ -11,7 +11,7 @@ type error =
|
||||||
| NotYetImplemented
|
| NotYetImplemented
|
||||||
| Unreachable
|
| Unreachable
|
||||||
| DistributionVerticalShiftIsInvalid
|
| DistributionVerticalShiftIsInvalid
|
||||||
| TooFewSamples
|
| SampleSetError(SampleSetDist.sampleSetError)
|
||||||
| ArgumentError(string)
|
| ArgumentError(string)
|
||||||
| OperationError(Operation.Error.t)
|
| OperationError(Operation.Error.t)
|
||||||
| PointSetConversionError(SampleSetDist.pointsetConversionError)
|
| PointSetConversionError(SampleSetDist.pointsetConversionError)
|
||||||
|
@ -35,7 +35,8 @@ module Error = {
|
||||||
| DistributionVerticalShiftIsInvalid => "Distribution Vertical Shift is Invalid"
|
| DistributionVerticalShiftIsInvalid => "Distribution Vertical Shift is Invalid"
|
||||||
| ArgumentError(s) => `Argument Error ${s}`
|
| ArgumentError(s) => `Argument Error ${s}`
|
||||||
| LogarithmOfDistributionError(s) => `Logarithm of input error: ${s}`
|
| LogarithmOfDistributionError(s) => `Logarithm of input error: ${s}`
|
||||||
| TooFewSamples => "Too Few Samples"
|
| SampleSetError(TooFewSamples) => "Too Few Samples"
|
||||||
|
| SampleSetError(NonNumericInput(err)) => `Found a non-number in input: ${err}`
|
||||||
| OperationError(err) => Operation.Error.toString(err)
|
| OperationError(err) => Operation.Error.toString(err)
|
||||||
| PointSetConversionError(err) => SampleSetDist.pointsetConversionErrorToString(err)
|
| PointSetConversionError(err) => SampleSetDist.pointsetConversionErrorToString(err)
|
||||||
| SparklineError(err) => PointSetTypes.sparklineErrorToString(err)
|
| SparklineError(err) => PointSetTypes.sparklineErrorToString(err)
|
||||||
|
@ -47,10 +48,7 @@ module Error = {
|
||||||
let resultStringToResultError: result<'a, string> => result<'a, error> = n =>
|
let resultStringToResultError: result<'a, string> => result<'a, error> = n =>
|
||||||
n->E.R2.errMap(r => r->fromString)
|
n->E.R2.errMap(r => r->fromString)
|
||||||
|
|
||||||
let sampleErrorToDistErr = (err: SampleSetDist.sampleSetError): error =>
|
let sampleErrorToDistErr = (err: SampleSetDist.sampleSetError): error => SampleSetError(err)
|
||||||
switch err {
|
|
||||||
| TooFewSamples => TooFewSamples
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@genType
|
@genType
|
||||||
|
@ -100,6 +98,7 @@ module DistributionOperation = {
|
||||||
| FromDist(fromDist, genericDist)
|
| FromDist(fromDist, genericDist)
|
||||||
| FromFloat(fromDist, float)
|
| FromFloat(fromDist, float)
|
||||||
| Mixture(array<(genericDist, float)>)
|
| Mixture(array<(genericDist, float)>)
|
||||||
|
| FromArray(SampleSetDist.t)
|
||||||
|
|
||||||
let distCallToString = (distFunction: fromDist): string =>
|
let distCallToString = (distFunction: fromDist): string =>
|
||||||
switch distFunction {
|
switch distFunction {
|
||||||
|
@ -124,6 +123,7 @@ module DistributionOperation = {
|
||||||
switch d {
|
switch d {
|
||||||
| FromDist(f, _) | FromFloat(f, _) => distCallToString(f)
|
| FromDist(f, _) | FromFloat(f, _) => distCallToString(f)
|
||||||
| Mixture(_) => `mixture`
|
| Mixture(_) => `mixture`
|
||||||
|
| FromArray(_) => `samples`
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
module Constructors = {
|
module Constructors = {
|
||||||
|
@ -140,6 +140,7 @@ module Constructors = {
|
||||||
let isNormalized = (dist): t => FromDist(ToBool(IsNormalized), dist)
|
let isNormalized = (dist): t => FromDist(ToBool(IsNormalized), dist)
|
||||||
let toPointSet = (dist): t => FromDist(ToDist(ToPointSet), dist)
|
let toPointSet = (dist): t => FromDist(ToDist(ToPointSet), dist)
|
||||||
let toSampleSet = (dist, r): t => FromDist(ToDist(ToSampleSet(r)), dist)
|
let toSampleSet = (dist, r): t => FromDist(ToDist(ToSampleSet(r)), dist)
|
||||||
|
let fromSamples = (xs): t => FromArray(xs)
|
||||||
let truncate = (dist, left, right): t => FromDist(ToDist(Truncate(left, right)), dist)
|
let truncate = (dist, left, right): t => FromDist(ToDist(Truncate(left, right)), dist)
|
||||||
let inspect = (dist): t => FromDist(ToDist(Inspect), dist)
|
let inspect = (dist): t => FromDist(ToDist(Inspect), dist)
|
||||||
let toString = (dist): t => FromDist(ToString(ToString), dist)
|
let toString = (dist): t => FromDist(ToString(ToString), dist)
|
||||||
|
|
|
@ -1,11 +1,12 @@
|
||||||
@genType
|
@genType
|
||||||
module Error = {
|
module Error = {
|
||||||
@genType
|
@genType
|
||||||
type sampleSetError = TooFewSamples
|
type sampleSetError = TooFewSamples | NonNumericInput(string)
|
||||||
|
|
||||||
let sampleSetErrorToString = (err: sampleSetError): string =>
|
let sampleSetErrorToString = (err: sampleSetError): string =>
|
||||||
switch err {
|
switch err {
|
||||||
| TooFewSamples => "Too few samples when constructing sample set"
|
| TooFewSamples => "Too few samples when constructing sample set"
|
||||||
|
| NonNumericInput(err) => `Found a non-number in input: ${err}`
|
||||||
}
|
}
|
||||||
|
|
||||||
@genType
|
@genType
|
||||||
|
|
|
@ -1,27 +1,30 @@
|
||||||
//The math here was taken from https://github.com/jasondavies/science.js/blob/master/src/stats/SampleSetDist_Bandwidth.js
|
//The math here was taken from https://github.com/jasondavies/science.js/blob/master/src/stats/SampleSetDist_Bandwidth.js
|
||||||
|
let {iqr_percentile, nrd0_lo_denominator, one, nrd0_coef, nrd_coef, nrd_fractionalPower} = module(
|
||||||
|
MagicNumbers.SampleSetBandwidth
|
||||||
|
)
|
||||||
let len = x => E.A.length(x) |> float_of_int
|
let len = x => E.A.length(x) |> float_of_int
|
||||||
|
|
||||||
let iqr = x => Jstat.percentile(x, 0.75, true) -. Jstat.percentile(x, 0.25, true)
|
let iqr = x =>
|
||||||
|
Jstat.percentile(x, iqr_percentile, true) -. Jstat.percentile(x, 1.0 -. iqr_percentile, true)
|
||||||
|
|
||||||
// Silverman, B. W. (1986) Density Estimation. London: Chapman and Hall.
|
// Silverman, B. W. (1986) Density Estimation. London: Chapman and Hall.
|
||||||
let nrd0 = x => {
|
let nrd0 = x => {
|
||||||
let hi = Js_math.sqrt(Jstat.variance(x))
|
let hi = Js_math.sqrt(Jstat.variance(x))
|
||||||
let lo = Js_math.minMany_float([hi, iqr(x) /. 1.34])
|
let lo = Js_math.minMany_float([hi, iqr(x) /. nrd0_lo_denominator])
|
||||||
let e = Js_math.abs_float(x[1])
|
let e = Js_math.abs_float(x[1])
|
||||||
let lo' = switch (lo, hi, e) {
|
let lo' = switch (lo, hi, e) {
|
||||||
| (lo, _, _) if !Js.Float.isNaN(lo) => lo
|
| (lo, _, _) if !Js.Float.isNaN(lo) => lo
|
||||||
| (_, hi, _) if !Js.Float.isNaN(hi) => hi
|
| (_, hi, _) if !Js.Float.isNaN(hi) => hi
|
||||||
| (_, _, e) if !Js.Float.isNaN(e) => e
|
| (_, _, e) if !Js.Float.isNaN(e) => e
|
||||||
| _ => 1.0
|
| _ => one
|
||||||
}
|
}
|
||||||
0.9 *. lo' *. Js.Math.pow_float(~base=len(x), ~exp=-0.2)
|
nrd0_coef *. lo' *. Js.Math.pow_float(~base=len(x), ~exp=nrd_fractionalPower)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Scott, D. W. (1992) Multivariate Density Estimation: Theory, Practice, and Visualization. Wiley.
|
// Scott, D. W. (1992) Multivariate Density Estimation: Theory, Practice, and Visualization. Wiley.
|
||||||
let nrd = x => {
|
let nrd = x => {
|
||||||
let h = iqr(x) /. 1.34
|
let h = iqr(x) /. nrd0_lo_denominator
|
||||||
1.06 *.
|
nrd_coef *.
|
||||||
Js.Math.min_float(Js.Math.sqrt(Jstat.variance(x)), h) *.
|
Js.Math.min_float(Js.Math.sqrt(Jstat.variance(x)), h) *.
|
||||||
Js.Math.pow_float(~base=len(x), ~exp=-1.0 /. 5.0)
|
Js.Math.pow_float(~base=len(x), ~exp=nrd_fractionalPower)
|
||||||
}
|
}
|
||||||
|
|
|
@ -35,3 +35,16 @@ module ToPointSet = {
|
||||||
*/
|
*/
|
||||||
let minDiscreteToKeep = samples => max(20, E.A.length(samples) / 50)
|
let minDiscreteToKeep = samples => max(20, E.A.length(samples) / 50)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
module SampleSetBandwidth = {
|
||||||
|
// Silverman, B. W. (1986) Density Estimation. London: Chapman and Hall.
|
||||||
|
// Scott, D. W. (1992) Multivariate Density Estimation: Theory, Practice, and Visualization. Wiley.
|
||||||
|
let iqr_percentile = 0.75
|
||||||
|
let iqr_percentile_complement = 1.0 -. iqr_percentile
|
||||||
|
let nrd0_lo_denominator = 1.34
|
||||||
|
let one = 1.0
|
||||||
|
let nrd0_coef = 0.9
|
||||||
|
|
||||||
|
let nrd_coef = 1.06
|
||||||
|
let nrd_fractionalPower = -0.2
|
||||||
|
}
|
||||||
|
|
|
@ -218,6 +218,17 @@ let dispatchToGenericOutput = (call: ExpressionValue.functionCall): option<
|
||||||
Helpers.toDistFn(ToSampleSet(Belt.Int.fromFloat(float)), dist)
|
Helpers.toDistFn(ToSampleSet(Belt.Int.fromFloat(float)), dist)
|
||||||
| ("toSampleSet", [EvDistribution(dist)]) =>
|
| ("toSampleSet", [EvDistribution(dist)]) =>
|
||||||
Helpers.toDistFn(ToSampleSet(MagicNumbers.Environment.defaultSampleCount), dist)
|
Helpers.toDistFn(ToSampleSet(MagicNumbers.Environment.defaultSampleCount), dist)
|
||||||
|
| ("fromSamples", [EvArray(arr)]) =>
|
||||||
|
Helpers.toDistFn(
|
||||||
|
ToSampleSet(MagicNumbers.Environment.defaultSampleCount),
|
||||||
|
arr
|
||||||
|
->Helpers.parseNumberArray
|
||||||
|
->E.R2.fmap2(x => SampleSetDist.NonNumericInput(x))
|
||||||
|
->E.R.bind(SampleSetDist.make)
|
||||||
|
->E.R2.fmap(x => DistributionTypes.SampleSet(x))
|
||||||
|
// Raising here isn't ideal. This: GenDistError(SampleSetError(NonNumericInput("Something wasn't a number"))) would be proper.
|
||||||
|
->E.R2.toExn("Something in the input wasn't a number"),
|
||||||
|
)
|
||||||
| ("inspect", [EvDistribution(dist)]) => Helpers.toDistFn(Inspect, dist)
|
| ("inspect", [EvDistribution(dist)]) => Helpers.toDistFn(Inspect, dist)
|
||||||
| ("truncateLeft", [EvDistribution(dist), EvNumber(float)]) =>
|
| ("truncateLeft", [EvDistribution(dist), EvNumber(float)]) =>
|
||||||
Helpers.toDistFn(Truncate(Some(float), None), dist)
|
Helpers.toDistFn(Truncate(Some(float), None), dist)
|
||||||
|
|
|
@ -307,6 +307,8 @@ module R2 = {
|
||||||
| Ok(x) => x->Ok
|
| Ok(x) => x->Ok
|
||||||
| Error(x) => x->f->Error
|
| Error(x) => x->f->Error
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let toExn = (a, b) => R.toExn(b, a)
|
||||||
}
|
}
|
||||||
|
|
||||||
let safe_fn_of_string = (fn, s: string): option<'a> =>
|
let safe_fn_of_string = (fn, s: string): option<'a> =>
|
||||||
|
|
Loading…
Reference in New Issue
Block a user