Merge pull request #1002 from quantified-uncertainty/sampleset-mixture

Sampleset mixture
This commit is contained in:
Quinn 2022-09-01 03:00:17 -04:00 committed by GitHub
commit e6d543daef
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 189 additions and 114 deletions

View File

@ -204,7 +204,6 @@ jobs:
run: cd ../../ && yarn
- name: Build
run: yarn compile
# cli-lint:
# name: CLI lint
# runs-on: ubuntu-latest

View File

@ -0,0 +1,20 @@
open Jest
open Expect
let makeTest = (~only=false, str, item1, item2) =>
only
? Only.test(str, () => expect(item1)->toEqual(item2))
: test(str, () => expect(item1)->toEqual(item2))
describe("Stdlib", () => {
makeTest(
"Length of Random.sample",
Stdlib.Random.sample([1.0, 2.0], {probs: [0.5, 0.5], size: 10})->E.A.length,
10,
)
makeTest(
"Random.sample returns elements from input array (will fail with very slim probability)",
Stdlib.Random.sample([1.0, 2.0], {probs: [0.5, 0.5], size: 10})->E.A.uniq->E.A.Floats.sort,
[1.0, 2.0],
)
})

View File

@ -18,6 +18,7 @@
"benchmark": "ts-node benchmark/conversion_tests.ts",
"test": "jest",
"test:ts": "jest __tests__/TS/",
"test:stdlib": "jest __tests__/Stdlib_test.bs.js",
"test:rescript": "jest --modulePathIgnorePatterns=__tests__/TS/*",
"test:watch": "jest --watchAll",
"test:fnRegistry": "jest __tests__/SquiggleLibrary/SquiggleLibrary_FunctionRegistryLibrary_test.bs.js",

View File

@ -216,7 +216,7 @@ let rec run = (~env: env, functionCallInfo: functionCallInfo): outputType => {
| FromFloat(subFnName, x) => reCall(~functionCallInfo=FromFloat(subFnName, x), ())
| Mixture(dists) =>
dists
->GenericDist.mixture(~scaleMultiplyFn=scaleMultiply, ~pointwiseAddFn=pointwiseAdd)
->GenericDist.mixture(~scaleMultiplyFn=scaleMultiply, ~pointwiseAddFn=pointwiseAdd, ~env)
->E.R2.fmap(r => Dist(r))
->OutputLocal.fromResult
| FromSamples(xs) =>

View File

@ -499,15 +499,30 @@ let pointwiseCombinationFloat = (
m->E.R2.fmap(r => DistributionTypes.PointSet(r))
}
//Note: The result should always cumulatively sum to 1. This would be good to test.
//Note: If the inputs are not normalized, this will return poor results. The weights probably refer to the post-normalized forms. It would be good to apply a catch to this.
//TODO: The result should always cumulatively sum to 1. This would be good to test.
//TODO: If the inputs are not normalized, this will return poor results. The weights probably refer to the post-normalized forms. It would be good to apply a catch to this.
let mixture = (
values: array<(t, float)>,
~scaleMultiplyFn: scaleMultiplyFn,
~pointwiseAddFn: pointwiseAddFn,
~env: env,
) => {
if E.A.length(values) == 0 {
let allValuesAreSampleSet = v => E.A.all(((t, _)) => isSampleSetSet(t), v)
if E.A.isEmpty(values) {
Error(DistributionTypes.OtherError("Mixture error: mixture must have at least 1 element"))
} else if allValuesAreSampleSet(values) {
let withSampleSetValues = values->E.A2.fmap(((value, weight)) =>
switch value {
| SampleSet(sampleSet) => Ok((sampleSet, weight))
| _ => Error("Unreachable")
}->E.R2.toExn("Mixture coding error: SampleSet expected. This should be inaccessible.")
)
let sampleSetMixture = SampleSetDist.mixture(withSampleSetValues, env.sampleCount)
switch sampleSetMixture {
| Ok(sampleSet) => Ok(DistributionTypes.SampleSet(sampleSet))
| Error(err) => Error(DistributionTypes.Error.sampleErrorToDistErr(err))
}
} else {
let totalWeight = values->E.A2.fmap(E.Tuple2.second)->E.A.Floats.sum
let properlyWeightedValues =

View File

@ -81,6 +81,7 @@ let mixture: (
array<(t, float)>,
~scaleMultiplyFn: scaleMultiplyFn,
~pointwiseAddFn: pointwiseAddFn,
~env: env,
) => result<t, error>
let isSymbolic: t => bool

View File

@ -224,3 +224,8 @@ module T = Dist({
XYShape.Analysis.getVarianceDangerously(t, mean, getMeanOfSquares)
}
})
let sampleN = (t: t, n): array<float> => {
let normalized = t->T.normalize->getShape
Stdlib.Random.sample(normalized.xs, {probs: normalized.ys, size: n})
}

View File

@ -257,3 +257,7 @@ let toSparkline = (t: t, bucketCount): result<string, PointSetTypes.sparklineErr
->E.O2.fmap(Continuous.downsampleEquallyOverX(bucketCount))
->E.O2.toResult(PointSetTypes.CannotSparklineDiscrete)
->E.R2.fmap(r => Continuous.getShape(r).ys->Sparklines.create())
let makeDiscrete = (d): t => Discrete(d)
let makeContinuous = (d): t => Continuous(d)
let makeMixed = (d): t => Mixed(d)

View File

@ -132,6 +132,25 @@ let stdev = t => T.get(t)->E.A.Floats.stdev
let variance = t => T.get(t)->E.A.Floats.variance
let percentile = (t, f) => T.get(t)->E.A.Floats.percentile(f)
let mixture = (values: array<(t, float)>, intendedLength: int) => {
let totalWeight = values->E.A2.fmap(E.Tuple2.second)->E.A.Floats.sum
let discreteSamples =
values
->Belt.Array.mapWithIndex((i, (_, weight)) => (E.I.toFloat(i), weight /. totalWeight))
->XYShape.T.fromZippedArray
->Discrete.make
->Discrete.sampleN(intendedLength)
let dists = values->E.A2.fmap(E.Tuple2.first)->E.A2.fmap(T.get)
let samples =
discreteSamples
->Belt.Array.mapWithIndex((index, distIndexToChoose) => {
let chosenDist = E.A.get(dists, E.Float.toInt(distIndexToChoose))
chosenDist->E.O.bind(E.A.get(_, index))
})
->E.A.O.openIfAllSome
samples->E.O2.toExn("Mixture unreachable error")->T.make
}
let truncateLeft = (t, f) => T.get(t)->E.A2.filter(x => x >= f)->T.make
let truncateRight = (t, f) => T.get(t)->E.A2.filter(x => x <= f)->T.make

View File

@ -220,6 +220,7 @@ module I = {
let increment = n => n + 1
let decrement = n => n - 1
let toString = Js.Int.toString
let toFloat = Js.Int.toFloat
}
exception Assertion(string)

View File

@ -38,3 +38,12 @@ module Logistic = {
@module external variance: (float, float) => float = "@stdlib/stats/base/dists/logistic/variance"
let variance = variance
}
module Random = {
type sampleArgs = {
probs: array<float>,
size: int,
}
@module external sample: (array<float>, sampleArgs) => array<float> = "@stdlib/random/sample"
let sample = sample
}

View File

@ -14,6 +14,7 @@ module.exports = {
},
resolve: {
extensions: [".tsx", ".ts", ".js"],
fallback: { buffer: ["@stdlib/buffer"] },
},
output: {
filename: "bundle.js",