First pass at nested multimodals, still needs lots of cleanup
This commit is contained in:
parent
a9d52e2c5c
commit
eb0ffdc6c3
|
@ -148,6 +148,10 @@ module MathAdtToDistDst = {
|
|||
Ok(`Simple(`Triangular({low, medium, high})))
|
||||
| _ => Error("Wrong number of variables in triangle distribution");
|
||||
|
||||
/*let add: array(arg) => result(SymbolicDist.bigDist, string) =
|
||||
fun
|
||||
| */
|
||||
|
||||
let multiModal =
|
||||
(
|
||||
args: array(result(SymbolicDist.bigDist, string)),
|
||||
|
@ -158,12 +162,15 @@ module MathAdtToDistDst = {
|
|||
args
|
||||
|> E.A.fmap(
|
||||
fun
|
||||
| Ok(`Simple(n)) => Ok(n)
|
||||
| Ok(`Simple(d)) => Ok(`Simple(d))
|
||||
| Ok(`PointwiseCombination(dists)) => Ok(`PointwiseCombination(dists))
|
||||
| Error(e) => Error(e)
|
||||
| Ok(k) => Error(SymbolicDist.toString(k)),
|
||||
| _ => Error("Unexpected dist")
|
||||
);
|
||||
|
||||
let firstWithError = dists |> Belt.Array.getBy(_, Belt.Result.isError);
|
||||
let withoutErrors = dists |> E.A.fmap(E.R.toOption) |> E.A.O.concatSomes;
|
||||
|
||||
switch (firstWithError) {
|
||||
| Some(Error(e)) => Error(e)
|
||||
| None when withoutErrors |> E.A.length == 0 =>
|
||||
|
@ -186,12 +193,12 @@ module MathAdtToDistDst = {
|
|||
)
|
||||
|> E.A.O.concatSomes
|
||||
let outputs = Samples.T.fromSamples(samples);
|
||||
let pdf = outputs.shape |> E.O.bind(_,Distributions.Shape.T.toContinuous)
|
||||
let pdf = outputs.shape |> E.O.bind(_,Distributions.Shape.T.toContinuous);
|
||||
let shape = pdf |> E.O.fmap(pdf => {
|
||||
let _pdf = Distributions.Continuous.T.scaleToIntegralSum(~cache=None, ~intendedSum=1.0, pdf);
|
||||
let cdf = Distributions.Continuous.T.integral(~cache=None, _pdf);
|
||||
SymbolicDist.ContinuousShape.make(_pdf, cdf)
|
||||
})
|
||||
});
|
||||
switch(shape){
|
||||
| Some(s) => Ok(`Simple(`ContinuousShape(s)))
|
||||
| None => Error("Rendering did not work")
|
||||
|
@ -238,6 +245,7 @@ module MathAdtToDistDst = {
|
|||
let dists = possibleDists |> E.A.fmap(functionParser);
|
||||
multiModal(dists, weights);
|
||||
}
|
||||
//| Fn({name: "add", args}) => add(args)
|
||||
| Fn({name}) => Error(name ++ ": function not supported")
|
||||
| _ => {
|
||||
Error("This type not currently supported");
|
||||
|
@ -255,19 +263,32 @@ module MathAdtToDistDst = {
|
|||
| Object(_) => Error("Object not valid as top level")
|
||||
);
|
||||
|
||||
let run = (r): result(SymbolicDist.bigDist, string) =>
|
||||
r |> MathAdtCleaner.run |> topLevel;
|
||||
let run = (r): result(SymbolicDist.bigDist, string) => {
|
||||
let o = r |> MathAdtCleaner.run |> topLevel;
|
||||
Js.log2("parser output", o);
|
||||
o
|
||||
};
|
||||
};
|
||||
|
||||
let fromString = str => {
|
||||
/* We feed the user-typed string into Mathjs.parseMath,
|
||||
which returns a JSON with (hopefully) a single-element array.
|
||||
This array element is the top-level node of a nested-object tree
|
||||
representing the functions/arguments/values/etc. in the string.
|
||||
|
||||
The function MathJsonToMathJsAdt then recursively unpacks this JSON into a typed data structure we can use.
|
||||
Inside of this function, MathAdtToDistDst is called whenever a distribution function is encountered.
|
||||
*/
|
||||
let mathJsToJson = Mathjs.parseMath(str);
|
||||
let mathJsParse =
|
||||
E.R.bind(mathJsToJson, r =>
|
||||
E.R.bind(mathJsToJson, r => {
|
||||
Js.log2("parsed", r);
|
||||
switch (MathJsonToMathJsAdt.run(r)) {
|
||||
| Some(r) => Ok(r)
|
||||
| None => Error("MathJsParse Error")
|
||||
}
|
||||
);
|
||||
});
|
||||
|
||||
let value = E.R.bind(mathJsParse, MathAdtToDistDst.run);
|
||||
value;
|
||||
};
|
|
@ -47,12 +47,44 @@ type dist = [
|
|||
| `Cauchy(cauchy)
|
||||
| `Triangular(triangular)
|
||||
| `ContinuousShape(continuousShape)
|
||||
| `Float(float)
|
||||
| `Float(float) // Dirac delta at x. Practically useful only in the context of multimodals.
|
||||
];
|
||||
|
||||
type pointwiseAdd = array((dist, float));
|
||||
/* Build a tree.
|
||||
|
||||
type bigDist = [ | `Simple(dist) | `PointwiseCombination(pointwiseAdd)];
|
||||
Multiple operations possible:
|
||||
|
||||
- PointwiseSum(Scalar, Scalar)
|
||||
- PointwiseSum(WeightedDist, WeightedDist)
|
||||
- PointwiseProduct(Scalar, Scalar)
|
||||
- PointwiseProduct(Scalar, WeightedDist)
|
||||
- PointwiseProduct(WeightedDist, WeightedDist)
|
||||
|
||||
- IndependentVariableSum(WeightedDist, WeightedDist) [i.e., convolution]
|
||||
- IndependentVariableProduct(WeightedDist, WeightedDist) [i.e. distribution product]
|
||||
*/
|
||||
|
||||
type weightedDist = (float, dist);
|
||||
|
||||
type bigDistTree =
|
||||
/* | DistLeaf(dist) */
|
||||
/* | ScalarLeaf(float) */
|
||||
/* | PointwiseScalarDistProduct(DistLeaf(d), ScalarLeaf(s)) */
|
||||
| WeightedDistLeaf(weightedDist)
|
||||
| PointwiseNormalizedDistSum(array(bigDistTree));
|
||||
|
||||
let rec treeIntegral = item => {
|
||||
switch (item) {
|
||||
| WeightedDistLeaf((w, d)) => w
|
||||
| PointwiseNormalizedDistSum(childTrees) =>
|
||||
childTrees |> E.A.fmap(treeIntegral) |> E.A.Floats.sum
|
||||
};
|
||||
};
|
||||
|
||||
/* bigDist can either be a single distribution, or a
|
||||
PointwiseCombination, i.e. an array of (dist, weight) tuples */
|
||||
type bigDist = [ | `Simple(dist) | `PointwiseCombination(pointwiseAdd)]
|
||||
and pointwiseAdd = array((bigDist, float));
|
||||
|
||||
module ContinuousShape = {
|
||||
type t = continuousShape;
|
||||
|
@ -255,7 +287,6 @@ module GenericSimple = {
|
|||
| `Uniform({high}) => high
|
||||
| `Float(n) => n;
|
||||
|
||||
|
||||
/* This function returns a list of x's at which to evaluate the overall distribution (for rendering).
|
||||
This function is called separately for each individual distribution.
|
||||
|
||||
|
@ -267,17 +298,16 @@ module GenericSimple = {
|
|||
*/
|
||||
let interpolateXs =
|
||||
(~xSelection: [ | `Linear | `ByWeight]=`Linear, dist: dist, sampleCount) => {
|
||||
|
||||
switch (xSelection, dist) {
|
||||
| (`Linear, _) => E.A.Floats.range(min(dist), max(dist), sampleCount)
|
||||
| (`ByWeight, `Uniform(n)) =>
|
||||
// In `ByWeight mode, uniform distributions get special treatment because we need two x's
|
||||
// on either side for proper rendering (just left and right of the discontinuities).
|
||||
let dx = 0.00001 *. (n.high -. n.low);
|
||||
[|n.low -. dx, n.low +. dx, n.high -. dx, n.high +. dx|]
|
||||
[|n.low -. dx, n.low +. dx, n.high -. dx, n.high +. dx|];
|
||||
| (`ByWeight, _) =>
|
||||
let ys = E.A.Floats.range(minCdfValue, maxCdfValue, sampleCount)
|
||||
ys |> E.A.fmap(y => inv(y, dist))
|
||||
let ys = E.A.Floats.range(minCdfValue, maxCdfValue, sampleCount);
|
||||
ys |> E.A.fmap(y => inv(y, dist));
|
||||
};
|
||||
};
|
||||
|
||||
|
@ -299,90 +329,190 @@ module GenericSimple = {
|
|||
module PointwiseAddDistributionsWeighted = {
|
||||
type t = pointwiseAdd;
|
||||
|
||||
let normalizeWeights = (dists: t) => {
|
||||
let total = dists |> E.A.fmap(snd) |> E.A.Floats.sum;
|
||||
dists |> E.A.fmap(((a, b)) => (a, b /. total));
|
||||
let normalizeWeights = (weightedDists: t) => {
|
||||
let total = weightedDists |> E.A.fmap(snd) |> E.A.Floats.sum;
|
||||
weightedDists |> E.A.fmap(((d, w)) => (d, w /. total));
|
||||
};
|
||||
|
||||
let pdf = (x: float, dists: t) =>
|
||||
dists
|
||||
|> E.A.fmap(((e, w)) => GenericSimple.pdf(x, e) *. w)
|
||||
let rec pdf = (x: float, weightedNormalizedDists: t) =>
|
||||
weightedNormalizedDists
|
||||
|> E.A.fmap(((d, w)) => {
|
||||
switch (d) {
|
||||
| `PointwiseCombination(ts) => pdf(x, ts) *. w
|
||||
| `Simple(d) => GenericSimple.pdf(x, d) *. w
|
||||
}
|
||||
})
|
||||
|> E.A.Floats.sum;
|
||||
|
||||
let min = (dists: t) =>
|
||||
dists |> E.A.fmap(d => d |> fst |> GenericSimple.min) |> E.A.min;
|
||||
// TODO: perhaps rename into minCdfX?
|
||||
// TODO: how should nonexistent min values be handled? They should never happen
|
||||
let rec min = (dists: t) =>
|
||||
dists
|
||||
|> E.A.fmap(((d, w)) => {
|
||||
switch (d) {
|
||||
| `PointwiseCombination(ts) => E.O.toExn("Dist has no min", min(ts))
|
||||
| `Simple(d) => GenericSimple.min(d)
|
||||
}
|
||||
})
|
||||
|> E.A.min;
|
||||
|
||||
let max = (dists: t) =>
|
||||
dists |> E.A.fmap(d => d |> fst |> GenericSimple.max) |> E.A.max;
|
||||
// TODO: perhaps rename into minCdfX?
|
||||
let rec max = (dists: t) =>
|
||||
dists
|
||||
|> E.A.fmap(((d, w)) => {
|
||||
switch (d) {
|
||||
| `PointwiseCombination(ts) => E.O.toExn("Dist has no max", max(ts))
|
||||
| `Simple(d) => GenericSimple.max(d)
|
||||
}
|
||||
})
|
||||
|> E.A.max;
|
||||
|
||||
let discreteShape = (dists: t, sampleCount: int) => {
|
||||
|
||||
/*let rec discreteShape = (dists: t, sampleCount: int) => {
|
||||
let discrete =
|
||||
dists
|
||||
|> E.A.fmap(((r, e)) =>
|
||||
r
|
||||
|> E.A.fmap(((x, w)) => {
|
||||
switch (d) {
|
||||
| `Float(d) => Some((d, w)) // if the distribution is just a number, then the weight is considered the y
|
||||
| _ => None
|
||||
}
|
||||
})
|
||||
|> E.A.O.concatSomes
|
||||
|> E.A.fmap(((x, y)) =>
|
||||
({xs: [|x|], ys: [|y|]}: DistTypes.xyShape)
|
||||
)
|
||||
// take an array of xyShapes and combine them together
|
||||
//* r
|
||||
|> (
|
||||
fun
|
||||
| `Float(r) => Some((r, e))
|
||||
| _ => None
|
||||
)
|
||||
)
|
||||
|> E.A.O.concatSomes
|
||||
|> E.A.fmap(((x, y)) =>
|
||||
({xs: [|x|], ys: [|y|]}: DistTypes.xyShape)
|
||||
)
|
||||
)*/
|
||||
|> Distributions.Discrete.reduce((+.));
|
||||
discrete;
|
||||
};
|
||||
};*/
|
||||
|
||||
let continuousShape = (dists: t, sampleCount: int) => {
|
||||
let xs =
|
||||
|
||||
let rec findContinuousXs = (dists: t, sampleCount: int) => {
|
||||
// we need to go through the tree of distributions and, for the continuous ones, find the xs at which
|
||||
// later, all distributions will get evaluated.
|
||||
|
||||
// we want to accumulate a set of xs.
|
||||
let xs: array(float) =
|
||||
dists
|
||||
|> E.A.fmap(r =>
|
||||
r
|
||||
|> fst
|
||||
|> GenericSimple.interpolateXs(
|
||||
~xSelection=`ByWeight,
|
||||
_,
|
||||
sampleCount / (dists |> E.A.length),
|
||||
)
|
||||
)
|
||||
|> E.A.concatMany;
|
||||
xs |> Array.fast_sort(compare);
|
||||
let ys = xs |> E.A.fmap(pdf(_, dists));
|
||||
XYShape.T.fromArrays(xs, ys) |> Distributions.Continuous.make(`Linear, _);
|
||||
|> E.A.fold_left((accXs, (d, w)) => {
|
||||
switch (d) {
|
||||
| `Simple(t) when (GenericSimple.contType(t) == `Discrete) => accXs
|
||||
| `Simple(d) => {
|
||||
let xs = GenericSimple.interpolateXs(~xSelection=`ByWeight, d, sampleCount)
|
||||
|
||||
E.A.append(accXs, xs)
|
||||
}
|
||||
| `PointwiseCombination(ts) => {
|
||||
let xs = findContinuousXs(ts, sampleCount);
|
||||
E.A.append(accXs, xs)
|
||||
}
|
||||
}
|
||||
}, [||]);
|
||||
xs
|
||||
};
|
||||
|
||||
let toShape = (dists: t, sampleCount: int) => {
|
||||
/* Accumulate (accContShapes, accDistShapes), each of which is an array of {xs, ys} shapes. */
|
||||
let rec accumulateContAndDiscShapes = (dists: t, continuousXs: array(float), currentWeight) => {
|
||||
let normalized = normalizeWeights(dists);
|
||||
let continuous =
|
||||
|
||||
normalized
|
||||
|> E.A.filter(((r, _)) => GenericSimple.contType(r) == `Continuous)
|
||||
|> continuousShape(_, sampleCount);
|
||||
let discrete =
|
||||
normalized
|
||||
|> E.A.filter(((r, _)) => GenericSimple.contType(r) == `Discrete)
|
||||
|> discreteShape(_, sampleCount);
|
||||
let shape =
|
||||
MixedShapeBuilder.buildSimple(~continuous=Some(continuous), ~discrete);
|
||||
|> E.A.fold_left(((accContShapes: array(DistTypes.xyShape), accDiscShapes: array(DistTypes.xyShape)), (d, w)) => {
|
||||
switch (d) {
|
||||
|
||||
| `Simple(`Float(x)) => {
|
||||
let ds: DistTypes.xyShape = {xs: [|x|], ys: [|w *. currentWeight|]};
|
||||
(accContShapes, E.A.append(accDiscShapes, [|ds|]))
|
||||
}
|
||||
|
||||
| `Simple(d) when (GenericSimple.contType(d) == `Continuous) => {
|
||||
let ys = continuousXs |> E.A.fmap(x => GenericSimple.pdf(x, d) *. w *. currentWeight);
|
||||
let cs = XYShape.T.fromArrays(continuousXs, ys);
|
||||
|
||||
(E.A.append(accContShapes, [|cs|]), accDiscShapes)
|
||||
}
|
||||
|
||||
| `Simple(d) => (accContShapes, accDiscShapes) // default -- should never happen
|
||||
|
||||
| `PointwiseCombination(ts) => {
|
||||
let (cs, ds) = accumulateContAndDiscShapes(ts, continuousXs, w *. currentWeight);
|
||||
(E.A.append(accContShapes, cs), E.A.append(accDiscShapes, ds))
|
||||
}
|
||||
}
|
||||
|
||||
}, ([||]: array(DistTypes.xyShape), [||]: array(DistTypes.xyShape)))
|
||||
};
|
||||
|
||||
/*
|
||||
We will assume that each dist (of t) in the multimodal has a total of one.
|
||||
We can therefore normalize the weights of the parts.
|
||||
|
||||
However, a multimodal can consist of both discrete and continuous shapes.
|
||||
These need to be added and collected individually.
|
||||
*/
|
||||
let toShape = (dists: t, sampleCount: int) => {
|
||||
let continuousXs = findContinuousXs(dists, sampleCount);
|
||||
continuousXs |> Array.fast_sort(compare);
|
||||
|
||||
let (contShapes, distShapes) = accumulateContAndDiscShapes(dists, continuousXs, 1.0);
|
||||
|
||||
let combinedContinuous = contShapes
|
||||
|> E.A.fold_left((shapeAcc: DistTypes.xyShape, shape: DistTypes.xyShape) => {
|
||||
let ys = E.A.fmapi((i, y) => y +. shape.ys[i], shapeAcc.ys);
|
||||
{xs: continuousXs, ys: ys}
|
||||
}, {xs: continuousXs, ys: Array.make(Array.length(continuousXs), 0.0)})
|
||||
|> Distributions.Continuous.make(`Linear);
|
||||
|
||||
let combinedDiscrete = Distributions.Discrete.reduce((+.), distShapes)
|
||||
|
||||
let shape = MixedShapeBuilder.buildSimple(~continuous=Some(combinedContinuous), ~discrete=combinedDiscrete);
|
||||
|
||||
shape |> E.O.toExt("");
|
||||
};
|
||||
|
||||
let toString = (dists: t) => {
|
||||
let rec toString = (dists: t): string => {
|
||||
let distString =
|
||||
dists
|
||||
|> E.A.fmap(d => GenericSimple.toString(fst(d)))
|
||||
|> Js.Array.joinWith(",");
|
||||
let weights =
|
||||
dists
|
||||
|> E.A.fmap(d =>
|
||||
snd(d) |> Js.Float.toPrecisionWithPrecision(~digits=2)
|
||||
|> E.A.fmap(((d, _)) =>
|
||||
switch (d) {
|
||||
| `Simple(d) => GenericSimple.toString(d)
|
||||
| `PointwiseCombination(ts: t) => ts |> toString
|
||||
}
|
||||
)
|
||||
|> Js.Array.joinWith(",");
|
||||
|
||||
// mm(normal(0,1), normal(1,2)) => "multimodal(normal(0,1), normal(1,2), )
|
||||
|
||||
let weights =
|
||||
dists
|
||||
|> E.A.fmap(((_, w)) =>
|
||||
Js.Float.toPrecisionWithPrecision(w, ~digits=2)
|
||||
)
|
||||
|> Js.Array.joinWith(",");
|
||||
|
||||
{j|multimodal($distString, [$weights])|j};
|
||||
};
|
||||
};
|
||||
|
||||
// assume that recursive pointwiseNormalizedDistSums are the only type of operation there is.
|
||||
// in the original, it was a list of (dist, weight) tuples. Now, it's a tree of (dist, weight) tuples, just that every
|
||||
// dist can be either a GenericSimple or another PointwiseAdd.
|
||||
|
||||
/*let toString = (r: bigDistTree) => {
|
||||
switch (r) {
|
||||
| WeightedDistLeaf((w, d)) => GenericWeighted.toString(w) // "normal "
|
||||
| PointwiseNormalizedDistSum(childTrees) => childTrees |> E.A.fmap(toString) |> Js.Array.joinWith("")
|
||||
}
|
||||
}*/
|
||||
|
||||
let toString = (r: bigDist) =>
|
||||
// we need to recursively create the string representation of the tree.
|
||||
r
|
||||
|> (
|
||||
fun
|
||||
|
|
Loading…
Reference in New Issue
Block a user