squiggle/src/distPlus/expressionTree/MathJsParser.re

416 lines
14 KiB
ReasonML
Raw Normal View History

type inputVars = Belt.Map.String.t(ExpressionTypes.ExpressionTree.node);
2020-03-24 17:48:46 +00:00
module MathJsonToMathJsAdt = {
type arg =
| Symbol(string)
| Value(float)
| Fn(fn)
| Array(array(arg))
| Object(Js.Dict.t(arg))
and fn = {
name: string,
args: array(arg),
};
2020-03-24 17:48:46 +00:00
let rec run = (j: Js.Json.t) =>
Json.Decode.(
switch (field("mathjs", string, j)) {
| "FunctionNode" =>
let args = j |> field("args", array(run));
2020-07-19 13:21:47 +00:00
let name = j |> optional(field("fn", field("name", string)));
name |> E.O.fmap(name => Fn({name, args: args |> E.A.O.concatSomes}));
2020-03-24 17:48:46 +00:00
| "OperatorNode" =>
let args = j |> field("args", array(run));
Some(
Fn({
name: j |> field("fn", string),
args: args |> E.A.O.concatSomes,
}),
);
| "ConstantNode" =>
optional(field("value", Json.Decode.float), j)
|> E.O.fmap(r => Value(r))
2020-04-01 17:47:59 +00:00
| "ParenthesisNode" => j |> field("content", run)
2020-03-24 17:48:46 +00:00
| "ObjectNode" =>
let properties = j |> field("properties", dict(run));
Js.Dict.entries(properties)
|> E.A.fmap(((key, value)) => value |> E.O.fmap(v => (key, v)))
|> E.A.O.concatSomes
|> Js.Dict.fromArray
|> (r => Some(Object(r)));
| "ArrayNode" =>
let items = field("items", array(run), j);
Some(Array(items |> E.A.O.concatSomes));
| "SymbolNode" => Some(Symbol(field("name", string, j)))
| n =>
Js.log3("Couldn't parse mathjs node", j, n);
None;
}
);
};
2020-03-24 17:48:46 +00:00
module MathAdtToDistDst = {
open MathJsonToMathJsAdt;
let handleSymbol = (inputVars: inputVars, sym) => {
switch (Belt.Map.String.get(inputVars, sym)) {
| Some(s) => Ok(s)
| None => Error("Couldn't find.")
};
};
2020-03-24 17:48:46 +00:00
module MathAdtCleaner = {
let transformWithSymbol = (f: float, s: string) =>
switch (s) {
| "K"
| "k" => Some(f *. 1000.)
2020-03-24 17:48:46 +00:00
| "M"
| "m" => Some(f *. 1000000.)
2020-03-24 17:48:46 +00:00
| "B"
| "b" => Some(f *. 1000000000.)
2020-03-24 17:48:46 +00:00
| "T"
| "t" => Some(f *. 1000000000000.)
| _ => None
2020-03-24 17:48:46 +00:00
};
2020-03-24 00:04:48 +00:00
2020-03-24 17:48:46 +00:00
let rec run =
fun
| Fn({name: "multiply", args: [|Value(f), Symbol(s)|]}) as doNothing =>
transformWithSymbol(f, s)
|> E.O.fmap(r => Value(r))
|> E.O.default(doNothing)
2020-04-03 20:53:23 +00:00
| Fn({name: "unaryMinus", args: [|Value(f)|]}) => Value((-1.0) *. f)
2020-03-24 17:48:46 +00:00
| Fn({name, args}) => Fn({name, args: args |> E.A.fmap(run)})
| Array(args) => Array(args |> E.A.fmap(run))
| Symbol(s) => Symbol(s)
| Value(v) => Value(v)
| Object(v) =>
Object(
v
|> Js.Dict.entries
|> E.A.fmap(((key, value)) => (key, run(value)))
|> Js.Dict.fromArray,
);
};
2020-03-24 00:04:48 +00:00
let normal:
array(arg) => result(ExpressionTypes.ExpressionTree.node, string) =
2020-03-24 17:48:46 +00:00
fun
| [|Value(mean), Value(stdev)|] =>
Ok(`SymbolicDist(`Normal({mean, stdev})))
2020-03-24 17:48:46 +00:00
| _ => Error("Wrong number of variables in normal distribution");
2020-03-24 00:04:48 +00:00
let lognormal:
array(arg) => result(ExpressionTypes.ExpressionTree.node, string) =
2020-03-24 17:48:46 +00:00
fun
2020-07-01 22:05:35 +00:00
| [|Value(mu), Value(sigma)|] =>
Ok(`SymbolicDist(`Lognormal({mu, sigma})))
2020-03-24 17:48:46 +00:00
| [|Object(o)|] => {
let g = Js.Dict.get(o);
switch (g("mean"), g("stdev"), g("mu"), g("sigma")) {
| (Some(Value(mean)), Some(Value(stdev)), _, _) =>
2020-07-01 22:05:35 +00:00
Ok(
`SymbolicDist(
SymbolicDist.Lognormal.fromMeanAndStdev(mean, stdev),
2020-07-01 22:05:35 +00:00
),
)
2020-03-24 17:48:46 +00:00
| (_, _, Some(Value(mu)), Some(Value(sigma))) =>
Ok(`SymbolicDist(`Lognormal({mu, sigma})))
2020-03-24 17:48:46 +00:00
| _ => Error("Lognormal distribution would need mean and stdev")
};
}
| _ => Error("Wrong number of variables in lognormal distribution");
2020-03-24 00:04:48 +00:00
2020-07-02 17:12:03 +00:00
let to_: array(arg) => result(ExpressionTypes.ExpressionTree.node, string) =
2020-03-24 17:48:46 +00:00
fun
2020-07-01 22:05:35 +00:00
| [|Value(low), Value(high)|] when low <= 0.0 && low < high => {
Ok(`SymbolicDist(SymbolicDist.Normal.from90PercentCI(low, high)));
}
2020-03-25 15:12:39 +00:00
| [|Value(low), Value(high)|] when low < high => {
2020-07-01 22:05:35 +00:00
Ok(
`SymbolicDist(SymbolicDist.Lognormal.from90PercentCI(low, high)),
2020-07-01 22:05:35 +00:00
);
2020-03-24 17:48:46 +00:00
}
2020-03-25 15:12:39 +00:00
| [|Value(_), Value(_)|] =>
Error("Low value must be less than high value.")
2020-03-24 17:48:46 +00:00
| _ => Error("Wrong number of variables in lognormal distribution");
2020-03-24 00:04:48 +00:00
let uniform:
array(arg) => result(ExpressionTypes.ExpressionTree.node, string) =
2020-03-24 17:48:46 +00:00
fun
2020-07-01 22:05:35 +00:00
| [|Value(low), Value(high)|] =>
Ok(`SymbolicDist(`Uniform({low, high})))
2020-03-24 17:48:46 +00:00
| _ => Error("Wrong number of variables in lognormal distribution");
2020-07-02 17:12:03 +00:00
let beta: array(arg) => result(ExpressionTypes.ExpressionTree.node, string) =
fun
2020-07-01 22:05:35 +00:00
| [|Value(alpha), Value(beta)|] =>
Ok(`SymbolicDist(`Beta({alpha, beta})))
2020-03-24 17:48:46 +00:00
| _ => Error("Wrong number of variables in lognormal distribution");
let exponential:
array(arg) => result(ExpressionTypes.ExpressionTree.node, string) =
2020-03-26 16:01:52 +00:00
fun
| [|Value(rate)|] => Ok(`SymbolicDist(`Exponential({rate: rate})))
2020-03-26 16:01:52 +00:00
| _ => Error("Wrong number of variables in Exponential distribution");
let cauchy:
array(arg) => result(ExpressionTypes.ExpressionTree.node, string) =
2020-03-26 16:01:52 +00:00
fun
| [|Value(local), Value(scale)|] =>
Ok(`SymbolicDist(`Cauchy({local, scale})))
2020-03-26 16:01:52 +00:00
| _ => Error("Wrong number of variables in cauchy distribution");
let triangular:
array(arg) => result(ExpressionTypes.ExpressionTree.node, string) =
2020-03-26 16:01:52 +00:00
fun
2020-07-19 12:10:36 +00:00
| [|Value(low), Value(medium), Value(high)|]
when low < medium && medium < high =>
Ok(`SymbolicDist(`Triangular({low, medium, high})))
2020-07-19 12:10:36 +00:00
| [|Value(_), Value(_), Value(_)|] =>
Error("Triangular values must be increasing order")
2020-03-26 16:01:52 +00:00
| _ => Error("Wrong number of variables in triangle distribution");
2020-03-25 15:12:39 +00:00
let multiModal =
(
2020-07-02 17:12:03 +00:00
args: array(result(ExpressionTypes.ExpressionTree.node, string)),
2020-04-03 20:53:23 +00:00
weights: option(array(float)),
2020-03-25 15:12:39 +00:00
) => {
2020-04-03 20:53:23 +00:00
let weights = weights |> E.O.default([||]);
2020-06-27 04:29:21 +00:00
/*let dists: =
2020-03-24 17:48:46 +00:00
args
|> E.A.fmap(
fun
| Ok(a) => a
| Error(e) => Error(e)
2020-06-27 04:29:21 +00:00
);*/
2020-06-27 04:29:21 +00:00
let firstWithError = args |> Belt.Array.getBy(_, Belt.Result.isError);
let withoutErrors = args |> E.A.fmap(E.R.toOption) |> E.A.O.concatSomes;
2020-04-03 20:53:23 +00:00
switch (firstWithError) {
2020-07-01 22:05:35 +00:00
| Some(Error(e)) => Error(e)
| None when withoutErrors |> E.A.length == 0 =>
Error("Multimodals need at least one input")
| _ =>
let components =
withoutErrors
|> E.A.fmapi((index, t) => {
2020-07-01 22:05:35 +00:00
let w = weights |> E.A.get(_, index) |> E.O.default(1.0);
`VerticalScaling((`Multiply, t, `SymbolicDist(`Float(w))));
2020-07-01 22:05:35 +00:00
});
2020-07-01 22:05:35 +00:00
let pointwiseSum =
components
|> Js.Array.sliceFrom(1)
2020-07-01 22:05:35 +00:00
|> E.A.fold_left(
(acc, x) => {`PointwiseCombination((`Add, acc, x))},
2020-07-01 22:05:35 +00:00
E.A.unsafe_get(components, 0),
);
Ok(`Normalize(pointwiseSum));
2020-03-24 17:48:46 +00:00
};
};
// let arrayParser =
// (args: array(arg))
// : result(ExpressionTypes.ExpressionTree.node, string) => {
// let samples =
// args
// |> E.A.fmap(
// fun
// | Value(n) => Some(n)
// | _ => None,
// )
// |> E.A.O.concatSomes;
// let outputs = Samples.T.fromSamples(samples);
// let pdf =
// outputs.shape |> E.O.bind(_, Shape.T.toContinuous);
// let shape =
// pdf
// |> E.O.fmap(pdf => {
// let _pdf = Continuous.T.normalize(pdf);
// let cdf = Continuous.T.integral(~cache=None, _pdf);
// SymbolicDist.ContinuousShape.make(_pdf, cdf);
// });
// switch (shape) {
// | Some(s) => Ok(`SymbolicDist(`ContinuousShape(s)))
// | None => Error("Rendering did not work")
// };
// };
2020-07-01 22:05:35 +00:00
let operationParser =
(
name: string,
args: array(result(ExpressionTypes.ExpressionTree.node, string)),
) => {
let toOkAlgebraic = r => Ok(`AlgebraicCombination(r));
2020-07-19 13:21:47 +00:00
let toOkPointwise = r => Ok(`PointwiseCombination(r));
2020-07-13 19:05:00 +00:00
let toOkTruncate = r => Ok(`Truncate(r));
2020-07-19 12:10:36 +00:00
let toOkFloatFromDist = r => Ok(`FloatFromDist(r));
2020-07-23 10:17:39 +00:00
E.A.R.firstErrorOrOpen(args)
|> E.R.bind(_, args => {
switch (name, args) {
| ("add", [|l, r|]) => toOkAlgebraic((`Add, l, r))
| ("add", _) => Error("Addition needs two operands")
| ("subtract", [|l, r|]) => toOkAlgebraic((`Subtract, l, r))
| ("subtract", _) => Error("Subtraction needs two operands")
| ("multiply", [|l, r|]) => toOkAlgebraic((`Multiply, l, r))
| ("multiply", _) => Error("Multiplication needs two operands")
| ("dotMultiply", [|l, r|]) => toOkPointwise((`Multiply, l, r))
| ("dotMultiply", _) =>
Error("Dotwise multiplication needs two operands")
| ("rightLogShift", [|l, r|]) => toOkPointwise((`Add, l, r))
| ("rightLogShift", _) =>
Error("Dotwise addition needs two operands")
| ("divide", [|l, r|]) => toOkAlgebraic((`Divide, l, r))
| ("divide", _) => Error("Division needs two operands")
| ("pow", _) => Error("Exponentiation is not yet supported.")
| ("leftTruncate", [|d, `SymbolicDist(`Float(lc))|]) =>
toOkTruncate((Some(lc), None, d))
| ("leftTruncate", _) =>
Error(
"leftTruncate needs two arguments: the expression and the cutoff",
)
| ("rightTruncate", [|d, `SymbolicDist(`Float(rc))|]) =>
toOkTruncate((None, Some(rc), d))
| ("rightTruncate", _) =>
Error(
"rightTruncate needs two arguments: the expression and the cutoff",
)
| (
"truncate",
[|d, `SymbolicDist(`Float(lc)), `SymbolicDist(`Float(rc))|],
) =>
toOkTruncate((Some(lc), Some(rc), d))
| ("truncate", _) =>
Error(
"truncate needs three arguments: the expression and both cutoffs",
)
| ("pdf", [|d, `SymbolicDist(`Float(v))|]) =>
toOkFloatFromDist((`Pdf(v), d))
| ("cdf", [|d, `SymbolicDist(`Float(v))|]) =>
toOkFloatFromDist((`Cdf(v), d))
| ("inv", [|d, `SymbolicDist(`Float(v))|]) =>
toOkFloatFromDist((`Inv(v), d))
| ("mean", [|d|]) => toOkFloatFromDist((`Mean, d))
| ("sample", [|d|]) => toOkFloatFromDist((`Sample, d))
| _ => Error("This type not currently supported")
}
});
2020-07-01 22:47:49 +00:00
};
2020-07-01 22:05:35 +00:00
2020-07-01 22:47:49 +00:00
let functionParser = (nodeParser, name, args) => {
let parseArgs = () => args |> E.A.fmap(nodeParser);
2020-07-23 10:17:39 +00:00
Js.log2("Parseargs", parseArgs);
2020-07-01 22:47:49 +00:00
switch (name) {
| "normal" => normal(args)
| "lognormal" => lognormal(args)
| "uniform" => uniform(args)
| "beta" => beta(args)
| "to" => to_(args)
| "exponential" => exponential(args)
| "cauchy" => cauchy(args)
| "triangular" => triangular(args)
| "mm" =>
let weights =
args
|> E.A.last
|> E.O.bind(
_,
fun
| Array(values) => Some(values)
| _ => None,
)
|> E.O.fmap(o =>
o
|> E.A.fmap(
fun
| Value(r) => Some(r)
| _ => None,
)
|> E.A.O.concatSomes
);
let possibleDists =
E.O.isSome(weights)
? Belt.Array.slice(args, ~offset=0, ~len=E.A.length(args) - 1)
: args;
let dists = possibleDists |> E.A.fmap(nodeParser);
multiModal(dists, weights);
| "add"
| "subtract"
| "multiply"
2020-07-19 13:21:47 +00:00
| "dotMultiply"
| "rightLogShift"
2020-07-01 22:47:49 +00:00
| "divide"
| "pow"
| "leftTruncate"
| "rightTruncate"
2020-07-13 19:05:00 +00:00
| "truncate"
| "mean"
| "inv"
| "sample"
| "cdf"
| "pdf" => operationParser(name, parseArgs())
2020-07-01 22:47:49 +00:00
| n => Error(n ++ "(...) is not currently supported")
2020-07-01 22:05:35 +00:00
};
};
2020-03-24 17:48:46 +00:00
let rec nodeParser = inputVars =>
2020-07-01 22:47:49 +00:00
fun
| Value(f) => Ok(`SymbolicDist(`Float(f)))
| Symbol(s) => handleSymbol(inputVars, s)
| Fn({name, args}) => functionParser(nodeParser(inputVars), name, args)
2020-07-01 22:47:49 +00:00
| _ => {
Error("This type not currently supported");
};
let topLevel = inputVars =>
2020-07-01 22:47:49 +00:00
fun
| Value(_) as r => nodeParser(inputVars, r)
| Fn(_) as r => nodeParser(inputVars, r)
| Array(_) => Error("Array not valid as top level")
| Symbol(s) => handleSymbol(inputVars, s)
2020-07-01 22:47:49 +00:00
| Object(_) => Error("Object not valid as top level");
2020-03-24 00:04:48 +00:00
let run =
(inputVars, r): result(ExpressionTypes.ExpressionTree.node, string) =>
r |> MathAdtCleaner.run |> topLevel(inputVars);
2020-03-24 17:48:46 +00:00
};
2020-07-19 13:21:47 +00:00
/* The MathJs parser doesn't support '.+' syntax, but we want it because it
would make sense with '.*'. Our workaround is to change this to >>>, which is
logShift in mathJS. We don't expect to use logShift anytime soon, so this tradeoff
seems fine.
*/
let pointwiseToRightLogShift = Js.String.replaceByRe([%re "/\.\+/g"], ">>>");
let fromString2 = (inputVars: inputVars, str) => {
/* We feed the user-typed string into Mathjs.parseMath,
2020-07-01 22:05:35 +00:00
which returns a JSON with (hopefully) a single-element array.
This array element is the top-level node of a nested-object tree
representing the functions/arguments/values/etc. in the string.
2020-07-01 22:05:35 +00:00
The function MathJsonToMathJsAdt then recursively unpacks this JSON into a typed data structure we can use.
Inside of this function, MathAdtToDistDst is called whenever a distribution function is encountered.
*/
2020-07-19 13:21:47 +00:00
let mathJsToJson = str |> pointwiseToRightLogShift |> Mathjs.parseMath;
2020-03-24 17:48:46 +00:00
let mathJsParse =
E.R.bind(mathJsToJson, r => {
2020-03-24 17:48:46 +00:00
switch (MathJsonToMathJsAdt.run(r)) {
| Some(r) => Ok(r)
| None => Error("MathJsParse Error")
}
2020-07-01 22:05:35 +00:00
});
2020-07-23 10:17:39 +00:00
Js.log(mathJsParse);
let value = E.R.bind(mathJsParse, MathAdtToDistDst.run(inputVars));
2020-03-24 17:48:46 +00:00
value;
};
let fromString = (str, vars: inputVars) => {
fromString2(vars, str);
};