Merge pull request #15 from foretold-app/improvements/1102

Improvements/1102
This commit is contained in:
Ozzie Gooen 2020-03-03 13:29:26 +00:00 committed by GitHub
commit 7398877e55
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 240 additions and 81 deletions

View File

@ -45,18 +45,20 @@ module FieldString = {
module FieldNumber = {
[@react.component]
let make = (~field, ~label) => {
let make = (~field, ~label, ~min=0) => {
<Form.Field
field
render={({handleChange, error, value, validate}) =>
<Antd.Form.Item label={label |> E.ste}>
<Antd.InputNumber
value
onChange={e => {
e |> handleChange;
();
}}
onChange=handleChange
min
onBlur={_ => validate()}
parser={str => {
let a = str |> Js.Float.fromString |> int_of_float;
a < min ? min : a;
}}
/>
</Antd.Form.Item>
}
@ -66,19 +68,22 @@ module FieldNumber = {
module FieldFloat = {
[@react.component]
let make = (~field, ~label, ~className=Css.style([])) => {
let make =
(~field, ~label, ~className=Css.style([]), ~min=0., ~precision=2) => {
<Form.Field
field
render={({handleChange, error, value, validate}) =>
<Antd.Form.Item label={label |> E.ste}>
<Antd.InputFloat
value
onChange={e => {
e |> handleChange;
();
}}
precision
onChange=handleChange
onBlur={_ => validate()}
className
parser={str => {
let a = str |> Js.Float.fromString;
Js.Float.isNaN(a) ? min : a;
}}
/>
</Antd.Form.Item>
}
@ -440,16 +445,25 @@ let make = () => {
</Row>
<Row _type=`flex className=Styles.rows>
<Col span=4>
<FieldNumber field=FormConfig.SampleCount label="Sample Count" />
<FieldNumber
field=FormConfig.SampleCount
label="Sample Count"
min=100
/>
</Col>
<Col span=4>
<FieldNumber
field=FormConfig.OutputXYPoints
label="Output XY-points"
min=100
/>
</Col>
<Col span=4>
<FieldNumber field=FormConfig.TruncateTo label="Truncate To" />
<FieldNumber
field=FormConfig.TruncateTo
label="Truncate To"
min=10
/>
</Col>
</Row>
<Antd.Button

View File

@ -34,11 +34,15 @@ module Styles = {
module DemoDist = {
[@react.component]
let make = (~guesstimatorString: string) => {
let (ys, xs) = DistEditor.getPdfFromUserInput(guesstimatorString);
let (ys, xs, isEmpty) =
DistEditor.getPdfFromUserInput(guesstimatorString);
let continuous: DistTypes.xyShape = {xs, ys};
<Antd.Card title={"Distribution" |> E.ste}>
<div className=Styles.spacer />
<DistributionPlot continuous />
{isEmpty
? "Nothing to show. Try to change the distribution description."
|> E.ste
: <DistributionPlot continuous />}
</Antd.Card>;
};
};

View File

@ -68,14 +68,18 @@ export class CdfChartD3 {
* @returns {CdfChartD3}
*/
data(data) {
const continuousXs = _.get(data, 'continuous.xs', []);
const continuousYs = _.get(data, 'continuous.ys', []);
const discreteXs = _.get(data, 'discrete.xs', []);
const discreteYs = _.get(data, 'discrete.ys', []);
this.attrs.data = data;
this.attrs.data.continuous = data.continuous || {
xs: [],
ys: [],
this.attrs.data.continuous = {
xs: continuousXs,
ys: continuousYs,
};
this.attrs.data.discrete = data.discrete || {
xs: [],
ys: [],
this.attrs.data.discrete = {
xs: discreteXs,
ys: discreteYs,
};
return this;
}

View File

@ -1,3 +1,3 @@
[@bs.module "./main.js"]
external getPdfFromUserInput: string => (array(float), array(float)) =
external getPdfFromUserInput: string => (array(float), array(float), bool) =
"get_pdf_from_user_input";

View File

@ -1,11 +1,14 @@
// This module defines an abstract BinnedDistribution class, which
// should be implemented for each distribution. You need to decide
// how to bin the distribution (use _adabin unless there's a nicer
// way for your distr) and how to choose the distribution's support.
const _math = require("mathjs");
const math = _math.create(_math.all);
const jStat = require("jstat");
/**
* This module defines an abstract BinnedDistribution class, which
* should be implemented for each distribution. You need to decide
* how to bin the distribution (use _adabin unless there's a nicer
* way for your distr) and how to choose the distribution's support.
*/
math.import({
normal: jStat.normal,
beta: jStat.beta,
@ -14,6 +17,9 @@ math.import({
});
class BaseDistributionBinned {
/**
* @param args
*/
constructor(args) {
this._set_props();
this.max_bin_size = 0.5;
@ -30,11 +36,18 @@ class BaseDistributionBinned {
[this.pdf_vals, this.divider_pts] = this.bin();
}
/**
* this is hacky but class properties aren't always supported
* @private
*/
_set_props() {
// this is hacky but class properties aren't always supported
throw new Error("NotImplementedError");
}
/**
* @returns {(number[]|[*])[]}
* @private
*/
_adabin() {
let point = this.start_point;
let vals = [this.pdf_func(point)];
@ -78,6 +91,10 @@ class BaseDistributionBinned {
throw new Error("NotImplementedError");
}
/**
* @param args
* @returns {(any|(function(*=): *))[]}
*/
get_params_and_pdf_func(args) {
let args_str = args.toString() + ")";
let substr = this.name + ".pdf(x, " + args_str;
@ -95,11 +112,17 @@ class BaseDistributionBinned {
}
class NormalDistributionBinned extends BaseDistributionBinned {
/**
* @private
*/
_set_props() {
this.name = "normal";
this.param_names = ["mean", "std"];
}
/**
* @returns {(number|*)[]}
*/
get_bounds() {
return [
this.params.mean - 4 * this.params.std,
@ -107,22 +130,34 @@ class NormalDistributionBinned extends BaseDistributionBinned {
];
}
/**
* @returns {[[*], [*]]}
*/
bin() {
return this._adabin(this.params.std);
}
}
class UniformDistributionBinned extends BaseDistributionBinned {
/**
* @private
*/
_set_props() {
this.name = "uniform";
this.param_names = ["start_point", "end_point"];
this.num_bins = 200;
}
/**
* @returns {*[]}
*/
get_bounds() {
return [this.params.start_point, this.params.end_point];
}
/**
* @returns {(*[])[]}
*/
bin() {
let divider_pts = evenly_spaced_grid(
this.params.start_point,
@ -138,6 +173,9 @@ class UniformDistributionBinned extends BaseDistributionBinned {
}
class LogNormalDistributionBinned extends BaseDistributionBinned {
/**
* @private
*/
_set_props() {
this.name = "lognormal";
this.param_names = ["normal_mean", "normal_std"];
@ -145,6 +183,12 @@ class LogNormalDistributionBinned extends BaseDistributionBinned {
this.n_largest_bound_sample = 10;
}
/**
* @param samples
* @param n
* @returns {any}
* @private
*/
_nth_largest(samples, n) {
var largest_buffer = Array(n).fill(-Infinity);
for (const sample of samples) {
@ -159,6 +203,9 @@ class LogNormalDistributionBinned extends BaseDistributionBinned {
return largest_buffer[n - 1];
}
/**
* @returns {(*|any)[]}
*/
get_bounds() {
let samples = Array(this.n_bounds_samples)
.fill(0)
@ -169,11 +216,20 @@ class LogNormalDistributionBinned extends BaseDistributionBinned {
];
}
/**
* @returns {[[*], [*]]}
*/
bin() {
return this._adabin();
}
}
/**
* @param start
* @param stop
* @param numel
* @returns {*[]}
*/
function evenly_spaced_grid(start, stop, numel) {
return Array(numel)
.fill(0)

View File

@ -1,30 +1,57 @@
// The main algorithmic work is done by functions in this module.
// It also contains the main function, taking the user's string
// and returning pdf values and x's.
const _math = require("mathjs");
const bst = require("binary-search-tree");
const distrs = require("./distribution.js").distrs;
const parse = require("./parse.js");
const _math = require("mathjs");
const math = _math.create(_math.all);
const bst = require("binary-search-tree");
const NUM_MC_SAMPLES = 300;
const OUTPUT_GRID_NUMEL = 300;
/**
* The main algorithmic work is done by functions in this module.
* It also contains the main function, taking the user's string
* and returning pdf values and x's.
*/
/**
* @param start
* @param stop
* @param numel
* @returns {*[]}
*/
function evenly_spaced_grid(start, stop, numel) {
return Array(numel)
.fill(0)
.map((_, idx) => start + (idx / numel) * (stop - start));
}
/**
* Takes an array of strings like "normal(0, 1)" and
* returns the corresponding distribution objects
* @param substrings
* @returns {*}
*/
function get_distributions(substrings) {
// Takes an array of strings like "normal(0, 1)" and
// returns the corresponding distribution objects
let names_and_args = substrings.map(parse.get_distr_name_and_args);
let pdfs = names_and_args.map(x => new distrs[x[0]](x[1]));
return pdfs;
}
/**
* update the binary search tree with bin points of
* deterministic_pdf transformed by tansform func
* (transfrom func can be a stocahstic func with parameters
* sampled from mc_distrs)
*
* @param transform_func
* @param deterministic_pdf
* @param mc_distrs
* @param track_idx
* @param num_mc_samples
* @param bst_pts_and_idxs
* @returns {(number)[]}
*/
function update_transformed_divider_points_bst(
transform_func,
deterministic_pdf,
@ -33,10 +60,6 @@ function update_transformed_divider_points_bst(
num_mc_samples,
bst_pts_and_idxs
) {
// update the binary search tree with bin points of
// deterministic_pdf transformed by tansform func
// (transfrom func can be a stocahstic func with parameters
// sampled from mc_distrs)
var transformed_pts = [];
var pdf_inner_idxs = [];
var factors = [];
@ -97,10 +120,17 @@ function update_transformed_divider_points_bst(
return [start_pt, end_pt];
}
/**
* Take the binary search tree with transformed bin points,
* and an array of pdf values associated with the bins,
* and return a pdf over an evenly spaced grid
*
* @param pdf_vals
* @param bst_pts_and_idxs
* @param output_grid
* @returns {[]}
*/
function get_final_pdf(pdf_vals, bst_pts_and_idxs, output_grid) {
// Take the binary search tree with transformed bin points,
// and an array of pdf values associated with the bins,
// and return a pdf over an evenly spaced grid
var offset = output_grid[1] / 2 - output_grid[0] / 2;
var active_intervals = new Map();
var active_endpoints = new bst.AVLTree();
@ -152,16 +182,25 @@ function get_final_pdf(pdf_vals, bst_pts_and_idxs, output_grid) {
return final_pdf_vals;
}
/**
* Entrypoint. Pass user input strings to this function,
* get the corresponding pdf values and input points back.
* If the pdf requires monte carlo (it contains a between-distr function)
* we first determing which distr to have deterministic
* and which to sample from. This is decided based on which
* choice gives the least variance.
*
* @param user_input_string
* @returns {([]|*[])[]}
*/
function get_pdf_from_user_input(user_input_string) {
// Entrypoint. Pass user input strings to this function,
// get the corresponding pdf values and input points back.
// If the pdf requires monte carlo (it contains a between-distr function)
// we first determing which distr to have deterministic
// and whih to sample from. This is decided based on which
// choice gives the least variance.
try{
let parsed = parse.parse_initial_string(user_input_string);
let mm_args = parse.separate_mm_args(parsed.mm_args_string);
const is_mm = mm_args.distrs.length > 0;
if (!parsed.outer_string) return [[], [], true];
let tree = new bst.AVLTree();
let possible_start_pts = [];
let possible_end_pts = [];
@ -170,6 +209,7 @@ function get_pdf_from_user_input(user_input_string) {
let weights_sum = weights.reduce((a, b) => a + b);
weights = weights.map(x => x / weights_sum);
let n_iters = is_mm ? mm_args.distrs.length : 1;
for (let i = 0; i < n_iters; ++i) {
let distr_string = is_mm ? mm_args.distrs[i] : parsed.outer_string;
var [deterministic_pdf, mc_distrs] = choose_pdf_func(distr_string);
@ -186,13 +226,22 @@ function get_pdf_from_user_input(user_input_string) {
possible_end_pts.push(end_pt);
all_vals.push(deterministic_pdf.pdf_vals.map(x => x * weights[i]));
}
start_pt = Math.min(...possible_start_pts);
end_pt = Math.max(...possible_end_pts);
let output_grid = evenly_spaced_grid(start_pt, end_pt, OUTPUT_GRID_NUMEL);
let final_pdf_vals = get_final_pdf(all_vals, tree, output_grid);
return [final_pdf_vals, output_grid];
return [final_pdf_vals, output_grid, false];
} catch (e) {
return [[], [], true];
}
}
/**
* @param vals
* @returns {number}
*/
function variance(vals) {
var vari = 0;
for (let i = 0; i < vals[0].length; ++i) {
@ -209,14 +258,24 @@ function variance(vals) {
return vari;
}
/**
* @param array
* @param idx
* @returns {*[]}
*/
function pluck_from_array(array, idx) {
return [array[idx], array.slice(0, idx).concat(array.slice(idx + 1))];
}
/**
* If distr_string requires MC, try all possible
* choices for the deterministic distribution,
* and pick the one with the least variance.
*
* @param distr_string
* @returns {(*|*[])[]|*[]}
*/
function choose_pdf_func(distr_string) {
// If distr_string requires MC, try all possible
// choices for the deterministic distribution,
// and pick the one with the least variance.
var variances = [];
let transform_func = get_grid_transform(distr_string);
let substrings = parse.get_distr_substrings(distr_string);
@ -259,6 +318,10 @@ function choose_pdf_func(distr_string) {
return [pdfs[best_idx], mc_distrs];
}
/**
* @param distr_string
* @returns {function(*): *}
*/
function get_grid_transform(distr_string) {
let substrings = parse.get_distr_substrings(distr_string);
let arg_strings = [];

View File

@ -1,7 +1,8 @@
// Functions for parsing/processing user input strings are here
const _math = require("mathjs");
const math = _math.create(_math.all);
// Functions for parsing/processing user input strings are here
const DISTR_REGEXS = [
/beta\(/g,
/(log)?normal\(/g,
@ -10,6 +11,11 @@ const DISTR_REGEXS = [
/uniform\(/g
];
/**
*
* @param user_input_string
* @returns {{mm_args_string: string, outer_string: string}}
*/
function parse_initial_string(user_input_string) {
let outer_output_string = "";
let mm_args_string = "";
@ -42,6 +48,10 @@ function parse_initial_string(user_input_string) {
};
}
/**
* @param mm_args_string
* @returns {{distrs: [], weights: string}}
*/
function separate_mm_args(mm_args_string) {
if (mm_args_string.endsWith(",")) {
mm_args_string = mm_args_string.slice(0, -1);
@ -68,6 +78,10 @@ function separate_mm_args(mm_args_string) {
};
}
/**
* @param distr_string
* @returns {[]}
*/
function get_distr_substrings(distr_string) {
let substrings = [];
for (let regex of DISTR_REGEXS) {
@ -92,6 +106,10 @@ function get_distr_substrings(distr_string) {
return substrings;
}
/**
* @param substr
* @returns {(string|*)[]}
*/
function get_distr_name_and_args(substr) {
let distr_name = "";
let args_str = "";