Compare commits
33 Commits
5c0294b4b1
...
7834c3baae
Author | SHA1 | Date | |
---|---|---|---|
7834c3baae | |||
5473a6aeda | |||
90b8804884 | |||
8675d98784 | |||
249a1ff434 | |||
1a3099b7e4 | |||
b9f64ec37b | |||
c19738709a | |||
50fb88db7c | |||
3befb4af4d | |||
cf76c92803 | |||
2d5378fd53 | |||
37a2dab610 | |||
608cbb2f68 | |||
25a27f7fc4 | |||
7ce4658d30 | |||
13194bc3ca | |||
710eb4267b | |||
4708c6f198 | |||
4959255947 | |||
4c4d053ab9 | |||
3950946d68 | |||
351f4c584d | |||
4983699308 | |||
ce119253b7 | |||
90e48d2249 | |||
fb21e4baa6 | |||
23bd623b66 | |||
fa73c33c27 | |||
2cddf557bf | |||
ffec4663fc | |||
e9bfd1f2ed | |||
dd8a1806bd |
BIN
C/out/samples
BIN
C/out/samples
Binary file not shown.
26
C/perf.txt
26
C/perf.txt
|
@ -1,26 +0,0 @@
|
|||
Samples: 884 of event 'cycles', Event count (approx.): 551167949
|
||||
Overhead Command Shared Object Symbol
|
||||
35.32% samples samples [.] xorshift32
|
||||
14.09% samples libgomp.so.1.0.0 [.] 0x000000000001d2ea
|
||||
12.04% samples libgomp.so.1.0.0 [.] 0x000000000001d132
|
||||
11.53% samples samples [.] mixture._omp_fn.0
|
||||
4.55% samples libm-2.31.so [.] __sin_fma
|
||||
4.24% samples samples [.] rand_0_to_1
|
||||
3.77% samples samples [.] random_to
|
||||
3.03% samples libm-2.31.so [.] __logf_fma
|
||||
1.61% samples libm-2.31.so [.] __expf_fma
|
||||
1.54% samples samples [.] split_array_sum._omp_fn.0
|
||||
1.38% samples samples [.] random_uniform
|
||||
0.94% samples samples [.] ur_normal
|
||||
0.91% samples libm-2.31.so [.] __ieee754_log_fma
|
||||
0.74% samples libgomp.so.1.0.0 [.] 0x000000000001d13d
|
||||
0.52% samples samples [.] sample_0
|
||||
0.41% samples libm-2.31.so [.] __sqrtf_finite@GLIBC_2.15
|
||||
0.38% samples samples [.] sample_1
|
||||
0.36% samples libgomp.so.1.0.0 [.] 0x000000000001d139
|
||||
0.36% samples libgomp.so.1.0.0 [.] 0x000000000001d2f5
|
||||
0.22% samples [kernel.kallsyms] [k] native_queued_spin_lock_slowpath
|
||||
0.18% samples [kernel.kallsyms] [k] _raw_spin_lock_irq
|
||||
0.18% samples samples [.] random_lognormal
|
||||
0.17% samples libgomp.so.1.0.0 [.] 0x000000000001d2f1
|
||||
|
BIN
C/perf/perf.data
Normal file
BIN
C/perf/perf.data
Normal file
Binary file not shown.
25
C/perf/perf.txt
Normal file
25
C/perf/perf.txt
Normal file
|
@ -0,0 +1,25 @@
|
|||
Samples: 806 of event 'cycles', Event count (approx.): 576095767
|
||||
Overhead Command Shared Object Symbol
|
||||
36.75% samples samples [.] xorshift32
|
||||
13.69% samples libgomp.so.1.0.0 [.] 0x000000000001d3ca
|
||||
12.84% samples samples [.] mixture._omp_fn.0
|
||||
10.70% samples libgomp.so.1.0.0 [.] 0x000000000001d212
|
||||
5.02% samples samples [.] rand_0_to_1
|
||||
4.05% samples libm-2.31.so [.] __sin_fma
|
||||
3.11% samples libm-2.31.so [.] __logf_fma
|
||||
2.79% samples samples [.] split_array_sum._omp_fn.0
|
||||
1.91% samples samples [.] random_to
|
||||
1.74% samples samples [.] random_uniform
|
||||
0.87% samples libm-2.31.so [.] __ieee754_log_fma
|
||||
0.81% samples samples [.] ur_normal
|
||||
0.74% samples libm-2.31.so [.] __expf_fma
|
||||
0.55% samples libgomp.so.1.0.0 [.] 0x000000000001d3d1
|
||||
0.53% samples libgomp.so.1.0.0 [.] 0x000000000001d21d
|
||||
0.35% samples samples [.] random_normal
|
||||
0.30% samples libm-2.31.so [.] __sqrtf
|
||||
0.28% samples samples [.] random_lognormal
|
||||
0.25% samples samples [.] sample_0
|
||||
0.24% samples [kernel.kallsyms] [k] 0xffffffffae72e205
|
||||
0.22% samples libgomp.so.1.0.0 [.] 0x000000000001d219
|
||||
0.18% samples [kernel.kallsyms] [k] 0xffffffffae2c2b83
|
||||
0.18% samples [kernel.kallsyms] [k] 0xffffffffae4f4da7
|
23
README.md
23
README.md
|
@ -25,15 +25,17 @@ The name of this repository is a pun on two meanings of "time to": "how much tim
|
|||
| Language | Time | Lines of code |
|
||||
|-----------------------------|-----------|---------------|
|
||||
| C (optimized, 16 threads) | 5ms | 249 |
|
||||
| squiggle.c | 37ms | 54 |
|
||||
| squiggle.c | 37ms | 54* |
|
||||
| Nim | 38ms | 84 |
|
||||
| Lua (LuaJIT) | 68ms | 82 |
|
||||
| OCaml (flambda) | 164ms | 123 |
|
||||
| Lua | 278ms | 82 |
|
||||
| C (naïve implementation) | 292ms | 149 |
|
||||
| Javascript (NodeJS) | 732ms | 69 |
|
||||
| Squiggle | 1,536s | 14* |
|
||||
| SquigglePy | 1.602s | 18* |
|
||||
| R | 7,000s | 49 |
|
||||
| Gavin Howard's bc | 15.9s | 59 |
|
||||
| Python (CPython) | 16,641s | 56 |
|
||||
|
||||
Time measurements taken with the [time](https://man7.org/linux/man-pages/man1/time.1.html) tool, using 1M samples. But different implementations use different algorithms and, occasionally, different time measuring methodologies (for the C, Nim and Lua implementations, I run the program 100 times and take the mean). Their speed was also measured under different loads in my machine. So I think that these time estimates are accurate within maybe ~2x or so.
|
||||
|
@ -118,17 +120,32 @@ sudo ln -sf luajit-2.1.0-beta3 /usr/local/bin/luajit
|
|||
|
||||
Overall I'm thinking that a combination of lua at least for scripting and ¿nim/C/tbd? for more serious programs could be quite powerful.
|
||||
|
||||
### OCaml
|
||||
|
||||
OCaml was like meeting an old and forgotten friend. I found its syntax a bit clunky, but could get accustomed to it. Its list matching is nice, O(n) list element accessing, not so much. Honestly, I wanted to really like it, but I'm not sure yet. And it's *slow* compared to C.
|
||||
|
||||
I restricted myself to using the standard library, but it's likely that using Jane Street's Base or Core would have produced less clunky code.
|
||||
|
||||
### bc
|
||||
|
||||
The beautiful thing about bc is that it's an arbitrary precision calculator:
|
||||
|
||||
- it's not going to get floating point overflows, unlike practically everything else. Try `1000000001.0 ** 1000000.0` in OCaml, and you will get infinity, try p(1000000000.0, 1000000.0) and you will get a large power of 10 in bc.
|
||||
- you can always trade get more precision (at the cost of longer running times). Could be useful if you were working with tricky long tails.
|
||||
|
||||
I decided to go with [Gavin Howard's bc](https://git.gavinhoward.com/gavin/bc), because I've been following the guy some time, and I respect him. It also had some crucial extensions, like a random number generator and
|
||||
|
||||
### Overall thoughts
|
||||
|
||||
Overall I don't think that this is a fair comparison of the languages intrinsically, because I'm just differentially good at them, because I've chosen to put more effort in ones than in others. But it is still useful to me personally, and perhaps mildly informative to others.
|
||||
|
||||
## Languages I may add later
|
||||
|
||||
- [x] bc (the standard posix calculator)
|
||||
- [x] OCaml
|
||||
- [ ] PyMC
|
||||
- [ ] bc (the standard posix calculator)
|
||||
- [ ] Zig
|
||||
- [ ] Rust
|
||||
- [ ] OCaml
|
||||
- [ ] Forth
|
||||
- [ ] Julia (TuringML, MCHammer)
|
||||
- [ ] Lisp
|
||||
|
|
27
bc/comments_stripped/estimate.bc
Normal file
27
bc/comments_stripped/estimate.bc
Normal file
|
@ -0,0 +1,27 @@
|
|||
p_a = 0.8
|
||||
p_b = 0.5
|
||||
p_c = p_a * p_b
|
||||
weights[0] = 1 - p_c
|
||||
weights[1] = p_c / 2
|
||||
weights[2] = p_c / 4
|
||||
weights[3] = p_c / 4
|
||||
define mixture(){
|
||||
p = sample_unit_uniform()
|
||||
if(p <= weights[0]){
|
||||
return 0
|
||||
}
|
||||
if(p <= (weights[0] + weights[1])){
|
||||
return 1
|
||||
}
|
||||
if(p<= (weights[0] + weights[1] + weights[2])){
|
||||
return sample_to(1, 3)
|
||||
}
|
||||
return sample_to(2, 10)
|
||||
}
|
||||
n_samples = 1000000
|
||||
sum=0
|
||||
for(i=0; i < n_samples; i++){
|
||||
sum += mixture()
|
||||
}
|
||||
sum/n_samples
|
||||
halt
|
32
bc/comments_stripped/squiggle.bc
Normal file
32
bc/comments_stripped/squiggle.bc
Normal file
|
@ -0,0 +1,32 @@
|
|||
scale = 16
|
||||
pi = 4 * atan(1)
|
||||
normal90confidence=1.64485362695
|
||||
define sample_unit_uniform(){
|
||||
return rand()/maxrand()
|
||||
}
|
||||
define sample_unit_normal(){
|
||||
u1=sample_unit_uniform()
|
||||
u2=sample_unit_uniform()
|
||||
z = sqrt(-2 * l(u1)) * sin(2 * pi * u2)
|
||||
return z
|
||||
}
|
||||
define sample_uniform(min, max){
|
||||
return (min + sample_unit_uniform()*(max-min))
|
||||
}
|
||||
define sample_normal(mean, sigma){
|
||||
return (mean + sigma * sample_unit_normal())
|
||||
}
|
||||
define sample_lognormal(logmean, logstd){
|
||||
return e(sample_normal(logmean, logstd))
|
||||
}
|
||||
define sample_normal_from_90_confidence_interval(low, high){
|
||||
mean = (high + low) / 2
|
||||
std = (high - low) / (2 * normal90confidence)
|
||||
return sample_normal(mean, std)
|
||||
}
|
||||
define sample_to(low, high){
|
||||
loglow = l(low)
|
||||
loghigh = l(high)
|
||||
return e(sample_normal_from_90_confidence_interval(loglow, loghigh))
|
||||
}
|
||||
|
34
bc/estimate.bc
Normal file
34
bc/estimate.bc
Normal file
|
@ -0,0 +1,34 @@
|
|||
p_a = 0.8
|
||||
p_b = 0.5
|
||||
p_c = p_a * p_b
|
||||
|
||||
weights[0] = 1 - p_c
|
||||
weights[1] = p_c / 2
|
||||
weights[2] = p_c / 4
|
||||
weights[3] = p_c / 4
|
||||
|
||||
/* We'll have to define the mixture manually */
|
||||
define mixture(){
|
||||
p = sample_unit_uniform()
|
||||
if(p <= weights[0]){
|
||||
return 0
|
||||
}
|
||||
if(p <= (weights[0] + weights[1])){
|
||||
return 1
|
||||
}
|
||||
if(p<= (weights[0] + weights[1] + weights[2])){
|
||||
return sample_to(1, 3)
|
||||
}
|
||||
return sample_to(2, 10)
|
||||
}
|
||||
|
||||
/* n_samples = 1000000 */
|
||||
n_samples = 1000000
|
||||
sum=0
|
||||
for(i=0; i < n_samples; i++){
|
||||
/* samples[i] = mixture() */
|
||||
sum += mixture()
|
||||
}
|
||||
sum/n_samples
|
||||
|
||||
halt
|
51
bc/extra/beta.bc
Normal file
51
bc/extra/beta.bc
Normal file
|
@ -0,0 +1,51 @@
|
|||
define sample_gamma(alpha){
|
||||
/*
|
||||
A Simple Method for Generating Gamma Variables, Marsaglia and Wan Tsang, 2001
|
||||
https://dl.acm.org/doi/pdf/10.1145/358407.358414
|
||||
see also the references/ folder
|
||||
Note that the Wikipedia page for the gamma distribution includes a scaling parameter
|
||||
k or beta
|
||||
https://en.wikipedia.org/wiki/Gamma_distribution
|
||||
such that gamma_k(alpha, k) = k * gamma(alpha)
|
||||
or gamma_beta(alpha, beta) = gamma(alpha) / beta
|
||||
So far I have not needed to use this, and thus the second parameter is by default 1.
|
||||
*/
|
||||
|
||||
if (alpha >= 1) {
|
||||
d = alpha - (1/3);
|
||||
c = 1.0 / sqrt(9.0 * d);
|
||||
while (1) {
|
||||
v=-1
|
||||
while(v<=0) {
|
||||
x = sample_unit_normal();
|
||||
v = 1 + c * x;
|
||||
}
|
||||
v = v * v * v;
|
||||
u = sample_unit_uniform();
|
||||
if (u < (1 - (0.0331 * (x * x * x * x)))) { /* Condition 1 */
|
||||
/*
|
||||
the 0.0331 doesn't inspire much confidence
|
||||
however, this isn't the whole story
|
||||
by knowing that Condition 1 implies condition 2
|
||||
we realize that this is just a way of making the algorithm faster
|
||||
i.e., of not using the logarithms
|
||||
*/
|
||||
return d * v;
|
||||
}
|
||||
if (log(u, 2) < ((0.5 * (x * x)) + (d * (1 - v + log(v, 2))))) { /* Condition 2 */
|
||||
return d * v;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
return sample_gamma(1 + alpha) * p(sample_unit_uniform(), 1 / alpha);
|
||||
/* see note in p. 371 of https://dl.acm.org/doi/pdf/10.1145/358407.358414 */
|
||||
}
|
||||
}
|
||||
|
||||
define sample_beta(a, b)
|
||||
{
|
||||
/* See: https://en.wikipedia.org/wiki/Gamma_distribution#Related_distributions */
|
||||
gamma_a = sample_gamma(a);
|
||||
gamma_b = sample_gamma(b);
|
||||
return gamma_a / (gamma_a + gamma_b);
|
||||
}
|
7
bc/extra/scratchpad.bc
Normal file
7
bc/extra/scratchpad.bc
Normal file
|
@ -0,0 +1,7 @@
|
|||
define factorial(x){
|
||||
if(x > 1){
|
||||
return x * factorial(x-1)
|
||||
} else {
|
||||
return 1
|
||||
}
|
||||
}
|
7
bc/makefile
Normal file
7
bc/makefile
Normal file
|
@ -0,0 +1,7 @@
|
|||
SHELL := /bin/bash
|
||||
|
||||
compute:
|
||||
ghbc -l squiggle.bc estimate.bc
|
||||
|
||||
time:
|
||||
time ghbc -l squiggle.bc estimate.bc
|
44
bc/notes.md
Normal file
44
bc/notes.md
Normal file
|
@ -0,0 +1,44 @@
|
|||
## bc versions
|
||||
https://git.gavinhoward.com/gavin/bc/src/branch/master
|
||||
https://www.gnu.org/software/bc/manual/html_mono/bc.html
|
||||
https://pubs.opengroup.org/onlinepubs/9699919799.2018edition/utilities/bc.html
|
||||
|
||||
## gh-bc
|
||||
|
||||
To build
|
||||
./configure.sh -O3
|
||||
make
|
||||
sudo cp bin/bc /usr/bin/ghbc
|
||||
|
||||
Man, just feels nicer.
|
||||
rand()
|
||||
maxrand()
|
||||
|
||||
ghbc -l: include math functions, like log, sin
|
||||
|
||||
## gnu bc
|
||||
|
||||
--standard: Process exactly the POSIX bc language.
|
||||
Could define my own rng, and use arrays to do the seed thing
|
||||
|
||||
## Usage
|
||||
|
||||
Numbers are arbitrary precision numbers
|
||||
|
||||
length ( expression )
|
||||
scale (expression)
|
||||
scale=100
|
||||
define t(x) {
|
||||
return(2);
|
||||
}
|
||||
|
||||
Apparently posix bc only has one-letter functions, lol
|
||||
Extensions needed: multi-letter functions
|
||||
|
||||
## Decisions, decisions
|
||||
|
||||
Maybe target gh-bc, and then see about making it POSIX complicant later?
|
||||
|
||||
Decide between GH's bc, POSIX bc, and gnu bc
|
||||
- Start with POSIX for now
|
||||
- Can't do POSIX, one letter functions are too annoying
|
67
bc/squiggle.bc
Normal file
67
bc/squiggle.bc
Normal file
|
@ -0,0 +1,67 @@
|
|||
scale = 16
|
||||
/* seed = 12345678910 */
|
||||
pi = 4 * atan(1)
|
||||
normal90confidence=1.64485362695
|
||||
/* 1.6448536269514727148638489079916 */
|
||||
|
||||
/* Distribution & sampling functions */
|
||||
/* Unit distributions */
|
||||
define sample_unit_uniform(){
|
||||
return rand()/maxrand()
|
||||
}
|
||||
define sample_unit_normal(){
|
||||
u1=sample_unit_uniform()
|
||||
u2=sample_unit_uniform()
|
||||
z = sqrt(-2 * l(u1)) * sin(2 * pi * u2)
|
||||
return z
|
||||
}
|
||||
|
||||
/* Composite distributions */
|
||||
define sample_uniform(min, max){
|
||||
return (min + sample_unit_uniform()*(max-min))
|
||||
}
|
||||
define sample_normal(mean, sigma){
|
||||
return (mean + sigma * sample_unit_normal())
|
||||
}
|
||||
define sample_lognormal(logmean, logstd){
|
||||
return e(sample_normal(logmean, logstd))
|
||||
}
|
||||
define sample_normal_from_90_confidence_interval(low, high){
|
||||
/*
|
||||
Explanation of key idea:
|
||||
1. We know that the 90% confidence interval of the unit normal is
|
||||
[-1.6448536269514722, 1.6448536269514722]
|
||||
see e.g.: https://stackoverflow.com/questions/20626994/how-to-calculate-the-inverse-of-the-normal-cumulative-distribution-function-in-p
|
||||
or https://www.wolframalpha.com/input?i=N%5BInverseCDF%28normal%280%2C1%29%2C+0.05%29%2C%7B%E2%88%9E%2C100%7D%5D
|
||||
2. So if we take a unit normal and multiply it by
|
||||
L / 1.6448536269514722, its new 90% confidence interval will be
|
||||
[-L, L], i.e., length 2 * L
|
||||
3. Instead, if we want to get a confidence interval of length L,
|
||||
we should multiply the unit normal by
|
||||
L / (2 * 1.6448536269514722)
|
||||
Meaning that its standard deviation should be multiplied by that amount
|
||||
see: https://en.wikipedia.org/wiki/Normal_distribution?lang=en#Operations_on_a_single_normal_variable
|
||||
4. So we have learnt that Normal(0, L / (2 * 1.6448536269514722))
|
||||
has a 90% confidence interval of length L
|
||||
5. If we want a 90% confidence interval from high to low,
|
||||
we can set mean = (high + low)/2; the midpoint, and L = high-low,
|
||||
Normal([high + low]/2, [high - low]/(2 * 1.6448536269514722))
|
||||
*/
|
||||
mean = (high + low) / 2
|
||||
std = (high - low) / (2 * normal90confidence)
|
||||
return sample_normal(mean, std)
|
||||
}
|
||||
define sample_to(low, high){
|
||||
|
||||
/*
|
||||
Given a (positive) 90% confidence interval,
|
||||
returns a sample from a lognorma with a matching 90% c.i.
|
||||
Key idea: If we want a lognormal with 90% confidence interval [a, b]
|
||||
we need but get a normal with 90% confidence interval [log(a), log(b)].
|
||||
Then see code for sample_normal_from_90_confidence_interval
|
||||
*/
|
||||
loglow = l(low)
|
||||
loghigh = l(high)
|
||||
return e(sample_normal_from_90_confidence_interval(loglow, loghigh))
|
||||
}
|
||||
|
BIN
nim/samples
BIN
nim/samples
Binary file not shown.
38
ocaml/makefile
Normal file
38
ocaml/makefile
Normal file
|
@ -0,0 +1,38 @@
|
|||
# Compiler
|
||||
OC=ocamlopt
|
||||
# ocamlopt: platform-specific, faster
|
||||
# ocamlc: platform-independent intermediate representation, run with ocamlrun
|
||||
FAST=-O3 -unsafe # install flambda with opam
|
||||
PROF=-g
|
||||
|
||||
SRC=samples.ml
|
||||
OUT=./out/samples
|
||||
|
||||
build: $(SRC)
|
||||
$(OC) $(SRC) -o $(OUT)
|
||||
mv samples.cmi samples.cmx samples.o ./out/
|
||||
|
||||
run:
|
||||
$(OUT)
|
||||
|
||||
fast:
|
||||
$(OC) $(FAST) $(SRC) -o $(OUT)
|
||||
mv samples.cmi samples.cmx samples.o ./out/
|
||||
|
||||
time:
|
||||
bash -c "time $(OUT)"
|
||||
|
||||
profile:
|
||||
$(OC) $(PERF) $(SRC) -o $(OUT)
|
||||
mv samples.cmi samples.cmx samples.o ./out/
|
||||
sudo perf record -g $(OUT)
|
||||
sudo perf report
|
||||
rm perf.data
|
||||
|
||||
switch-opam-fast:
|
||||
opam switch create 4.11.2+flambda
|
||||
eval $(opam env)
|
||||
|
||||
switch-opam-5.1.0:
|
||||
opam switch create 5.1.0
|
||||
eval $(opam env)
|
BIN
ocaml/out/samples
Executable file
BIN
ocaml/out/samples
Executable file
Binary file not shown.
BIN
ocaml/out/samples.cmi
Normal file
BIN
ocaml/out/samples.cmi
Normal file
Binary file not shown.
BIN
ocaml/out/samples.cmx
Normal file
BIN
ocaml/out/samples.cmx
Normal file
Binary file not shown.
BIN
ocaml/out/samples.o
Normal file
BIN
ocaml/out/samples.o
Normal file
Binary file not shown.
123
ocaml/samples.ml
Normal file
123
ocaml/samples.ml
Normal file
|
@ -0,0 +1,123 @@
|
|||
(* Constants *)
|
||||
let pi = acos (-1.)
|
||||
let normal_95_ci_length = 1.6448536269514722
|
||||
|
||||
(* List manipulation helpers *)
|
||||
let sumFloats xs = List.fold_left(fun acc x -> acc +. x) 0.0 xs
|
||||
|
||||
let normalizeXs xs =
|
||||
let sum_xs = sumFloats xs in
|
||||
List.map(fun x -> x /. sum_xs) xs
|
||||
|
||||
let cumsumXs xs =
|
||||
let _, cum_sum = List.fold_left(fun (sum, ys) x ->
|
||||
let new_sum = sum +. x in
|
||||
new_sum, ys @ [new_sum]
|
||||
) (0.0, []) xs in
|
||||
cum_sum
|
||||
|
||||
let rec nth xs (n: int) =
|
||||
match xs with
|
||||
| [] -> Error "nth function finds no match"
|
||||
| y :: ys -> if n = 0 then Ok(y) else nth ys (n-1)
|
||||
(*
|
||||
Note that this is O(n) access.
|
||||
That is the cost of using the nice match syntax,
|
||||
which is not possible with OCaml arrays
|
||||
*)
|
||||
|
||||
let findIndex xs test =
|
||||
let rec recursiveHelper ys i =
|
||||
match ys with
|
||||
| [] -> Error "findIndex doesn't find an index"
|
||||
| z :: zs -> if test z then Ok(i) else recursiveHelper zs (i+1)
|
||||
in
|
||||
recursiveHelper xs 0
|
||||
|
||||
let unwind xs =
|
||||
let rec tailRecursiveHelper ys acc =
|
||||
match ys with
|
||||
| [] -> Ok(acc)
|
||||
| Error e :: _ -> Error e
|
||||
| Ok(y) :: ys -> tailRecursiveHelper ys (y :: acc)
|
||||
in
|
||||
tailRecursiveHelper xs []
|
||||
|
||||
let unwindSum xs =
|
||||
let rec tailRecursiveHelper ys sum =
|
||||
match ys with
|
||||
| [] -> Ok(sum)
|
||||
| Error e :: _ -> Error e
|
||||
| Ok(y) :: ys -> tailRecursiveHelper ys (y +. sum)
|
||||
in
|
||||
tailRecursiveHelper xs 0.0
|
||||
|
||||
(* Array helpers *)
|
||||
let unwindSumArray xs =
|
||||
Array.fold_left(fun acc x ->
|
||||
(
|
||||
match acc, x with
|
||||
| Error e, _ -> Error e
|
||||
| _, Error e -> Error e
|
||||
| Ok(sum), Ok(y) -> Ok(sum +. y)
|
||||
)
|
||||
) (Ok 0.0) xs
|
||||
|
||||
let sumFloats xs = List.fold_left(fun acc x -> acc +. x) 0.0 xs
|
||||
|
||||
(* Basic samplers *)
|
||||
let sampleZeroToOne () : float = Random.float 1.0
|
||||
|
||||
let sampleStandardNormal (): float =
|
||||
let u1 = sampleZeroToOne () in
|
||||
let u2 = sampleZeroToOne () in
|
||||
let z = sqrt(-2.0 *. log(u1)) *. sin(2.0 *. pi *. u2) in
|
||||
z
|
||||
|
||||
let sampleNormal mean std = mean +. std *. (sampleStandardNormal ())
|
||||
|
||||
let sampleLognormal logmean logstd = exp(sampleNormal logmean logstd)
|
||||
|
||||
let sampleTo low high =
|
||||
let loglow = log(low) in
|
||||
let loghigh = log(high) in
|
||||
let logmean = (loglow +. loghigh) /. 2.0 in
|
||||
let logstd = (loghigh -. loglow) /. (2.0 *. normal_95_ci_length ) in
|
||||
sampleLognormal logmean logstd
|
||||
|
||||
let mixture (samplers: (unit -> float) list) (weights: float list): (float, string) result =
|
||||
if (List.length samplers <> List.length weights)
|
||||
then Error "in mixture function, List.length samplers != List.length weights"
|
||||
else
|
||||
let normalized_weights = normalizeXs weights in
|
||||
let cumsummed_normalized_weights = cumsumXs normalized_weights in
|
||||
let p = sampleZeroToOne () in
|
||||
let chosenSamplerIndex = findIndex cumsummed_normalized_weights (fun x -> p < x) in
|
||||
let sampler = match chosenSamplerIndex with
|
||||
| Error e -> Error e
|
||||
| Ok(i) -> nth samplers i
|
||||
in
|
||||
let sample = match sampler with
|
||||
| Error e -> Error e
|
||||
| Ok(f) -> Ok(f ())
|
||||
in
|
||||
sample
|
||||
|
||||
let () =
|
||||
let sample0 () = 0. in
|
||||
let sample1 () = 1. in
|
||||
let sampleFew () = sampleTo 1. 3. in
|
||||
let sampleMany () = sampleTo 2. 10. in
|
||||
let p1 = 0.8 in
|
||||
let p2 = 0.5 in
|
||||
let p3 = p1 *. p2 in
|
||||
let weights = [ 1. -. p3; p3 /. 2.; p3 /. 4.; p3/. 4. ] in
|
||||
let sampler () = mixture [ sample0; sample1; sampleFew; sampleMany ] weights in
|
||||
let n = 1_000_000 in
|
||||
let samples = Array.init n (fun _ -> sampler ()) in
|
||||
match unwindSumArray samples with
|
||||
| Error err -> Printf.printf "Error %s\n" err
|
||||
| Ok(sum) -> (
|
||||
let mean = sum /. float_of_int(n) in
|
||||
Printf.printf "Mean: %f\n" mean
|
||||
)
|
21
time.txt
21
time.txt
|
@ -138,3 +138,24 @@ Requires /bin/time, found on GNU/Linux systems
|
|||
Running 100x and taking avg time example
|
||||
Time using 1 thread: 37.60ms
|
||||
|
||||
## squiggle.ml
|
||||
|
||||
— make fast && time make run
|
||||
ocamlopt -O3 -unsafe samples.ml -o ./out/samples
|
||||
mv samples.cmi samples.cmx samples.o ./out/
|
||||
./out/samples
|
||||
Mean: 0.884629
|
||||
|
||||
real 0m0.164s
|
||||
user 0m0.159s
|
||||
sys 0m0.004s
|
||||
|
||||
## squiggle.bc
|
||||
|
||||
make time
|
||||
time ghbc -l squiggle.bc estimate.bc
|
||||
.8882048395568279
|
||||
|
||||
real 0m15.906s
|
||||
user 0m15.900s
|
||||
sys 0m0.000s
|
||||
|
|
Loading…
Reference in New Issue
Block a user