Compare commits
	
		
			33 Commits
		
	
	
		
			5c0294b4b1
			...
			7834c3baae
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| 7834c3baae | |||
| 5473a6aeda | |||
| 90b8804884 | |||
| 8675d98784 | |||
| 249a1ff434 | |||
| 1a3099b7e4 | |||
| b9f64ec37b | |||
| c19738709a | |||
| 50fb88db7c | |||
| 3befb4af4d | |||
| cf76c92803 | |||
| 2d5378fd53 | |||
| 37a2dab610 | |||
| 608cbb2f68 | |||
| 25a27f7fc4 | |||
| 7ce4658d30 | |||
| 13194bc3ca | |||
| 710eb4267b | |||
| 4708c6f198 | |||
| 4959255947 | |||
| 4c4d053ab9 | |||
| 3950946d68 | |||
| 351f4c584d | |||
| 4983699308 | |||
| ce119253b7 | |||
| 90e48d2249 | |||
| fb21e4baa6 | |||
| 23bd623b66 | |||
| fa73c33c27 | |||
| 2cddf557bf | |||
| ffec4663fc | |||
| e9bfd1f2ed | |||
| dd8a1806bd | 
							
								
								
									
										
											BIN
										
									
								
								C/out/samples
									
									
									
									
									
								
							
							
						
						
									
										
											BIN
										
									
								
								C/out/samples
									
									
									
									
									
								
							
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										26
									
								
								C/perf.txt
									
									
									
									
									
								
							
							
						
						
									
										26
									
								
								C/perf.txt
									
									
									
									
									
								
							| 
						 | 
				
			
			@ -1,26 +0,0 @@
 | 
			
		|||
Samples: 884  of event 'cycles', Event count (approx.): 551167949
 | 
			
		||||
Overhead  Command  Shared Object      Symbol
 | 
			
		||||
  35.32%  samples  samples            [.] xorshift32
 | 
			
		||||
  14.09%  samples  libgomp.so.1.0.0   [.] 0x000000000001d2ea
 | 
			
		||||
  12.04%  samples  libgomp.so.1.0.0   [.] 0x000000000001d132
 | 
			
		||||
  11.53%  samples  samples            [.] mixture._omp_fn.0
 | 
			
		||||
   4.55%  samples  libm-2.31.so       [.] __sin_fma
 | 
			
		||||
   4.24%  samples  samples            [.] rand_0_to_1
 | 
			
		||||
   3.77%  samples  samples            [.] random_to
 | 
			
		||||
   3.03%  samples  libm-2.31.so       [.] __logf_fma
 | 
			
		||||
   1.61%  samples  libm-2.31.so       [.] __expf_fma
 | 
			
		||||
   1.54%  samples  samples            [.] split_array_sum._omp_fn.0
 | 
			
		||||
   1.38%  samples  samples            [.] random_uniform
 | 
			
		||||
   0.94%  samples  samples            [.] ur_normal
 | 
			
		||||
   0.91%  samples  libm-2.31.so       [.] __ieee754_log_fma
 | 
			
		||||
   0.74%  samples  libgomp.so.1.0.0   [.] 0x000000000001d13d
 | 
			
		||||
   0.52%  samples  samples            [.] sample_0
 | 
			
		||||
   0.41%  samples  libm-2.31.so       [.] __sqrtf_finite@GLIBC_2.15
 | 
			
		||||
   0.38%  samples  samples            [.] sample_1
 | 
			
		||||
   0.36%  samples  libgomp.so.1.0.0   [.] 0x000000000001d139
 | 
			
		||||
   0.36%  samples  libgomp.so.1.0.0   [.] 0x000000000001d2f5
 | 
			
		||||
   0.22%  samples  [kernel.kallsyms]  [k] native_queued_spin_lock_slowpath
 | 
			
		||||
   0.18%  samples  [kernel.kallsyms]  [k] _raw_spin_lock_irq
 | 
			
		||||
   0.18%  samples  samples            [.] random_lognormal
 | 
			
		||||
   0.17%  samples  libgomp.so.1.0.0   [.] 0x000000000001d2f1
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										
											BIN
										
									
								
								C/perf/perf.data
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								C/perf/perf.data
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										25
									
								
								C/perf/perf.txt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										25
									
								
								C/perf/perf.txt
									
									
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,25 @@
 | 
			
		|||
Samples: 806  of event 'cycles', Event count (approx.): 576095767
 | 
			
		||||
Overhead  Command  Shared Object      Symbol
 | 
			
		||||
  36.75%  samples  samples            [.] xorshift32
 | 
			
		||||
  13.69%  samples  libgomp.so.1.0.0   [.] 0x000000000001d3ca
 | 
			
		||||
  12.84%  samples  samples            [.] mixture._omp_fn.0
 | 
			
		||||
  10.70%  samples  libgomp.so.1.0.0   [.] 0x000000000001d212
 | 
			
		||||
   5.02%  samples  samples            [.] rand_0_to_1
 | 
			
		||||
   4.05%  samples  libm-2.31.so       [.] __sin_fma
 | 
			
		||||
   3.11%  samples  libm-2.31.so       [.] __logf_fma
 | 
			
		||||
   2.79%  samples  samples            [.] split_array_sum._omp_fn.0
 | 
			
		||||
   1.91%  samples  samples            [.] random_to
 | 
			
		||||
   1.74%  samples  samples            [.] random_uniform
 | 
			
		||||
   0.87%  samples  libm-2.31.so       [.] __ieee754_log_fma
 | 
			
		||||
   0.81%  samples  samples            [.] ur_normal
 | 
			
		||||
   0.74%  samples  libm-2.31.so       [.] __expf_fma
 | 
			
		||||
   0.55%  samples  libgomp.so.1.0.0   [.] 0x000000000001d3d1
 | 
			
		||||
   0.53%  samples  libgomp.so.1.0.0   [.] 0x000000000001d21d
 | 
			
		||||
   0.35%  samples  samples            [.] random_normal
 | 
			
		||||
   0.30%  samples  libm-2.31.so       [.] __sqrtf
 | 
			
		||||
   0.28%  samples  samples            [.] random_lognormal
 | 
			
		||||
   0.25%  samples  samples            [.] sample_0
 | 
			
		||||
   0.24%  samples  [kernel.kallsyms]  [k] 0xffffffffae72e205
 | 
			
		||||
   0.22%  samples  libgomp.so.1.0.0   [.] 0x000000000001d219
 | 
			
		||||
   0.18%  samples  [kernel.kallsyms]  [k] 0xffffffffae2c2b83
 | 
			
		||||
   0.18%  samples  [kernel.kallsyms]  [k] 0xffffffffae4f4da7
 | 
			
		||||
							
								
								
									
										23
									
								
								README.md
									
									
									
									
									
								
							
							
						
						
									
										23
									
								
								README.md
									
									
									
									
									
								
							| 
						 | 
				
			
			@ -25,15 +25,17 @@ The name of this repository is a pun on two meanings of "time to": "how much tim
 | 
			
		|||
| Language                    | Time      | Lines of code |
 | 
			
		||||
|-----------------------------|-----------|---------------|
 | 
			
		||||
| C (optimized, 16 threads)   | 5ms       | 249  |
 | 
			
		||||
| squiggle.c                  | 37ms      | 54   | 
 | 
			
		||||
| squiggle.c                  | 37ms      | 54*  | 
 | 
			
		||||
| Nim                         | 38ms      | 84   |
 | 
			
		||||
| Lua (LuaJIT)                | 68ms      | 82   |
 | 
			
		||||
| OCaml (flambda)             | 164ms     | 123  |
 | 
			
		||||
| Lua                         | 278ms     | 82   |
 | 
			
		||||
| C (naïve implementation)    | 292ms     | 149  |
 | 
			
		||||
| Javascript (NodeJS)         | 732ms     | 69   |
 | 
			
		||||
| Squiggle                    | 1,536s    | 14*  |
 | 
			
		||||
| SquigglePy                  | 1.602s    | 18*  |
 | 
			
		||||
| R                           | 7,000s    | 49   |
 | 
			
		||||
| Gavin Howard's bc           | 15.9s     | 59   |
 | 
			
		||||
| Python (CPython)            | 16,641s   | 56   |
 | 
			
		||||
 | 
			
		||||
Time measurements taken with the [time](https://man7.org/linux/man-pages/man1/time.1.html) tool, using 1M samples. But different implementations use different algorithms and, occasionally, different time measuring methodologies (for the C, Nim and Lua implementations, I run the program 100 times and take the mean). Their speed was also measured under different loads in my machine. So I think that these time estimates are accurate within maybe ~2x or so.
 | 
			
		||||
| 
						 | 
				
			
			@ -118,17 +120,32 @@ sudo ln -sf luajit-2.1.0-beta3 /usr/local/bin/luajit
 | 
			
		|||
 | 
			
		||||
Overall I'm thinking that a combination of lua at least for scripting and ¿nim/C/tbd? for more serious programs could be quite powerful.
 | 
			
		||||
 | 
			
		||||
### OCaml
 | 
			
		||||
 | 
			
		||||
OCaml was like meeting an old and forgotten friend. I found its syntax a bit clunky, but could get accustomed to it. Its list matching is nice, O(n) list element accessing, not so much. Honestly, I wanted to really like it, but I'm not sure yet. And it's *slow* compared to C.
 | 
			
		||||
 | 
			
		||||
I restricted myself to using the standard library, but it's likely that using Jane Street's Base or Core would have produced less clunky code.
 | 
			
		||||
 | 
			
		||||
### bc
 | 
			
		||||
 | 
			
		||||
The beautiful thing about bc is that it's an arbitrary precision calculator:
 | 
			
		||||
 | 
			
		||||
- it's not going to get floating point overflows, unlike practically everything else. Try `1000000001.0 ** 1000000.0` in OCaml, and you will get infinity, try p(1000000000.0, 1000000.0) and you will get a large power of 10 in bc.
 | 
			
		||||
- you can always trade get more precision (at the cost of longer running times). Could be useful if you were working with tricky long tails.
 | 
			
		||||
 | 
			
		||||
I decided to go with [Gavin Howard's bc](https://git.gavinhoward.com/gavin/bc), because I've been following the guy some time, and I respect him. It also had some crucial extensions, like a random number generator and 
 | 
			
		||||
 | 
			
		||||
### Overall thoughts
 | 
			
		||||
 | 
			
		||||
Overall I don't think that this is a fair comparison of the languages intrinsically, because I'm just differentially good at them, because I've chosen to put more effort in ones than in others. But it is still useful to me personally, and perhaps mildly informative to others. 
 | 
			
		||||
 | 
			
		||||
## Languages I may add later
 | 
			
		||||
 | 
			
		||||
- [x] bc (the standard posix calculator)
 | 
			
		||||
- [x] OCaml
 | 
			
		||||
- [ ] PyMC
 | 
			
		||||
- [ ] bc (the standard posix calculator)
 | 
			
		||||
- [ ] Zig
 | 
			
		||||
- [ ] Rust
 | 
			
		||||
- [ ] OCaml
 | 
			
		||||
- [ ] Forth
 | 
			
		||||
- [ ] Julia (TuringML, MCHammer) 
 | 
			
		||||
- [ ] Lisp
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										27
									
								
								bc/comments_stripped/estimate.bc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										27
									
								
								bc/comments_stripped/estimate.bc
									
									
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,27 @@
 | 
			
		|||
p_a = 0.8
 | 
			
		||||
p_b = 0.5
 | 
			
		||||
p_c = p_a * p_b
 | 
			
		||||
weights[0] = 1 - p_c
 | 
			
		||||
weights[1] = p_c / 2
 | 
			
		||||
weights[2] = p_c / 4
 | 
			
		||||
weights[3] = p_c / 4
 | 
			
		||||
define mixture(){
 | 
			
		||||
  p = sample_unit_uniform()
 | 
			
		||||
  if(p <= weights[0]){
 | 
			
		||||
    return 0
 | 
			
		||||
  } 
 | 
			
		||||
  if(p <= (weights[0] + weights[1])){
 | 
			
		||||
    return 1
 | 
			
		||||
  }
 | 
			
		||||
  if(p<= (weights[0] + weights[1] + weights[2])){
 | 
			
		||||
    return sample_to(1, 3)
 | 
			
		||||
  }
 | 
			
		||||
  return sample_to(2, 10)
 | 
			
		||||
}
 | 
			
		||||
n_samples = 1000000
 | 
			
		||||
sum=0
 | 
			
		||||
for(i=0; i < n_samples; i++){
 | 
			
		||||
  sum += mixture()
 | 
			
		||||
}
 | 
			
		||||
sum/n_samples
 | 
			
		||||
halt
 | 
			
		||||
							
								
								
									
										32
									
								
								bc/comments_stripped/squiggle.bc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										32
									
								
								bc/comments_stripped/squiggle.bc
									
									
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,32 @@
 | 
			
		|||
scale = 16
 | 
			
		||||
pi = 4 * atan(1)
 | 
			
		||||
normal90confidence=1.64485362695
 | 
			
		||||
define sample_unit_uniform(){
 | 
			
		||||
  return rand()/maxrand()
 | 
			
		||||
}
 | 
			
		||||
define sample_unit_normal(){
 | 
			
		||||
  u1=sample_unit_uniform()
 | 
			
		||||
  u2=sample_unit_uniform()
 | 
			
		||||
  z = sqrt(-2 * l(u1)) * sin(2 * pi * u2)
 | 
			
		||||
  return z
 | 
			
		||||
}
 | 
			
		||||
define sample_uniform(min, max){
 | 
			
		||||
  return (min + sample_unit_uniform()*(max-min))
 | 
			
		||||
}
 | 
			
		||||
define sample_normal(mean, sigma){
 | 
			
		||||
  return (mean + sigma * sample_unit_normal())
 | 
			
		||||
}
 | 
			
		||||
define sample_lognormal(logmean, logstd){
 | 
			
		||||
  return e(sample_normal(logmean, logstd))
 | 
			
		||||
}
 | 
			
		||||
define sample_normal_from_90_confidence_interval(low, high){
 | 
			
		||||
  mean = (high + low) / 2
 | 
			
		||||
  std = (high - low) / (2 * normal90confidence)
 | 
			
		||||
  return sample_normal(mean, std)
 | 
			
		||||
}
 | 
			
		||||
define sample_to(low, high){
 | 
			
		||||
  loglow = l(low)
 | 
			
		||||
  loghigh = l(high)
 | 
			
		||||
  return e(sample_normal_from_90_confidence_interval(loglow, loghigh))
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										34
									
								
								bc/estimate.bc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										34
									
								
								bc/estimate.bc
									
									
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,34 @@
 | 
			
		|||
p_a = 0.8
 | 
			
		||||
p_b = 0.5
 | 
			
		||||
p_c = p_a * p_b
 | 
			
		||||
 | 
			
		||||
weights[0] = 1 - p_c
 | 
			
		||||
weights[1] = p_c / 2
 | 
			
		||||
weights[2] = p_c / 4
 | 
			
		||||
weights[3] = p_c / 4
 | 
			
		||||
 | 
			
		||||
/* We'll have to define the mixture manually */
 | 
			
		||||
define mixture(){
 | 
			
		||||
  p = sample_unit_uniform()
 | 
			
		||||
  if(p <= weights[0]){
 | 
			
		||||
    return 0
 | 
			
		||||
  } 
 | 
			
		||||
  if(p <= (weights[0] + weights[1])){
 | 
			
		||||
    return 1
 | 
			
		||||
  }
 | 
			
		||||
  if(p<= (weights[0] + weights[1] + weights[2])){
 | 
			
		||||
    return sample_to(1, 3)
 | 
			
		||||
  }
 | 
			
		||||
  return sample_to(2, 10)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/* n_samples = 1000000 */
 | 
			
		||||
n_samples = 1000000
 | 
			
		||||
sum=0
 | 
			
		||||
for(i=0; i < n_samples; i++){
 | 
			
		||||
  /* samples[i] = mixture() */
 | 
			
		||||
  sum += mixture()
 | 
			
		||||
}
 | 
			
		||||
sum/n_samples
 | 
			
		||||
 | 
			
		||||
halt
 | 
			
		||||
							
								
								
									
										51
									
								
								bc/extra/beta.bc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										51
									
								
								bc/extra/beta.bc
									
									
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,51 @@
 | 
			
		|||
define sample_gamma(alpha){
 | 
			
		||||
  /*
 | 
			
		||||
  A Simple Method for Generating Gamma Variables, Marsaglia and Wan Tsang, 2001
 | 
			
		||||
  https://dl.acm.org/doi/pdf/10.1145/358407.358414
 | 
			
		||||
  see also the references/ folder
 | 
			
		||||
  Note that the Wikipedia page for the gamma distribution includes a scaling parameter
 | 
			
		||||
  k or beta
 | 
			
		||||
  https://en.wikipedia.org/wiki/Gamma_distribution
 | 
			
		||||
  such that gamma_k(alpha, k) = k * gamma(alpha)
 | 
			
		||||
  or gamma_beta(alpha, beta) = gamma(alpha) / beta
 | 
			
		||||
  So far I have not needed to use this, and thus the second parameter is by default 1.
 | 
			
		||||
  */
 | 
			
		||||
 | 
			
		||||
  if (alpha >= 1) {
 | 
			
		||||
    d = alpha - (1/3);
 | 
			
		||||
    c = 1.0 / sqrt(9.0 * d);
 | 
			
		||||
    while (1) {
 | 
			
		||||
      v=-1
 | 
			
		||||
      while(v<=0) {
 | 
			
		||||
        x = sample_unit_normal();
 | 
			
		||||
        v = 1 + c * x;
 | 
			
		||||
      }
 | 
			
		||||
      v = v * v * v;
 | 
			
		||||
      u = sample_unit_uniform();
 | 
			
		||||
      if (u < (1 - (0.0331 * (x * x * x * x)))) { /* Condition 1 */
 | 
			
		||||
        /* 
 | 
			
		||||
        the 0.0331 doesn't inspire much confidence
 | 
			
		||||
        however, this isn't the whole story
 | 
			
		||||
        by knowing that Condition 1 implies condition 2
 | 
			
		||||
        we realize that this is just a way of making the algorithm faster
 | 
			
		||||
        i.e., of not using the logarithms
 | 
			
		||||
        */
 | 
			
		||||
        return d * v;
 | 
			
		||||
      }
 | 
			
		||||
      if (log(u, 2) < ((0.5 * (x * x)) + (d * (1 - v + log(v, 2))))) { /* Condition 2 */
 | 
			
		||||
        return d * v;
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
  } else {
 | 
			
		||||
    return sample_gamma(1 + alpha) * p(sample_unit_uniform(), 1 / alpha);
 | 
			
		||||
    /* see note in p. 371 of https://dl.acm.org/doi/pdf/10.1145/358407.358414 */
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
define sample_beta(a, b)
 | 
			
		||||
{
 | 
			
		||||
    /* See: https://en.wikipedia.org/wiki/Gamma_distribution#Related_distributions */
 | 
			
		||||
    gamma_a = sample_gamma(a);
 | 
			
		||||
    gamma_b = sample_gamma(b);
 | 
			
		||||
    return gamma_a / (gamma_a + gamma_b);
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										7
									
								
								bc/extra/scratchpad.bc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										7
									
								
								bc/extra/scratchpad.bc
									
									
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,7 @@
 | 
			
		|||
define factorial(x){
 | 
			
		||||
  if(x > 1){
 | 
			
		||||
    return x * factorial(x-1)
 | 
			
		||||
  } else {
 | 
			
		||||
     return 1
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										7
									
								
								bc/makefile
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										7
									
								
								bc/makefile
									
									
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,7 @@
 | 
			
		|||
SHELL := /bin/bash
 | 
			
		||||
 | 
			
		||||
compute:
 | 
			
		||||
	ghbc -l squiggle.bc estimate.bc
 | 
			
		||||
 | 
			
		||||
time:
 | 
			
		||||
	time ghbc -l squiggle.bc estimate.bc
 | 
			
		||||
							
								
								
									
										44
									
								
								bc/notes.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										44
									
								
								bc/notes.md
									
									
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,44 @@
 | 
			
		|||
## bc versions
 | 
			
		||||
https://git.gavinhoward.com/gavin/bc/src/branch/master
 | 
			
		||||
https://www.gnu.org/software/bc/manual/html_mono/bc.html
 | 
			
		||||
https://pubs.opengroup.org/onlinepubs/9699919799.2018edition/utilities/bc.html
 | 
			
		||||
 | 
			
		||||
## gh-bc
 | 
			
		||||
 | 
			
		||||
To build
 | 
			
		||||
./configure.sh -O3
 | 
			
		||||
make
 | 
			
		||||
sudo cp bin/bc /usr/bin/ghbc
 | 
			
		||||
 | 
			
		||||
Man, just feels nicer.
 | 
			
		||||
rand()
 | 
			
		||||
maxrand()
 | 
			
		||||
 | 
			
		||||
ghbc -l: include math functions, like log, sin 
 | 
			
		||||
 | 
			
		||||
## gnu bc
 | 
			
		||||
 | 
			
		||||
--standard: Process exactly the POSIX bc language.
 | 
			
		||||
Could define my own rng, and use arrays to do the seed thing
 | 
			
		||||
 | 
			
		||||
## Usage
 | 
			
		||||
 | 
			
		||||
Numbers are arbitrary precision numbers
 | 
			
		||||
 | 
			
		||||
length ( expression )
 | 
			
		||||
scale (expression)
 | 
			
		||||
scale=100
 | 
			
		||||
define t(x) {
 | 
			
		||||
  return(2);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
Apparently posix bc only has one-letter functions, lol
 | 
			
		||||
Extensions needed: multi-letter functions 
 | 
			
		||||
 | 
			
		||||
## Decisions, decisions
 | 
			
		||||
 | 
			
		||||
Maybe target gh-bc, and then see about making it POSIX complicant later?
 | 
			
		||||
 | 
			
		||||
Decide between GH's bc, POSIX bc, and gnu bc
 | 
			
		||||
- Start with POSIX for now 
 | 
			
		||||
- Can't do POSIX, one letter functions are too annoying
 | 
			
		||||
							
								
								
									
										67
									
								
								bc/squiggle.bc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										67
									
								
								bc/squiggle.bc
									
									
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,67 @@
 | 
			
		|||
scale = 16
 | 
			
		||||
/* seed = 12345678910 */
 | 
			
		||||
pi = 4 * atan(1)
 | 
			
		||||
normal90confidence=1.64485362695
 | 
			
		||||
/* 1.6448536269514727148638489079916 */
 | 
			
		||||
 | 
			
		||||
/* Distribution & sampling functions */
 | 
			
		||||
/* Unit distributions */
 | 
			
		||||
define sample_unit_uniform(){
 | 
			
		||||
  return rand()/maxrand()
 | 
			
		||||
}
 | 
			
		||||
define sample_unit_normal(){
 | 
			
		||||
  u1=sample_unit_uniform()
 | 
			
		||||
  u2=sample_unit_uniform()
 | 
			
		||||
  z = sqrt(-2 * l(u1)) * sin(2 * pi * u2)
 | 
			
		||||
  return z
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/* Composite distributions */
 | 
			
		||||
define sample_uniform(min, max){
 | 
			
		||||
  return (min + sample_unit_uniform()*(max-min))
 | 
			
		||||
}
 | 
			
		||||
define sample_normal(mean, sigma){
 | 
			
		||||
  return (mean + sigma * sample_unit_normal())
 | 
			
		||||
}
 | 
			
		||||
define sample_lognormal(logmean, logstd){
 | 
			
		||||
  return e(sample_normal(logmean, logstd))
 | 
			
		||||
}
 | 
			
		||||
define sample_normal_from_90_confidence_interval(low, high){
 | 
			
		||||
  /*
 | 
			
		||||
  Explanation of key idea:
 | 
			
		||||
  1. We know that the 90% confidence interval of the unit normal is
 | 
			
		||||
  [-1.6448536269514722, 1.6448536269514722]
 | 
			
		||||
  see e.g.: https://stackoverflow.com/questions/20626994/how-to-calculate-the-inverse-of-the-normal-cumulative-distribution-function-in-p
 | 
			
		||||
  or https://www.wolframalpha.com/input?i=N%5BInverseCDF%28normal%280%2C1%29%2C+0.05%29%2C%7B%E2%88%9E%2C100%7D%5D
 | 
			
		||||
  2. So if we take a unit normal and multiply it by
 | 
			
		||||
  L / 1.6448536269514722, its new 90% confidence interval will be
 | 
			
		||||
  [-L, L], i.e., length 2 * L
 | 
			
		||||
  3. Instead, if we want to get a confidence interval of length L,
 | 
			
		||||
  we should multiply the unit normal by
 | 
			
		||||
  L / (2 * 1.6448536269514722)
 | 
			
		||||
  Meaning that its standard deviation should be multiplied by that amount
 | 
			
		||||
  see: https://en.wikipedia.org/wiki/Normal_distribution?lang=en#Operations_on_a_single_normal_variable
 | 
			
		||||
  4. So we have learnt that Normal(0, L / (2 * 1.6448536269514722))
 | 
			
		||||
  has a 90% confidence interval of length L
 | 
			
		||||
  5. If we want a 90% confidence interval from high to low,
 | 
			
		||||
  we can set mean = (high + low)/2; the midpoint, and L = high-low,
 | 
			
		||||
  Normal([high + low]/2, [high - low]/(2 * 1.6448536269514722))
 | 
			
		||||
  */
 | 
			
		||||
  mean = (high + low) / 2
 | 
			
		||||
  std = (high - low) / (2 * normal90confidence)
 | 
			
		||||
  return sample_normal(mean, std)
 | 
			
		||||
}
 | 
			
		||||
define sample_to(low, high){
 | 
			
		||||
 | 
			
		||||
  /*
 | 
			
		||||
  Given a (positive) 90% confidence interval,
 | 
			
		||||
  returns a sample from a lognorma with a matching 90% c.i.
 | 
			
		||||
  Key idea: If we want a lognormal with 90% confidence interval [a, b]
 | 
			
		||||
  we need but get a normal with 90% confidence interval [log(a), log(b)].
 | 
			
		||||
  Then see code for sample_normal_from_90_confidence_interval
 | 
			
		||||
  */
 | 
			
		||||
  loglow = l(low)
 | 
			
		||||
  loghigh = l(high)
 | 
			
		||||
  return e(sample_normal_from_90_confidence_interval(loglow, loghigh))
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										
											BIN
										
									
								
								nim/samples
									
									
									
									
									
								
							
							
						
						
									
										
											BIN
										
									
								
								nim/samples
									
									
									
									
									
								
							
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										38
									
								
								ocaml/makefile
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										38
									
								
								ocaml/makefile
									
									
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,38 @@
 | 
			
		|||
# Compiler
 | 
			
		||||
OC=ocamlopt
 | 
			
		||||
# ocamlopt: platform-specific, faster
 | 
			
		||||
# ocamlc: platform-independent intermediate representation, run with ocamlrun
 | 
			
		||||
FAST=-O3 -unsafe # install flambda with opam
 | 
			
		||||
PROF=-g
 | 
			
		||||
 | 
			
		||||
SRC=samples.ml
 | 
			
		||||
OUT=./out/samples
 | 
			
		||||
 | 
			
		||||
build: $(SRC)
 | 
			
		||||
	$(OC) $(SRC) -o $(OUT)
 | 
			
		||||
	mv samples.cmi samples.cmx samples.o ./out/
 | 
			
		||||
 | 
			
		||||
run:
 | 
			
		||||
	$(OUT)
 | 
			
		||||
 | 
			
		||||
fast:
 | 
			
		||||
	$(OC) $(FAST) $(SRC) -o $(OUT)
 | 
			
		||||
	mv samples.cmi samples.cmx samples.o ./out/
 | 
			
		||||
 | 
			
		||||
time:
 | 
			
		||||
	bash -c "time $(OUT)"
 | 
			
		||||
 | 
			
		||||
profile:
 | 
			
		||||
	$(OC) $(PERF) $(SRC) -o $(OUT)
 | 
			
		||||
	mv samples.cmi samples.cmx samples.o ./out/
 | 
			
		||||
	sudo perf record -g $(OUT)
 | 
			
		||||
	sudo perf report
 | 
			
		||||
	rm perf.data
 | 
			
		||||
 | 
			
		||||
switch-opam-fast:
 | 
			
		||||
	opam switch create 4.11.2+flambda 
 | 
			
		||||
	eval $(opam env)
 | 
			
		||||
 | 
			
		||||
switch-opam-5.1.0:
 | 
			
		||||
	opam switch create 5.1.0 
 | 
			
		||||
	eval $(opam env)
 | 
			
		||||
							
								
								
									
										
											BIN
										
									
								
								ocaml/out/samples
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								ocaml/out/samples
									
									
									
									
									
										Executable file
									
								
							
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										
											BIN
										
									
								
								ocaml/out/samples.cmi
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								ocaml/out/samples.cmi
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										
											BIN
										
									
								
								ocaml/out/samples.cmx
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								ocaml/out/samples.cmx
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										
											BIN
										
									
								
								ocaml/out/samples.o
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								ocaml/out/samples.o
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										123
									
								
								ocaml/samples.ml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										123
									
								
								ocaml/samples.ml
									
									
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,123 @@
 | 
			
		|||
(* Constants *)
 | 
			
		||||
let pi = acos (-1.)
 | 
			
		||||
let normal_95_ci_length = 1.6448536269514722
 | 
			
		||||
 | 
			
		||||
(* List manipulation helpers *)
 | 
			
		||||
let sumFloats xs = List.fold_left(fun acc x -> acc +. x) 0.0 xs
 | 
			
		||||
 | 
			
		||||
let normalizeXs xs = 
 | 
			
		||||
  let sum_xs = sumFloats xs in 
 | 
			
		||||
  List.map(fun x -> x /. sum_xs) xs
 | 
			
		||||
 | 
			
		||||
let cumsumXs xs = 
 | 
			
		||||
  let _, cum_sum = List.fold_left(fun (sum, ys) x -> 
 | 
			
		||||
    let new_sum = sum +. x in
 | 
			
		||||
    new_sum, ys @ [new_sum]
 | 
			
		||||
  ) (0.0, []) xs in
 | 
			
		||||
  cum_sum
 | 
			
		||||
 | 
			
		||||
let rec nth xs (n: int) = 
 | 
			
		||||
  match xs with
 | 
			
		||||
  | [] -> Error "nth function finds no match"
 | 
			
		||||
  | y :: ys -> if n = 0 then Ok(y) else nth ys (n-1)
 | 
			
		||||
  (* 
 | 
			
		||||
     Note that this is O(n) access.
 | 
			
		||||
     That is the cost of using the nice match syntax,
 | 
			
		||||
     which is not possible with OCaml arrays
 | 
			
		||||
  *)
 | 
			
		||||
 | 
			
		||||
let findIndex xs test = 
 | 
			
		||||
  let rec recursiveHelper ys i = 
 | 
			
		||||
    match ys with
 | 
			
		||||
      | [] -> Error "findIndex doesn't find an index"
 | 
			
		||||
      | z :: zs -> if test z then Ok(i) else recursiveHelper zs (i+1)
 | 
			
		||||
    in
 | 
			
		||||
  recursiveHelper xs 0
 | 
			
		||||
 | 
			
		||||
let unwind xs = 
 | 
			
		||||
  let rec tailRecursiveHelper ys acc =
 | 
			
		||||
    match ys with
 | 
			
		||||
    | [] -> Ok(acc)
 | 
			
		||||
    | Error e :: _ -> Error e
 | 
			
		||||
    | Ok(y) :: ys -> tailRecursiveHelper ys (y :: acc) 
 | 
			
		||||
  in 
 | 
			
		||||
  tailRecursiveHelper xs []
 | 
			
		||||
  
 | 
			
		||||
let unwindSum xs = 
 | 
			
		||||
  let rec tailRecursiveHelper ys sum =
 | 
			
		||||
    match ys with
 | 
			
		||||
    | [] -> Ok(sum)
 | 
			
		||||
    | Error e :: _ -> Error e
 | 
			
		||||
    | Ok(y) :: ys -> tailRecursiveHelper ys (y +. sum) 
 | 
			
		||||
  in 
 | 
			
		||||
  tailRecursiveHelper xs 0.0
 | 
			
		||||
 | 
			
		||||
(* Array helpers *)
 | 
			
		||||
let unwindSumArray xs = 
 | 
			
		||||
  Array.fold_left(fun acc x -> 
 | 
			
		||||
    (
 | 
			
		||||
      match acc, x with
 | 
			
		||||
      | Error e, _ -> Error e
 | 
			
		||||
      | _, Error e -> Error e
 | 
			
		||||
      | Ok(sum), Ok(y) -> Ok(sum +. y)
 | 
			
		||||
    )
 | 
			
		||||
  ) (Ok 0.0) xs
 | 
			
		||||
 | 
			
		||||
let sumFloats xs = List.fold_left(fun acc x -> acc +. x) 0.0 xs
 | 
			
		||||
 | 
			
		||||
(* Basic samplers *)
 | 
			
		||||
let sampleZeroToOne () : float = Random.float 1.0
 | 
			
		||||
 | 
			
		||||
let sampleStandardNormal (): float = 
 | 
			
		||||
  let u1 = sampleZeroToOne () in 
 | 
			
		||||
  let u2 = sampleZeroToOne () in 
 | 
			
		||||
  let z = sqrt(-2.0 *. log(u1)) *. sin(2.0 *. pi *. u2) in 
 | 
			
		||||
  z
 | 
			
		||||
 | 
			
		||||
let sampleNormal mean std = mean +. std *. (sampleStandardNormal ())
 | 
			
		||||
 | 
			
		||||
let sampleLognormal logmean logstd = exp(sampleNormal logmean logstd)
 | 
			
		||||
 | 
			
		||||
let sampleTo low high = 
 | 
			
		||||
  let loglow = log(low) in
 | 
			
		||||
  let loghigh = log(high) in
 | 
			
		||||
  let logmean = (loglow +. loghigh) /. 2.0 in
 | 
			
		||||
  let logstd = (loghigh -. loglow) /. (2.0 *. normal_95_ci_length ) in
 | 
			
		||||
  sampleLognormal logmean logstd
 | 
			
		||||
 | 
			
		||||
let mixture (samplers: (unit -> float) list) (weights: float list): (float, string) result = 
 | 
			
		||||
   if (List.length samplers <> List.length weights) 
 | 
			
		||||
    then Error "in mixture function, List.length samplers != List.length weights"
 | 
			
		||||
  else
 | 
			
		||||
    let normalized_weights = normalizeXs weights in
 | 
			
		||||
    let cumsummed_normalized_weights = cumsumXs normalized_weights in
 | 
			
		||||
    let p = sampleZeroToOne () in
 | 
			
		||||
    let chosenSamplerIndex = findIndex cumsummed_normalized_weights (fun x -> p < x) in
 | 
			
		||||
    let sampler = match chosenSamplerIndex with
 | 
			
		||||
      | Error e -> Error e
 | 
			
		||||
      | Ok(i) -> nth samplers i
 | 
			
		||||
    in
 | 
			
		||||
    let sample = match sampler with
 | 
			
		||||
      | Error e -> Error e
 | 
			
		||||
      | Ok(f) -> Ok(f ())
 | 
			
		||||
    in 
 | 
			
		||||
    sample
 | 
			
		||||
 | 
			
		||||
let () =
 | 
			
		||||
  let sample0 () = 0. in 
 | 
			
		||||
  let sample1 () = 1. in
 | 
			
		||||
  let sampleFew () = sampleTo 1. 3. in
 | 
			
		||||
  let sampleMany () = sampleTo 2. 10. in 
 | 
			
		||||
  let p1 = 0.8 in 
 | 
			
		||||
  let p2 = 0.5 in 
 | 
			
		||||
  let p3 = p1 *. p2 in 
 | 
			
		||||
  let weights = [ 1. -. p3; p3 /. 2.; p3 /. 4.; p3/. 4. ] in
 | 
			
		||||
  let sampler () = mixture [ sample0; sample1; sampleFew; sampleMany ] weights in
 | 
			
		||||
  let n = 1_000_000 in 
 | 
			
		||||
  let samples = Array.init n (fun _ -> sampler ()) in 
 | 
			
		||||
  match unwindSumArray samples with
 | 
			
		||||
    | Error err -> Printf.printf "Error %s\n" err
 | 
			
		||||
    | Ok(sum) -> (
 | 
			
		||||
        let mean = sum /. float_of_int(n) in 
 | 
			
		||||
        Printf.printf "Mean: %f\n" mean
 | 
			
		||||
      )
 | 
			
		||||
							
								
								
									
										21
									
								
								time.txt
									
									
									
									
									
								
							
							
						
						
									
										21
									
								
								time.txt
									
									
									
									
									
								
							| 
						 | 
				
			
			@ -138,3 +138,24 @@ Requires /bin/time, found on GNU/Linux systems
 | 
			
		|||
Running 100x and taking avg time example
 | 
			
		||||
Time using 1 thread: 37.60ms
 | 
			
		||||
 | 
			
		||||
## squiggle.ml
 | 
			
		||||
 | 
			
		||||
— make fast && time make run
 | 
			
		||||
ocamlopt -O3 -unsafe  samples.ml -o ./out/samples
 | 
			
		||||
mv samples.cmi samples.cmx samples.o ./out/
 | 
			
		||||
./out/samples
 | 
			
		||||
Mean: 0.884629
 | 
			
		||||
 | 
			
		||||
real    0m0.164s
 | 
			
		||||
user    0m0.159s
 | 
			
		||||
sys     0m0.004s
 | 
			
		||||
 | 
			
		||||
## squiggle.bc
 | 
			
		||||
 | 
			
		||||
make time
 | 
			
		||||
time ghbc -l squiggle.bc estimate.bc
 | 
			
		||||
.8882048395568279
 | 
			
		||||
 | 
			
		||||
real    0m15.906s
 | 
			
		||||
user    0m15.900s
 | 
			
		||||
sys     0m0.000s
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in New Issue
	
	Block a user