improve nim code, change README
This commit is contained in:
parent
8acdc283a2
commit
47e2a25490
27
README.md
27
README.md
|
@ -2,17 +2,17 @@
|
||||||
|
|
||||||
## About
|
## About
|
||||||
|
|
||||||
This repository contains example of very simple code to manipulate samples in various programming languages. It implements this estimate:
|
This repository contains example of very simple code to manipulate samples in various programming languages. It implements this platonic estimate:
|
||||||
|
|
||||||
```
|
```
|
||||||
p_a = 0.8
|
p_a = 0.8
|
||||||
p_b = 0.5
|
p_b = 0.5
|
||||||
p_c = p_a * p_b
|
p_c = p_a * p_b
|
||||||
|
|
||||||
dists = [0, 1, 1 to 3, 2 to 10] # each dist represented as 1M samples
|
dists = [0, 1, 1 to 3, 2 to 10]
|
||||||
weights = [(1 - p_c), p_c/2, p_c/4, p_c/4 ]
|
weights = [(1 - p_c), p_c/2, p_c/4, p_c/4 ]
|
||||||
|
|
||||||
result = mixture(dists, weights)
|
result = mixture(dists, weights) # should be 1M samples
|
||||||
mean(result)
|
mean(result)
|
||||||
```
|
```
|
||||||
|
|
||||||
|
@ -33,16 +33,27 @@ With the [time](https://man7.org/linux/man-pages/man1/time.1.html) tool, using 1
|
||||||
|
|
||||||
| Language | Time |
|
| Language | Time |
|
||||||
|----------------------|-----------|
|
|----------------------|-----------|
|
||||||
| Nim | 0m0.153s |
|
| Nim | 0m0.068s |
|
||||||
| C | 0m0,442s |
|
| C | 0m0.292s |
|
||||||
| Node | 0m0,732s |
|
| Javascript (NodeJS) | 0m0,732s |
|
||||||
| Squiggle | 0m1,536s |
|
| Squiggle | 0m1,536s |
|
||||||
| R | 0m7,000s |
|
| R | 0m7,000s |
|
||||||
| Python (CPython) | 0m16,641s |
|
| Python (CPython) | 0m16,641s |
|
||||||
|
|
||||||
I was very surprised that Node/Squiggle code was almost as fast as the raw C code. For the Python code, it's possible that the lack of speed is more a function of me not being as familiar with Python. It's also very possible that the code would run faster with [PyPy](https://doc.pypy.org).
|
## Notes
|
||||||
|
|
||||||
I was also really happy with trying [Nim](https://nim-lang.org/). The version which beats all others is just the fastest "danger" compilation of Nim (the "release" compilation is 0m0.183s instead). The Nim version has the particularity that I define the normal function from scratch, using the [Box–Muller transform](https://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform#Basic_form). For Nim I also have a version of the code which takes around 4 seconds, where I define some very inefficient sine & logarithm functions to feed into the Box-Muller method, because it felt like fun to really write a botec tool really from scratch.
|
I was really happy trying [Nim](https://nim-lang.org/), and as a result the Nim code is a bit more optimized and engineered:
|
||||||
|
|
||||||
|
1. It is using the fastest "danger" compilation mode.
|
||||||
|
2. It has some optimizations: I don't compute 1M samples for each dist, but instead pass functions around and compute the 1M samples at the end
|
||||||
|
3. I define the normal function from scratch, using the [Box–Muller transform](https://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform#Basic_form).
|
||||||
|
4. I also have a version in which I define the logarithm and sine functions themselves in nim to feed into the Box-Muller method. But it is much slower.
|
||||||
|
|
||||||
|
Without 1. and 2., the nim code takes 0m0.183s instead. But I don't think that these are unfair advantages: I liked trying out nim and therefore put in more love into the code, and this seems like it could be a recurring factor.
|
||||||
|
|
||||||
|
For C, I enabled the `-Ofast` compilation flag. Without it, it instead takes ~0.4 seconds. Initially, before I enabled the `-Ofast` flag, I was surprised that the Node and Squiggle code were comparable to the C code. Using [bun](https://bun.sh/) instead of node is actually a bit slower.
|
||||||
|
|
||||||
|
For the Python code, it's possible that the lack of speed is more a function of me not being as familiar with Python. It's also very possible that the code would run faster with [PyPy](https://doc.pypy.org).
|
||||||
|
|
||||||
## Languages I may add later
|
## Languages I may add later
|
||||||
|
|
||||||
|
|
BIN
nim/samples
BIN
nim/samples
Binary file not shown.
|
@ -37,13 +37,9 @@ proc to(low: float, high: float): float =
|
||||||
|
|
||||||
## Manipulate samples
|
## Manipulate samples
|
||||||
|
|
||||||
proc make_samples(f: () -> float, n: int): seq[float] =
|
proc mixture(fs: seq[proc (): float{.nimcall.}], ps: seq[float], n: int): seq[float] =
|
||||||
result = toSeq(1..n).map(_ => f())
|
|
||||||
return result
|
|
||||||
|
|
||||||
proc mixture(sxs: seq[seq[float]], ps: seq[float], n: int): seq[float] =
|
assert fs.len == ps.len
|
||||||
|
|
||||||
assert sxs.len == ps.len
|
|
||||||
|
|
||||||
var ws: seq[float]
|
var ws: seq[float]
|
||||||
var sum = 0.0
|
var sum = 0.0
|
||||||
|
@ -52,23 +48,23 @@ proc mixture(sxs: seq[seq[float]], ps: seq[float], n: int): seq[float] =
|
||||||
ws.add(sum)
|
ws.add(sum)
|
||||||
ws = ws.map(w => w/sum)
|
ws = ws.map(w => w/sum)
|
||||||
|
|
||||||
proc get_mixture_sample(): float =
|
var samples: seq[float]
|
||||||
let r = rand(1.0)
|
let rs = toSeq(1..n).map(_=>rand(1.0))
|
||||||
var i = ws.len - 1
|
for i in 0..(n-1):
|
||||||
for j, w in ws:
|
let r = rs[i]
|
||||||
|
var j = ws.len - 1
|
||||||
|
for k, w in ws:
|
||||||
if r < w:
|
if r < w:
|
||||||
i = j
|
j = k
|
||||||
break
|
break
|
||||||
## only occasion when ^ doesn't assign i
|
## only occasion when ^ doesn't assign j
|
||||||
## is when r is exactly 1
|
## is when r is exactly 1
|
||||||
## which would correspond to choosing the last item in ws
|
## which would correspond to choosing the last item in ws
|
||||||
## which is why i is initialized to ws.len
|
## which is why j is initialized to ws.len - 1
|
||||||
let xs = sxs[i]
|
let f = fs[j]
|
||||||
let l = xs.len-1
|
samples.add(f())
|
||||||
let k = rand(0..l)
|
return samples
|
||||||
return xs[k]
|
|
||||||
|
|
||||||
return toSeq(1..n).map(_ => get_mixture_sample())
|
|
||||||
|
|
||||||
## Actual model
|
## Actual model
|
||||||
|
|
||||||
|
@ -80,9 +76,8 @@ let p_c = p_a * p_b
|
||||||
|
|
||||||
let weights = @[ 1.0 - p_c, p_c/2.0, p_c/4.0, p_c/4.0 ]
|
let weights = @[ 1.0 - p_c, p_c/2.0, p_c/4.0, p_c/4.0 ]
|
||||||
|
|
||||||
let fs = [ () => 0.0, () => 1.0, () => to(1.0, 3.0), () => to(2.0, 10.0) ]
|
let fs = @[ proc (): float = 0.0, proc (): float = 1.0, proc (): float = to(1.0, 3.0), proc (): float = to(2.0, 10.0)]
|
||||||
let dists = fs.map(f => make_samples(f, n))
|
let result = mixture(fs, weights, n)
|
||||||
let result = mixture(dists, weights, n)
|
|
||||||
let mean_result = foldl(result, a + b, 0.0) / float(result.len)
|
let mean_result = foldl(result, a + b, 0.0) / float(result.len)
|
||||||
|
|
||||||
# echo result
|
# echo result
|
||||||
|
|
30
time.txt
30
time.txt
|
@ -1,11 +1,19 @@
|
||||||
# C
|
# C
|
||||||
|
|
||||||
|
## normal compilation
|
||||||
0.888458
|
0.888458
|
||||||
|
|
||||||
real 0m0,442s
|
real 0m0,442s
|
||||||
user 0m0,378s
|
user 0m0,378s
|
||||||
sys 0m0,064s
|
sys 0m0,064s
|
||||||
|
|
||||||
|
## -Ofast
|
||||||
|
0.888458
|
||||||
|
|
||||||
|
real 0m0.292s
|
||||||
|
user 0m0.266s
|
||||||
|
sys 0m0.026s
|
||||||
|
|
||||||
# Squiggle
|
# Squiggle
|
||||||
|
|
||||||
real 0m1,536s
|
real 0m1,536s
|
||||||
|
@ -39,22 +47,22 @@ sys 0m0,052s
|
||||||
## Nim
|
## Nim
|
||||||
|
|
||||||
nim c --verbosity:0 samples.nim && time ./samples --verbosity:0 && echo
|
nim c --verbosity:0 samples.nim && time ./samples --verbosity:0 && echo
|
||||||
0.8881633539025908
|
0.8860780498240779
|
||||||
|
|
||||||
real 0m0.706s
|
real 0m0.234s
|
||||||
user 0m0.685s
|
user 0m0.214s
|
||||||
sys 0m0.020s
|
sys 0m0.020s
|
||||||
|
|
||||||
nim c --verbosity:0 -d:release samples.nim && time ./samples --verbosity:0 && echo
|
nim c --verbosity:0 -d:release samples.nim && time ./samples --verbosity:0 && echo
|
||||||
0.8861663545062978
|
0.884035098700204
|
||||||
|
|
||||||
real 0m0.184s
|
real 0m0.074s
|
||||||
user 0m0.151s
|
user 0m0.043s
|
||||||
sys 0m0.032s
|
sys 0m0.031s
|
||||||
|
|
||||||
nim c --verbosity:0 -d:danger samples.nim && time ./samples --verbosity:0
|
nim c --verbosity:0 -d:danger samples.nim && time ./samples --verbosity:0
|
||||||
0.8879220244477399
|
0.8892827195895541
|
||||||
|
|
||||||
real 0m0.158s
|
real 0m0.068s
|
||||||
user 0m0.130s
|
user 0m0.048s
|
||||||
sys 0m0.028s
|
sys 0m0.020s
|
||||||
|
|
Loading…
Reference in New Issue
Block a user