Compare commits

..

5 Commits

15 changed files with 53 additions and 365 deletions

Binary file not shown.

View File

@ -1,50 +0,0 @@
#include "../../squiggle.h"
#include <stdint.h>
#include <stdlib.h>
#include <stdio.h>
// Estimate functions
float sample_0(uint32_t* seed)
{
return 0;
}
float sample_1(uint32_t* seed)
{
return 1;
}
float sample_few(uint32_t* seed)
{
return random_to(1, 3, seed);
}
float sample_many(uint32_t* seed)
{
return random_to(2, 10, seed);
}
int main(){
// set randomness seed
uint32_t* seed = malloc(sizeof(uint32_t));
*seed = 1000; // xorshift can't start with 0
float p_a = 0.8;
float p_b = 0.5;
float p_c = p_a * p_b;
int n_dists = 4;
float weights[] = { 1 - p_c, p_c / 2, p_c / 4, p_c / 4 };
float (*samplers[])(uint32_t*) = { sample_0, sample_1, sample_few, sample_many };
float result_one = mixture(samplers, weights, n_dists, seed);
printf("result_one: %f\n", result_one);
}
/*
Aggregation mechanisms:
- Quantiles (requires a sort)
- Sum
- Average
- Std
*/

View File

@ -1,53 +0,0 @@
# Interface:
# make
# make build
# make format
# make run
# Compiler
CC=gcc
# CC=tcc # <= faster compilation
# Main file
SRC=example.c
OUTPUT=example
## Dependencies
MATH=-lm
## Flags
DEBUG= #'-g'
STANDARD=-std=c99
WARNINGS=-Wall
OPTIMIZED=-O3 #-Ofast
# OPENMP=-fopenmp
## Formatter
STYLE_BLUEPRINT=webkit
FORMATTER=clang-format -i -style=$(STYLE_BLUEPRINT)
## make build
build: $(SRC)
$(CC) $(OPTIMIZED) $(DEBUG) $(SRC) $(MATH) -o $(OUTPUT)
format: $(SRC)
$(FORMATTER) $(SRC)
run: $(SRC) $(OUTPUT)
OMP_NUM_THREADS=1 ./$(OUTPUT) && echo
time-linux:
@echo "Requires /bin/time, found on GNU/Linux systems" && echo
@echo "Running 100x and taking avg time $(OUTPUT)"
@t=$$(/usr/bin/time -f "%e" -p bash -c 'for i in {1..100}; do $(OUTPUT); done' 2>&1 >/dev/null | grep real | awk '{print $$2}' ); echo "scale=2; 1000 * $$t / 100" | bc | sed "s|^|Time using 1 thread: |" | sed 's|$$|ms|' && echo
## Profiling
profile-linux:
echo "Requires perf, which depends on the kernel version, and might be in linux-tools package or similar"
echo "Must be run as sudo"
$(CC) $(SRC) $(MATH) -o $(OUTPUT)
sudo perf record $(OUTPUT)
sudo perf report
rm perf.data

View File

@ -1,59 +0,0 @@
#include <stdint.h>
#include <stdlib.h>
#include <stdio.h>
#include "../../squiggle.h"
// Estimate functions
float sample_0(uint32_t* seed)
{
return 0;
}
float sample_1(uint32_t* seed)
{
return 1;
}
float sample_few(uint32_t* seed)
{
return random_to(1, 3, seed);
}
float sample_many(uint32_t* seed)
{
return random_to(2, 10, seed);
}
int main(){
// set randomness seed
uint32_t* seed = malloc(sizeof(uint32_t));
*seed = 1000; // xorshift can't start with 0
float p_a = 0.8;
float p_b = 0.5;
float p_c = p_a * p_b;
int n_dists = 4;
float weights[] = { 1 - p_c, p_c / 2, p_c / 4, p_c / 4 };
float (*samplers[])(uint32_t*) = { sample_0, sample_1, sample_few, sample_many };
int n_samples = 1000000;
float* result_many = (float *) malloc(n_samples * sizeof(float));
for(int i=0; i<n_samples; i++){
result_many[i] = mixture(samplers, weights, n_dists, seed);
}
printf("result_many: [");
for(int i=0; i<100; i++){
printf("%.2f, ", result_many[i]);
}
printf("]\n");
}
/*
Aggregation mechanisms:
- Quantiles (requires a sort)
- Sum
- Average
- Std
*/

View File

@ -1,53 +0,0 @@
# Interface:
# make
# make build
# make format
# make run
# Compiler
CC=gcc
# CC=tcc # <= faster compilation
# Main file
SRC=example.c
OUTPUT=example
## Dependencies
MATH=-lm
## Flags
DEBUG= #'-g'
STANDARD=-std=c99
WARNINGS=-Wall
OPTIMIZED=-O3 #-Ofast
# OPENMP=-fopenmp
## Formatter
STYLE_BLUEPRINT=webkit
FORMATTER=clang-format -i -style=$(STYLE_BLUEPRINT)
## make build
build: $(SRC)
$(CC) $(OPTIMIZED) $(DEBUG) $(SRC) $(MATH) -o $(OUTPUT)
format: $(SRC)
$(FORMATTER) $(SRC)
run: $(SRC) $(OUTPUT)
OMP_NUM_THREADS=1 ./$(OUTPUT) && echo
time-linux:
@echo "Requires /bin/time, found on GNU/Linux systems" && echo
@echo "Running 100x and taking avg time $(OUTPUT)"
@t=$$(/usr/bin/time -f "%e" -p bash -c 'for i in {1..100}; do $(OUTPUT); done' 2>&1 >/dev/null | grep real | awk '{print $$2}' ); echo "scale=2; 1000 * $$t / 100" | bc | sed "s|^|Time using 1 thread: |" | sed 's|$$|ms|' && echo
## Profiling
profile-linux:
echo "Requires perf, which depends on the kernel version, and might be in linux-tools package or similar"
echo "Must be run as sudo"
$(CC) $(SRC) $(MATH) -o $(OUTPUT)
sudo perf record $(OUTPUT)
sudo perf report
rm perf.data

View File

@ -1,109 +0,0 @@
#include <math.h>
#include <stdint.h>
#include <stdlib.h>
const float PI = 3.14159265358979323846;
// Pseudo Random number generator
uint32_t xorshift32
(uint32_t* seed)
{
// Algorithm "xor" from p. 4 of Marsaglia, "Xorshift RNGs"
// See <https://stackoverflow.com/questions/53886131/how-does-xorshift32-works>
// https://en.wikipedia.org/wiki/Xorshift
// Also some drama: <https://www.pcg-random.org/posts/on-vignas-pcg-critique.html>, <https://prng.di.unimi.it/>
uint32_t x = *seed;
x ^= x << 13;
x ^= x >> 17;
x ^= x << 5;
return *seed = x;
}
// Distribution & sampling functions
float rand_0_to_1(uint32_t* seed){
return ((float) xorshift32(seed)) / ((float) UINT32_MAX);
}
float rand_float(float max, uint32_t* seed)
{
return rand_0_to_1(seed) * max;
}
float ur_normal(uint32_t* seed)
{
float u1 = rand_0_to_1(seed);
float u2 = rand_0_to_1(seed);
float z = sqrtf(-2.0 * log(u1)) * sin(2 * PI * u2);
return z;
}
float random_uniform(float from, float to, uint32_t* seed)
{
return rand_0_to_1(seed) * (to - from) + from;
}
float random_normal(float mean, float sigma, uint32_t* seed)
{
return (mean + sigma * ur_normal(seed));
}
float random_lognormal(float logmean, float logsigma, uint32_t* seed)
{
return expf(random_normal(logmean, logsigma, seed));
}
float random_to(float low, float high, uint32_t* seed)
{
const float NORMAL95CONFIDENCE = 1.6448536269514722;
float loglow = logf(low);
float loghigh = logf(high);
float logmean = (loglow + loghigh) / 2;
float logsigma = (loghigh - loglow) / (2.0 * NORMAL95CONFIDENCE);
return random_lognormal(logmean, logsigma, seed);
}
// Array helpers
float array_sum(float* array, int length)
{
float output = 0.0;
for (int i = 0; i < length; i++) {
output += array[i];
}
return output;
}
void array_cumsum(float* array_to_sum, float* array_cumsummed, int length)
{
array_cumsummed[0] = array_to_sum[0];
for (int i = 1; i < length; i++) {
array_cumsummed[i] = array_cumsummed[i - 1] + array_to_sum[i];
}
}
// Mixture function
float mixture(float (*samplers[])(uint32_t*), float* weights, int n_dists, uint32_t* seed)
{
// You can see a simpler version of this function in the git history
// or in C-02-better-algorithm-one-thread/
float sum_weights = array_sum(weights, n_dists);
float* cumsummed_normalized_weights = (float*) malloc(n_dists * sizeof(float));
cumsummed_normalized_weights[0] = weights[0]/sum_weights;
for (int i = 1; i < n_dists; i++) {
cumsummed_normalized_weights[i] = cumsummed_normalized_weights[i - 1] + weights[i]/sum_weights;
}
float p = random_uniform(0, 1, seed);
float result;
for (int k = 0; k < n_dists; k++) {
if (p < cumsummed_normalized_weights[k]) {
result = samplers[k](seed);
break;
}
}
free(cumsummed_normalized_weights);
return result;
}

View File

@ -1,9 +0,0 @@
- [ ] Add example for only one sample
- [ ] Add example for many samples
- [ ] Use gcc extension to define functions nested inside main.
- [ ] Use OpenMP for acceleration
- [ ] Chain various mixture functions
- [ ] Have some more complicated & realistic example
- [ ] Add summarization functions, like mean, std, 90% ci (or all c.i.?)
- [ ] Add beta distribution

View File

@ -16,24 +16,16 @@ result = mixture(dists, weights) # should be 1M samples
mean(result) mean(result)
``` ```
I don't particularly care about the speed of this particular example, but rather think that the speed in this simple exaxmple would be indicative of the speed when considering 100x or 1000x more complicated models. As of now, it may also be useful for checking the validity of simple estimations. I don't particularly care about the speed of this particular example. However, I think that how fast some approach is on this simple example might be indicative of that approach's speed when considering 100x or 1000x more complicated models. Having many different approaches computing the same model may also be useful for checking the validity of simple estimations.
The title of this repository is a pun on two meanings of "time to": "how much time does it take to do x", and "let's do x". "BOTEC" stands for "back of the envelope calculation". The name of this repository is a pun on two meanings of "time to": "how much time does it take to do x", and "let's do x". "BOTEC" stands for "back of the envelope calculation".
## Current languages
- [x] C
- [x] Javascript (NodeJS)
- [x] Squiggle
- [x] R
- [x] Python
- [x] Nim
## Comparison table ## Comparison table
| Language | Time | Lines of code | | Language | Time | Lines of code |
|-----------------------------|-----------|---------------| |-----------------------------|-----------|---------------|
| C (optimized, 16 threads) | 5ms | 249 | | C (optimized, 16 threads) | 5ms | 249 |
| squiggle.c | 37ms | 54 |
| Nim | 38ms | 84 | | Nim | 38ms | 84 |
| Lua (LuaJIT) | 68ms | 82 | | Lua (LuaJIT) | 68ms | 82 |
| Lua | 278ms | 82 | | Lua | 278ms | 82 |
@ -46,7 +38,7 @@ The title of this repository is a pun on two meanings of "time to": "how much ti
Time measurements taken with the [time](https://man7.org/linux/man-pages/man1/time.1.html) tool, using 1M samples. But different implementations use different algorithms and, occasionally, different time measuring methodologies (for the C, Nim and Lua implementations, I run the program 100 times and take the mean). Their speed was also measured under different loads in my machine. So I think that these time estimates are accurate within maybe ~2x or so. Time measurements taken with the [time](https://man7.org/linux/man-pages/man1/time.1.html) tool, using 1M samples. But different implementations use different algorithms and, occasionally, different time measuring methodologies (for the C, Nim and Lua implementations, I run the program 100 times and take the mean). Their speed was also measured under different loads in my machine. So I think that these time estimates are accurate within maybe ~2x or so.
Note that the number of lines is much shorter for Squiggle and SquigglePy because I'm just counting the lines needed to get Squiggle and SquigglePy to output a model, not the lines inside them. Note that the number of lines is much shorter for Squiggle, SquigglePy and squiggle.c because I'm just counting the lines needed to get these libraries to output a model, not the lines inside them.
## Notes ## Notes
@ -83,11 +75,15 @@ Once the code was at 6.6ms, there was a 0.6ms gain possible by using OMP better,
Although the above paragraphs were written in the first person, the C code was written together with Jorge Sierra, who translated the algorithmic improvements from nim to it and added the initial multithreading support. Although the above paragraphs were written in the first person, the C code was written together with Jorge Sierra, who translated the algorithmic improvements from nim to it and added the initial multithreading support.
### squiggle.c
squiggle.c is a minimalistic library focused on understandability and being self-contained. It grew from the initial C code in this repository. You can see the code for the library [here](https://git.nunosempere.com/personal/squiggle.c), and the code for the example we are discussing [here](https://git.nunosempere.com/personal/squiggle.c/src/branch/master/examples/02_many_samples_time_to_botec).
### NodeJS and Squiggle ### NodeJS and Squiggle
Using [bun](https://bun.sh/) instead of node is actually a bit slower. Also, both the NodeJS and the Squiggle code use [stdlib](https://stdlib.io/) in their innards, which has a bunch of interleaved functions that make the code slower. It's possible that not using that external library could make the code faster, but at the same time, the js approach does seem to be to use external libraries whenever possible. Using [bun](https://bun.sh/) instead of node is actually a bit slower. Also, both the NodeJS and the Squiggle code use [stdlib](https://stdlib.io/) in their innards, which has a bunch of interleaved functions that make the code slower. It's possible that not using that external library could make the code faster. But at the same time, the js approach does seem to be to use external libraries whenever possible.
I am not particularly sure that the Squiggle code is actually producing 1M samples, but also have no particular plan to debug this. I am not particularly sure that the Squiggle code is actually producing 1M samples, but I am also not in a rush to debug this.
### Python ### Python
@ -95,7 +91,7 @@ For the Python code, it's possible that the lack of speed is more a function of
### R ### R
R has a warm place in my heart from back in the day, and it has predefined functions to do everything. It was particularly fast to write for me, though not particularly fast to run :) However, I do recall that R does have some multithreading support; it wasn't used. R has a warm place in my heart from back in the day, and it has predefined functions to do everything. It was particularly fast to write for me, though not particularly fast to run :) R has some multithreading support, which I didn't use.
### Lua ### Lua
@ -111,7 +107,7 @@ stack traceback:
make: *** [makefile:14: run] Error 1 make: *** [makefile:14: run] Error 1
``` ```
I also appreciated the speedup when using the LuaJIT interpreter. You can install it with commands similar to I also appreciated the speedup when using the LuaJIT interpreter. You can install LuaJIT with commands similar to:
``` ```
git clone https://luajit.org/git/luajit.git git clone https://luajit.org/git/luajit.git
@ -128,31 +124,21 @@ Overall I don't think that this is a fair comparison of the languages intrinsica
## Languages I may add later ## Languages I may add later
- [ ] Julia (TuringML) - [ ] Zig
- [ ] Rust - [ ] Rust
- [ ] OCaml
- [ ] Forth
- [ ] Julia (TuringML, MCHammer)
- [ ] Lisp - [ ] Lisp
- [ ] Go - [ ] Go
- [ ] Zig
- [ ] Forth
- [ ] sh/bash, lol? - [ ] sh/bash, lol?
- [ ] OCaml - [ ] Dagger (<https://usedagger.com/>)
- [ ] Haskell - [ ] Haskell
- [ ] OpenCL/CUDA (e.g., as in <https://www.eriksmistad.no/getting-started-with-opencl-and-gpu-computing/>). Seems like it would be overkill, and also the code would be way more complex. But maybe worth trying? - [ ] OpenCL/CUDA (e.g., as in <https://www.eriksmistad.no/getting-started-with-opencl-and-gpu-computing/>). Seems like it would be overkill, and also the code would be way more complex. But maybe worth trying?
- [-] Stan => As far as I can tell, Stan is designed to generate samples from the posterior distribution given some data, not to create data by drawing samples from an arbitrary distribution. - ~~[ ] Stan => As far as I can tell, Stan is designed to generate samples from the posterior distribution given some data, not to create data by drawing samples from an arbitrary distribution.~~
- [ ] Maybe still worth reversing the process? - [ ] Maybe still worth reversing the process?
- ... and suggestions welcome - ... and suggestions welcome
## Languages added so far
- [x] Squiggle
- [x] Javascript (NodeJS)
- [x] Python (CPython)
- [x] R
- [x] C
- [x] Nim
- [x] SquigglePy
- [x] Lua
## Roadmap ## Roadmap
The future of this project is uncertain. In most words, I simply forget about this repository. The future of this project is uncertain. In most words, I simply forget about this repository.

6
python/beta/beta.py Normal file
View File

@ -0,0 +1,6 @@
import numpy as np
for i in range(1000* 1000):
x=np.random.beta(1,2)

View File

@ -0,0 +1,9 @@
import numpy as np
n = 1000 * 1000
def sample_beta_1_2():
return np.random.beta(1,2)
a = np.array([sample_beta_1_2() for _ in range(n)])
print(np.mean(a))

View File

@ -0,0 +1,10 @@
import squigglepy as sq
import numpy as np
a = sq.to(1, 3)
b = a / 2
c = b / a
c_samples = sq.sample(c, 10)
print(c_samples)

View File

@ -128,3 +128,13 @@ Requires /bin/time, found on GNU/Linux systems
Running 100x and taking avg time of: luajit samples.lua Running 100x and taking avg time of: luajit samples.lua
Time: 68.60ms Time: 68.60ms
## squiggle.c
— make time-linux
Requires /bin/time, found on GNU/Linux systems
Running 100x and taking avg time example
Time using 1 thread: 37.60ms