savepoint

savepoint, clean README a bit
add squiggle.c
2023-09-13 19:04:47 +02:00 · 2023-09-12 16:47:12 +02:00 · 2023-08-17 14:36:03 +02:00 · 2023-08-17 14:20:20 +02:00 · 2023-08-17 14:19:32 +02:00
15 changed files with 53 additions and 365 deletions
--- a/C/C-02-better-algorithm-one-thread/out/samples-one-thread
+++ b/C/C-02-better-algorithm-one-thread/out/samples-one-thread
--- a/C/out/samples
+++ b/C/out/samples
--- a/C/squiggle_c/examples/01_one_sample/example
+++ b/C/squiggle_c/examples/01_one_sample/example
--- a/C/squiggle_c/examples/01_one_sample/example.c
+++ b/C/squiggle_c/examples/01_one_sample/example.c
@ -1,50 +0,0 @@
 #include "../../squiggle.h"
 #include <stdint.h>
 #include <stdlib.h>
 #include <stdio.h>
 // Estimate functions
 float sample_0(uint32_t* seed)
 {
    return 0;
 }
 float sample_1(uint32_t* seed)
 {
    return 1;
 }
 float sample_few(uint32_t* seed)
 {
    return random_to(1, 3, seed);
 }
 float sample_many(uint32_t* seed)
 {
    return random_to(2, 10, seed);
 }
 int main(){
    // set randomness seed
 		uint32_t* seed = malloc(sizeof(uint32_t));
 		*seed = 1000; // xorshift can't start with 0
    float p_a = 0.8;
    float p_b = 0.5;
    float p_c = p_a * p_b;
    int n_dists = 4;
    float weights[] = { 1 - p_c, p_c / 2, p_c / 4, p_c / 4 };
    float (*samplers[])(uint32_t*) = { sample_0, sample_1, sample_few, sample_many };
    float result_one = mixture(samplers, weights, n_dists, seed);
 		printf("result_one: %f\n", result_one);
 }
 /* 
 Aggregation mechanisms:
 - Quantiles (requires a sort)
 - Sum 
 - Average
 - Std
 */
--- a/C/squiggle_c/examples/01_one_sample/makefile
+++ b/C/squiggle_c/examples/01_one_sample/makefile
@ -1,53 +0,0 @@
 # Interface: 
 #   make
 #   make build
 #   make format
 #   make run
 # Compiler
 CC=gcc
 # CC=tcc # <= faster compilation
 # Main file
 SRC=example.c
 OUTPUT=example
 ## Dependencies
 MATH=-lm
 ## Flags
 DEBUG= #'-g'
 STANDARD=-std=c99
 WARNINGS=-Wall
 OPTIMIZED=-O3  #-Ofast
 # OPENMP=-fopenmp
 ## Formatter
 STYLE_BLUEPRINT=webkit
 FORMATTER=clang-format -i -style=$(STYLE_BLUEPRINT)
 ## make build
 build: $(SRC)
 	$(CC) $(OPTIMIZED) $(DEBUG) $(SRC) $(MATH) -o $(OUTPUT)
 format: $(SRC)
 	$(FORMATTER) $(SRC)
 run: $(SRC) $(OUTPUT)
 	OMP_NUM_THREADS=1 ./$(OUTPUT) && echo
 time-linux: 
 	@echo "Requires /bin/time, found on GNU/Linux systems" && echo
 	@echo "Running 100x and taking avg time $(OUTPUT)"
 	@t=$$(/usr/bin/time -f "%e" -p bash -c 'for i in {1..100}; do $(OUTPUT); done' 2>&1 >/dev/null | grep real | awk '{print $$2}' ); echo "scale=2; 1000 * $$t / 100" | bc | sed "s|^|Time using 1 thread: |" | sed 's|$$|ms|' && echo
 ## Profiling
 profile-linux: 
 	echo "Requires perf, which depends on the kernel version, and might be in linux-tools package or similar"
 	echo "Must be run as sudo"
 	$(CC) $(SRC) $(MATH) -o $(OUTPUT)
 	sudo perf record $(OUTPUT)
 	sudo perf report
 	rm perf.data
--- a/C/squiggle_c/examples/02_many_samples/example
+++ b/C/squiggle_c/examples/02_many_samples/example
--- a/C/squiggle_c/examples/02_many_samples/example.c
+++ b/C/squiggle_c/examples/02_many_samples/example.c
@ -1,59 +0,0 @@
 #include <stdint.h>
 #include <stdlib.h>
 #include <stdio.h>
 #include "../../squiggle.h"
 // Estimate functions
 float sample_0(uint32_t* seed)
 {
    return 0;
 }
 float sample_1(uint32_t* seed)
 {
    return 1;
 }
 float sample_few(uint32_t* seed)
 {
    return random_to(1, 3, seed);
 }
 float sample_many(uint32_t* seed)
 {
    return random_to(2, 10, seed);
 }
 int main(){
    // set randomness seed
 		uint32_t* seed = malloc(sizeof(uint32_t));
 		*seed = 1000; // xorshift can't start with 0
    float p_a = 0.8;
    float p_b = 0.5;
    float p_c = p_a * p_b;
    int n_dists = 4;
    float weights[] = { 1 - p_c, p_c / 2, p_c / 4, p_c / 4 };
    float (*samplers[])(uint32_t*) = { sample_0, sample_1, sample_few, sample_many };
 		int n_samples = 1000000;
 		float* result_many = (float *) malloc(n_samples * sizeof(float));
 		for(int i=0; i<n_samples; i++){
      result_many[i] = mixture(samplers, weights, n_dists, seed);
 		}
 		printf("result_many: [");
 		for(int i=0; i<100; i++){
 		  printf("%.2f, ", result_many[i]);
 		}
 		printf("]\n");
 }
 /* 
 Aggregation mechanisms:
 - Quantiles (requires a sort)
 - Sum 
 - Average
 - Std
 */
--- a/C/squiggle_c/examples/02_many_samples/makefile
+++ b/C/squiggle_c/examples/02_many_samples/makefile
@ -1,53 +0,0 @@
 # Interface: 
 #   make
 #   make build
 #   make format
 #   make run
 # Compiler
 CC=gcc
 # CC=tcc # <= faster compilation
 # Main file
 SRC=example.c
 OUTPUT=example
 ## Dependencies
 MATH=-lm
 ## Flags
 DEBUG= #'-g'
 STANDARD=-std=c99
 WARNINGS=-Wall
 OPTIMIZED=-O3  #-Ofast
 # OPENMP=-fopenmp
 ## Formatter
 STYLE_BLUEPRINT=webkit
 FORMATTER=clang-format -i -style=$(STYLE_BLUEPRINT)
 ## make build
 build: $(SRC)
 	$(CC) $(OPTIMIZED) $(DEBUG) $(SRC) $(MATH) -o $(OUTPUT)
 format: $(SRC)
 	$(FORMATTER) $(SRC)
 run: $(SRC) $(OUTPUT)
 	OMP_NUM_THREADS=1 ./$(OUTPUT) && echo
 time-linux: 
 	@echo "Requires /bin/time, found on GNU/Linux systems" && echo
 	@echo "Running 100x and taking avg time $(OUTPUT)"
 	@t=$$(/usr/bin/time -f "%e" -p bash -c 'for i in {1..100}; do $(OUTPUT); done' 2>&1 >/dev/null | grep real | awk '{print $$2}' ); echo "scale=2; 1000 * $$t / 100" | bc | sed "s|^|Time using 1 thread: |" | sed 's|$$|ms|' && echo
 ## Profiling
 profile-linux: 
 	echo "Requires perf, which depends on the kernel version, and might be in linux-tools package or similar"
 	echo "Must be run as sudo"
 	$(CC) $(SRC) $(MATH) -o $(OUTPUT)
 	sudo perf record $(OUTPUT)
 	sudo perf report
 	rm perf.data
--- a/C/squiggle_c/squiggle.h
+++ b/C/squiggle_c/squiggle.h
@ -1,109 +0,0 @@
 #include <math.h>
 #include <stdint.h>
 #include <stdlib.h>
 const float PI = 3.14159265358979323846;
 // Pseudo Random number generator
 uint32_t xorshift32
 (uint32_t* seed)
 {
 	// Algorithm "xor" from p. 4 of Marsaglia, "Xorshift RNGs"
  // See <https://stackoverflow.com/questions/53886131/how-does-xorshift32-works>
  // https://en.wikipedia.org/wiki/Xorshift
 	// Also some drama: <https://www.pcg-random.org/posts/on-vignas-pcg-critique.html>, <https://prng.di.unimi.it/>
 	uint32_t x = *seed;
 	x ^= x << 13;
 	x ^= x >> 17;
 	x ^= x << 5;
 	return *seed = x;
 }
 // Distribution & sampling functions
 float rand_0_to_1(uint32_t* seed){
 	return ((float) xorshift32(seed)) / ((float) UINT32_MAX);
 }
 float rand_float(float max, uint32_t* seed)
 {
    return rand_0_to_1(seed) * max;
 }
 float ur_normal(uint32_t* seed)
 {
    float u1 = rand_0_to_1(seed);
    float u2 = rand_0_to_1(seed);
    float z = sqrtf(-2.0 * log(u1)) * sin(2 * PI * u2);
    return z;
 }
 float random_uniform(float from, float to, uint32_t* seed)
 {
    return rand_0_to_1(seed) * (to - from) + from;
 }
 float random_normal(float mean, float sigma, uint32_t* seed)
 {
    return (mean + sigma * ur_normal(seed));
 }
 float random_lognormal(float logmean, float logsigma, uint32_t* seed)
 {
    return expf(random_normal(logmean, logsigma, seed));
 }
 float random_to(float low, float high, uint32_t* seed)
 {
    const float NORMAL95CONFIDENCE = 1.6448536269514722;
    float loglow = logf(low);
    float loghigh = logf(high);
    float logmean = (loglow + loghigh) / 2;
    float logsigma = (loghigh - loglow) / (2.0 * NORMAL95CONFIDENCE);
    return random_lognormal(logmean, logsigma, seed);
 }
 // Array helpers
 float array_sum(float* array, int length)
 {
    float output = 0.0;
    for (int i = 0; i < length; i++) {
        output += array[i];
    }
    return output;
 }
 void array_cumsum(float* array_to_sum, float* array_cumsummed, int length)
 {
    array_cumsummed[0] = array_to_sum[0];
    for (int i = 1; i < length; i++) {
        array_cumsummed[i] = array_cumsummed[i - 1] + array_to_sum[i];
    }
 }
 // Mixture function
 float mixture(float (*samplers[])(uint32_t*), float* weights, int n_dists, uint32_t* seed)
 {
    // You can see a simpler version of this function in the git history
    // or in C-02-better-algorithm-one-thread/
    float sum_weights = array_sum(weights, n_dists);
 		float* cumsummed_normalized_weights = (float*) malloc(n_dists * sizeof(float));
 		cumsummed_normalized_weights[0] = weights[0]/sum_weights;
    for (int i = 1; i < n_dists; i++) {
        cumsummed_normalized_weights[i] = cumsummed_normalized_weights[i - 1] + weights[i]/sum_weights;
    }
 		float p = random_uniform(0, 1, seed);
 		float result;
 		for (int k = 0; k < n_dists; k++) {
 				if (p < cumsummed_normalized_weights[k]) {
 						result = samplers[k](seed);
 						break;
 				}
 		}
 		free(cumsummed_normalized_weights);
 		return result;
 }
--- a/C/squiggle_c/to-do.md
+++ b/C/squiggle_c/to-do.md
@ -1,9 +0,0 @@
 - [ ] Add example for only one sample
 - [ ] Add example for many samples
 - [ ] Use gcc extension to define functions nested inside main.
 - [ ] Use OpenMP for acceleration
 - [ ] Chain various mixture functions
 - [ ] Have some more complicated & realistic example
 - [ ] Add summarization functions, like mean, std, 90% ci (or all c.i.?)
 - [ ] Add beta distribution
--- a/README.md
+++ b/README.md
@ -16,24 +16,16 @@ result = mixture(dists, weights) # should be 1M samples
 mean(result)
 ```
-I don't particularly care about the speed of this particular example, but rather think that the speed in this simple exaxmple would be indicative of the speed when considering 100x or 1000x more complicated models. As of now, it may also be useful for checking the validity of simple estimations. 
+I don't particularly care about the speed of this particular example. However, I think that how fast some approach is on this simple example might be indicative of that approach's speed when considering 100x or 1000x more complicated models. Having many different approaches computing the same model may also be useful for checking the validity of simple estimations. 
-The title of this repository is a pun on two meanings of "time to": "how much time does it take to do x", and "let's do x". "BOTEC" stands for "back of the envelope calculation".
+The name of this repository is a pun on two meanings of "time to": "how much time does it take to do x", and "let's do x". "BOTEC" stands for "back of the envelope calculation".
 ## Current languages
 - [x] C
 - [x] Javascript (NodeJS)
 - [x] Squiggle 
 - [x] R
 - [x] Python
 - [x] Nim 
 ## Comparison table
 | Language                    | Time      | Lines of code |
 |-----------------------------|-----------|---------------|
 | C (optimized, 16 threads)   | 5ms       | 249  |
 | squiggle.c                  | 37ms      | 54   | 
 | Nim                         | 38ms      | 84   |
 | Lua (LuaJIT)                | 68ms      | 82   |
 | Lua                         | 278ms     | 82   |
@ -46,7 +38,7 @@ The title of this repository is a pun on two meanings of "time to": "how much ti
 Time measurements taken with the [time](https://man7.org/linux/man-pages/man1/time.1.html) tool, using 1M samples. But different implementations use different algorithms and, occasionally, different time measuring methodologies (for the C, Nim and Lua implementations, I run the program 100 times and take the mean). Their speed was also measured under different loads in my machine. So I think that these time estimates are accurate within maybe ~2x or so.
-Note that the number of lines is much shorter for Squiggle and SquigglePy because I'm just counting the lines needed to get Squiggle and SquigglePy to output a model, not the lines inside them.
+Note that the number of lines is much shorter for Squiggle, SquigglePy and squiggle.c because I'm just counting the lines needed to get these libraries to output a model, not the lines inside them.
 ## Notes
@ -83,11 +75,15 @@ Once the code was at 6.6ms, there was a 0.6ms gain possible by using OMP better,
 Although the above paragraphs were written in the first person, the C code was written together with Jorge Sierra, who translated the algorithmic improvements from nim to it and added the initial multithreading support.
 ### squiggle.c
 squiggle.c is a minimalistic library focused on understandability and being self-contained. It grew from the initial C code in this repository. You can see the code for the library [here](https://git.nunosempere.com/personal/squiggle.c), and the code for the example we are discussing [here](https://git.nunosempere.com/personal/squiggle.c/src/branch/master/examples/02_many_samples_time_to_botec).
 ### NodeJS and Squiggle
-Using [bun](https://bun.sh/) instead of node is actually a bit slower. Also, both the NodeJS and the Squiggle code use [stdlib](https://stdlib.io/) in their innards, which has a bunch of interleaved functions that make the code slower. It's possible that not using that external library could make the code faster, but at the same time, the js approach does seem to be to use external libraries whenever possible.
+Using [bun](https://bun.sh/) instead of node is actually a bit slower. Also, both the NodeJS and the Squiggle code use [stdlib](https://stdlib.io/) in their innards, which has a bunch of interleaved functions that make the code slower. It's possible that not using that external library could make the code faster. But at the same time, the js approach does seem to be to use external libraries whenever possible.
-I am not particularly sure that the Squiggle code is actually producing 1M samples, but also have no particular plan to debug this.
+I am not particularly sure that the Squiggle code is actually producing 1M samples, but I am also not in a rush to debug this.
 ### Python
@ -95,7 +91,7 @@ For the Python code, it's possible that the lack of speed is more a function of
 ### R
-R has a warm place in my heart from back in the day, and it has predefined functions to do everything. It was particularly fast to write for me, though not particularly fast to run :) However, I do recall that R does have some multithreading support; it wasn't used.
+R has a warm place in my heart from back in the day, and it has predefined functions to do everything. It was particularly fast to write for me, though not particularly fast to run :) R has some multithreading support, which I didn't use.
 ### Lua 
@ -111,7 +107,7 @@ stack traceback:
 make: *** [makefile:14: run] Error 1
 ```
-I also appreciated the speedup when using the LuaJIT interpreter. You can install it with commands similar to 
+I also appreciated the speedup when using the LuaJIT interpreter. You can install LuaJIT with commands similar to:
 ```
 git clone https://luajit.org/git/luajit.git
@ -128,31 +124,21 @@ Overall I don't think that this is a fair comparison of the languages intrinsica
 ## Languages I may add later
- [ ] Julia (TuringML) 
+- [ ] Zig
 - [ ] Rust
 - [ ] OCaml
 - [ ] Forth
 - [ ] Julia (TuringML, MCHammer) 
 - [ ] Lisp
 - [ ] Go 
 - [ ] Zig
 - [ ] Forth
 - [ ] sh/bash, lol?
- [ ] OCaml
+- [ ] Dagger (<https://usedagger.com/>)
 - [ ] Haskell
 - [ ] OpenCL/CUDA (e.g., as in <https://www.eriksmistad.no/getting-started-with-opencl-and-gpu-computing/>). Seems like it would be overkill, and also the code would be way more complex. But maybe worth trying?
- [-] Stan => As far as I can tell, Stan is designed to generate samples from the posterior distribution given some data, not to create data by drawing samples from an arbitrary distribution.
+- ~~[ ] Stan => As far as I can tell, Stan is designed to generate samples from the posterior distribution given some data, not to create data by drawing samples from an arbitrary distribution.~~
  - [ ] Maybe still worth reversing the process?
 - ... and suggestions welcome
 ## Languages added so far
 - [x] Squiggle
 - [x] Javascript (NodeJS)
 - [x] Python (CPython)
 - [x] R
 - [x] C
 - [x] Nim
 - [x] SquigglePy
 - [x] Lua
 ## Roadmap
 The future of this project is uncertain. In most words, I simply forget about this repository.
--- a/python/beta/beta.py
+++ b/python/beta/beta.py
@ -0,0 +1,6 @@
 import numpy as np
 for i in range(1000* 1000):
    x=np.random.beta(1,2)
--- a/python/beta/beta_array.py
+++ b/python/beta/beta_array.py
@ -0,0 +1,9 @@
 import numpy as np
 n = 1000 * 1000
 def sample_beta_1_2():
    return np.random.beta(1,2)
 a = np.array([sample_beta_1_2() for _ in range(n)])
 print(np.mean(a))
--- a/squigglepy/scratchpad/samples_correlated.py
+++ b/squigglepy/scratchpad/samples_correlated.py
@ -0,0 +1,10 @@
 import squigglepy as sq
 import numpy as np
 a = sq.to(1, 3)
 b = a / 2 
 c = b / a 
 c_samples = sq.sample(c, 10)
 print(c_samples)
--- a/time.txt
+++ b/time.txt
@ -128,3 +128,13 @@ Requires /bin/time, found on GNU/Linux systems
 Running 100x and taking avg time of: luajit samples.lua
 Time: 68.60ms
 ## squiggle.c
 — make time-linux
 Requires /bin/time, found on GNU/Linux systems
 Running 100x and taking avg time example
 Time using 1 thread: 37.60ms
Author	SHA1	Message	Date
NunoSempere	a3a6d8925f	savepoint	2023-09-13 19:04:47 +02:00
NunoSempere	ff29256bdd	savepoint, clean README a bit	2023-09-12 16:47:12 +02:00
NunoSempere	64e337a743	add squiggle.c	2023-08-17 14:36:03 +02:00
NunoSempere	3e0282ab17	delete current langs table, as redunant with perf table.	2023-08-17 14:20:20 +02:00
NunoSempere	4e079e9015	move squiggle.c to its own repo, fix readme typo.	2023-08-17 14:19:32 +02:00