move squiggle.c to its own repo, fix readme typo.

2023-08-17 14:19:32 +02:00 · 2023-08-17 14:19:32 +02:00 · 4e079e9015
commit 4e079e9015
parent 3b29ad7e45
12 changed files with 26 additions and 334 deletions
--- a/C/squiggle_c/examples/01_one_sample/example
+++ b/C/squiggle_c/examples/01_one_sample/example
--- a/C/squiggle_c/examples/01_one_sample/example.c
+++ b/C/squiggle_c/examples/01_one_sample/example.c
@ -1,50 +0,0 @@
 #include "../../squiggle.h"
 #include <stdint.h>
 #include <stdlib.h>
 #include <stdio.h>
 // Estimate functions
 float sample_0(uint32_t* seed)
 {
    return 0;
 }
 float sample_1(uint32_t* seed)
 {
    return 1;
 }
 float sample_few(uint32_t* seed)
 {
    return random_to(1, 3, seed);
 }
 float sample_many(uint32_t* seed)
 {
    return random_to(2, 10, seed);
 }
 int main(){
    // set randomness seed
 		uint32_t* seed = malloc(sizeof(uint32_t));
 		*seed = 1000; // xorshift can't start with 0
    float p_a = 0.8;
    float p_b = 0.5;
    float p_c = p_a * p_b;
    int n_dists = 4;
    float weights[] = { 1 - p_c, p_c / 2, p_c / 4, p_c / 4 };
    float (*samplers[])(uint32_t*) = { sample_0, sample_1, sample_few, sample_many };
    float result_one = mixture(samplers, weights, n_dists, seed);
 		printf("result_one: %f\n", result_one);
 }
 /* 
 Aggregation mechanisms:
 - Quantiles (requires a sort)
 - Sum 
 - Average
 - Std
 */
--- a/C/squiggle_c/examples/01_one_sample/makefile
+++ b/C/squiggle_c/examples/01_one_sample/makefile
@ -1,53 +0,0 @@
 # Interface: 
 #   make
 #   make build
 #   make format
 #   make run
 # Compiler
 CC=gcc
 # CC=tcc # <= faster compilation
 # Main file
 SRC=example.c
 OUTPUT=example
 ## Dependencies
 MATH=-lm
 ## Flags
 DEBUG= #'-g'
 STANDARD=-std=c99
 WARNINGS=-Wall
 OPTIMIZED=-O3  #-Ofast
 # OPENMP=-fopenmp
 ## Formatter
 STYLE_BLUEPRINT=webkit
 FORMATTER=clang-format -i -style=$(STYLE_BLUEPRINT)
 ## make build
 build: $(SRC)
 	$(CC) $(OPTIMIZED) $(DEBUG) $(SRC) $(MATH) -o $(OUTPUT)
 format: $(SRC)
 	$(FORMATTER) $(SRC)
 run: $(SRC) $(OUTPUT)
 	OMP_NUM_THREADS=1 ./$(OUTPUT) && echo
 time-linux: 
 	@echo "Requires /bin/time, found on GNU/Linux systems" && echo
 	@echo "Running 100x and taking avg time $(OUTPUT)"
 	@t=$$(/usr/bin/time -f "%e" -p bash -c 'for i in {1..100}; do $(OUTPUT); done' 2>&1 >/dev/null | grep real | awk '{print $$2}' ); echo "scale=2; 1000 * $$t / 100" | bc | sed "s|^|Time using 1 thread: |" | sed 's|$$|ms|' && echo
 ## Profiling
 profile-linux: 
 	echo "Requires perf, which depends on the kernel version, and might be in linux-tools package or similar"
 	echo "Must be run as sudo"
 	$(CC) $(SRC) $(MATH) -o $(OUTPUT)
 	sudo perf record $(OUTPUT)
 	sudo perf report
 	rm perf.data
--- a/C/squiggle_c/examples/02_many_samples/example
+++ b/C/squiggle_c/examples/02_many_samples/example
--- a/C/squiggle_c/examples/02_many_samples/example.c
+++ b/C/squiggle_c/examples/02_many_samples/example.c
@ -1,59 +0,0 @@
 #include <stdint.h>
 #include <stdlib.h>
 #include <stdio.h>
 #include "../../squiggle.h"
 // Estimate functions
 float sample_0(uint32_t* seed)
 {
    return 0;
 }
 float sample_1(uint32_t* seed)
 {
    return 1;
 }
 float sample_few(uint32_t* seed)
 {
    return random_to(1, 3, seed);
 }
 float sample_many(uint32_t* seed)
 {
    return random_to(2, 10, seed);
 }
 int main(){
    // set randomness seed
 		uint32_t* seed = malloc(sizeof(uint32_t));
 		*seed = 1000; // xorshift can't start with 0
    float p_a = 0.8;
    float p_b = 0.5;
    float p_c = p_a * p_b;
    int n_dists = 4;
    float weights[] = { 1 - p_c, p_c / 2, p_c / 4, p_c / 4 };
    float (*samplers[])(uint32_t*) = { sample_0, sample_1, sample_few, sample_many };
 		int n_samples = 1000000;
 		float* result_many = (float *) malloc(n_samples * sizeof(float));
 		for(int i=0; i<n_samples; i++){
      result_many[i] = mixture(samplers, weights, n_dists, seed);
 		}
 		printf("result_many: [");
 		for(int i=0; i<100; i++){
 		  printf("%.2f, ", result_many[i]);
 		}
 		printf("]\n");
 }
 /* 
 Aggregation mechanisms:
 - Quantiles (requires a sort)
 - Sum 
 - Average
 - Std
 */
--- a/C/squiggle_c/examples/02_many_samples/makefile
+++ b/C/squiggle_c/examples/02_many_samples/makefile
@ -1,53 +0,0 @@
 # Interface: 
 #   make
 #   make build
 #   make format
 #   make run
 # Compiler
 CC=gcc
 # CC=tcc # <= faster compilation
 # Main file
 SRC=example.c
 OUTPUT=example
 ## Dependencies
 MATH=-lm
 ## Flags
 DEBUG= #'-g'
 STANDARD=-std=c99
 WARNINGS=-Wall
 OPTIMIZED=-O3  #-Ofast
 # OPENMP=-fopenmp
 ## Formatter
 STYLE_BLUEPRINT=webkit
 FORMATTER=clang-format -i -style=$(STYLE_BLUEPRINT)
 ## make build
 build: $(SRC)
 	$(CC) $(OPTIMIZED) $(DEBUG) $(SRC) $(MATH) -o $(OUTPUT)
 format: $(SRC)
 	$(FORMATTER) $(SRC)
 run: $(SRC) $(OUTPUT)
 	OMP_NUM_THREADS=1 ./$(OUTPUT) && echo
 time-linux: 
 	@echo "Requires /bin/time, found on GNU/Linux systems" && echo
 	@echo "Running 100x and taking avg time $(OUTPUT)"
 	@t=$$(/usr/bin/time -f "%e" -p bash -c 'for i in {1..100}; do $(OUTPUT); done' 2>&1 >/dev/null | grep real | awk '{print $$2}' ); echo "scale=2; 1000 * $$t / 100" | bc | sed "s|^|Time using 1 thread: |" | sed 's|$$|ms|' && echo
 ## Profiling
 profile-linux: 
 	echo "Requires perf, which depends on the kernel version, and might be in linux-tools package or similar"
 	echo "Must be run as sudo"
 	$(CC) $(SRC) $(MATH) -o $(OUTPUT)
 	sudo perf record $(OUTPUT)
 	sudo perf report
 	rm perf.data
--- a/C/squiggle_c/squiggle.h
+++ b/C/squiggle_c/squiggle.h
@ -1,109 +0,0 @@
 #include <math.h>
 #include <stdint.h>
 #include <stdlib.h>
 const float PI = 3.14159265358979323846;
 // Pseudo Random number generator
 uint32_t xorshift32
 (uint32_t* seed)
 {
 	// Algorithm "xor" from p. 4 of Marsaglia, "Xorshift RNGs"
  // See <https://stackoverflow.com/questions/53886131/how-does-xorshift32-works>
  // https://en.wikipedia.org/wiki/Xorshift
 	// Also some drama: <https://www.pcg-random.org/posts/on-vignas-pcg-critique.html>, <https://prng.di.unimi.it/>
 	uint32_t x = *seed;
 	x ^= x << 13;
 	x ^= x >> 17;
 	x ^= x << 5;
 	return *seed = x;
 }
 // Distribution & sampling functions
 float rand_0_to_1(uint32_t* seed){
 	return ((float) xorshift32(seed)) / ((float) UINT32_MAX);
 }
 float rand_float(float max, uint32_t* seed)
 {
    return rand_0_to_1(seed) * max;
 }
 float ur_normal(uint32_t* seed)
 {
    float u1 = rand_0_to_1(seed);
    float u2 = rand_0_to_1(seed);
    float z = sqrtf(-2.0 * log(u1)) * sin(2 * PI * u2);
    return z;
 }
 float random_uniform(float from, float to, uint32_t* seed)
 {
    return rand_0_to_1(seed) * (to - from) + from;
 }
 float random_normal(float mean, float sigma, uint32_t* seed)
 {
    return (mean + sigma * ur_normal(seed));
 }
 float random_lognormal(float logmean, float logsigma, uint32_t* seed)
 {
    return expf(random_normal(logmean, logsigma, seed));
 }
 float random_to(float low, float high, uint32_t* seed)
 {
    const float NORMAL95CONFIDENCE = 1.6448536269514722;
    float loglow = logf(low);
    float loghigh = logf(high);
    float logmean = (loglow + loghigh) / 2;
    float logsigma = (loghigh - loglow) / (2.0 * NORMAL95CONFIDENCE);
    return random_lognormal(logmean, logsigma, seed);
 }
 // Array helpers
 float array_sum(float* array, int length)
 {
    float output = 0.0;
    for (int i = 0; i < length; i++) {
        output += array[i];
    }
    return output;
 }
 void array_cumsum(float* array_to_sum, float* array_cumsummed, int length)
 {
    array_cumsummed[0] = array_to_sum[0];
    for (int i = 1; i < length; i++) {
        array_cumsummed[i] = array_cumsummed[i - 1] + array_to_sum[i];
    }
 }
 // Mixture function
 float mixture(float (*samplers[])(uint32_t*), float* weights, int n_dists, uint32_t* seed)
 {
    // You can see a simpler version of this function in the git history
    // or in C-02-better-algorithm-one-thread/
    float sum_weights = array_sum(weights, n_dists);
 		float* cumsummed_normalized_weights = (float*) malloc(n_dists * sizeof(float));
 		cumsummed_normalized_weights[0] = weights[0]/sum_weights;
    for (int i = 1; i < n_dists; i++) {
        cumsummed_normalized_weights[i] = cumsummed_normalized_weights[i - 1] + weights[i]/sum_weights;
    }
 		float p = random_uniform(0, 1, seed);
 		float result;
 		for (int k = 0; k < n_dists; k++) {
 				if (p < cumsummed_normalized_weights[k]) {
 						result = samplers[k](seed);
 						break;
 				}
 		}
 		free(cumsummed_normalized_weights);
 		return result;
 }
--- a/C/squiggle_c/to-do.md
+++ b/C/squiggle_c/to-do.md
@ -1,9 +0,0 @@
 - [ ] Add example for only one sample
 - [ ] Add example for many samples
 - [ ] Use gcc extension to define functions nested inside main.
 - [ ] Use OpenMP for acceleration
 - [ ] Chain various mixture functions
 - [ ] Have some more complicated & realistic example
 - [ ] Add summarization functions, like mean, std, 90% ci (or all c.i.?)
 - [ ] Add beta distribution
--- a/README.md
+++ b/README.md
@ -16,7 +16,7 @@ result = mixture(dists, weights) # should be 1M samples
 mean(result)
 ```
-I don't particularly care about the speed of this particular example, but rather think that the speed in this simple exaxmple would be indicative of the speed when considering 100x or 1000x more complicated models. As of now, it may also be useful for checking the validity of simple estimations. 
+I don't particularly care about the speed of this particular example, but rather think that the speed in this simple example would be indicative of the speed when considering 100x or 1000x more complicated models. As of now, it may also be useful for checking the validity of simple estimations. 
 The title of this repository is a pun on two meanings of "time to": "how much time does it take to do x", and "let's do x". "BOTEC" stands for "back of the envelope calculation".
--- a/python/beta/beta.py
+++ b/python/beta/beta.py
@ -0,0 +1,6 @@
 import numpy as np
 for i in range(1000* 1000):
    x=np.random.beta(1,2)
--- a/python/beta/beta_array.py
+++ b/python/beta/beta_array.py
@ -0,0 +1,9 @@
 import numpy as np
 n = 1000 * 1000
 def sample_beta_1_2():
    return np.random.beta(1,2)
 a = np.array([sample_beta_1_2() for _ in range(n)])
 print(np.mean(a))
--- a/squigglepy/scratchpad/samples_correlated.py
+++ b/squigglepy/scratchpad/samples_correlated.py
@ -0,0 +1,10 @@
 import squigglepy as sq
 import numpy as np
 a = sq.to(1, 3)
 b = a / 2 
 c = b / a 
 c_samples = sq.sample(c, 10)
 print(c_samples)