move squiggle.c to its own repo, fix readme typo.

2023-08-17 14:19:32 +02:00 · 2023-08-17 14:19:32 +02:00 · 4e079e9015
commit 4e079e9015
parent 3b29ad7e45
12 changed files with 26 additions and 334 deletions
--- a/C/squiggle_c/examples/01_one_sample/example
+++ b/C/squiggle_c/examples/01_one_sample/example
--- a/C/squiggle_c/examples/01_one_sample/example.c
+++ b/C/squiggle_c/examples/01_one_sample/example.c
@ -1,50 +0,0 @@
-#include "../../squiggle.h"
-#include <stdint.h>
-#include <stdlib.h>
-#include <stdio.h>
-
-// Estimate functions
-float sample_0(uint32_t* seed)
-{
-    return 0;
-}
-
-float sample_1(uint32_t* seed)
-{
-    return 1;
-}
-
-float sample_few(uint32_t* seed)
-{
-    return random_to(1, 3, seed);
-}
-
-float sample_many(uint32_t* seed)
-{
-    return random_to(2, 10, seed);
-}
-
-int main(){
-    // set randomness seed
-		uint32_t* seed = malloc(sizeof(uint32_t));
-		*seed = 1000; // xorshift can't start with 0
-
-    float p_a = 0.8;
-    float p_b = 0.5;
-    float p_c = p_a * p_b;
-
-    int n_dists = 4;
-    float weights[] = { 1 - p_c, p_c / 2, p_c / 4, p_c / 4 };
-    float (*samplers[])(uint32_t*) = { sample_0, sample_1, sample_few, sample_many };
-
-    float result_one = mixture(samplers, weights, n_dists, seed);
-		printf("result_one: %f\n", result_one);
-}
-
-/* 
-Aggregation mechanisms:
- Quantiles (requires a sort)
- Sum 
- Average
- Std
-*/
--- a/C/squiggle_c/examples/01_one_sample/makefile
+++ b/C/squiggle_c/examples/01_one_sample/makefile
@ -1,53 +0,0 @@
-# Interface: 
-#   make
-#   make build
-#   make format
-#   make run
-
-# Compiler
-CC=gcc
-# CC=tcc # <= faster compilation
-
-# Main file
-SRC=example.c
-OUTPUT=example
-
-## Dependencies
-MATH=-lm
-
-## Flags
-DEBUG= #'-g'
-STANDARD=-std=c99
-WARNINGS=-Wall
-OPTIMIZED=-O3  #-Ofast
-# OPENMP=-fopenmp
-
-## Formatter
-STYLE_BLUEPRINT=webkit
-FORMATTER=clang-format -i -style=$(STYLE_BLUEPRINT)
-
-## make build
-build: $(SRC)
-	$(CC) $(OPTIMIZED) $(DEBUG) $(SRC) $(MATH) -o $(OUTPUT)
-
-format: $(SRC)
-	$(FORMATTER) $(SRC)
-
-run: $(SRC) $(OUTPUT)
-	OMP_NUM_THREADS=1 ./$(OUTPUT) && echo
-
-time-linux: 
-	@echo "Requires /bin/time, found on GNU/Linux systems" && echo
-	
-	@echo "Running 100x and taking avg time $(OUTPUT)"
-	@t=$$(/usr/bin/time -f "%e" -p bash -c 'for i in {1..100}; do $(OUTPUT); done' 2>&1 >/dev/null | grep real | awk '{print $$2}' ); echo "scale=2; 1000 * $$t / 100" | bc | sed "s|^|Time using 1 thread: |" | sed 's|$$|ms|' && echo
-
-## Profiling
-
-profile-linux: 
-	echo "Requires perf, which depends on the kernel version, and might be in linux-tools package or similar"
-	echo "Must be run as sudo"
-	$(CC) $(SRC) $(MATH) -o $(OUTPUT)
-	sudo perf record $(OUTPUT)
-	sudo perf report
-	rm perf.data
--- a/C/squiggle_c/examples/02_many_samples/example
+++ b/C/squiggle_c/examples/02_many_samples/example
--- a/C/squiggle_c/examples/02_many_samples/example.c
+++ b/C/squiggle_c/examples/02_many_samples/example.c
@ -1,59 +0,0 @@
-#include <stdint.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include "../../squiggle.h"
-
-// Estimate functions
-float sample_0(uint32_t* seed)
-{
-    return 0;
-}
-
-float sample_1(uint32_t* seed)
-{
-    return 1;
-}
-
-float sample_few(uint32_t* seed)
-{
-    return random_to(1, 3, seed);
-}
-
-float sample_many(uint32_t* seed)
-{
-    return random_to(2, 10, seed);
-}
-
-int main(){
-    // set randomness seed
-		uint32_t* seed = malloc(sizeof(uint32_t));
-		*seed = 1000; // xorshift can't start with 0
-
-    float p_a = 0.8;
-    float p_b = 0.5;
-    float p_c = p_a * p_b;
-
-    int n_dists = 4;
-    float weights[] = { 1 - p_c, p_c / 2, p_c / 4, p_c / 4 };
-    float (*samplers[])(uint32_t*) = { sample_0, sample_1, sample_few, sample_many };
-
-		int n_samples = 1000000;
-		float* result_many = (float *) malloc(n_samples * sizeof(float));
-		for(int i=0; i<n_samples; i++){
-      result_many[i] = mixture(samplers, weights, n_dists, seed);
-		}
-		
-		printf("result_many: [");
-		for(int i=0; i<100; i++){
-		  printf("%.2f, ", result_many[i]);
-		}
-		printf("]\n");
-}
-
-/* 
-Aggregation mechanisms:
- Quantiles (requires a sort)
- Sum 
- Average
- Std
-*/
--- a/C/squiggle_c/examples/02_many_samples/makefile
+++ b/C/squiggle_c/examples/02_many_samples/makefile
@ -1,53 +0,0 @@
-# Interface: 
-#   make
-#   make build
-#   make format
-#   make run
-
-# Compiler
-CC=gcc
-# CC=tcc # <= faster compilation
-
-# Main file
-SRC=example.c
-OUTPUT=example
-
-## Dependencies
-MATH=-lm
-
-## Flags
-DEBUG= #'-g'
-STANDARD=-std=c99
-WARNINGS=-Wall
-OPTIMIZED=-O3  #-Ofast
-# OPENMP=-fopenmp
-
-## Formatter
-STYLE_BLUEPRINT=webkit
-FORMATTER=clang-format -i -style=$(STYLE_BLUEPRINT)
-
-## make build
-build: $(SRC)
-	$(CC) $(OPTIMIZED) $(DEBUG) $(SRC) $(MATH) -o $(OUTPUT)
-
-format: $(SRC)
-	$(FORMATTER) $(SRC)
-
-run: $(SRC) $(OUTPUT)
-	OMP_NUM_THREADS=1 ./$(OUTPUT) && echo
-
-time-linux: 
-	@echo "Requires /bin/time, found on GNU/Linux systems" && echo
-	
-	@echo "Running 100x and taking avg time $(OUTPUT)"
-	@t=$$(/usr/bin/time -f "%e" -p bash -c 'for i in {1..100}; do $(OUTPUT); done' 2>&1 >/dev/null | grep real | awk '{print $$2}' ); echo "scale=2; 1000 * $$t / 100" | bc | sed "s|^|Time using 1 thread: |" | sed 's|$$|ms|' && echo
-
-## Profiling
-
-profile-linux: 
-	echo "Requires perf, which depends on the kernel version, and might be in linux-tools package or similar"
-	echo "Must be run as sudo"
-	$(CC) $(SRC) $(MATH) -o $(OUTPUT)
-	sudo perf record $(OUTPUT)
-	sudo perf report
-	rm perf.data
--- a/C/squiggle_c/squiggle.h
+++ b/C/squiggle_c/squiggle.h
@ -1,109 +0,0 @@
-#include <math.h>
-#include <stdint.h>
-#include <stdlib.h>
-
-const float PI = 3.14159265358979323846;
-
-// Pseudo Random number generator
-
-uint32_t xorshift32
-(uint32_t* seed)
-{
-	// Algorithm "xor" from p. 4 of Marsaglia, "Xorshift RNGs"
-  // See <https://stackoverflow.com/questions/53886131/how-does-xorshift32-works>
-  // https://en.wikipedia.org/wiki/Xorshift
-	// Also some drama: <https://www.pcg-random.org/posts/on-vignas-pcg-critique.html>, <https://prng.di.unimi.it/>
-
-	uint32_t x = *seed;
-	x ^= x << 13;
-	x ^= x >> 17;
-	x ^= x << 5;
-	return *seed = x;
-}
-
-// Distribution & sampling functions
-
-float rand_0_to_1(uint32_t* seed){
-	return ((float) xorshift32(seed)) / ((float) UINT32_MAX);
-}
-
-float rand_float(float max, uint32_t* seed)
-{
-    return rand_0_to_1(seed) * max;
-}
-
-float ur_normal(uint32_t* seed)
-{
-    float u1 = rand_0_to_1(seed);
-    float u2 = rand_0_to_1(seed);
-    float z = sqrtf(-2.0 * log(u1)) * sin(2 * PI * u2);
-    return z;
-}
-
-float random_uniform(float from, float to, uint32_t* seed)
-{
-    return rand_0_to_1(seed) * (to - from) + from;
-}
-
-float random_normal(float mean, float sigma, uint32_t* seed)
-{
-    return (mean + sigma * ur_normal(seed));
-}
-
-float random_lognormal(float logmean, float logsigma, uint32_t* seed)
-{
-    return expf(random_normal(logmean, logsigma, seed));
-}
-
-float random_to(float low, float high, uint32_t* seed)
-{
-    const float NORMAL95CONFIDENCE = 1.6448536269514722;
-    float loglow = logf(low);
-    float loghigh = logf(high);
-    float logmean = (loglow + loghigh) / 2;
-    float logsigma = (loghigh - loglow) / (2.0 * NORMAL95CONFIDENCE);
-    return random_lognormal(logmean, logsigma, seed);
-}
-
-// Array helpers
-float array_sum(float* array, int length)
-{
-    float output = 0.0;
-    for (int i = 0; i < length; i++) {
-        output += array[i];
-    }
-    return output;
-}
-
-void array_cumsum(float* array_to_sum, float* array_cumsummed, int length)
-{
-    array_cumsummed[0] = array_to_sum[0];
-    for (int i = 1; i < length; i++) {
-        array_cumsummed[i] = array_cumsummed[i - 1] + array_to_sum[i];
-    }
-}
-
-// Mixture function
-float mixture(float (*samplers[])(uint32_t*), float* weights, int n_dists, uint32_t* seed)
-{
-    // You can see a simpler version of this function in the git history
-    // or in C-02-better-algorithm-one-thread/
-    float sum_weights = array_sum(weights, n_dists);
-		float* cumsummed_normalized_weights = (float*) malloc(n_dists * sizeof(float));
-		cumsummed_normalized_weights[0] = weights[0]/sum_weights;
-    for (int i = 1; i < n_dists; i++) {
-        cumsummed_normalized_weights[i] = cumsummed_normalized_weights[i - 1] + weights[i]/sum_weights;
-    }
-
-		float p = random_uniform(0, 1, seed);
-		float result;
-		for (int k = 0; k < n_dists; k++) {
-				if (p < cumsummed_normalized_weights[k]) {
-						result = samplers[k](seed);
-						break;
-				}
-		}
-
-		free(cumsummed_normalized_weights);
-		return result;
-}
--- a/C/squiggle_c/to-do.md
+++ b/C/squiggle_c/to-do.md
@ -1,9 +0,0 @@
-
- [ ] Add example for only one sample
- [ ] Add example for many samples
- [ ] Use gcc extension to define functions nested inside main.
- [ ] Use OpenMP for acceleration
- [ ] Chain various mixture functions
- [ ] Have some more complicated & realistic example
- [ ] Add summarization functions, like mean, std, 90% ci (or all c.i.?)
- [ ] Add beta distribution
--- a/README.md
+++ b/README.md
@ -16,7 +16,7 @@ result = mixture(dists, weights) # should be 1M samples
 mean(result)
 ```

-I don't particularly care about the speed of this particular example, but rather think that the speed in this simple exaxmple would be indicative of the speed when considering 100x or 1000x more complicated models. As of now, it may also be useful for checking the validity of simple estimations. 
+I don't particularly care about the speed of this particular example, but rather think that the speed in this simple example would be indicative of the speed when considering 100x or 1000x more complicated models. As of now, it may also be useful for checking the validity of simple estimations. 

 The title of this repository is a pun on two meanings of "time to": "how much time does it take to do x", and "let's do x". "BOTEC" stands for "back of the envelope calculation".

--- a/python/beta/beta.py
+++ b/python/beta/beta.py
@ -0,0 +1,6 @@
+
+import numpy as np
+
+for i in range(1000* 1000):
+    x=np.random.beta(1,2)
+
--- a/python/beta/beta_array.py
+++ b/python/beta/beta_array.py
@ -0,0 +1,9 @@
+import numpy as np
+
+n = 1000 * 1000
+
+def sample_beta_1_2():
+    return np.random.beta(1,2)
+
+a = np.array([sample_beta_1_2() for _ in range(n)])
+print(np.mean(a))
--- a/squigglepy/scratchpad/samples_correlated.py
+++ b/squigglepy/scratchpad/samples_correlated.py
@ -0,0 +1,10 @@
+import squigglepy as sq
+import numpy as np
+
+a = sq.to(1, 3)
+b = a / 2 
+c = b / a 
+
+c_samples = sq.sample(c, 10)
+
+print(c_samples)