diff --git a/C/squiggle_c/examples/01_one_sample/example b/C/squiggle_c/examples/01_one_sample/example deleted file mode 100755 index b3db352f..00000000 Binary files a/C/squiggle_c/examples/01_one_sample/example and /dev/null differ diff --git a/C/squiggle_c/examples/01_one_sample/example.c b/C/squiggle_c/examples/01_one_sample/example.c deleted file mode 100644 index bbea2ebb..00000000 --- a/C/squiggle_c/examples/01_one_sample/example.c +++ /dev/null @@ -1,50 +0,0 @@ -#include "../../squiggle.h" -#include -#include -#include - -// Estimate functions -float sample_0(uint32_t* seed) -{ - return 0; -} - -float sample_1(uint32_t* seed) -{ - return 1; -} - -float sample_few(uint32_t* seed) -{ - return random_to(1, 3, seed); -} - -float sample_many(uint32_t* seed) -{ - return random_to(2, 10, seed); -} - -int main(){ - // set randomness seed - uint32_t* seed = malloc(sizeof(uint32_t)); - *seed = 1000; // xorshift can't start with 0 - - float p_a = 0.8; - float p_b = 0.5; - float p_c = p_a * p_b; - - int n_dists = 4; - float weights[] = { 1 - p_c, p_c / 2, p_c / 4, p_c / 4 }; - float (*samplers[])(uint32_t*) = { sample_0, sample_1, sample_few, sample_many }; - - float result_one = mixture(samplers, weights, n_dists, seed); - printf("result_one: %f\n", result_one); -} - -/* -Aggregation mechanisms: -- Quantiles (requires a sort) -- Sum -- Average -- Std -*/ diff --git a/C/squiggle_c/examples/01_one_sample/makefile b/C/squiggle_c/examples/01_one_sample/makefile deleted file mode 100644 index 302a1499..00000000 --- a/C/squiggle_c/examples/01_one_sample/makefile +++ /dev/null @@ -1,53 +0,0 @@ -# Interface: -# make -# make build -# make format -# make run - -# Compiler -CC=gcc -# CC=tcc # <= faster compilation - -# Main file -SRC=example.c -OUTPUT=example - -## Dependencies -MATH=-lm - -## Flags -DEBUG= #'-g' -STANDARD=-std=c99 -WARNINGS=-Wall -OPTIMIZED=-O3 #-Ofast -# OPENMP=-fopenmp - -## Formatter -STYLE_BLUEPRINT=webkit -FORMATTER=clang-format -i -style=$(STYLE_BLUEPRINT) - -## make build -build: $(SRC) - $(CC) $(OPTIMIZED) $(DEBUG) $(SRC) $(MATH) -o $(OUTPUT) - -format: $(SRC) - $(FORMATTER) $(SRC) - -run: $(SRC) $(OUTPUT) - OMP_NUM_THREADS=1 ./$(OUTPUT) && echo - -time-linux: - @echo "Requires /bin/time, found on GNU/Linux systems" && echo - - @echo "Running 100x and taking avg time $(OUTPUT)" - @t=$$(/usr/bin/time -f "%e" -p bash -c 'for i in {1..100}; do $(OUTPUT); done' 2>&1 >/dev/null | grep real | awk '{print $$2}' ); echo "scale=2; 1000 * $$t / 100" | bc | sed "s|^|Time using 1 thread: |" | sed 's|$$|ms|' && echo - -## Profiling - -profile-linux: - echo "Requires perf, which depends on the kernel version, and might be in linux-tools package or similar" - echo "Must be run as sudo" - $(CC) $(SRC) $(MATH) -o $(OUTPUT) - sudo perf record $(OUTPUT) - sudo perf report - rm perf.data diff --git a/C/squiggle_c/examples/02_many_samples/example b/C/squiggle_c/examples/02_many_samples/example deleted file mode 100755 index b3db352f..00000000 Binary files a/C/squiggle_c/examples/02_many_samples/example and /dev/null differ diff --git a/C/squiggle_c/examples/02_many_samples/example.c b/C/squiggle_c/examples/02_many_samples/example.c deleted file mode 100644 index 001d7321..00000000 --- a/C/squiggle_c/examples/02_many_samples/example.c +++ /dev/null @@ -1,59 +0,0 @@ -#include -#include -#include -#include "../../squiggle.h" - -// Estimate functions -float sample_0(uint32_t* seed) -{ - return 0; -} - -float sample_1(uint32_t* seed) -{ - return 1; -} - -float sample_few(uint32_t* seed) -{ - return random_to(1, 3, seed); -} - -float sample_many(uint32_t* seed) -{ - return random_to(2, 10, seed); -} - -int main(){ - // set randomness seed - uint32_t* seed = malloc(sizeof(uint32_t)); - *seed = 1000; // xorshift can't start with 0 - - float p_a = 0.8; - float p_b = 0.5; - float p_c = p_a * p_b; - - int n_dists = 4; - float weights[] = { 1 - p_c, p_c / 2, p_c / 4, p_c / 4 }; - float (*samplers[])(uint32_t*) = { sample_0, sample_1, sample_few, sample_many }; - - int n_samples = 1000000; - float* result_many = (float *) malloc(n_samples * sizeof(float)); - for(int i=0; i&1 >/dev/null | grep real | awk '{print $$2}' ); echo "scale=2; 1000 * $$t / 100" | bc | sed "s|^|Time using 1 thread: |" | sed 's|$$|ms|' && echo - -## Profiling - -profile-linux: - echo "Requires perf, which depends on the kernel version, and might be in linux-tools package or similar" - echo "Must be run as sudo" - $(CC) $(SRC) $(MATH) -o $(OUTPUT) - sudo perf record $(OUTPUT) - sudo perf report - rm perf.data diff --git a/C/squiggle_c/squiggle.h b/C/squiggle_c/squiggle.h deleted file mode 100644 index 5409e01c..00000000 --- a/C/squiggle_c/squiggle.h +++ /dev/null @@ -1,109 +0,0 @@ -#include -#include -#include - -const float PI = 3.14159265358979323846; - -// Pseudo Random number generator - -uint32_t xorshift32 -(uint32_t* seed) -{ - // Algorithm "xor" from p. 4 of Marsaglia, "Xorshift RNGs" - // See - // https://en.wikipedia.org/wiki/Xorshift - // Also some drama: , - - uint32_t x = *seed; - x ^= x << 13; - x ^= x >> 17; - x ^= x << 5; - return *seed = x; -} - -// Distribution & sampling functions - -float rand_0_to_1(uint32_t* seed){ - return ((float) xorshift32(seed)) / ((float) UINT32_MAX); -} - -float rand_float(float max, uint32_t* seed) -{ - return rand_0_to_1(seed) * max; -} - -float ur_normal(uint32_t* seed) -{ - float u1 = rand_0_to_1(seed); - float u2 = rand_0_to_1(seed); - float z = sqrtf(-2.0 * log(u1)) * sin(2 * PI * u2); - return z; -} - -float random_uniform(float from, float to, uint32_t* seed) -{ - return rand_0_to_1(seed) * (to - from) + from; -} - -float random_normal(float mean, float sigma, uint32_t* seed) -{ - return (mean + sigma * ur_normal(seed)); -} - -float random_lognormal(float logmean, float logsigma, uint32_t* seed) -{ - return expf(random_normal(logmean, logsigma, seed)); -} - -float random_to(float low, float high, uint32_t* seed) -{ - const float NORMAL95CONFIDENCE = 1.6448536269514722; - float loglow = logf(low); - float loghigh = logf(high); - float logmean = (loglow + loghigh) / 2; - float logsigma = (loghigh - loglow) / (2.0 * NORMAL95CONFIDENCE); - return random_lognormal(logmean, logsigma, seed); -} - -// Array helpers -float array_sum(float* array, int length) -{ - float output = 0.0; - for (int i = 0; i < length; i++) { - output += array[i]; - } - return output; -} - -void array_cumsum(float* array_to_sum, float* array_cumsummed, int length) -{ - array_cumsummed[0] = array_to_sum[0]; - for (int i = 1; i < length; i++) { - array_cumsummed[i] = array_cumsummed[i - 1] + array_to_sum[i]; - } -} - -// Mixture function -float mixture(float (*samplers[])(uint32_t*), float* weights, int n_dists, uint32_t* seed) -{ - // You can see a simpler version of this function in the git history - // or in C-02-better-algorithm-one-thread/ - float sum_weights = array_sum(weights, n_dists); - float* cumsummed_normalized_weights = (float*) malloc(n_dists * sizeof(float)); - cumsummed_normalized_weights[0] = weights[0]/sum_weights; - for (int i = 1; i < n_dists; i++) { - cumsummed_normalized_weights[i] = cumsummed_normalized_weights[i - 1] + weights[i]/sum_weights; - } - - float p = random_uniform(0, 1, seed); - float result; - for (int k = 0; k < n_dists; k++) { - if (p < cumsummed_normalized_weights[k]) { - result = samplers[k](seed); - break; - } - } - - free(cumsummed_normalized_weights); - return result; -} diff --git a/C/squiggle_c/to-do.md b/C/squiggle_c/to-do.md deleted file mode 100644 index 3be7ffb1..00000000 --- a/C/squiggle_c/to-do.md +++ /dev/null @@ -1,9 +0,0 @@ - -- [ ] Add example for only one sample -- [ ] Add example for many samples -- [ ] Use gcc extension to define functions nested inside main. -- [ ] Use OpenMP for acceleration -- [ ] Chain various mixture functions -- [ ] Have some more complicated & realistic example -- [ ] Add summarization functions, like mean, std, 90% ci (or all c.i.?) -- [ ] Add beta distribution diff --git a/README.md b/README.md index b9806588..9a8ef576 100644 --- a/README.md +++ b/README.md @@ -16,7 +16,7 @@ result = mixture(dists, weights) # should be 1M samples mean(result) ``` -I don't particularly care about the speed of this particular example, but rather think that the speed in this simple exaxmple would be indicative of the speed when considering 100x or 1000x more complicated models. As of now, it may also be useful for checking the validity of simple estimations. +I don't particularly care about the speed of this particular example, but rather think that the speed in this simple example would be indicative of the speed when considering 100x or 1000x more complicated models. As of now, it may also be useful for checking the validity of simple estimations. The title of this repository is a pun on two meanings of "time to": "how much time does it take to do x", and "let's do x". "BOTEC" stands for "back of the envelope calculation". diff --git a/python/beta/beta.py b/python/beta/beta.py new file mode 100644 index 00000000..58bffe7e --- /dev/null +++ b/python/beta/beta.py @@ -0,0 +1,6 @@ + +import numpy as np + +for i in range(1000* 1000): + x=np.random.beta(1,2) + diff --git a/python/beta/beta_array.py b/python/beta/beta_array.py new file mode 100644 index 00000000..d182931d --- /dev/null +++ b/python/beta/beta_array.py @@ -0,0 +1,9 @@ +import numpy as np + +n = 1000 * 1000 + +def sample_beta_1_2(): + return np.random.beta(1,2) + +a = np.array([sample_beta_1_2() for _ in range(n)]) +print(np.mean(a)) diff --git a/squigglepy/scratchpad/samples_correlated.py b/squigglepy/scratchpad/samples_correlated.py new file mode 100644 index 00000000..2e790c59 --- /dev/null +++ b/squigglepy/scratchpad/samples_correlated.py @@ -0,0 +1,10 @@ +import squigglepy as sq +import numpy as np + +a = sq.to(1, 3) +b = a / 2 +c = b / a + +c_samples = sq.sample(c, 10) + +print(c_samples)