diff --git a/README.md b/README.md index 7b46a40..fd2af7a 100644 --- a/README.md +++ b/README.md @@ -345,8 +345,8 @@ It emits one warning about something I already took care of, so by default I've - [x] Add prototypes - [x] Use named structs - [x] Add to header file - - [ ] Test results - [ ] Provide example - - [ ] Add conversion between 90%ci and parameters. + - [ ] Test results + - [ ] Add conversion between 90% ci and parameters. - [ ] Move to own file? Or signpost in file? - [ ] Disambiguate sample_laplace--successes vs failures || successes vs total trials as two distinct and differently named functions diff --git a/examples/11_algebra/example b/examples/11_algebra/example new file mode 100755 index 0000000..1ac404b Binary files /dev/null and b/examples/11_algebra/example differ diff --git a/examples/11_algebra/example.c b/examples/11_algebra/example.c new file mode 100644 index 0000000..b26d0ad --- /dev/null +++ b/examples/11_algebra/example.c @@ -0,0 +1,26 @@ +#include "../../squiggle.h" +#include +#include +#include +#include + +int main() +{ + // set randomness seed + uint64_t* seed = malloc(sizeof(uint64_t)); + *seed = 1000; // xorshift can't start with 0 + + normal_params n1 = { .mean = 1.0, .std = 3.0 }; + normal_params n2 = { .mean = 2.0, .std = 4.0 }; + normal_params sn = algebra_sum_normals(n1, n2); + printf("The sum of Normal(%f, %f) and Normal(%f, %f) is Normal(%f, %f)\n", + n1.mean, n1.std, n2.mean, n2.std, sn.mean, sn.std); + + lognormal_params ln1 = { .logmean = 1.0, .logstd = 3.0 }; + lognormal_params ln2 = { .logmean = 2.0, .logstd = 4.0 }; + lognormal_params sln = algebra_product_lognormals(ln1, ln2); + printf("The product of Lognormal(%f, %f) and Lognormal(%f, %f) is Lognormal(%f, %f)\n", + ln1.logmean, ln1.logstd, ln2.logmean, ln2.logstd, sln.logmean, sln.logstd); + + free(seed); +} diff --git a/examples/11_algebra/makefile b/examples/11_algebra/makefile new file mode 100644 index 0000000..ef385f7 --- /dev/null +++ b/examples/11_algebra/makefile @@ -0,0 +1,53 @@ +# Interface: +# make +# make build +# make format +# make run + +# Compiler +CC=gcc +# CC=tcc # <= faster compilation + +# Main file +SRC=example.c ../../squiggle.c +OUTPUT=example + +## Dependencies +MATH=-lm + +## Flags +DEBUG= #'-g' +STANDARD=-std=c99 +WARNINGS=-Wall +OPTIMIZED=-O3 #-Ofast +# OPENMP=-fopenmp + +## Formatter +STYLE_BLUEPRINT=webkit +FORMATTER=clang-format -i -style=$(STYLE_BLUEPRINT) + +## make build +build: $(SRC) + $(CC) $(OPTIMIZED) $(DEBUG) $(SRC) $(MATH) -o $(OUTPUT) + +format: $(SRC) + $(FORMATTER) $(SRC) + +run: $(SRC) $(OUTPUT) + OMP_NUM_THREADS=1 ./$(OUTPUT) && echo + +time-linux: + @echo "Requires /bin/time, found on GNU/Linux systems" && echo + + @echo "Running 100x and taking avg time $(OUTPUT)" + @t=$$(/usr/bin/time -f "%e" -p bash -c 'for i in {1..100}; do $(OUTPUT); done' 2>&1 >/dev/null | grep real | awk '{print $$2}' ); echo "scale=2; 1000 * $$t / 100" | bc | sed "s|^|Time using 1 thread: |" | sed 's|$$|ms|' && echo + +## Profiling + +profile-linux: + echo "Requires perf, which depends on the kernel version, and might be in linux-tools package or similar" + echo "Must be run as sudo" + $(CC) $(SRC) $(MATH) -o $(OUTPUT) + sudo perf record ./$(OUTPUT) + sudo perf report + rm perf.data diff --git a/squiggle.c b/squiggle.c index 1e03bb2..3bde4da 100644 --- a/squiggle.c +++ b/squiggle.c @@ -78,28 +78,29 @@ double sample_lognormal(double logmean, double logstd, uint64_t* seed) return exp(sample_normal(logmean, logstd, seed)); } -inline double sample_normal_from_95_confidence_interval(double low, double high, uint64_t* seed){ +inline double sample_normal_from_95_confidence_interval(double low, double high, uint64_t* seed) +{ // Explanation of key idea: - // 1. We know that the 90% confidence interval of the unit normal is + // 1. We know that the 90% confidence interval of the unit normal is // [-1.6448536269514722, 1.6448536269514722] // see e.g.: https://stackoverflow.com/questions/20626994/how-to-calculate-the-inverse-of-the-normal-cumulative-distribution-function-in-p // 2. So if we take a unit normal and multiply it by - // L / 1.6448536269514722, its new 90% confidence interval will be + // L / 1.6448536269514722, its new 90% confidence interval will be // [-L, L], i.e., length 2 * L - // 3. Instead, if we want to get a confidence interval of length L, - // we should multiply the unit normal by + // 3. Instead, if we want to get a confidence interval of length L, + // we should multiply the unit normal by // L / (2 * 1.6448536269514722) // Meaning that its standard deviation should be multiplied by that amount // see: https://en.wikipedia.org/wiki/Normal_distribution?lang=en#Operations_on_a_single_normal_variable // 4. So we have learnt that Normal(0, L / (2 * 1.6448536269514722)) // has a 90% confidence interval of length L - // 5. If we want a 90% confidence interval from high to low, - // we can set mean = (high + low)/2; the midpoint, and L = high-low, + // 5. If we want a 90% confidence interval from high to low, + // we can set mean = (high + low)/2; the midpoint, and L = high-low, // Normal([high + low]/2, [high - low]/(2 * 1.6448536269514722)) const double NORMAL95CONFIDENCE = 1.6448536269514722; double mean = (high + low) / 2.0; - double std = (high - low) / (2.0 * NORMAL95CONFIDENCE ); - return sample_normal(mean, std); + double std = (high - low) / (2.0 * NORMAL95CONFIDENCE); + return sample_normal(mean, std, seed); } double sample_to(double low, double high, uint64_t* seed) @@ -111,7 +112,7 @@ double sample_to(double low, double high, uint64_t* seed) // Then see code for sample_normal_from_95_confidence_interval double loglow = logf(low); double loghigh = logf(high); - return exp(sample_normal_from_95_confidence_interval(loglow, loghigh)); + return exp(sample_normal_from_95_confidence_interval(loglow, loghigh, seed)); } double sample_gamma(double alpha, uint64_t* seed) @@ -165,9 +166,10 @@ double sample_beta(double a, double b, uint64_t* seed) return gamma_a / (gamma_a + gamma_b); } -double sample_laplace(double successes, double failures, uint64_t* seed){ - // see - return sample_beta(successes + 1, failures + 1, seed); +double sample_laplace(double successes, double failures, uint64_t* seed) +{ + // see + return sample_beta(successes + 1, failures + 1, seed); } // Array helpers @@ -470,10 +472,9 @@ struct c_i get_90_confidence_interval(double (*sampler)(uint64_t*), uint64_t* se return result; } -// # Small algebra system +// # Small algebra manipulations -// Do algebra over lognormals and normals -// here I discover named structs, +// here I discover named structs, // which mean that I don't have to be typing // struct blah all the time. typedef struct normal_params_t { @@ -481,11 +482,6 @@ typedef struct normal_params_t { double std; } normal_params; -typedef struct lognormal_params_t { - double logmean; - double logstd; -} lognormal_params; - normal_params algebra_sum_normals(normal_params a, normal_params b) { normal_params result = { @@ -494,16 +490,11 @@ normal_params algebra_sum_normals(normal_params a, normal_params b) }; return result; } -normal_params algebra_shift_normal(normal_params a, double shift) -{ - normal_params result = { - .mean = a.mean + shift, - .std = a.std, - }; - return result; -} -// Also add stretching +typedef struct lognormal_params_t { + double logmean; + double logstd; +} lognormal_params; lognormal_params algebra_product_lognormals(lognormal_params a, lognormal_params b) { @@ -513,11 +504,3 @@ lognormal_params algebra_product_lognormals(lognormal_params a, lognormal_params }; return result; } -lognormal_params algebra_scale_lognormal(lognormal_params a, double k) -{ - lognormal_params result = { - .logmean = a.logmean + k, - .logstd = a.logstd, - }; - return result; -} diff --git a/squiggle.h b/squiggle.h index 75a9fca..50ba120 100644 --- a/squiggle.h +++ b/squiggle.h @@ -58,24 +58,18 @@ struct c_i { }; struct c_i get_90_confidence_interval(double (*sampler)(uint64_t*), uint64_t* seed); -// small algebra system +// small algebra manipulations typedef struct normal_params_t { double mean; double std; } normal_params; +normal_params algebra_sum_normals(normal_params a, normal_params b); typedef struct lognormal_params_t { double logmean; double logstd; } lognormal_params; - - -normal_params algebra_sum_normals(normal_params a, normal_params b); -normal_params algebra_shift_normal(normal_params a, double shift); - - lognormal_params algebra_product_lognormals(lognormal_params a, lognormal_params b); -lognormal_params algebra_scale_lognormal(lognormal_params a, double k); #endif