From 9c1909595586181381ecbee8f0101b24ec3acf81 Mon Sep 17 00:00:00 2001 From: NunoSempere Date: Mon, 24 Jul 2023 00:37:45 +0200 Subject: [PATCH] add serious model, add template. --- README.md | 14 ++++--- examples/00_example_template/example.c | 14 +++++++ examples/00_example_template/makefile | 53 ++++++++++++++++++++++++ squiggle.c | 56 +++++++++++++------------- 4 files changed, 104 insertions(+), 33 deletions(-) create mode 100644 examples/00_example_template/example.c create mode 100644 examples/00_example_template/makefile diff --git a/README.md b/README.md index 090c9c3..a1384c7 100644 --- a/README.md +++ b/README.md @@ -20,12 +20,14 @@ A self-contained C99 library that provides a subset of [Squiggle](https://www.sq You can follow some example usage in the examples/ folder -1. In the first example, we define a small model, and draw one sample from it -2. In the second example, we define a small model, and return many samples -3. In the third example, we use a gcc extension—nested functions—to rewrite the code from point 2. in a more linear way. -4. In the fourth example, we define some simple cdfs, and we draw samples from those cdfs. We see that this approach is slower than using the built-in samplers, e.g., the normal sampler. -5. In the fifth example, we define the cdf for the beta distribution, and we draw samples from it. -6. In the sixth example, we take samples from simple gamma and beta distributions, using the samplers provided by this library. +1. In the 1st example, we define a small model, and draw one sample from it +2. In the 2nd example, we define a small model, and return many samples +3. In the 3rd example, we use a gcc extension—nested functions—to rewrite the code from point 2. in a more linear way. +4. In the 4th example, we define some simple cdfs, and we draw samples from those cdfs. We see that this approach is slower than using the built-in samplers, e.g., the normal sampler. +5. In the 5th example, we define the cdf for the beta distribution, and we draw samples from it. +6. In the 6th example, we take samples from simple gamma and beta distributions, using the samplers provided by this library. +7. In the 7th example, we get the 90% confidence interval of a beta distribution +8. The 8th example translates the models from Eli and Nuño from [Samotsvety Nuclear Risk Forecasts — March 2022](https://forum.nunosempere.com/posts/KRFXjCqqfGQAYirm5/samotsvety-nuclear-risk-forecasts-march-2022#Nu_o_Sempere) into squiggle.c, then creates a mixture from both, and returns the mean probability of death per month and the 90% confidence interval. ## Commentary diff --git a/examples/00_example_template/example.c b/examples/00_example_template/example.c new file mode 100644 index 0000000..1938a7b --- /dev/null +++ b/examples/00_example_template/example.c @@ -0,0 +1,14 @@ +#include "../../squiggle.h" +#include +#include +#include + +int main() +{ + // set randomness seed + uint64_t* seed = malloc(sizeof(uint64_t)); + *seed = 1000; // xorshift can't start with 0 + + + free(seed); +} diff --git a/examples/00_example_template/makefile b/examples/00_example_template/makefile new file mode 100644 index 0000000..ef385f7 --- /dev/null +++ b/examples/00_example_template/makefile @@ -0,0 +1,53 @@ +# Interface: +# make +# make build +# make format +# make run + +# Compiler +CC=gcc +# CC=tcc # <= faster compilation + +# Main file +SRC=example.c ../../squiggle.c +OUTPUT=example + +## Dependencies +MATH=-lm + +## Flags +DEBUG= #'-g' +STANDARD=-std=c99 +WARNINGS=-Wall +OPTIMIZED=-O3 #-Ofast +# OPENMP=-fopenmp + +## Formatter +STYLE_BLUEPRINT=webkit +FORMATTER=clang-format -i -style=$(STYLE_BLUEPRINT) + +## make build +build: $(SRC) + $(CC) $(OPTIMIZED) $(DEBUG) $(SRC) $(MATH) -o $(OUTPUT) + +format: $(SRC) + $(FORMATTER) $(SRC) + +run: $(SRC) $(OUTPUT) + OMP_NUM_THREADS=1 ./$(OUTPUT) && echo + +time-linux: + @echo "Requires /bin/time, found on GNU/Linux systems" && echo + + @echo "Running 100x and taking avg time $(OUTPUT)" + @t=$$(/usr/bin/time -f "%e" -p bash -c 'for i in {1..100}; do $(OUTPUT); done' 2>&1 >/dev/null | grep real | awk '{print $$2}' ); echo "scale=2; 1000 * $$t / 100" | bc | sed "s|^|Time using 1 thread: |" | sed 's|$$|ms|' && echo + +## Profiling + +profile-linux: + echo "Requires perf, which depends on the kernel version, and might be in linux-tools package or similar" + echo "Must be run as sudo" + $(CC) $(SRC) $(MATH) -o $(OUTPUT) + sudo perf record ./$(OUTPUT) + sudo perf report + rm perf.data diff --git a/squiggle.c b/squiggle.c index 3ddb416..8f77636 100644 --- a/squiggle.c +++ b/squiggle.c @@ -94,12 +94,12 @@ double sample_gamma(double alpha, uint64_t* seed) // A Simple Method for Generating Gamma Variables, Marsaglia and Wan Tsang, 2001 // https://dl.acm.org/doi/pdf/10.1145/358407.358414 // see also the references/ folder - // Note that the Wikipedia page for the gamma distribution includes a scaling parameter - // k or beta - // https://en.wikipedia.org/wiki/Gamma_distribution - // such that gamma_k(alpha, k) = k * gamma(alpha) - // or gamma_beta(alpha, beta) = gamma(alpha) / beta - // So far I have not needed to use this, and thus the second parameter is by default 1. + // Note that the Wikipedia page for the gamma distribution includes a scaling parameter + // k or beta + // https://en.wikipedia.org/wiki/Gamma_distribution + // such that gamma_k(alpha, k) = k * gamma(alpha) + // or gamma_beta(alpha, beta) = gamma(alpha) / beta + // So far I have not needed to use this, and thus the second parameter is by default 1. if (alpha >= 1) { double d, c, x, v, u; d = alpha - 1.0 / 3.0; @@ -386,38 +386,40 @@ struct box sampler_cdf_double(double cdf(double), uint64_t* seed) // Get confidence intervals, given a sampler struct c_i { - float low; - float high; + float low; + float high; }; -int compare_doubles(const void *p, const void *q) { - // https://wikiless.esmailelbob.xyz/wiki/Qsort?lang=en - double x = *(const double *)p; - double y = *(const double *)q; +int compare_doubles(const void* p, const void* q) +{ + // https://wikiless.esmailelbob.xyz/wiki/Qsort?lang=en + double x = *(const double*)p; + double y = *(const double*)q; /* Avoid return x - y, which can cause undefined behaviour because of signed integer overflow. */ if (x < y) - return -1; // Return -1 if you want ascending, 1 if you want descending order. + return -1; // Return -1 if you want ascending, 1 if you want descending order. else if (x > y) - return 1; // Return 1 if you want ascending, -1 if you want descending order. + return 1; // Return 1 if you want ascending, -1 if you want descending order. return 0; } -struct c_i get_90_confidence_interval(double (*sampler)(uint64_t*), uint64_t* seed){ - int n = 100 * 1000; - double* samples_array = malloc(n * sizeof(double)); - for(int i=0; i