add serious model, add template.

2023-07-24 00:37:45 +02:00 · 2023-07-24 00:37:45 +02:00 · 9c19095955
commit 9c19095955
parent 84399e60a2
4 changed files with 104 additions and 33 deletions
--- a/README.md
+++ b/README.md
@ -20,12 +20,14 @@ A self-contained C99 library that provides a subset of [Squiggle](https://www.sq

 You can follow some example usage in the examples/ folder

-1. In the first example, we define a small model, and draw one sample from it
-2. In the second example, we define a small model, and return many samples
-3. In the third example, we use a gcc extension—nested functions—to rewrite the code from point 2. in a more linear way.
-4. In the fourth example, we define some simple cdfs, and we draw samples from those cdfs. We see that this approach is slower than using the built-in samplers, e.g., the normal sampler.
-5. In the fifth example, we define the cdf for the beta distribution, and we draw samples from it. 
-6. In the sixth example, we take samples from simple gamma and beta distributions, using the samplers provided by this library.
+1. In the 1st example, we define a small model, and draw one sample from it
+2. In the 2nd example, we define a small model, and return many samples
+3. In the 3rd example, we use a gcc extension—nested functions—to rewrite the code from point 2. in a more linear way.
+4. In the 4th example, we define some simple cdfs, and we draw samples from those cdfs. We see that this approach is slower than using the built-in samplers, e.g., the normal sampler.
+5. In the 5th example, we define the cdf for the beta distribution, and we draw samples from it. 
+6. In the 6th example, we take samples from simple gamma and beta distributions, using the samplers provided by this library.
+7. In the 7th example, we get the 90% confidence interval of a beta distribution
+8. The 8th example translates the models from Eli and Nuño from [Samotsvety Nuclear Risk Forecasts — March 2022](https://forum.nunosempere.com/posts/KRFXjCqqfGQAYirm5/samotsvety-nuclear-risk-forecasts-march-2022#Nu_o_Sempere) into squiggle.c, then creates a mixture from both, and returns the mean probability of death per month and the 90% confidence interval.

 ## Commentary

--- a/examples/00_example_template/example.c
+++ b/examples/00_example_template/example.c
@ -0,0 +1,14 @@
+#include "../../squiggle.h"
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+int main()
+{
+    // set randomness seed
+    uint64_t* seed = malloc(sizeof(uint64_t));
+    *seed = 1000; // xorshift can't start with 0
+
+		
+    free(seed);
+}
--- a/examples/00_example_template/makefile
+++ b/examples/00_example_template/makefile
@ -0,0 +1,53 @@
+# Interface: 
+#   make
+#   make build
+#   make format
+#   make run
+
+# Compiler
+CC=gcc
+# CC=tcc # <= faster compilation
+
+# Main file
+SRC=example.c ../../squiggle.c
+OUTPUT=example
+
+## Dependencies
+MATH=-lm
+
+## Flags
+DEBUG= #'-g'
+STANDARD=-std=c99
+WARNINGS=-Wall
+OPTIMIZED=-O3  #-Ofast
+# OPENMP=-fopenmp
+
+## Formatter
+STYLE_BLUEPRINT=webkit
+FORMATTER=clang-format -i -style=$(STYLE_BLUEPRINT)
+
+## make build
+build: $(SRC)
+	$(CC) $(OPTIMIZED) $(DEBUG) $(SRC) $(MATH) -o $(OUTPUT)
+
+format: $(SRC)
+	$(FORMATTER) $(SRC)
+
+run: $(SRC) $(OUTPUT)
+	OMP_NUM_THREADS=1 ./$(OUTPUT) && echo
+
+time-linux: 
+	@echo "Requires /bin/time, found on GNU/Linux systems" && echo
+	
+	@echo "Running 100x and taking avg time $(OUTPUT)"
+	@t=$$(/usr/bin/time -f "%e" -p bash -c 'for i in {1..100}; do $(OUTPUT); done' 2>&1 >/dev/null | grep real | awk '{print $$2}' ); echo "scale=2; 1000 * $$t / 100" | bc | sed "s|^|Time using 1 thread: |" | sed 's|$$|ms|' && echo
+
+## Profiling
+
+profile-linux: 
+	echo "Requires perf, which depends on the kernel version, and might be in linux-tools package or similar"
+	echo "Must be run as sudo"
+	$(CC) $(SRC) $(MATH) -o $(OUTPUT)
+	sudo perf record ./$(OUTPUT)
+	sudo perf report
+	rm perf.data
--- a/squiggle.c
+++ b/squiggle.c
@ -94,12 +94,12 @@ double sample_gamma(double alpha, uint64_t* seed)
    // A Simple Method for Generating Gamma Variables, Marsaglia and Wan Tsang, 2001
    // https://dl.acm.org/doi/pdf/10.1145/358407.358414
    // see also the references/ folder
-		// Note that the Wikipedia page for the gamma distribution includes a scaling parameter
-		// k or beta
-		// https://en.wikipedia.org/wiki/Gamma_distribution
-		// such that gamma_k(alpha, k) = k * gamma(alpha)
-		// or gamma_beta(alpha, beta) = gamma(alpha) / beta
-		// So far I have not needed to use this, and thus the second parameter is by default 1.
+    // Note that the Wikipedia page for the gamma distribution includes a scaling parameter
+    // k or beta
+    // https://en.wikipedia.org/wiki/Gamma_distribution
+    // such that gamma_k(alpha, k) = k * gamma(alpha)
+    // or gamma_beta(alpha, beta) = gamma(alpha) / beta
+    // So far I have not needed to use this, and thus the second parameter is by default 1.
    if (alpha >= 1) {
        double d, c, x, v, u;
        d = alpha - 1.0 / 3.0;
@ -386,38 +386,40 @@ struct box sampler_cdf_double(double cdf(double), uint64_t* seed)
 // Get confidence intervals, given a sampler

 struct c_i {
-	float low;
-	float high;
+    float low;
+    float high;
 };
-int compare_doubles(const void *p, const void *q) {
-		// https://wikiless.esmailelbob.xyz/wiki/Qsort?lang=en
-    double x = *(const double *)p;
-    double y = *(const double *)q;
+int compare_doubles(const void* p, const void* q)
+{
+    // https://wikiless.esmailelbob.xyz/wiki/Qsort?lang=en
+    double x = *(const double*)p;
+    double y = *(const double*)q;

    /* Avoid return x - y, which can cause undefined behaviour
       because of signed integer overflow. */
    if (x < y)
-        return -1;  // Return -1 if you want ascending, 1 if you want descending order. 
+        return -1; // Return -1 if you want ascending, 1 if you want descending order.
    else if (x > y)
-        return 1;   // Return 1 if you want ascending, -1 if you want descending order.
+        return 1; // Return 1 if you want ascending, -1 if you want descending order.

    return 0;
 }
-struct c_i get_90_confidence_interval(double (*sampler)(uint64_t*), uint64_t* seed){
-	int n = 100 * 1000;
-	double* samples_array = malloc(n * sizeof(double));
-	for(int i=0; i<n; i++){
-		samples_array[i] = sampler(seed);
-	}
-	qsort(samples_array, n, sizeof(double), compare_doubles);
+struct c_i get_90_confidence_interval(double (*sampler)(uint64_t*), uint64_t* seed)
+{
+    int n = 100 * 1000;
+    double* samples_array = malloc(n * sizeof(double));
+    for (int i = 0; i < n; i++) {
+        samples_array[i] = sampler(seed);
+    }
+    qsort(samples_array, n, sizeof(double), compare_doubles);

-	struct c_i result = {
-		.low = samples_array[5000], 
-		.high =samples_array[94999],
-	};
-	free(samples_array);
+    struct c_i result = {
+        .low = samples_array[5000],
+        .high = samples_array[94999],
+    };
+    free(samples_array);

-	return result;
+    return result;
 }

 /* Could also define other variations, e.g.,