add serious model, add template.

This commit is contained in:
NunoSempere 2023-07-24 00:37:45 +02:00
parent 84399e60a2
commit 9c19095955
4 changed files with 104 additions and 33 deletions

View File

@ -20,12 +20,14 @@ A self-contained C99 library that provides a subset of [Squiggle](https://www.sq
You can follow some example usage in the examples/ folder
1. In the first example, we define a small model, and draw one sample from it
2. In the second example, we define a small model, and return many samples
3. In the third example, we use a gcc extension—nested functions—to rewrite the code from point 2. in a more linear way.
4. In the fourth example, we define some simple cdfs, and we draw samples from those cdfs. We see that this approach is slower than using the built-in samplers, e.g., the normal sampler.
5. In the fifth example, we define the cdf for the beta distribution, and we draw samples from it.
6. In the sixth example, we take samples from simple gamma and beta distributions, using the samplers provided by this library.
1. In the 1st example, we define a small model, and draw one sample from it
2. In the 2nd example, we define a small model, and return many samples
3. In the 3rd example, we use a gcc extension—nested functions—to rewrite the code from point 2. in a more linear way.
4. In the 4th example, we define some simple cdfs, and we draw samples from those cdfs. We see that this approach is slower than using the built-in samplers, e.g., the normal sampler.
5. In the 5th example, we define the cdf for the beta distribution, and we draw samples from it.
6. In the 6th example, we take samples from simple gamma and beta distributions, using the samplers provided by this library.
7. In the 7th example, we get the 90% confidence interval of a beta distribution
8. The 8th example translates the models from Eli and Nuño from [Samotsvety Nuclear Risk Forecasts — March 2022](https://forum.nunosempere.com/posts/KRFXjCqqfGQAYirm5/samotsvety-nuclear-risk-forecasts-march-2022#Nu_o_Sempere) into squiggle.c, then creates a mixture from both, and returns the mean probability of death per month and the 90% confidence interval.
## Commentary

View File

@ -0,0 +1,14 @@
#include "../../squiggle.h"
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
int main()
{
// set randomness seed
uint64_t* seed = malloc(sizeof(uint64_t));
*seed = 1000; // xorshift can't start with 0
free(seed);
}

View File

@ -0,0 +1,53 @@
# Interface:
# make
# make build
# make format
# make run
# Compiler
CC=gcc
# CC=tcc # <= faster compilation
# Main file
SRC=example.c ../../squiggle.c
OUTPUT=example
## Dependencies
MATH=-lm
## Flags
DEBUG= #'-g'
STANDARD=-std=c99
WARNINGS=-Wall
OPTIMIZED=-O3 #-Ofast
# OPENMP=-fopenmp
## Formatter
STYLE_BLUEPRINT=webkit
FORMATTER=clang-format -i -style=$(STYLE_BLUEPRINT)
## make build
build: $(SRC)
$(CC) $(OPTIMIZED) $(DEBUG) $(SRC) $(MATH) -o $(OUTPUT)
format: $(SRC)
$(FORMATTER) $(SRC)
run: $(SRC) $(OUTPUT)
OMP_NUM_THREADS=1 ./$(OUTPUT) && echo
time-linux:
@echo "Requires /bin/time, found on GNU/Linux systems" && echo
@echo "Running 100x and taking avg time $(OUTPUT)"
@t=$$(/usr/bin/time -f "%e" -p bash -c 'for i in {1..100}; do $(OUTPUT); done' 2>&1 >/dev/null | grep real | awk '{print $$2}' ); echo "scale=2; 1000 * $$t / 100" | bc | sed "s|^|Time using 1 thread: |" | sed 's|$$|ms|' && echo
## Profiling
profile-linux:
echo "Requires perf, which depends on the kernel version, and might be in linux-tools package or similar"
echo "Must be run as sudo"
$(CC) $(SRC) $(MATH) -o $(OUTPUT)
sudo perf record ./$(OUTPUT)
sudo perf report
rm perf.data

View File

@ -94,12 +94,12 @@ double sample_gamma(double alpha, uint64_t* seed)
// A Simple Method for Generating Gamma Variables, Marsaglia and Wan Tsang, 2001
// https://dl.acm.org/doi/pdf/10.1145/358407.358414
// see also the references/ folder
// Note that the Wikipedia page for the gamma distribution includes a scaling parameter
// k or beta
// https://en.wikipedia.org/wiki/Gamma_distribution
// such that gamma_k(alpha, k) = k * gamma(alpha)
// or gamma_beta(alpha, beta) = gamma(alpha) / beta
// So far I have not needed to use this, and thus the second parameter is by default 1.
// Note that the Wikipedia page for the gamma distribution includes a scaling parameter
// k or beta
// https://en.wikipedia.org/wiki/Gamma_distribution
// such that gamma_k(alpha, k) = k * gamma(alpha)
// or gamma_beta(alpha, beta) = gamma(alpha) / beta
// So far I have not needed to use this, and thus the second parameter is by default 1.
if (alpha >= 1) {
double d, c, x, v, u;
d = alpha - 1.0 / 3.0;
@ -386,38 +386,40 @@ struct box sampler_cdf_double(double cdf(double), uint64_t* seed)
// Get confidence intervals, given a sampler
struct c_i {
float low;
float high;
float low;
float high;
};
int compare_doubles(const void *p, const void *q) {
// https://wikiless.esmailelbob.xyz/wiki/Qsort?lang=en
double x = *(const double *)p;
double y = *(const double *)q;
int compare_doubles(const void* p, const void* q)
{
// https://wikiless.esmailelbob.xyz/wiki/Qsort?lang=en
double x = *(const double*)p;
double y = *(const double*)q;
/* Avoid return x - y, which can cause undefined behaviour
because of signed integer overflow. */
if (x < y)
return -1; // Return -1 if you want ascending, 1 if you want descending order.
return -1; // Return -1 if you want ascending, 1 if you want descending order.
else if (x > y)
return 1; // Return 1 if you want ascending, -1 if you want descending order.
return 1; // Return 1 if you want ascending, -1 if you want descending order.
return 0;
}
struct c_i get_90_confidence_interval(double (*sampler)(uint64_t*), uint64_t* seed){
int n = 100 * 1000;
double* samples_array = malloc(n * sizeof(double));
for(int i=0; i<n; i++){
samples_array[i] = sampler(seed);
}
qsort(samples_array, n, sizeof(double), compare_doubles);
struct c_i get_90_confidence_interval(double (*sampler)(uint64_t*), uint64_t* seed)
{
int n = 100 * 1000;
double* samples_array = malloc(n * sizeof(double));
for (int i = 0; i < n; i++) {
samples_array[i] = sampler(seed);
}
qsort(samples_array, n, sizeof(double), compare_doubles);
struct c_i result = {
.low = samples_array[5000],
.high =samples_array[94999],
};
free(samples_array);
struct c_i result = {
.low = samples_array[5000],
.high = samples_array[94999],
};
free(samples_array);
return result;
return result;
}
/* Could also define other variations, e.g.,