Compare commits
7 Commits
2a9d3bf135
...
4f32ccbd21
Author | SHA1 | Date | |
---|---|---|---|
4f32ccbd21 | |||
a48b15f171 | |||
9a56a63c61 | |||
1d5e0a6a7f | |||
1bc7c90fcf | |||
3d3a3f0045 | |||
2a39a04c69 |
Binary file not shown.
1
C/alt/04-factor-out-paralellization/why.md
Normal file
1
C/alt/04-factor-out-paralellization/why.md
Normal file
|
@ -0,0 +1 @@
|
||||||
|
So that the mixture distribution could be composable with other functions in squiggle.c
|
37
C/alt/05-refactor-still-split-array/desiderata.md
Normal file
37
C/alt/05-refactor-still-split-array/desiderata.md
Normal file
|
@ -0,0 +1,37 @@
|
||||||
|
Instead of
|
||||||
|
|
||||||
|
```C
|
||||||
|
#pragma omp parallel private(i, sample_index, split_array_length)
|
||||||
|
{
|
||||||
|
#pragma omp for
|
||||||
|
for (i = 0; i < n_threads; i++) {
|
||||||
|
split_array_length = split_array_get_length(i, N, n_threads);
|
||||||
|
for (int j = 0; j < split_array_length; j++) {
|
||||||
|
results[i][j] = sampler(seeds[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
Algo como
|
||||||
|
|
||||||
|
```C
|
||||||
|
#pragma omp parallel private(i, sample_index, bounds)
|
||||||
|
{
|
||||||
|
#pragma omp for
|
||||||
|
for (i = 0; i < n_threads; i++) {
|
||||||
|
int bounds[2] = split_array_get_bounds(i, N, n_threads);
|
||||||
|
for (int j = bound[0]; j < bounds[1] + 1; j++) {
|
||||||
|
// o j < bounds[1], no se si el +1 va a ser más elegante
|
||||||
|
// dentro o fuera, aunque algo me dice que dentro
|
||||||
|
results[j] = sampler(seeds[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
Por qué? Porque esto hace que la estructura subyacente sea un solo array,
|
||||||
|
lo cual implica que no *tengo* que utilizar split_array_functions especializadas
|
||||||
|
si no quiero
|
107
C/alt/05-refactor-still-split-array/makefile
Normal file
107
C/alt/05-refactor-still-split-array/makefile
Normal file
|
@ -0,0 +1,107 @@
|
||||||
|
# Interface:
|
||||||
|
# make
|
||||||
|
# make build
|
||||||
|
# make format
|
||||||
|
# make run
|
||||||
|
|
||||||
|
# Compiler
|
||||||
|
CC=gcc
|
||||||
|
# CC=tcc # <= faster compilation
|
||||||
|
|
||||||
|
# Main file
|
||||||
|
SRC=samples.c
|
||||||
|
OUTPUT=out/samples
|
||||||
|
|
||||||
|
SRC_ONE_THREAD=./samples-one-thread.c
|
||||||
|
OUTPUT_ONE_THREAD=out/samples-one-thread
|
||||||
|
|
||||||
|
## Dependencies
|
||||||
|
# Has no dependencies
|
||||||
|
MATH=-lm
|
||||||
|
|
||||||
|
## Flags
|
||||||
|
DEBUG= #'-g'
|
||||||
|
STANDARD=-std=c99
|
||||||
|
WARNINGS=-Wall
|
||||||
|
OPTIMIZED=-O3 #-O3 actually gives better performance than -Ofast, at least for this version
|
||||||
|
OPENMP=-fopenmp
|
||||||
|
|
||||||
|
## Formatter
|
||||||
|
STYLE_BLUEPRINT=webkit
|
||||||
|
FORMATTER=clang-format -i -style=$(STYLE_BLUEPRINT)
|
||||||
|
|
||||||
|
## make build
|
||||||
|
build: $(SRC)
|
||||||
|
$(CC) $(OPTIMIZED) $(DEBUG) $(SRC) $(OPENMP) $(MATH) -o $(OUTPUT)
|
||||||
|
|
||||||
|
static:
|
||||||
|
$(CC) $(OPTIMIZED) $(DEBUG) $(SRC) $(OPENMP) $(MATH) -o $(OUTPUT)
|
||||||
|
|
||||||
|
format: $(SRC)
|
||||||
|
$(FORMATTER) $(SRC)
|
||||||
|
|
||||||
|
run: $(SRC) $(OUTPUT)
|
||||||
|
OMP_NUM_THREADS=1 ./$(OUTPUT) && echo
|
||||||
|
|
||||||
|
multi:
|
||||||
|
OMP_NUM_THREADS=1 ./$(OUTPUT) && echo
|
||||||
|
OMP_NUM_THREADS=2 ./$(OUTPUT) && echo
|
||||||
|
OMP_NUM_THREADS=4 ./$(OUTPUT) && echo
|
||||||
|
OMP_NUM_THREADS=8 ./$(OUTPUT) && echo
|
||||||
|
OMP_NUM_THREADS=16 ./$(OUTPUT) && echo
|
||||||
|
|
||||||
|
## Timing
|
||||||
|
|
||||||
|
time-linux:
|
||||||
|
@echo "Requires /bin/time, found on GNU/Linux systems" && echo
|
||||||
|
|
||||||
|
@echo "Running 100x and taking avg time: OMP_NUM_THREADS=1 $(OUTPUT)"
|
||||||
|
@t=$$(/usr/bin/time -f "%e" -p bash -c 'for i in {1..100}; do OMP_NUM_THREADS=1 $(OUTPUT); done' 2>&1 >/dev/null | grep real | awk '{print $$2}' ); echo "scale=2; 1000 * $$t / 100" | bc | sed "s|^|Time using 1 thread: |" | sed 's|$$|ms|' && echo
|
||||||
|
|
||||||
|
@echo "Running 100x and taking avg time: OMP_NUM_THREADS=2 $(OUTPUT)"
|
||||||
|
@t=$$(/usr/bin/time -f "%e" -p bash -c 'for i in {1..100}; do OMP_NUM_THREADS=2 $(OUTPUT); done' 2>&1 >/dev/null | grep real | awk '{print $$2}' ); echo "scale=2; 1000 * $$t / 100" | bc | sed "s|^|Time using 2 threads: |" | sed 's|$$|ms|' && echo
|
||||||
|
|
||||||
|
@echo "Running 100x and taking avg time: OMP_NUM_THREADS=4 $(OUTPUT)"
|
||||||
|
@t=$$(/usr/bin/time -f "%e" -p bash -c 'for i in {1..100}; do OMP_NUM_THREADS=4 $(OUTPUT); done' 2>&1 >/dev/null | grep real | awk '{print $$2}' ); echo "scale=2; 1000 * $$t / 100" | bc | sed "s|^|Time for 4 threads: |" | sed 's|$$|ms|' && echo
|
||||||
|
|
||||||
|
@echo "Running 100x and taking avg time: OMP_NUM_THREADS=8 $(OUTPUT)"
|
||||||
|
@t=$$(/usr/bin/time -f "%e" -p bash -c 'for i in {1..100}; do OMP_NUM_THREADS=8 $(OUTPUT); done' 2>&1 >/dev/null | grep real | awk '{print $$2}' ); echo "scale=2; 1000 * $$t / 100" | bc | sed "s|^|Time using 8 threads: |" | sed 's|$$|ms|' && echo
|
||||||
|
|
||||||
|
@echo "Running 100x and taking avg time: OMP_NUM_THREADS=16 $(OUTPUT)"
|
||||||
|
@t=$$(/usr/bin/time -f "%e" -p bash -c 'for i in {1..100}; do OMP_NUM_THREADS=16 $(OUTPUT); done' 2>&1 >/dev/null | grep real | awk '{print $$2}' ); echo "scale=2; 1000 * $$t / 100" | bc | sed "s|^|Time using 16 threads: |" | sed 's|$$|ms|' && echo
|
||||||
|
|
||||||
|
time-linux-fastest:
|
||||||
|
@echo "Running 100x and taking avg time: OMP_NUM_THREADS=32 $(OUTPUT)"
|
||||||
|
@t=$$(/usr/bin/time -f "%e" -p bash -c 'for i in {1..100}; do OMP_NUM_THREADS=16 $(OUTPUT); done' 2>&1 >/dev/null | grep real | awk '{print $$2}' ); echo "scale=2; 1000 * $$t / 100" | bc | sed "s|^|Time using 16 threads: |" | sed 's|$$|ms|' && echo
|
||||||
|
|
||||||
|
time-linux-simple:
|
||||||
|
@echo "Requires /bin/time, found on GNU/Linux systems" && echo
|
||||||
|
OMP_NUM_THREADS=1 /bin/time -f "Time: %es" ./$(OUTPUT) && echo
|
||||||
|
OMP_NUM_THREADS=2 /bin/time -f "Time: %es" ./$(OUTPUT) && echo
|
||||||
|
OMP_NUM_THREADS=4 /bin/time -f "Time: %es" ./$(OUTPUT) && echo
|
||||||
|
OMP_NUM_THREADS=8 /bin/time -f "Time: %es" ./$(OUTPUT) && echo
|
||||||
|
OMP_NUM_THREADS=16 /bin/time -f "Time: %es" ./$(OUTPUT) && echo
|
||||||
|
OMP_NUM_THREADS=32 /bin/time -f "Time: %es" ./$(OUTPUT) && echo
|
||||||
|
|
||||||
|
## Profiling
|
||||||
|
|
||||||
|
profile-linux:
|
||||||
|
echo "Requires perf, which depends on the kernel version, and might be in linux-tools package or similar"
|
||||||
|
echo "Must be run as sudo"
|
||||||
|
$(CC) $(SRC) $(OPENMP) $(MATH) -o $(OUTPUT)
|
||||||
|
# ./$(OUTPUT)
|
||||||
|
# gprof:
|
||||||
|
# gprof $(OUTPUT) gmon.out > analysis.txt
|
||||||
|
# rm gmon.out
|
||||||
|
# vim analysis.txt
|
||||||
|
# rm analysis.txt
|
||||||
|
# perf:
|
||||||
|
OMP_NUM_THREADS=16 sudo perf record $(OUTPUT)
|
||||||
|
sudo perf report
|
||||||
|
rm perf.data
|
||||||
|
|
||||||
|
|
||||||
|
## Install
|
||||||
|
debian-install-dependencies:
|
||||||
|
sudo apt-get install libomp-dev
|
||||||
|
|
BIN
C/alt/05-refactor-still-split-array/out/samples
Executable file
BIN
C/alt/05-refactor-still-split-array/out/samples
Executable file
Binary file not shown.
283
C/alt/05-refactor-still-split-array/samples.c
Normal file
283
C/alt/05-refactor-still-split-array/samples.c
Normal file
|
@ -0,0 +1,283 @@
|
||||||
|
#include <math.h>
|
||||||
|
#include <omp.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
const float PI = 3.14159265358979323846;
|
||||||
|
|
||||||
|
#define N_SAMPLES (1024 * 1000 )
|
||||||
|
|
||||||
|
//Array helpers
|
||||||
|
|
||||||
|
void array_print(float* array, int length)
|
||||||
|
{
|
||||||
|
for (int i = 0; i < length; i++) {
|
||||||
|
printf("item[%d] = %f\n", i, array[i]);
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
float array_sum(float* array, int length)
|
||||||
|
{
|
||||||
|
float output = 0.0;
|
||||||
|
for (int i = 0; i < length; i++) {
|
||||||
|
output += array[i];
|
||||||
|
}
|
||||||
|
return output;
|
||||||
|
}
|
||||||
|
|
||||||
|
void array_cumsum(float* array_to_sum, float* array_cumsummed, int length)
|
||||||
|
{
|
||||||
|
array_cumsummed[0] = array_to_sum[0];
|
||||||
|
for (int i = 1; i < length; i++) {
|
||||||
|
array_cumsummed[i] = array_cumsummed[i - 1] + array_to_sum[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Split array helpers
|
||||||
|
int split_array_get_length(int index, int total_length, int n_threads)
|
||||||
|
{
|
||||||
|
return (total_length % n_threads > index ? total_length / n_threads + 1 : total_length / n_threads);
|
||||||
|
}
|
||||||
|
|
||||||
|
void split_array_allocate(float** meta_array, int length, int divide_into)
|
||||||
|
{
|
||||||
|
int split_array_length;
|
||||||
|
|
||||||
|
for (int i = 0; i < divide_into; i++) {
|
||||||
|
split_array_length = split_array_get_length(i, length, divide_into);
|
||||||
|
meta_array[i] = malloc(split_array_length * sizeof(float));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void split_array_free(float** meta_array, int divided_into)
|
||||||
|
{
|
||||||
|
for (int i = 0; i < divided_into; i++) {
|
||||||
|
free(meta_array[i]);
|
||||||
|
}
|
||||||
|
free(meta_array);
|
||||||
|
}
|
||||||
|
|
||||||
|
float split_array_sum(float** meta_array, int length, int divided_into)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
float output = 0;
|
||||||
|
|
||||||
|
#pragma omp parallel for reduction(+ \
|
||||||
|
: output)
|
||||||
|
for (int i = 0; i < divided_into; i++) {
|
||||||
|
float own_partial_sum = 0;
|
||||||
|
int split_array_length = split_array_get_length(i, length, divided_into);
|
||||||
|
for (int j = 0; j < split_array_length; j++) {
|
||||||
|
own_partial_sum += meta_array[i][j];
|
||||||
|
}
|
||||||
|
output += own_partial_sum;
|
||||||
|
}
|
||||||
|
return output;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Pseudo Random number generator
|
||||||
|
|
||||||
|
uint32_t xorshift32(uint32_t* seed)
|
||||||
|
{
|
||||||
|
// Algorithm "xor" from p. 4 of Marsaglia, "Xorshift RN_SAMPLESGs"
|
||||||
|
// See <https://stackoverflow.com/questions/53886131/how-does-xorshift32-works>
|
||||||
|
// https://en.wikipedia.org/wiki/Xorshift
|
||||||
|
// Also some drama: <https://www.pcg-random.org/posts/on-vignas-pcg-critique.html>, <https://prng.di.unimi.it/>
|
||||||
|
|
||||||
|
uint32_t x = *seed;
|
||||||
|
x ^= x << 13;
|
||||||
|
x ^= x >> 17;
|
||||||
|
x ^= x << 5;
|
||||||
|
return *seed = x;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Distribution & sampling functions
|
||||||
|
|
||||||
|
float rand_0_to_1(uint32_t* seed)
|
||||||
|
{
|
||||||
|
return ((float)xorshift32(seed)) / ((float)UINT32_MAX);
|
||||||
|
/*
|
||||||
|
uint32_t x = *seed;
|
||||||
|
x ^= x << 13;
|
||||||
|
x ^= x >> 17;
|
||||||
|
x ^= x << 5;
|
||||||
|
return ((float)(*seed = x))/((float) UIN_SAMPLEST32_MAX);
|
||||||
|
*/
|
||||||
|
// previously:
|
||||||
|
// ((float)rand_r(seed) / (float)RAN_SAMPLESD_MAX)
|
||||||
|
// and before that: rand, but it wasn't thread-safe.
|
||||||
|
// See: <https://stackoverflow.com/questions/43151361/how-to-create-thread-safe-random-number-generator-in-c-using-rand-r> for why to use rand_r:
|
||||||
|
// rand() is not thread-safe, as it relies on (shared) hidden seed.
|
||||||
|
}
|
||||||
|
|
||||||
|
float rand_float(float max, uint32_t* seed)
|
||||||
|
{
|
||||||
|
return rand_0_to_1(seed) * max;
|
||||||
|
}
|
||||||
|
|
||||||
|
float ur_normal(uint32_t* seed)
|
||||||
|
{
|
||||||
|
float u1 = rand_0_to_1(seed);
|
||||||
|
float u2 = rand_0_to_1(seed);
|
||||||
|
float z = sqrtf(-2.0 * log(u1)) * sin(2 * PI * u2);
|
||||||
|
return z;
|
||||||
|
}
|
||||||
|
|
||||||
|
float random_uniform(float from, float to, uint32_t* seed)
|
||||||
|
{
|
||||||
|
return rand_0_to_1(seed) * (to - from) + from;
|
||||||
|
}
|
||||||
|
|
||||||
|
float random_normal(float mean, float sigma, uint32_t* seed)
|
||||||
|
{
|
||||||
|
return (mean + sigma * ur_normal(seed));
|
||||||
|
}
|
||||||
|
|
||||||
|
float random_lognormal(float logmean, float logsigma, uint32_t* seed)
|
||||||
|
{
|
||||||
|
return expf(random_normal(logmean, logsigma, seed));
|
||||||
|
}
|
||||||
|
|
||||||
|
float random_to(float low, float high, uint32_t* seed)
|
||||||
|
{
|
||||||
|
const float N_SAMPLESORMAL95CON_SAMPLESFIDEN_SAMPLESCE = 1.6448536269514722;
|
||||||
|
float loglow = logf(low);
|
||||||
|
float loghigh = logf(high);
|
||||||
|
float logmean = (loglow + loghigh) / 2;
|
||||||
|
float logsigma = (loghigh - loglow) / (2.0 * N_SAMPLESORMAL95CON_SAMPLESFIDEN_SAMPLESCE);
|
||||||
|
return random_lognormal(logmean, logsigma, seed);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Mixture function
|
||||||
|
|
||||||
|
float mixture(float (*samplers[])(uint32_t*), float* weights, int n_dists, uint32_t* seed)
|
||||||
|
{
|
||||||
|
|
||||||
|
// You can see a slightly simpler version of this function in the git history
|
||||||
|
// or in alt/C-02-better-algorithm-one-thread/
|
||||||
|
float sum_weights = array_sum(weights, n_dists);
|
||||||
|
float* cumsummed_normalized_weights = malloc(n_dists * sizeof(float));
|
||||||
|
cumsummed_normalized_weights[0] = weights[0] / sum_weights;
|
||||||
|
for (int i = 1; i < n_dists; i++) {
|
||||||
|
cumsummed_normalized_weights[i] = cumsummed_normalized_weights[i - 1] + weights[i] / sum_weights;
|
||||||
|
}
|
||||||
|
|
||||||
|
//create var holders
|
||||||
|
float p1, result;
|
||||||
|
int sample_index, i, own_length;
|
||||||
|
p1 = random_uniform(0, 1, seed);
|
||||||
|
for (int i = 0; i < n_dists; i++) {
|
||||||
|
if (p1 < cumsummed_normalized_weights[i]) {
|
||||||
|
result = samplers[i](seed);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
free(cumsummed_normalized_weights);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parallization function
|
||||||
|
void paralellize(float (*sampler)(uint32_t* seed), float* results, int n_threads, int n_samples){
|
||||||
|
|
||||||
|
if((N_SAMPLES % n_threads) != 0){
|
||||||
|
fprintf(stderr, "Number of samples isn't divisible by number of threads, aborting\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
int n_samples_per_thread = N_SAMPLES / n_threads;
|
||||||
|
|
||||||
|
float** split_results = malloc(n_threads * sizeof(float*));
|
||||||
|
for(int i=0; i<n_threads; i++){
|
||||||
|
split_results[i] = malloc(n_samples_per_thread * sizeof(float));
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t** seeds = malloc(n_threads * sizeof(uint32_t*));
|
||||||
|
for (uint32_t i = 0; i < n_threads; i++) {
|
||||||
|
seeds[i] = malloc(sizeof(uint32_t));
|
||||||
|
*seeds[i] = i + 1; // xorshift can't start with 0
|
||||||
|
}
|
||||||
|
|
||||||
|
int i;
|
||||||
|
#pragma omp parallel private(i)
|
||||||
|
{
|
||||||
|
#pragma omp for
|
||||||
|
for (i = 0; i < n_threads; i++) {
|
||||||
|
// split_array_length = split_array_get_length(i, N_SAMPLES, n_threads);
|
||||||
|
for (int j = 0; j < n_samples_per_thread; j++) {
|
||||||
|
split_results[i][j] = sampler(seeds[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for(int i=0; i<n_threads; i++){
|
||||||
|
int lower_bound = i * (n_samples / n_threads);
|
||||||
|
int upper_bound = ((i+1) * (n_samples / n_threads)) - 1;
|
||||||
|
// printf("Lower bound: %d, upper bound: %d\n", lower_bound, upper_bound);
|
||||||
|
for(int j=lower_bound; j<upper_bound; j++){
|
||||||
|
results[j] = split_results[i][j-lower_bound];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (uint32_t i = 0; i < n_threads; i++) {
|
||||||
|
free(seeds[i]);
|
||||||
|
}
|
||||||
|
free(seeds);
|
||||||
|
|
||||||
|
for(int i=0; i<n_threads; i++){
|
||||||
|
free(split_results[i]);
|
||||||
|
}
|
||||||
|
free(split_results);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Functions used for the BOTEC.
|
||||||
|
// Their type has to be the same, as we will be passing them around.
|
||||||
|
|
||||||
|
float sample_0(uint32_t* seed)
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
float sample_1(uint32_t* seed)
|
||||||
|
{
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
float sample_few(uint32_t* seed)
|
||||||
|
{
|
||||||
|
return random_to(1, 3, seed);
|
||||||
|
}
|
||||||
|
|
||||||
|
float sample_many(uint32_t* seed)
|
||||||
|
{
|
||||||
|
return random_to(2, 10, seed);
|
||||||
|
}
|
||||||
|
|
||||||
|
float sample_mixture(uint32_t* seed){
|
||||||
|
float p_a, p_b, p_c;
|
||||||
|
|
||||||
|
// Initialize variables
|
||||||
|
p_a = 0.8;
|
||||||
|
p_b = 0.5;
|
||||||
|
p_c = p_a * p_b;
|
||||||
|
|
||||||
|
// Generate mixture
|
||||||
|
int n_dists = 4;
|
||||||
|
float weights[] = { 1 - p_c, p_c / 2, p_c / 4, p_c / 4 };
|
||||||
|
float (*samplers[])(uint32_t*) = { sample_0, sample_1, sample_few, sample_many };
|
||||||
|
|
||||||
|
return mixture(samplers, weights, n_dists, seed);
|
||||||
|
}
|
||||||
|
|
||||||
|
int main()
|
||||||
|
{
|
||||||
|
int n_threads = omp_get_max_threads();
|
||||||
|
float* split_array_results = malloc(N_SAMPLES * sizeof(float));
|
||||||
|
|
||||||
|
paralellize(sample_mixture, split_array_results, n_threads, N_SAMPLES);
|
||||||
|
printf("Sum(split_array_results, N_SAMPLES)/N_SAMPLES = %f\n",
|
||||||
|
array_sum(split_array_results, N_SAMPLES) / N_SAMPLES);
|
||||||
|
|
||||||
|
free(split_array_results);
|
||||||
|
return 0;
|
||||||
|
}
|
17
C/alt/05-refactor-still-split-array/time.txt
Normal file
17
C/alt/05-refactor-still-split-array/time.txt
Normal file
|
@ -0,0 +1,17 @@
|
||||||
|
Requires /bin/time, found on GNU/Linux systems
|
||||||
|
|
||||||
|
Running 100x and taking avg time: OMP_NUM_THREADS=1 out/samples
|
||||||
|
Time using 1 thread: 35.60ms
|
||||||
|
|
||||||
|
Running 100x and taking avg time: OMP_NUM_THREADS=2 out/samples
|
||||||
|
Time using 2 threads: 38.40ms
|
||||||
|
|
||||||
|
Running 100x and taking avg time: OMP_NUM_THREADS=4 out/samples
|
||||||
|
Time for 4 threads: 26.30ms
|
||||||
|
|
||||||
|
Running 100x and taking avg time: OMP_NUM_THREADS=8 out/samples
|
||||||
|
Time using 8 threads: 15.30ms
|
||||||
|
|
||||||
|
Running 100x and taking avg time: OMP_NUM_THREADS=16 out/samples
|
||||||
|
Time using 16 threads: 10.40ms
|
||||||
|
|
37
C/alt/06-refactor-one-array/desiderata.md
Normal file
37
C/alt/06-refactor-one-array/desiderata.md
Normal file
|
@ -0,0 +1,37 @@
|
||||||
|
Instead of
|
||||||
|
|
||||||
|
```C
|
||||||
|
#pragma omp parallel private(i, sample_index, split_array_length)
|
||||||
|
{
|
||||||
|
#pragma omp for
|
||||||
|
for (i = 0; i < n_threads; i++) {
|
||||||
|
split_array_length = split_array_get_length(i, N, n_threads);
|
||||||
|
for (int j = 0; j < split_array_length; j++) {
|
||||||
|
results[i][j] = sampler(seeds[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
Algo como
|
||||||
|
|
||||||
|
```C
|
||||||
|
#pragma omp parallel private(i, sample_index, bounds)
|
||||||
|
{
|
||||||
|
#pragma omp for
|
||||||
|
for (i = 0; i < n_threads; i++) {
|
||||||
|
int bounds[2] = split_array_get_bounds(i, N, n_threads);
|
||||||
|
for (int j = bound[0]; j < bounds[1] + 1; j++) {
|
||||||
|
// o j < bounds[1], no se si el +1 va a ser más elegante
|
||||||
|
// dentro o fuera, aunque algo me dice que dentro
|
||||||
|
results[j] = sampler(seeds[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
Por qué? Porque esto hace que la estructura subyacente sea un solo array,
|
||||||
|
lo cual implica que no *tengo* que utilizar split_array_functions especializadas
|
||||||
|
si no quiero
|
107
C/alt/06-refactor-one-array/makefile
Normal file
107
C/alt/06-refactor-one-array/makefile
Normal file
|
@ -0,0 +1,107 @@
|
||||||
|
# Interface:
|
||||||
|
# make
|
||||||
|
# make build
|
||||||
|
# make format
|
||||||
|
# make run
|
||||||
|
|
||||||
|
# Compiler
|
||||||
|
CC=gcc
|
||||||
|
# CC=clang
|
||||||
|
# CC=tcc # <= faster compilation
|
||||||
|
|
||||||
|
# Main file
|
||||||
|
SRC=samples.c
|
||||||
|
OUTPUT=out/samples
|
||||||
|
|
||||||
|
SRC_ONE_THREAD=./samples-one-thread.c
|
||||||
|
OUTPUT_ONE_THREAD=out/samples-one-thread
|
||||||
|
|
||||||
|
## Dependencies
|
||||||
|
# Has no dependencies
|
||||||
|
MATH=-lm
|
||||||
|
|
||||||
|
## Flags
|
||||||
|
DEBUG= #'-g'
|
||||||
|
STANDARD=-std=c99
|
||||||
|
WARNINGS=-Wall
|
||||||
|
OPTIMIZED=-O3 #-O3 actually gives better performance than -Ofast, at least for this version
|
||||||
|
OPENMP=-fopenmp
|
||||||
|
|
||||||
|
## Formatter
|
||||||
|
STYLE_BLUEPRINT=webkit
|
||||||
|
FORMATTER=clang-format -i -style=$(STYLE_BLUEPRINT)
|
||||||
|
|
||||||
|
## make build
|
||||||
|
build: $(SRC)
|
||||||
|
$(CC) $(OPTIMIZED) $(DEBUG) $(SRC) $(OPENMP) $(MATH) -o $(OUTPUT)
|
||||||
|
|
||||||
|
static:
|
||||||
|
$(CC) $(OPTIMIZED) $(DEBUG) $(SRC) $(OPENMP) $(MATH) -o $(OUTPUT)
|
||||||
|
|
||||||
|
format: $(SRC)
|
||||||
|
$(FORMATTER) $(SRC)
|
||||||
|
|
||||||
|
run: $(SRC) $(OUTPUT)
|
||||||
|
OMP_NUM_THREADS=1 ./$(OUTPUT) && echo
|
||||||
|
|
||||||
|
multi:
|
||||||
|
OMP_NUM_THREADS=1 ./$(OUTPUT) && echo
|
||||||
|
OMP_NUM_THREADS=2 ./$(OUTPUT) && echo
|
||||||
|
OMP_NUM_THREADS=4 ./$(OUTPUT) && echo
|
||||||
|
OMP_NUM_THREADS=8 ./$(OUTPUT) && echo
|
||||||
|
OMP_NUM_THREADS=16 ./$(OUTPUT) && echo
|
||||||
|
|
||||||
|
## Timing
|
||||||
|
|
||||||
|
time-linux:
|
||||||
|
@echo "Requires /bin/time, found on GNU/Linux systems" && echo
|
||||||
|
|
||||||
|
@echo "Running 100x and taking avg time: OMP_NUM_THREADS=1 $(OUTPUT)"
|
||||||
|
@t=$$(/usr/bin/time -f "%e" -p bash -c 'for i in {1..100}; do OMP_NUM_THREADS=1 $(OUTPUT); done' 2>&1 >/dev/null | grep real | awk '{print $$2}' ); echo "scale=2; 1000 * $$t / 100" | bc | sed "s|^|Time using 1 thread: |" | sed 's|$$|ms|' && echo
|
||||||
|
|
||||||
|
@echo "Running 100x and taking avg time: OMP_NUM_THREADS=2 $(OUTPUT)"
|
||||||
|
@t=$$(/usr/bin/time -f "%e" -p bash -c 'for i in {1..100}; do OMP_NUM_THREADS=2 $(OUTPUT); done' 2>&1 >/dev/null | grep real | awk '{print $$2}' ); echo "scale=2; 1000 * $$t / 100" | bc | sed "s|^|Time using 2 threads: |" | sed 's|$$|ms|' && echo
|
||||||
|
|
||||||
|
@echo "Running 100x and taking avg time: OMP_NUM_THREADS=4 $(OUTPUT)"
|
||||||
|
@t=$$(/usr/bin/time -f "%e" -p bash -c 'for i in {1..100}; do OMP_NUM_THREADS=4 $(OUTPUT); done' 2>&1 >/dev/null | grep real | awk '{print $$2}' ); echo "scale=2; 1000 * $$t / 100" | bc | sed "s|^|Time for 4 threads: |" | sed 's|$$|ms|' && echo
|
||||||
|
|
||||||
|
@echo "Running 100x and taking avg time: OMP_NUM_THREADS=8 $(OUTPUT)"
|
||||||
|
@t=$$(/usr/bin/time -f "%e" -p bash -c 'for i in {1..100}; do OMP_NUM_THREADS=8 $(OUTPUT); done' 2>&1 >/dev/null | grep real | awk '{print $$2}' ); echo "scale=2; 1000 * $$t / 100" | bc | sed "s|^|Time using 8 threads: |" | sed 's|$$|ms|' && echo
|
||||||
|
|
||||||
|
@echo "Running 100x and taking avg time: OMP_NUM_THREADS=16 $(OUTPUT)"
|
||||||
|
@t=$$(/usr/bin/time -f "%e" -p bash -c 'for i in {1..100}; do OMP_NUM_THREADS=16 $(OUTPUT); done' 2>&1 >/dev/null | grep real | awk '{print $$2}' ); echo "scale=2; 1000 * $$t / 100" | bc | sed "s|^|Time using 16 threads: |" | sed 's|$$|ms|' && echo
|
||||||
|
|
||||||
|
time-linux-fastest:
|
||||||
|
@echo "Running 100x and taking avg time: OMP_NUM_THREADS=16 $(OUTPUT)"
|
||||||
|
@t=$$(/usr/bin/time -f "%e" -p bash -c 'for i in {1..100}; do OMP_NUM_THREADS=16 $(OUTPUT); done' 2>&1 >/dev/null | grep real | awk '{print $$2}' ); echo "scale=2; 1000 * $$t / 100" | bc | sed "s|^|Time using 16 threads: |" | sed 's|$$|ms|' && echo
|
||||||
|
|
||||||
|
time-linux-simple:
|
||||||
|
@echo "Requires /bin/time, found on GNU/Linux systems" && echo
|
||||||
|
OMP_NUM_THREADS=1 /bin/time -f "Time: %es" ./$(OUTPUT) && echo
|
||||||
|
OMP_NUM_THREADS=2 /bin/time -f "Time: %es" ./$(OUTPUT) && echo
|
||||||
|
OMP_NUM_THREADS=4 /bin/time -f "Time: %es" ./$(OUTPUT) && echo
|
||||||
|
OMP_NUM_THREADS=8 /bin/time -f "Time: %es" ./$(OUTPUT) && echo
|
||||||
|
OMP_NUM_THREADS=16 /bin/time -f "Time: %es" ./$(OUTPUT) && echo
|
||||||
|
|
||||||
|
## Profiling
|
||||||
|
|
||||||
|
profile-linux:
|
||||||
|
echo "Requires perf, which depends on the kernel version, and might be in linux-tools package or similar"
|
||||||
|
echo "Must be run as sudo"
|
||||||
|
$(CC) $(SRC) $(OPENMP) $(MATH) -o $(OUTPUT)
|
||||||
|
# ./$(OUTPUT)
|
||||||
|
# gprof:
|
||||||
|
# gprof $(OUTPUT) gmon.out > analysis.txt
|
||||||
|
# rm gmon.out
|
||||||
|
# vim analysis.txt
|
||||||
|
# rm analysis.txt
|
||||||
|
# perf:
|
||||||
|
OMP_NUM_THREADS=16 sudo perf record $(OUTPUT)
|
||||||
|
sudo perf report
|
||||||
|
rm perf.data
|
||||||
|
|
||||||
|
|
||||||
|
## Install
|
||||||
|
debian-install-dependencies:
|
||||||
|
sudo apt-get install libomp-dev
|
||||||
|
|
BIN
C/alt/06-refactor-one-array/out/samples
Executable file
BIN
C/alt/06-refactor-one-array/out/samples
Executable file
Binary file not shown.
220
C/alt/06-refactor-one-array/samples.c
Normal file
220
C/alt/06-refactor-one-array/samples.c
Normal file
|
@ -0,0 +1,220 @@
|
||||||
|
#include <math.h>
|
||||||
|
#include <omp.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
const float PI = 3.14159265358979323846;
|
||||||
|
|
||||||
|
#define N_SAMPLES (1024 * 1000)
|
||||||
|
|
||||||
|
//Array helpers
|
||||||
|
void array_print(float* array, int length)
|
||||||
|
{
|
||||||
|
for (int i = 0; i < length; i++) {
|
||||||
|
printf("item[%d] = %f\n", i, array[i]);
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
float array_sum(float* array, int length)
|
||||||
|
{
|
||||||
|
float output = 0.0;
|
||||||
|
for (int i = 0; i < length; i++) {
|
||||||
|
output += array[i];
|
||||||
|
}
|
||||||
|
return output;
|
||||||
|
}
|
||||||
|
|
||||||
|
void array_cumsum(float* array_to_sum, float* array_cumsummed, int length)
|
||||||
|
{
|
||||||
|
array_cumsummed[0] = array_to_sum[0];
|
||||||
|
for (int i = 1; i < length; i++) {
|
||||||
|
array_cumsummed[i] = array_cumsummed[i - 1] + array_to_sum[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Pseudo Random number generator
|
||||||
|
|
||||||
|
uint32_t xorshift32(uint32_t* seed)
|
||||||
|
{
|
||||||
|
// Algorithm "xor" from p. 4 of Marsaglia, "Xorshift RN_SAMPLESGs"
|
||||||
|
// See <https://stackoverflow.com/questions/53886131/how-does-xorshift32-works>
|
||||||
|
// https://en.wikipedia.org/wiki/Xorshift
|
||||||
|
// Also some drama: <https://www.pcg-random.org/posts/on-vignas-pcg-critique.html>, <https://prng.di.unimi.it/>
|
||||||
|
|
||||||
|
uint32_t x = *seed;
|
||||||
|
x ^= x << 13;
|
||||||
|
x ^= x >> 17;
|
||||||
|
x ^= x << 5;
|
||||||
|
return *seed = x;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Distribution & sampling functions
|
||||||
|
|
||||||
|
float rand_0_to_1(uint32_t* seed)
|
||||||
|
{
|
||||||
|
return ((float)xorshift32(seed)) / ((float)UINT32_MAX);
|
||||||
|
/*
|
||||||
|
uint32_t x = *seed;
|
||||||
|
x ^= x << 13;
|
||||||
|
x ^= x >> 17;
|
||||||
|
x ^= x << 5;
|
||||||
|
return ((float)(*seed = x))/((float) UIN_SAMPLEST32_MAX);
|
||||||
|
*/
|
||||||
|
// previously:
|
||||||
|
// ((float)rand_r(seed) / (float)RAN_SAMPLESD_MAX)
|
||||||
|
// and before that: rand, but it wasn't thread-safe.
|
||||||
|
// See: <https://stackoverflow.com/questions/43151361/how-to-create-thread-safe-random-number-generator-in-c-using-rand-r> for why to use rand_r:
|
||||||
|
// rand() is not thread-safe, as it relies on (shared) hidden seed.
|
||||||
|
}
|
||||||
|
|
||||||
|
float rand_float(float max, uint32_t* seed)
|
||||||
|
{
|
||||||
|
return rand_0_to_1(seed) * max;
|
||||||
|
}
|
||||||
|
|
||||||
|
float ur_normal(uint32_t* seed)
|
||||||
|
{
|
||||||
|
float u1 = rand_0_to_1(seed);
|
||||||
|
float u2 = rand_0_to_1(seed);
|
||||||
|
float z = sqrtf(-2.0 * log(u1)) * sin(2 * PI * u2);
|
||||||
|
return z;
|
||||||
|
}
|
||||||
|
|
||||||
|
float random_uniform(float from, float to, uint32_t* seed)
|
||||||
|
{
|
||||||
|
return rand_0_to_1(seed) * (to - from) + from;
|
||||||
|
}
|
||||||
|
|
||||||
|
float random_normal(float mean, float sigma, uint32_t* seed)
|
||||||
|
{
|
||||||
|
return (mean + sigma * ur_normal(seed));
|
||||||
|
}
|
||||||
|
|
||||||
|
float random_lognormal(float logmean, float logsigma, uint32_t* seed)
|
||||||
|
{
|
||||||
|
return expf(random_normal(logmean, logsigma, seed));
|
||||||
|
}
|
||||||
|
|
||||||
|
float random_to(float low, float high, uint32_t* seed)
|
||||||
|
{
|
||||||
|
const float N_SAMPLESORMAL95CON_SAMPLESFIDEN_SAMPLESCE = 1.6448536269514722;
|
||||||
|
float loglow = logf(low);
|
||||||
|
float loghigh = logf(high);
|
||||||
|
float logmean = (loglow + loghigh) / 2;
|
||||||
|
float logsigma = (loghigh - loglow) / (2.0 * N_SAMPLESORMAL95CON_SAMPLESFIDEN_SAMPLESCE);
|
||||||
|
return random_lognormal(logmean, logsigma, seed);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Mixture function
|
||||||
|
|
||||||
|
float mixture(float (*samplers[])(uint32_t*), float* weights, int n_dists, uint32_t* seed)
|
||||||
|
{
|
||||||
|
|
||||||
|
// You can see a slightly simpler version of this function in the git history
|
||||||
|
// or in alt/C-02-better-algorithm-one-thread/
|
||||||
|
float sum_weights = array_sum(weights, n_dists);
|
||||||
|
float* cumsummed_normalized_weights = malloc(n_dists * sizeof(float));
|
||||||
|
cumsummed_normalized_weights[0] = weights[0] / sum_weights;
|
||||||
|
for (int i = 1; i < n_dists; i++) {
|
||||||
|
cumsummed_normalized_weights[i] = cumsummed_normalized_weights[i - 1] + weights[i] / sum_weights;
|
||||||
|
}
|
||||||
|
|
||||||
|
//create var holders
|
||||||
|
float p1, result;
|
||||||
|
int sample_index, i, own_length;
|
||||||
|
p1 = random_uniform(0, 1, seed);
|
||||||
|
for (int i = 0; i < n_dists; i++) {
|
||||||
|
if (p1 < cumsummed_normalized_weights[i]) {
|
||||||
|
result = samplers[i](seed);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
free(cumsummed_normalized_weights);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parallization function
|
||||||
|
void paralellize(float (*sampler)(uint32_t* seed), float* results, int n_threads, int n_samples){
|
||||||
|
if((N_SAMPLES % n_threads) != 0){
|
||||||
|
fprintf(stderr, "Number of samples isn't divisible by number of threads, aborting\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
// int n_samples_per_thread = N_SAMPLES / n_thread;
|
||||||
|
uint32_t** seeds = malloc(n_threads * sizeof(uint32_t*));
|
||||||
|
for (uint32_t i = 0; i < n_threads; i++) {
|
||||||
|
seeds[i] = malloc(sizeof(uint32_t));
|
||||||
|
*seeds[i] = i + 1; // xorshift can't start with 0
|
||||||
|
}
|
||||||
|
|
||||||
|
int i;
|
||||||
|
#pragma omp parallel private(i)
|
||||||
|
{
|
||||||
|
#pragma omp for
|
||||||
|
for (i = 0; i < n_threads; i++) {
|
||||||
|
int lower_bound = i * (n_samples / n_threads);
|
||||||
|
int upper_bound = ((i+1) * (n_samples / n_threads)) - 1;
|
||||||
|
// printf("Lower bound: %d, upper bound: %d\n", lower_bound, upper_bound);
|
||||||
|
for (int j = lower_bound; j < upper_bound; j++) {
|
||||||
|
results[j] = sampler(seeds[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (uint32_t i = 0; i < n_threads; i++) {
|
||||||
|
free(seeds[i]);
|
||||||
|
}
|
||||||
|
free(seeds);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Functions used for the BOTEC.
|
||||||
|
// Their type has to be the same, as we will be passing them around.
|
||||||
|
|
||||||
|
float sample_0(uint32_t* seed)
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
float sample_1(uint32_t* seed)
|
||||||
|
{
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
float sample_few(uint32_t* seed)
|
||||||
|
{
|
||||||
|
return random_to(1, 3, seed);
|
||||||
|
}
|
||||||
|
|
||||||
|
float sample_many(uint32_t* seed)
|
||||||
|
{
|
||||||
|
return random_to(2, 10, seed);
|
||||||
|
}
|
||||||
|
|
||||||
|
float sample_mixture(uint32_t* seed){
|
||||||
|
float p_a, p_b, p_c;
|
||||||
|
|
||||||
|
// Initialize variables
|
||||||
|
p_a = 0.8;
|
||||||
|
p_b = 0.5;
|
||||||
|
p_c = p_a * p_b;
|
||||||
|
|
||||||
|
// Generate mixture
|
||||||
|
int n_dists = 4;
|
||||||
|
float weights[] = { 1 - p_c, p_c / 2, p_c / 4, p_c / 4 };
|
||||||
|
float (*samplers[])(uint32_t*) = { sample_0, sample_1, sample_few, sample_many };
|
||||||
|
|
||||||
|
return mixture(samplers, weights, n_dists, seed);
|
||||||
|
}
|
||||||
|
|
||||||
|
int main()
|
||||||
|
{
|
||||||
|
int n_threads = omp_get_max_threads();
|
||||||
|
float* split_array_results = malloc(N_SAMPLES * sizeof(float));
|
||||||
|
|
||||||
|
paralellize(sample_mixture, split_array_results, n_threads, N_SAMPLES);
|
||||||
|
printf("Sum(split_array_results, N_SAMPLES)/N_SAMPLES = %f\n", array_sum(split_array_results, N_SAMPLES) / N_SAMPLES);
|
||||||
|
|
||||||
|
free(split_array_results);
|
||||||
|
return 0;
|
||||||
|
}
|
17
C/alt/06-refactor-one-array/time.txt
Normal file
17
C/alt/06-refactor-one-array/time.txt
Normal file
|
@ -0,0 +1,17 @@
|
||||||
|
Requires /bin/time, found on GNU/Linux systems
|
||||||
|
|
||||||
|
Running 100x and taking avg time: OMP_NUM_THREADS=1 out/samples
|
||||||
|
Time using 1 thread: 32.30ms
|
||||||
|
|
||||||
|
Running 100x and taking avg time: OMP_NUM_THREADS=2 out/samples
|
||||||
|
Time using 2 threads: 30.80ms
|
||||||
|
|
||||||
|
Running 100x and taking avg time: OMP_NUM_THREADS=4 out/samples
|
||||||
|
Time for 4 threads: 30.50ms
|
||||||
|
|
||||||
|
Running 100x and taking avg time: OMP_NUM_THREADS=8 out/samples
|
||||||
|
Time using 8 threads: 12.90ms
|
||||||
|
|
||||||
|
Running 100x and taking avg time: OMP_NUM_THREADS=16 out/samples
|
||||||
|
Time using 16 threads: 9.10ms
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
SHELL := /bin/bash ## <= required to use time
|
||||||
# Interface:
|
# Interface:
|
||||||
# make
|
# make
|
||||||
# make build
|
# make build
|
||||||
|
@ -43,6 +44,9 @@ format: $(SRC)
|
||||||
run: $(SRC) $(OUTPUT)
|
run: $(SRC) $(OUTPUT)
|
||||||
OMP_NUM_THREADS=1 ./$(OUTPUT) && echo
|
OMP_NUM_THREADS=1 ./$(OUTPUT) && echo
|
||||||
|
|
||||||
|
run-16: $(SRC) $(OUTPUT)
|
||||||
|
OMP_NUM_THREADS=16 ./$(OUTPUT) && echo
|
||||||
|
|
||||||
multi:
|
multi:
|
||||||
OMP_NUM_THREADS=1 ./$(OUTPUT) && echo
|
OMP_NUM_THREADS=1 ./$(OUTPUT) && echo
|
||||||
OMP_NUM_THREADS=2 ./$(OUTPUT) && echo
|
OMP_NUM_THREADS=2 ./$(OUTPUT) && echo
|
||||||
|
|
24
README.md
24
README.md
|
@ -24,19 +24,17 @@ The name of this repository is a pun on two meanings of "time to": "how much tim
|
||||||
|
|
||||||
| Language | Time | Lines of code |
|
| Language | Time | Lines of code |
|
||||||
|-----------------------------|-----------|---------------|
|
|-----------------------------|-----------|---------------|
|
||||||
| C (optimized, 16 threads) | 5ms | 249 |
|
| C | 5.6ms | 252 |
|
||||||
| squiggle.c | 37ms | 54* |
|
| squiggle.c | 12.7ms | 29* |
|
||||||
| Nim | 38ms | 84 |
|
| Nim | 40.8ms | 84 |
|
||||||
| Lua (LuaJIT) | 68ms | 82 |
|
| Lua (LuaJIT) | 69.9ms | 82 |
|
||||||
| OCaml (flambda) | 164ms | 123 |
|
| OCaml (flambda) | 187.9ms | 123 |
|
||||||
| Lua | 278ms | 82 |
|
| Squiggle (bun) | 0.386s | 14* |
|
||||||
| C (naïve implementation) | 292ms | 149 |
|
| Javascript (node) | 0.445s | 69 |
|
||||||
| Javascript (NodeJS) | 732ms | 69 |
|
| SquigglePy (v0.27) | 1.507s | 18* |
|
||||||
| Squiggle | 1,536s | 14* |
|
| R (3.6.1) | 4.508s | 49 |
|
||||||
| SquigglePy | 1.602s | 18* |
|
| Python 3.9 | 11.879s | 56 |
|
||||||
| R | 7,000s | 49 |
|
| Gavin Howard's bc | 15.960s | 101 |
|
||||||
| Gavin Howard's bc | 15.9s | 59 |
|
|
||||||
| Python (CPython) | 16,641s | 56 |
|
|
||||||
|
|
||||||
Time measurements taken with the [time](https://man7.org/linux/man-pages/man1/time.1.html) tool, using 1M samples. But different implementations use different algorithms and, occasionally, different time measuring methodologies (for the C, Nim and Lua implementations, I run the program 100 times and take the mean). Their speed was also measured under different loads in my machine. So I think that these time estimates are accurate within maybe ~2x or so.
|
Time measurements taken with the [time](https://man7.org/linux/man-pages/man1/time.1.html) tool, using 1M samples. But different implementations use different algorithms and, occasionally, different time measuring methodologies (for the C, Nim and Lua implementations, I run the program 100 times and take the mean). Their speed was also measured under different loads in my machine. So I think that these time estimates are accurate within maybe ~2x or so.
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
SHELL := /bin/bash
|
SHELL := /bin/bash
|
||||||
|
|
||||||
compute:
|
run:
|
||||||
ghbc -l squiggle.bc estimate.bc
|
ghbc -l squiggle.bc estimate.bc
|
||||||
|
|
||||||
time:
|
time:
|
||||||
|
|
25
makefile
Normal file
25
makefile
Normal file
|
@ -0,0 +1,25 @@
|
||||||
|
SHELL := /bin/bash ## <= required to use time
|
||||||
|
MAKEFLAGS += --no-print-directory
|
||||||
|
|
||||||
|
build-all:
|
||||||
|
cd C && make build
|
||||||
|
cd nim && make fast
|
||||||
|
cd ocaml && make fast
|
||||||
|
cd squiggle.c && make build
|
||||||
|
|
||||||
|
time-all:
|
||||||
|
@echo "# bc" && cd bc && make time && echo && echo
|
||||||
|
@echo "# C" && cd C && make time-linux-fastest && echo && echo
|
||||||
|
@echo "# js (bun)" && cd js && time bun samples.js && echo && echo
|
||||||
|
@echo "# js (node)" && cd js && time node samples.js && echo && echo
|
||||||
|
@echo "# lua (luajit)" && cd lua && make time-linux && echo && echo
|
||||||
|
@echo "# nim" && cd nim && make time-linux && echo && echo
|
||||||
|
@echo "# ocaml" && cd ocaml && make time-linux && echo && echo
|
||||||
|
@echo "# Python (3.9)" && cd python && time python3.9 samples.py && echo && echo
|
||||||
|
@echo "# R (3.6.1)" && cd R && time r samples.R && echo && echo
|
||||||
|
@echo "# Squiggle (0.8.6)" && cd squiggle && make time-linux && echo && echo
|
||||||
|
@echo "# SquigglePy (0.27)" && cd squigglepy && make time && echo && echo
|
||||||
|
@echo "# squiggle.c" && cd squiggle.c && make time-linux && echo && echo
|
||||||
|
|
||||||
|
record:
|
||||||
|
make time-all > time.txt 2>&1
|
|
@ -1,6 +1,9 @@
|
||||||
SHELL := /bin/bash ## <= required to use time
|
SHELL := /bin/bash ## <= required to use time
|
||||||
VERBOSE=--verbosity:0
|
VERBOSE=--verbosity:0
|
||||||
|
|
||||||
|
fast: samples.nim
|
||||||
|
nim c $(VERBOSE) -d:danger samples.nim
|
||||||
|
|
||||||
build: samples.nim
|
build: samples.nim
|
||||||
nim c $(VERBOSE) samples.nim
|
nim c $(VERBOSE) samples.nim
|
||||||
|
|
||||||
|
|
BIN
nim/samples
BIN
nim/samples
Binary file not shown.
|
@ -22,6 +22,11 @@ fast:
|
||||||
time:
|
time:
|
||||||
bash -c "time $(OUT)"
|
bash -c "time $(OUT)"
|
||||||
|
|
||||||
|
time-linux:
|
||||||
|
@echo "Requires /bin/time, found on GNU/Linux systems" && echo
|
||||||
|
@echo "Running 100x and taking avg time of: $(OUTPUT)"
|
||||||
|
@t=$$(/usr/bin/time -f "%e" -p bash -c 'for i in {1..100}; do ./out/samples; done' 2>&1 >/dev/null | grep real | awk '{print $$2}' ); echo "scale=2; 1000 * $$t / 100" | bc | sed "s|^|Time: |" | sed 's|$$|ms|' && echo
|
||||||
|
|
||||||
profile:
|
profile:
|
||||||
$(OC) $(PERF) $(SRC) -o $(OUT)
|
$(OC) $(PERF) $(SRC) -o $(OUT)
|
||||||
mv samples.cmi samples.cmx samples.o ./out/
|
mv samples.cmi samples.cmx samples.o ./out/
|
||||||
|
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
16
squiggle.c/makefile
Normal file
16
squiggle.c/makefile
Normal file
|
@ -0,0 +1,16 @@
|
||||||
|
OUTPUT=./samples
|
||||||
|
|
||||||
|
install:
|
||||||
|
rm -r squiggle_c
|
||||||
|
git clone https://git.nunosempere.com/personal/squiggle.c
|
||||||
|
mv squiggle.c squiggle_c
|
||||||
|
sudo rm -r squiggle_c/.git
|
||||||
|
cp -r squiggle_c/examples/more/12_time_to_botec_parallel/example.c samples.c
|
||||||
|
sed -i 's|../../..|squiggle_c|' samples.c
|
||||||
|
|
||||||
|
build:
|
||||||
|
gcc -O3 samples.c ./squiggle_c/squiggle.c ./squiggle_c/squiggle_more.c -lm -fopenmp -o $(OUTPUT)
|
||||||
|
|
||||||
|
time-linux:
|
||||||
|
@echo "Running 100x and taking avg time: OMP_NUM_THREADS=16 $(OUTPUT)"
|
||||||
|
@t=$$(/usr/bin/time -f "%e" -p bash -c 'for i in {1..100}; do OMP_NUM_THREADS=16 $(OUTPUT); done' 2>&1 >/dev/null | grep real | awk '{print $$2}' ); echo "scale=2; 1000 * $$t / 100" | bc | sed "s|^|Time using 16 threads: |" | sed 's|$$|ms|' && echo
|
BIN
squiggle.c/samples
Executable file
BIN
squiggle.c/samples
Executable file
Binary file not shown.
29
squiggle.c/samples.c
Normal file
29
squiggle.c/samples.c
Normal file
|
@ -0,0 +1,29 @@
|
||||||
|
#include "squiggle_c/squiggle.h"
|
||||||
|
#include "squiggle_c/squiggle_more.h"
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
int main()
|
||||||
|
{
|
||||||
|
double p_a = 0.8;
|
||||||
|
double p_b = 0.5;
|
||||||
|
double p_c = p_a * p_b;
|
||||||
|
|
||||||
|
double sample_0(uint64_t* seed){ return 0; }
|
||||||
|
double sample_1(uint64_t* seed) { return 1; }
|
||||||
|
double sample_few(uint64_t* seed) { return sample_to(1, 3, seed); }
|
||||||
|
double sample_many(uint64_t* seed) { return sample_to(2, 10, seed); }
|
||||||
|
|
||||||
|
int n_dists = 4;
|
||||||
|
double weights[] = { 1 - p_c, p_c / 2, p_c / 4, p_c / 4 };
|
||||||
|
double (*samplers[])(uint64_t*) = { sample_0, sample_1, sample_few, sample_many };
|
||||||
|
double sampler_result(uint64_t* seed) {
|
||||||
|
return sample_mixture(samplers, weights, n_dists, seed);
|
||||||
|
}
|
||||||
|
|
||||||
|
int n_samples = 1000 * 1000, n_threads = 16;
|
||||||
|
double* results = malloc(n_samples * sizeof(double));
|
||||||
|
parallel_sampler(sampler_result, results, n_threads, n_samples);
|
||||||
|
printf("Avg: %f\n", array_sum(results, n_samples)/n_samples);
|
||||||
|
free(results);
|
||||||
|
}
|
7
squiggle.c/squiggle_c/LICENSE.txt
Normal file
7
squiggle.c/squiggle_c/LICENSE.txt
Normal file
|
@ -0,0 +1,7 @@
|
||||||
|
Copyright 2023 and ss., Nuño Sempere
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
378
squiggle.c/squiggle_c/README.md
Normal file
378
squiggle.c/squiggle_c/README.md
Normal file
|
@ -0,0 +1,378 @@
|
||||||
|
# squiggle.c
|
||||||
|
|
||||||
|
squiggle.c is a [grug-brained](https://grugbrain.dev/) self-contained C99 library that provides functions for simple Monte Carlo estimation, based on [Squiggle](https://www.squiggle-language.com/).
|
||||||
|
|
||||||
|
## Why C?
|
||||||
|
|
||||||
|
- Because it is fast
|
||||||
|
- Because I enjoy it
|
||||||
|
- Because C is honest
|
||||||
|
- Because it will last long
|
||||||
|
- Because it can fit in my head
|
||||||
|
- Because if you can implement something in C, you can implement it anywhere else
|
||||||
|
- Because it can be made faster if need be
|
||||||
|
- e.g., with a multi-threading library like OpenMP,
|
||||||
|
- o by implementing faster but more complex algorithms
|
||||||
|
- or more simply, by inlining the sampling functions (adding an `inline` directive before their function declaration)
|
||||||
|
- **Because there are few abstractions between it and machine code** (C => assembly => machine code with gcc, or C => machine code, with tcc), leading to fewer errors beyond the programmer's control.
|
||||||
|
|
||||||
|
## Getting started
|
||||||
|
|
||||||
|
You can follow some example usage in the examples/ folder
|
||||||
|
|
||||||
|
1. In the [1st example](examples/01_one_sample/example.c), we define a small model, and draw one sample from it
|
||||||
|
2. In the [2nd example](examples/02_many_samples/example.c), we define a small model, and return many samples
|
||||||
|
3. In the [3rd example](examples/03_gcc_nested_function/example.c), we use a gcc extension—nested functions—to rewrite the code from point 2. in a more linear way.
|
||||||
|
4. In the [4th example](examples/04_sample_from_cdf_simple/example.c), we define some simple cdfs, and we draw samples from those cdfs. We see that this approach is slower than using the built-in samplers, e.g., the normal sampler.
|
||||||
|
5. In the [5th example](examples/05_sample_from_cdf_beta/example.c), we define the cdf for the beta distribution, and we draw samples from it.
|
||||||
|
6. In the [6th example](examples/06_gamma_beta/example.c), we take samples from simple gamma and beta distributions, using the samplers provided by this library.
|
||||||
|
7. In the [7th example](examples/07_ci_beta/example.c), we get the 90% confidence interval of a beta distribution
|
||||||
|
8. The [8th example](examples/08_nuclear_war/example.c) translates the models from Eli and Nuño from [Samotsvety Nuclear Risk Forecasts — March 2022](https://forum.nunosempere.com/posts/KRFXjCqqfGQAYirm5/samotsvety-nuclear-risk-forecasts-march-2022#Nu_o_Sempere) into squiggle.c, then creates a mixture from both, and returns the mean probability of death per month and the 90% confidence interval.
|
||||||
|
8. The [9th example](examples/09_burn_10kg_fat/example.c) estimates how many minutes per day I would have to jump rope in order to lose 10kg of fat in half a year.
|
||||||
|
|
||||||
|
## Commentary
|
||||||
|
|
||||||
|
### squiggle.c is short
|
||||||
|
|
||||||
|
[squiggle.c](squiggle.c) is less than 600 lines of C, with a core of <250 lines. The reader could just read it and grasp its contents.
|
||||||
|
|
||||||
|
### Core strategy
|
||||||
|
|
||||||
|
This library provides some basic building blocks. The recommended strategy is to:
|
||||||
|
|
||||||
|
1. Define sampler functions, which take a seed, and return 1 sample
|
||||||
|
2. Compose those sampler functions to define your estimation model
|
||||||
|
3. At the end, call the last sampler function many times to generate many samples from your model
|
||||||
|
|
||||||
|
### Cdf auxiliary functions
|
||||||
|
|
||||||
|
|
||||||
|
### Nested functions and compilation with tcc.
|
||||||
|
|
||||||
|
GCC has an extension which allows a program to define a function inside another function. This makes squiggle.c code more linear and nicer to read, at the cost of becoming dependent on GCC and hence sacrificing portability and increasing compilation times. Conversely, compiling with tcc (tiny c compiler) is almost instantaneous, but leads to longer execution times and doesn't allow for nested functions.
|
||||||
|
|
||||||
|
| GCC | tcc |
|
||||||
|
| --- | --- |
|
||||||
|
| slower compilation | faster compilation |
|
||||||
|
| allows nested functions | doesn't allow nested functions |
|
||||||
|
| faster execution | slower execution |
|
||||||
|
|
||||||
|
My recommendation would be to use tcc while drawing a small number of samples for fast iteration, and then using gcc for the final version with lots of samples, and possibly with nested functions for ease of reading by others.
|
||||||
|
|
||||||
|
### Guarantees and licensing
|
||||||
|
|
||||||
|
- I offer no guarantees about stability, correctness, performance, etc. I might, for instance, abandon the version in C and rewrite it in Zig, Nim or Rust.
|
||||||
|
- This project mostly exists for my own usage & for my own amusement.
|
||||||
|
- Caution! Think carefully before using this project for anything important
|
||||||
|
- If you wanted to pay me to provide some stability or correctness, guarantees, or to tweak this library for your own usage, or to teach you how to use it, you could do so [here](https://nunosempere.com/consulting). Although this theoretical possibility exists, I don't I don't anticipate that this would be a good idea on most cases.
|
||||||
|
|
||||||
|
This project is released under the MIT license, a permissive open-source license. You can see it in the LICENSE.txt file.
|
||||||
|
|
||||||
|
### Design choices
|
||||||
|
|
||||||
|
This code should aim to be correct, then simple, then fast.
|
||||||
|
|
||||||
|
- It should be correct. The user should be able to rely on it and not think about whether errors come from the library.
|
||||||
|
- Nonetheless, the user should understand the limitations of sampling-based methods. See the section on [Tests and the long tail of the lognormal](https://git.nunosempere.com/personal/squiggle.c#tests-and-the-long-tail-of-the-lognormal) for a discussion of how sampling is bad at capturing some aspects of distributions with long tails.
|
||||||
|
- It should be clear, conceptually simple. Simple for me to implement, simple for others to understand.
|
||||||
|
- It should be fast. But when speed conflicts with simplicity, choose simplicity. For example, there might be several possible algorithms to sample a distribution, each of which is faster over part of the domain. In that case, it's conceptually simpler to just pick one algorithm, and pay the—normally small—performance penalty. In any case, though, the code should still be *way faster* than Python.
|
||||||
|
|
||||||
|
Note that being terse, or avoiding verbosity, is a non-goal. This is in part because of the constraints that C imposes. But it also aids with clarity and conceptual simplicity, as the issue of correlated samples illustrates in the next section.
|
||||||
|
|
||||||
|
### Correlated samples
|
||||||
|
|
||||||
|
In the original [squiggle](https://www.squiggle-language.com/) language, there is some ambiguity about what this code means:
|
||||||
|
|
||||||
|
```js
|
||||||
|
a = 1 to 10
|
||||||
|
b = 2 * a
|
||||||
|
c = b/a
|
||||||
|
c
|
||||||
|
```
|
||||||
|
|
||||||
|
Likewise in [squigglepy](https://github.com/rethinkpriorities/squigglepy):
|
||||||
|
|
||||||
|
```python
|
||||||
|
import squigglepy as sq
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
a = sq.to(1, 3)
|
||||||
|
b = 2 * a
|
||||||
|
c = b / a
|
||||||
|
|
||||||
|
c_samples = sq.sample(c, 10)
|
||||||
|
|
||||||
|
print(c_samples)
|
||||||
|
```
|
||||||
|
|
||||||
|
Should `c` be equal to `2`? or should it be equal to 2 times the expected distribution of the ratio of two independent draws from a (`2 * a/a`, as it were)?
|
||||||
|
|
||||||
|
In squiggle.c, this ambiguity doesn't exist, at the cost of much greater overhead & verbosity:
|
||||||
|
|
||||||
|
```c
|
||||||
|
// correlated samples
|
||||||
|
// gcc -O3 correlated.c squiggle.c -lm -o correlated
|
||||||
|
|
||||||
|
#include "squiggle.h"
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
|
int main(){
|
||||||
|
// set randomness seed
|
||||||
|
uint64_t* seed = malloc(sizeof(uint64_t));
|
||||||
|
*seed = 1000; // xorshift can't start with a seed of 0
|
||||||
|
|
||||||
|
double a = sample_to(1, 10, seed);
|
||||||
|
double b = 2 * a;
|
||||||
|
double c = b / a;
|
||||||
|
|
||||||
|
printf("a: %f, b: %f, c: %f\n", a, b, c);
|
||||||
|
// a: 0.607162, b: 1.214325, c: 0.500000
|
||||||
|
|
||||||
|
free(seed);
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
vs
|
||||||
|
|
||||||
|
```c
|
||||||
|
// uncorrelated samples
|
||||||
|
// gcc -O3 uncorrelated.c ../../squiggle.c -lm -o uncorrelated
|
||||||
|
|
||||||
|
#include "squiggle.h"
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
|
double draw_xyz(uint64_t* seed){
|
||||||
|
// function could also be placed inside main with gcc nested functions extension.
|
||||||
|
return sample_to(1, 20, seed);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int main(){
|
||||||
|
// set randomness seed
|
||||||
|
uint64_t* seed = malloc(sizeof(uint64_t));
|
||||||
|
*seed = 1000; // xorshift can't start with a seed of 0
|
||||||
|
|
||||||
|
double a = draw_xyz(seed);
|
||||||
|
double b = 2 * draw_xyz(seed);
|
||||||
|
double c = b / a;
|
||||||
|
|
||||||
|
printf("a: %f, b: %f, c: %f\n", a, b, c);
|
||||||
|
// a: 0.522484, b: 10.283501, c: 19.681936
|
||||||
|
|
||||||
|
free(seed)
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Tests and the long tail of the lognormal
|
||||||
|
|
||||||
|
Distribution functions can be tested with:
|
||||||
|
|
||||||
|
```sh
|
||||||
|
cd tests
|
||||||
|
make && make run
|
||||||
|
```
|
||||||
|
|
||||||
|
`make verify` is an alias that runs all the tests and just displays the ones that are failing.
|
||||||
|
|
||||||
|
These tests are somewhat rudimentary: they get between 1M and 10M samples from a given sampling function, and check that their mean and standard deviations correspond to what they should theoretically should be.
|
||||||
|
|
||||||
|
If you run `make run` (or `make verify`), you will see errors such as these:
|
||||||
|
|
||||||
|
```
|
||||||
|
[-] Mean test for normal(47211.047473, 682197.019012) NOT passed.
|
||||||
|
Mean of normal(47211.047473, 682197.019012): 46933.673278, vs expected mean: 47211.047473
|
||||||
|
delta: -277.374195, relative delta: -0.005910
|
||||||
|
|
||||||
|
[-] Std test for lognormal(4.584666, 2.180816) NOT passed.
|
||||||
|
Std of lognormal(4.584666, 2.180816): 11443.588861, vs expected std: 11342.434900
|
||||||
|
delta: 101.153961, relative delta: 0.008839
|
||||||
|
|
||||||
|
[-] Std test for to(13839.861856, 897828.354318) NOT passed.
|
||||||
|
Std of to(13839.861856, 897828.354318): 495123.630575, vs expected std: 498075.002499
|
||||||
|
delta: -2951.371925, relative delta: -0.005961
|
||||||
|
```
|
||||||
|
|
||||||
|
These tests I wouldn't worry about. Due to luck of the draw, their relative error is a bit over 0.005, or 0.5%, and so the test fails. But it would surprise me if that had some meaningful practical implication.
|
||||||
|
|
||||||
|
The errors that should raise some worry are:
|
||||||
|
|
||||||
|
```
|
||||||
|
[-] Mean test for lognormal(1.210013, 4.766882) NOT passed.
|
||||||
|
Mean of lognormal(1.210013, 4.766882): 342337.257677, vs expected mean: 288253.061628
|
||||||
|
delta: 54084.196049, relative delta: 0.157985
|
||||||
|
[-] Std test for lognormal(1.210013, 4.766882) NOT passed.
|
||||||
|
Std of lognormal(1.210013, 4.766882): 208107782.972184, vs expected std: 24776840217.604111
|
||||||
|
delta: -24568732434.631927, relative delta: -118.057730
|
||||||
|
|
||||||
|
[-] Mean test for lognormal(-0.195240, 4.883106) NOT passed.
|
||||||
|
Mean of lognormal(-0.195240, 4.883106): 87151.733198, vs expected mean: 123886.818303
|
||||||
|
delta: -36735.085104, relative delta: -0.421507
|
||||||
|
[-] Std test for lognormal(-0.195240, 4.883106) NOT passed.
|
||||||
|
Std of lognormal(-0.195240, 4.883106): 33837426.331671, vs expected std: 18657000192.914921
|
||||||
|
delta: -18623162766.583248, relative delta: -550.371727
|
||||||
|
|
||||||
|
[-] Mean test for lognormal(0.644931, 4.795860) NOT passed.
|
||||||
|
Mean of lognormal(0.644931, 4.795860): 125053.904456, vs expected mean: 188163.894101
|
||||||
|
delta: -63109.989645, relative delta: -0.504662
|
||||||
|
[-] Std test for lognormal(0.644931, 4.795860) NOT passed.
|
||||||
|
Std of lognormal(0.644931, 4.795860): 39976300.711166, vs expected std: 18577298706.170452
|
||||||
|
delta: -18537322405.459286, relative delta: -463.707799
|
||||||
|
```
|
||||||
|
|
||||||
|
What is happening in this case is that you are taking a normal, like `normal(-0.195240, 4.883106)`, and you are exponentiating it to arrive at a lognormal. But `normal(-0.195240, 4.883106)` is going to have some noninsignificant weight on, say, 18. But `exp(18) = 39976300`, and points like it are going to end up a nontrivial amount to the analytical mean and standard deviation, even though they have little probability mass.
|
||||||
|
|
||||||
|
The reader can also check that for more plausible real-world values, like those fitting a lognormal to a really wide 90% confidence interval from 10 to 10k, errors aren't eggregious:
|
||||||
|
|
||||||
|
```
|
||||||
|
[x] Mean test for to(10.000000, 10000.000000) PASSED
|
||||||
|
[-] Std test for to(10.000000, 10000.000000) NOT passed.
|
||||||
|
Std of to(10.000000, 10000.000000): 23578.091775, vs expected std: 25836.381819
|
||||||
|
delta: -2258.290043, relative delta: -0.095779
|
||||||
|
```
|
||||||
|
|
||||||
|
Overall, I would caution that if you really care about the very far tails of distributions, you might want to instead use tools which can do some of the analytical manipulations for you, like the original Squiggle, Simple Squiggle (both linked below), or even doing lognormal multiplication by hand, relying on the fact that two lognormals multiplied together result in another lognormal with known shape.
|
||||||
|
|
||||||
|
In fact, squiggle.c does have a few functions for algebraic manipulations of simple distributions at the end of squiggle.c. But these are pretty rudimentary, and I don't know whether I'll end up expanding or deleting them.
|
||||||
|
|
||||||
|
### Results of running clang-tidy
|
||||||
|
|
||||||
|
clang-tidy is a utility to detect common errors in C/C++. You can run it with:
|
||||||
|
|
||||||
|
```
|
||||||
|
make tidy
|
||||||
|
```
|
||||||
|
|
||||||
|
It emits one warning about something I already took care of, so by default I've suppressed it. I think this is good news in terms of making me more confident that this simple library is correct :).
|
||||||
|
|
||||||
|
### Division between core functions and extraneous expansions
|
||||||
|
|
||||||
|
This library differentiates between core functions, which are pretty tightly scoped, and expansions and convenience functions, which are more meandering. Expansions are in `extra.c` and `extra.h`. To use them, take care to link them:
|
||||||
|
|
||||||
|
```
|
||||||
|
// In your C source file
|
||||||
|
#include "extra.h"
|
||||||
|
```
|
||||||
|
|
||||||
|
```
|
||||||
|
# When compiling:
|
||||||
|
gcc -std=c99 -Wall -O3 example.c squiggle.c extra.c -lm -o ./example
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Extra: Cdf auxiliary functions
|
||||||
|
|
||||||
|
I provide some Take a cdf, and return a sample from the distribution produced by that cdf. This might make it easier to program models, at the cost of a 20x to 60x slowdown in the parts of the code that use it.
|
||||||
|
|
||||||
|
#### Extra: Error propagation vs exiting on error
|
||||||
|
|
||||||
|
The process of taking a cdf and returning a sample might fail, e.g., it's a Newton method which might fail to converge because of cdf artifacts. The cdf itself might also fail, e.g., if a distribution only accepts a range of parameters, but is fed parameters outside that range.
|
||||||
|
|
||||||
|
This library provides two approaches:
|
||||||
|
|
||||||
|
1. Print the line and function in which the error occured, then exit on error
|
||||||
|
2. In situations where there might be an error, return a struct containing either the correct value or an error message:
|
||||||
|
|
||||||
|
```C
|
||||||
|
struct box {
|
||||||
|
int empty;
|
||||||
|
double content;
|
||||||
|
char* error_msg;
|
||||||
|
};
|
||||||
|
```
|
||||||
|
|
||||||
|
The first approach produces terser programs but might not scale. The second approach seems like it could lead to more robust programmes, but is more verbose.
|
||||||
|
|
||||||
|
Behaviour on error can be toggled by the `EXIT_ON_ERROR` variable. This library also provides a convenient macro, `PROCESS_ERROR`, to make error handling in either case much terser—see the usage in example 4 in the examples/ folder.
|
||||||
|
|
||||||
|
Overall, I'd describe the error handling capabilities of this library as pretty rudimentary. For example, this program might fail in surprising ways if you ask for a lognormal with negative standard deviation, because I haven't added error checking for that case yet.
|
||||||
|
|
||||||
|
|
||||||
|
## Related projects
|
||||||
|
|
||||||
|
- [Squiggle](https://www.squiggle-language.com/)
|
||||||
|
- [SquigglePy](https://github.com/rethinkpriorities/squigglepy)
|
||||||
|
- [Simple Squiggle](https://nunosempere.com/blog/2022/04/17/simple-squiggle/)
|
||||||
|
- [time to botec](https://github.com/NunoSempere/time-to-botec)
|
||||||
|
- [beta]()
|
||||||
|
|
||||||
|
## To do list
|
||||||
|
|
||||||
|
- [ ] Document paralellism
|
||||||
|
- [ ] Document confidence intervals
|
||||||
|
- [ ] Point out that, even though the C standard is ambiguous about this, this code assumes that doubles are 64 bit precision (otherwise the xorshift should be different).
|
||||||
|
- [ ] Document rudimentary algebra manipulations for normal/lognormal
|
||||||
|
- [ ] Think through whether to delete cdf => samples function
|
||||||
|
- [ ] Think through whether to:
|
||||||
|
- simplify and just abort on error
|
||||||
|
- complexify and use boxes for everything
|
||||||
|
- leave as is
|
||||||
|
- [ ] Systematize references
|
||||||
|
- [ ] Support all distribution functions in <https://www.squiggle-language.com/docs/Api/Dist>
|
||||||
|
- [ ] do so efficiently
|
||||||
|
- [ ] Add more functions to do algebra and get the 90% c.i. of normals, lognormals, betas, etc.
|
||||||
|
- Think through which of these make sense.
|
||||||
|
- [ ] Disambiguate sample_laplace--successes vs failures || successes vs total trials as two distinct and differently named functions
|
||||||
|
|
||||||
|
## Done
|
||||||
|
|
||||||
|
- [x] Add example for only one sample
|
||||||
|
- [x] Add example for many samples
|
||||||
|
- [ ] ~~Add a custom preprocessor to allow simple nested functions that don't rely on local scope?~~
|
||||||
|
- [x] Use gcc extension to define functions nested inside main.
|
||||||
|
- [x] Chain various `sample_mixture` functions
|
||||||
|
- [x] Add beta distribution
|
||||||
|
- See <https://stats.stackexchange.com/questions/502146/how-does-numpy-generate-samples-from-a-beta-distribution> for a faster method.
|
||||||
|
- [ ] ~~Use OpenMP for acceleration~~
|
||||||
|
- [x] Add function to get sample when given a cdf
|
||||||
|
- [x] Don't have a single header file.
|
||||||
|
- [x] Structure project a bit better
|
||||||
|
- [x] Simplify `PROCESS_ERROR` macro
|
||||||
|
- [x] Add README
|
||||||
|
- [x] Schema: a function which takes a sample and manipulates it,
|
||||||
|
- [x] and at the end, an array of samples.
|
||||||
|
- [x] Explain boxes
|
||||||
|
- [x] Explain nested functions
|
||||||
|
- [x] Explain exit on error
|
||||||
|
- [x] Explain individual examples
|
||||||
|
- [x] Rename functions to something more self-explanatory, e.g,. `sample_unit_normal`.
|
||||||
|
- [x] Add summarization functions: mean, std
|
||||||
|
- [x] Add sampling from a gamma distribution
|
||||||
|
- https://dl.acm.org/doi/pdf/10.1145/358407.358414
|
||||||
|
- [x] Explain correlated samples
|
||||||
|
- [ ] ~~Add tests in Stan?~~
|
||||||
|
- [x] Test summary statistics for each of the distributions.
|
||||||
|
- [x] For uniform
|
||||||
|
- [x] For normal
|
||||||
|
- [x] For lognormal
|
||||||
|
- [x] For lognormal (to syntax)
|
||||||
|
- [x] For beta distribution
|
||||||
|
- [x] Clarify gamma/standard gamma
|
||||||
|
- [x] Add efficient sampling from a beta distribution
|
||||||
|
- https://dl.acm.org/doi/10.1145/358407.358414
|
||||||
|
- https://link.springer.com/article/10.1007/bf02293108
|
||||||
|
- https://stats.stackexchange.com/questions/502146/how-does-numpy-generate-samples-from-a-beta-distribution
|
||||||
|
- https://github.com/numpy/numpy/blob/5cae51e794d69dd553104099305e9f92db237c53/numpy/random/src/distributions/distributions.c
|
||||||
|
- [x] Pontificate about lognormal tests
|
||||||
|
- [x] Give warning about sampling-based methods.
|
||||||
|
- [x] Have some more complicated & realistic example
|
||||||
|
- [x] Add summarization functions: 90% ci (or all c.i.?)
|
||||||
|
- [x] Link to the examples in the examples section.
|
||||||
|
- [x] Add a few functions for doing simple algebra on normals, and lognormals
|
||||||
|
- [x] Add prototypes
|
||||||
|
- [x] Use named structs
|
||||||
|
- [x] Add to header file
|
||||||
|
- [x] Provide example algebra
|
||||||
|
- [x] Add conversion between 90% ci and parameters.
|
||||||
|
- [x] Use that conversion in conjuction with small algebra.
|
||||||
|
- [x] Consider ergonomics of using ci instead of c_i
|
||||||
|
- [x] use named struct instead
|
||||||
|
- [x] demonstrate and document feeding a struct directly to a function; my_function((struct c_i){.low = 1, .high = 2});
|
||||||
|
- [ ] Consider desirability of defining shortcuts for those functions. Adds a level of magic, though.
|
||||||
|
- [ ] Test results
|
||||||
|
- [x] Move to own file? Or signpost in file? => signposted in file.
|
||||||
|
- [x] Write twitter thread: now [here](https://twitter.com/NunoSempere/status/1707041153210564959); retweets appreciated.
|
||||||
|
- [ ] ~~Think about whether to write a simple version of this for [uxn](https://100r.co/site/uxn.html), a minimalistic portable programming stack which, sadly, doesn't have doubles (64 bit floats)~~
|
BIN
squiggle.c/squiggle_c/examples/core/00_example_template/example
Executable file
BIN
squiggle.c/squiggle_c/examples/core/00_example_template/example
Executable file
Binary file not shown.
|
@ -0,0 +1,15 @@
|
||||||
|
#include "../../../squiggle.h"
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
int main()
|
||||||
|
{
|
||||||
|
// set randomness seed
|
||||||
|
uint64_t* seed = malloc(sizeof(uint64_t));
|
||||||
|
*seed = 1000; // xorshift can't start with 0
|
||||||
|
|
||||||
|
// ...
|
||||||
|
|
||||||
|
free(seed);
|
||||||
|
}
|
BIN
squiggle.c/squiggle_c/examples/core/01_one_sample/example
Executable file
BIN
squiggle.c/squiggle_c/examples/core/01_one_sample/example
Executable file
Binary file not shown.
44
squiggle.c/squiggle_c/examples/core/01_one_sample/example.c
Normal file
44
squiggle.c/squiggle_c/examples/core/01_one_sample/example.c
Normal file
|
@ -0,0 +1,44 @@
|
||||||
|
#include "../../../squiggle.h"
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
// Estimate functions
|
||||||
|
double sample_0(uint64_t* seed)
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
double sample_1(uint64_t* seed)
|
||||||
|
{
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
double sample_few(uint64_t* seed)
|
||||||
|
{
|
||||||
|
return sample_to(1, 3, seed);
|
||||||
|
}
|
||||||
|
|
||||||
|
double sample_many(uint64_t* seed)
|
||||||
|
{
|
||||||
|
return sample_to(2, 10, seed);
|
||||||
|
}
|
||||||
|
|
||||||
|
int main()
|
||||||
|
{
|
||||||
|
// set randomness seed
|
||||||
|
uint64_t* seed = malloc(sizeof(uint64_t));
|
||||||
|
*seed = 1000; // xorshift can't start with 0
|
||||||
|
|
||||||
|
double p_a = 0.8;
|
||||||
|
double p_b = 0.5;
|
||||||
|
double p_c = p_a * p_b;
|
||||||
|
|
||||||
|
int n_dists = 4;
|
||||||
|
double weights[] = { 1 - p_c, p_c / 2, p_c / 4, p_c / 4 };
|
||||||
|
double (*samplers[])(uint64_t*) = { sample_0, sample_1, sample_few, sample_many };
|
||||||
|
|
||||||
|
double result_one = sample_mixture(samplers, weights, n_dists, seed);
|
||||||
|
printf("result_one: %f\n", result_one);
|
||||||
|
free(seed);
|
||||||
|
}
|
BIN
squiggle.c/squiggle_c/examples/core/02_time_to_botec/example
Executable file
BIN
squiggle.c/squiggle_c/examples/core/02_time_to_botec/example
Executable file
Binary file not shown.
|
@ -0,0 +1,55 @@
|
||||||
|
#include "../../../squiggle.h"
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
// Estimate functions
|
||||||
|
double sample_0(uint64_t* seed)
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
double sample_1(uint64_t* seed)
|
||||||
|
{
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
double sample_few(uint64_t* seed)
|
||||||
|
{
|
||||||
|
return sample_to(1, 3, seed);
|
||||||
|
}
|
||||||
|
|
||||||
|
double sample_many(uint64_t* seed)
|
||||||
|
{
|
||||||
|
return sample_to(2, 10, seed);
|
||||||
|
}
|
||||||
|
|
||||||
|
int main()
|
||||||
|
{
|
||||||
|
// set randomness seed
|
||||||
|
uint64_t* seed = malloc(sizeof(uint64_t));
|
||||||
|
*seed = 1000; // xorshift can't start with 0
|
||||||
|
|
||||||
|
double p_a = 0.8;
|
||||||
|
double p_b = 0.5;
|
||||||
|
double p_c = p_a * p_b;
|
||||||
|
|
||||||
|
int n_dists = 4;
|
||||||
|
double weights[] = { 1 - p_c, p_c / 2, p_c / 4, p_c / 4 };
|
||||||
|
double (*samplers[])(uint64_t*) = { sample_0, sample_1, sample_few, sample_many };
|
||||||
|
|
||||||
|
int n_samples = 1000000;
|
||||||
|
double* result_many = (double*)malloc(n_samples * sizeof(double));
|
||||||
|
for (int i = 0; i < n_samples; i++) {
|
||||||
|
result_many[i] = sample_mixture(samplers, weights, n_dists, seed);
|
||||||
|
}
|
||||||
|
printf("Mean: %f\n", array_mean(result_many, n_samples));
|
||||||
|
|
||||||
|
// printf("result_many: [");
|
||||||
|
// for(int i=0; i<100; i++){
|
||||||
|
// printf("%.2f, ", result_many[i]);
|
||||||
|
// }
|
||||||
|
// printf("]\n");
|
||||||
|
|
||||||
|
free(seed);
|
||||||
|
}
|
BIN
squiggle.c/squiggle_c/examples/core/03_gcc_nested_function/example
Executable file
BIN
squiggle.c/squiggle_c/examples/core/03_gcc_nested_function/example
Executable file
Binary file not shown.
|
@ -0,0 +1,39 @@
|
||||||
|
#include "../../../squiggle.h"
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
int main()
|
||||||
|
{
|
||||||
|
// set randomness seed
|
||||||
|
uint64_t* seed = malloc(sizeof(uint64_t));
|
||||||
|
*seed = 1000; // xorshift can't start with 0
|
||||||
|
|
||||||
|
double p_a = 0.8;
|
||||||
|
double p_b = 0.5;
|
||||||
|
double p_c = p_a * p_b;
|
||||||
|
|
||||||
|
int n_dists = 4;
|
||||||
|
|
||||||
|
// These are nested functions. They will not compile without gcc.
|
||||||
|
double sample_0(uint64_t * seed) { return 0; }
|
||||||
|
double sample_1(uint64_t * seed) { return 1; }
|
||||||
|
double sample_few(uint64_t * seed) { return sample_to(1, 3, seed); }
|
||||||
|
double sample_many(uint64_t * seed) { return sample_to(2, 10, seed); }
|
||||||
|
|
||||||
|
double (*samplers[])(uint64_t*) = { sample_0, sample_1, sample_few, sample_many };
|
||||||
|
double weights[] = { 1 - p_c, p_c / 2, p_c / 4, p_c / 4 };
|
||||||
|
|
||||||
|
int n_samples = 1000000;
|
||||||
|
double* result_many = (double*)malloc(n_samples * sizeof(double));
|
||||||
|
for (int i = 0; i < n_samples; i++) {
|
||||||
|
result_many[i] = sample_mixture(samplers, weights, n_dists, seed);
|
||||||
|
}
|
||||||
|
|
||||||
|
printf("result_many: [");
|
||||||
|
for (int i = 0; i < 100; i++) {
|
||||||
|
printf("%.2f, ", result_many[i]);
|
||||||
|
}
|
||||||
|
printf("]\n");
|
||||||
|
free(seed);
|
||||||
|
}
|
BIN
squiggle.c/squiggle_c/examples/core/04_gamma_beta/example
Executable file
BIN
squiggle.c/squiggle_c/examples/core/04_gamma_beta/example
Executable file
Binary file not shown.
44
squiggle.c/squiggle_c/examples/core/04_gamma_beta/example.c
Normal file
44
squiggle.c/squiggle_c/examples/core/04_gamma_beta/example.c
Normal file
|
@ -0,0 +1,44 @@
|
||||||
|
#include "../../../squiggle.h"
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
// Estimate functions
|
||||||
|
|
||||||
|
int main()
|
||||||
|
{
|
||||||
|
// set randomness seed
|
||||||
|
uint64_t* seed = malloc(sizeof(uint64_t));
|
||||||
|
*seed = 1000; // xorshift can't start with 0
|
||||||
|
|
||||||
|
int n = 1000 * 1000;
|
||||||
|
/*
|
||||||
|
for (int i = 0; i < n; i++) {
|
||||||
|
double gamma_0 = sample_gamma(0.0, seed);
|
||||||
|
// printf("sample_gamma(0.0): %f\n", gamma_0);
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
*/
|
||||||
|
|
||||||
|
double* gamma_1_array = malloc(sizeof(double) * n);
|
||||||
|
for (int i = 0; i < n; i++) {
|
||||||
|
double gamma_1 = sample_gamma(1.0, seed);
|
||||||
|
// printf("sample_gamma(1.0): %f\n", gamma_1);
|
||||||
|
gamma_1_array[i] = gamma_1;
|
||||||
|
}
|
||||||
|
printf("gamma(1) summary statistics = mean: %f, std: %f\n", array_mean(gamma_1_array, n), array_std(gamma_1_array, n));
|
||||||
|
free(gamma_1_array);
|
||||||
|
printf("\n");
|
||||||
|
|
||||||
|
double* beta_1_2_array = malloc(sizeof(double) * n);
|
||||||
|
for (int i = 0; i < n; i++) {
|
||||||
|
double beta_1_2 = sample_beta(1, 2.0, seed);
|
||||||
|
// printf("sample_beta(1.0, 2.0): %f\n", beta_1_2);
|
||||||
|
beta_1_2_array[i] = beta_1_2;
|
||||||
|
}
|
||||||
|
printf("beta(1,2) summary statistics: mean: %f, std: %f\n", array_mean(beta_1_2_array, n), array_std(beta_1_2_array, n));
|
||||||
|
free(beta_1_2_array);
|
||||||
|
printf("\n");
|
||||||
|
|
||||||
|
free(seed);
|
||||||
|
}
|
BIN
squiggle.c/squiggle_c/examples/core/05_hundred_lognormals/example
Executable file
BIN
squiggle.c/squiggle_c/examples/core/05_hundred_lognormals/example
Executable file
Binary file not shown.
|
@ -0,0 +1,18 @@
|
||||||
|
#include "../../../squiggle.h"
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
// Estimate functions
|
||||||
|
int main()
|
||||||
|
{
|
||||||
|
// set randomness seed
|
||||||
|
uint64_t* seed = malloc(sizeof(uint64_t));
|
||||||
|
*seed = 1000; // xorshift can't start with 0
|
||||||
|
|
||||||
|
for (int i = 0; i < 100; i++) {
|
||||||
|
double sample = sample_lognormal(0, 10, seed);
|
||||||
|
printf("%f\n", sample);
|
||||||
|
}
|
||||||
|
free(seed);
|
||||||
|
}
|
|
@ -0,0 +1 @@
|
||||||
|
./example | sort -h
|
85
squiggle.c/squiggle_c/examples/core/makefile
Normal file
85
squiggle.c/squiggle_c/examples/core/makefile
Normal file
|
@ -0,0 +1,85 @@
|
||||||
|
# Interface:
|
||||||
|
# make all
|
||||||
|
# make format-all
|
||||||
|
# make run-all
|
||||||
|
# make one DIR=01_one_sample
|
||||||
|
# make format-one DIR=01_one_sample
|
||||||
|
# make run-one DIR=01_one_sample
|
||||||
|
# make time-linux-one DIR=01_one_sample
|
||||||
|
# make profile-one DIR=01_one_sample
|
||||||
|
|
||||||
|
# Compiler
|
||||||
|
CC=gcc
|
||||||
|
# CC=tcc # <= faster compilation
|
||||||
|
|
||||||
|
# Main file
|
||||||
|
SRC=example.c
|
||||||
|
OUTPUT=example
|
||||||
|
|
||||||
|
## Dependencies
|
||||||
|
SQUIGGLE=../../squiggle.c
|
||||||
|
MATH=-lm
|
||||||
|
DEPS=$(SQUIGGLE) $(MATH)
|
||||||
|
|
||||||
|
## Flags
|
||||||
|
DEBUG= #'-g'
|
||||||
|
STANDARD=-std=c99
|
||||||
|
WARNINGS=-Wall
|
||||||
|
OPTIMIZED=-O3 #-Ofast
|
||||||
|
|
||||||
|
## Formatter
|
||||||
|
STYLE_BLUEPRINT=webkit
|
||||||
|
FORMATTER=clang-format -i -style=$(STYLE_BLUEPRINT)
|
||||||
|
|
||||||
|
## make all
|
||||||
|
all:
|
||||||
|
$(CC) $(OPTIMIZED) $(DEBUG) 00_example_template/$(SRC) $(DEPS) -o 00_example_template/$(OUTPUT)
|
||||||
|
$(CC) $(OPTIMIZED) $(DEBUG) 01_one_sample/$(SRC) $(DEPS) -o 01_one_sample/$(OUTPUT)
|
||||||
|
$(CC) $(OPTIMIZED) $(DEBUG) 02_time_to_botec/$(SRC) $(DEPS) -o 02_time_to_botec/$(OUTPUT)
|
||||||
|
$(CC) $(OPTIMIZED) $(DEBUG) 03_gcc_nested_function/$(SRC) $(DEPS) -o 03_gcc_nested_function/$(OUTPUT)
|
||||||
|
$(CC) $(OPTIMIZED) $(DEBUG) 04_gamma_beta/$(SRC) $(DEPS) -o 04_gamma_beta/$(OUTPUT)
|
||||||
|
$(CC) $(OPTIMIZED) $(DEBUG) 05_hundred_lognormals/$(SRC) $(DEPS) -o 05_hundred_lognormals/$(OUTPUT)
|
||||||
|
|
||||||
|
format-all:
|
||||||
|
$(FORMATTER) 00_example_template/$(SRC)
|
||||||
|
$(FORMATTER) 01_one_sample/$(SRC)
|
||||||
|
$(FORMATTER) 02_time_to_botec/$(SRC)
|
||||||
|
$(FORMATTER) 03_gcc_nested_function/$(SRC)
|
||||||
|
$(FORMATTER) 04_gamma_beta/$(SRC)
|
||||||
|
$(FORMATTER) 05_hundred_lognormals/$(SRC)
|
||||||
|
|
||||||
|
run-all:
|
||||||
|
00_example_template/$(OUTPUT)
|
||||||
|
01_one_sample/$(OUTPUT)
|
||||||
|
02_time_to_botec/$(OUTPUT)
|
||||||
|
03_gcc_nested_function/$(OUTPUT)
|
||||||
|
04_gamma_beta/$(OUTPUT)
|
||||||
|
05_hundred_lognormals/$(OUTPUT)
|
||||||
|
|
||||||
|
## make one DIR=01_one_sample
|
||||||
|
one: $(DIR)/$(SRC)
|
||||||
|
$(CC) $(OPTIMIZED) $(DEBUG) $(DIR)/$(SRC) $(DEPS) -o $(DIR)/$(OUTPUT)
|
||||||
|
|
||||||
|
## make format-one DIR=01_one_sample
|
||||||
|
format-one: $(DIR)/$(SRC)
|
||||||
|
$(FORMATTER) $(DIR)/$(SRC)
|
||||||
|
|
||||||
|
## make run-one DIR=01_one_sample
|
||||||
|
run-one: $(DIR)/$(OUTPUT)
|
||||||
|
$(DIR)/$(OUTPUT) && echo
|
||||||
|
|
||||||
|
## make time-linux-one DIR=01_one_sample
|
||||||
|
time-linux-one: $(DIR)/$(OUTPUT)
|
||||||
|
@echo "Requires /bin/time, found on GNU/Linux systems" && echo
|
||||||
|
@echo "Running 100x and taking avg time $(DIR)/$(OUTPUT)"
|
||||||
|
@t=$$(/usr/bin/time -f "%e" -p bash -c 'for i in {1..100}; do $(DIR)/$(OUTPUT); done' 2>&1 >/dev/null | grep real | awk '{print $$2}' ); echo "scale=2; 1000 * $$t / 100" | bc | sed "s|^|Time using 1 thread: |" | sed 's|$$|ms|' && echo
|
||||||
|
|
||||||
|
## e.g., make profile-linux-one DIR=01_one_sample
|
||||||
|
profile-linux-one:
|
||||||
|
echo "Requires perf, which depends on the kernel version, and might be in linux-tools package or similar"
|
||||||
|
echo "Must be run as sudo"
|
||||||
|
$(CC) $(OPTIMIZED) $(DEBUG) $(DIR)/$(SRC) $(DEPS) -o $(DIR)/$(OUTPUT)
|
||||||
|
# $(CC) $(SRC) $(DEPS) -o $(OUTPUT)
|
||||||
|
sudo perf record $(DIR)/$(OUTPUT)
|
||||||
|
sudo perf report
|
||||||
|
rm perf.data
|
BIN
squiggle.c/squiggle_c/examples/more/00_example_template/example
Executable file
BIN
squiggle.c/squiggle_c/examples/more/00_example_template/example
Executable file
Binary file not shown.
|
@ -0,0 +1,16 @@
|
||||||
|
#include "../../../squiggle.h"
|
||||||
|
#include "../../../squiggle_more.h"
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
int main()
|
||||||
|
{
|
||||||
|
// set randomness seed
|
||||||
|
uint64_t* seed = malloc(sizeof(uint64_t));
|
||||||
|
*seed = 1000; // xorshift can't start with 0
|
||||||
|
|
||||||
|
// ...
|
||||||
|
|
||||||
|
free(seed);
|
||||||
|
}
|
BIN
squiggle.c/squiggle_c/examples/more/01_sample_from_cdf/example
Executable file
BIN
squiggle.c/squiggle_c/examples/more/01_sample_from_cdf/example
Executable file
Binary file not shown.
103
squiggle.c/squiggle_c/examples/more/01_sample_from_cdf/example.c
Normal file
103
squiggle.c/squiggle_c/examples/more/01_sample_from_cdf/example.c
Normal file
|
@ -0,0 +1,103 @@
|
||||||
|
#include "../../../squiggle.h"
|
||||||
|
#include "../../../squiggle_more.h"
|
||||||
|
#include <math.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <time.h>
|
||||||
|
|
||||||
|
#define NUM_SAMPLES 1000000
|
||||||
|
|
||||||
|
// Example cdf
|
||||||
|
double cdf_uniform_0_1(double x)
|
||||||
|
{
|
||||||
|
if (x < 0) {
|
||||||
|
return 0;
|
||||||
|
} else if (x > 1) {
|
||||||
|
return 1;
|
||||||
|
} else {
|
||||||
|
return x;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
double cdf_squared_0_1(double x)
|
||||||
|
{
|
||||||
|
if (x < 0) {
|
||||||
|
return 0;
|
||||||
|
} else if (x > 1) {
|
||||||
|
return 1;
|
||||||
|
} else {
|
||||||
|
return x * x;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
double cdf_normal_0_1(double x)
|
||||||
|
{
|
||||||
|
double mean = 0;
|
||||||
|
double std = 1;
|
||||||
|
return 0.5 * (1 + erf((x - mean) / (std * sqrt(2)))); // erf from math.h
|
||||||
|
}
|
||||||
|
|
||||||
|
// Some testers
|
||||||
|
void test_inverse_cdf_double(char* cdf_name, double cdf_double(double))
|
||||||
|
{
|
||||||
|
struct box result = inverse_cdf_double(cdf_double, 0.5);
|
||||||
|
if (result.empty) {
|
||||||
|
printf("Inverse for %s not calculated\n", cdf_name);
|
||||||
|
exit(1);
|
||||||
|
} else {
|
||||||
|
printf("Inverse of %s at %f is: %f\n", cdf_name, 0.5, result.content);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void test_and_time_sampler_double(char* cdf_name, double cdf_double(double), uint64_t* seed)
|
||||||
|
{
|
||||||
|
printf("\nGetting some samples from %s:\n", cdf_name);
|
||||||
|
clock_t begin = clock();
|
||||||
|
for (int i = 0; i < NUM_SAMPLES; i++) {
|
||||||
|
struct box sample = sampler_cdf_double(cdf_double, seed);
|
||||||
|
if (sample.empty) {
|
||||||
|
printf("Error in sampler function for %s", cdf_name);
|
||||||
|
} else {
|
||||||
|
// printf("%f\n", sample.content);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
clock_t end = clock();
|
||||||
|
double time_spent = (double)(end - begin) / CLOCKS_PER_SEC;
|
||||||
|
printf("Time spent: %f\n", time_spent);
|
||||||
|
}
|
||||||
|
|
||||||
|
int main()
|
||||||
|
{
|
||||||
|
// Test inverse cdf double
|
||||||
|
test_inverse_cdf_double("cdf_uniform_0_1", cdf_uniform_0_1);
|
||||||
|
test_inverse_cdf_double("cdf_squared_0_1", cdf_squared_0_1);
|
||||||
|
test_inverse_cdf_double("cdf_normal_0_1", cdf_normal_0_1);
|
||||||
|
|
||||||
|
// Testing samplers
|
||||||
|
// set randomness seed
|
||||||
|
uint64_t* seed = malloc(sizeof(uint64_t));
|
||||||
|
*seed = 1000; // xorshift can't start with 0
|
||||||
|
|
||||||
|
// Test double sampler
|
||||||
|
test_and_time_sampler_double("cdf_uniform_0_1", cdf_uniform_0_1, seed);
|
||||||
|
test_and_time_sampler_double("cdf_squared_0_1", cdf_squared_0_1, seed);
|
||||||
|
test_and_time_sampler_double("cdf_normal_0_1", cdf_normal_0_1, seed);
|
||||||
|
|
||||||
|
// Get some normal samples using a previous approach
|
||||||
|
printf("\nGetting some samples from sample_unit_normal\n");
|
||||||
|
|
||||||
|
clock_t begin_2 = clock();
|
||||||
|
double* normal_samples = malloc(NUM_SAMPLES * sizeof(double));
|
||||||
|
for (int i = 0; i < NUM_SAMPLES; i++) {
|
||||||
|
normal_samples[i] = sample_unit_normal(seed);
|
||||||
|
// printf("%f\n", normal_sample);
|
||||||
|
}
|
||||||
|
|
||||||
|
clock_t end_2 = clock();
|
||||||
|
double time_spent_2 = (double)(end_2 - begin_2) / CLOCKS_PER_SEC;
|
||||||
|
printf("Time spent: %f\n", time_spent_2);
|
||||||
|
|
||||||
|
free(seed);
|
||||||
|
return 0;
|
||||||
|
}
|
BIN
squiggle.c/squiggle_c/examples/more/02_sample_from_cdf_beta/example
Executable file
BIN
squiggle.c/squiggle_c/examples/more/02_sample_from_cdf_beta/example
Executable file
Binary file not shown.
|
@ -0,0 +1,169 @@
|
||||||
|
#include "../../../squiggle.h"
|
||||||
|
#include "../../../squiggle_more.h"
|
||||||
|
#include <math.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <time.h>
|
||||||
|
|
||||||
|
#define NUM_SAMPLES 10000
|
||||||
|
#define STOP_BETA 1.0e-8
|
||||||
|
#define TINY_BETA 1.0e-30
|
||||||
|
|
||||||
|
// Incomplete beta function
|
||||||
|
struct box incbeta(double a, double b, double x)
|
||||||
|
{
|
||||||
|
// Descended from <https://github.com/codeplea/incbeta/blob/master/incbeta.c>,
|
||||||
|
// <https://codeplea.com/incomplete-beta-function-c>
|
||||||
|
// but modified to return a box struct and doubles instead of doubles.
|
||||||
|
// [ ] to do: add attribution in README
|
||||||
|
// Original code under this license:
|
||||||
|
/*
|
||||||
|
* zlib License
|
||||||
|
*
|
||||||
|
* Regularized Incomplete Beta Function
|
||||||
|
*
|
||||||
|
* Copyright (c) 2016, 2017 Lewis Van Winkle
|
||||||
|
* http://CodePlea.com
|
||||||
|
*
|
||||||
|
* This software is provided 'as-is', without any express or implied
|
||||||
|
* warranty. In no event will the authors be held liable for any damages
|
||||||
|
* arising from the use of this software.
|
||||||
|
*
|
||||||
|
* Permission is granted to anyone to use this software for any purpose,
|
||||||
|
* including commercial applications, and to alter it and redistribute it
|
||||||
|
* freely, subject to the following restrictions:
|
||||||
|
*
|
||||||
|
* 1. The origin of this software must not be misrepresented; you must not
|
||||||
|
* claim that you wrote the original software. If you use this software
|
||||||
|
* in a product, an acknowledgement in the product documentation would be
|
||||||
|
* appreciated but is not required.
|
||||||
|
* 2. Altered source versions must be plainly marked as such, and must not be
|
||||||
|
* misrepresented as being the original software.
|
||||||
|
* 3. This notice may not be removed or altered from any source distribution.
|
||||||
|
*/
|
||||||
|
if (x < 0.0 || x > 1.0) {
|
||||||
|
return PROCESS_ERROR("x out of bounds [0, 1], in function incbeta");
|
||||||
|
}
|
||||||
|
|
||||||
|
/*The continued fraction converges nicely for x < (a+1)/(a+b+2)*/
|
||||||
|
if (x > (a + 1.0) / (a + b + 2.0)) {
|
||||||
|
struct box symmetric_incbeta = incbeta(b, a, 1.0 - x);
|
||||||
|
if (symmetric_incbeta.empty) {
|
||||||
|
return symmetric_incbeta; // propagate error
|
||||||
|
} else {
|
||||||
|
struct box result = {
|
||||||
|
.empty = 0,
|
||||||
|
.content = 1 - symmetric_incbeta.content
|
||||||
|
};
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*Find the first part before the continued fraction.*/
|
||||||
|
const double lbeta_ab = lgamma(a) + lgamma(b) - lgamma(a + b);
|
||||||
|
const double front = exp(log(x) * a + log(1.0 - x) * b - lbeta_ab) / a;
|
||||||
|
|
||||||
|
/*Use Lentz's algorithm to evaluate the continued fraction.*/
|
||||||
|
double f = 1.0, c = 1.0, d = 0.0;
|
||||||
|
|
||||||
|
int i, m;
|
||||||
|
for (i = 0; i <= 200; ++i) {
|
||||||
|
m = i / 2;
|
||||||
|
|
||||||
|
double numerator;
|
||||||
|
if (i == 0) {
|
||||||
|
numerator = 1.0; /*First numerator is 1.0.*/
|
||||||
|
} else if (i % 2 == 0) {
|
||||||
|
numerator = (m * (b - m) * x) / ((a + 2.0 * m - 1.0) * (a + 2.0 * m)); /*Even term.*/
|
||||||
|
} else {
|
||||||
|
numerator = -((a + m) * (a + b + m) * x) / ((a + 2.0 * m) * (a + 2.0 * m + 1)); /*Odd term.*/
|
||||||
|
}
|
||||||
|
|
||||||
|
/*Do an iteration of Lentz's algorithm.*/
|
||||||
|
d = 1.0 + numerator * d;
|
||||||
|
if (fabs(d) < TINY_BETA)
|
||||||
|
d = TINY_BETA;
|
||||||
|
d = 1.0 / d;
|
||||||
|
|
||||||
|
c = 1.0 + numerator / c;
|
||||||
|
if (fabs(c) < TINY_BETA)
|
||||||
|
c = TINY_BETA;
|
||||||
|
|
||||||
|
const double cd = c * d;
|
||||||
|
f *= cd;
|
||||||
|
|
||||||
|
/*Check for stop.*/
|
||||||
|
if (fabs(1.0 - cd) < STOP_BETA) {
|
||||||
|
struct box result = {
|
||||||
|
.empty = 0,
|
||||||
|
.content = front * (f - 1.0)
|
||||||
|
};
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return PROCESS_ERROR("More loops needed, did not converge, in function incbeta");
|
||||||
|
}
|
||||||
|
|
||||||
|
struct box cdf_beta(double x)
|
||||||
|
{
|
||||||
|
if (x < 0) {
|
||||||
|
struct box result = { .empty = 0, .content = 0 };
|
||||||
|
return result;
|
||||||
|
} else if (x > 1) {
|
||||||
|
struct box result = { .empty = 0, .content = 1 };
|
||||||
|
return result;
|
||||||
|
} else {
|
||||||
|
double successes = 1, failures = (2023 - 1945);
|
||||||
|
return incbeta(successes, failures, x);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Some testers
|
||||||
|
void test_inverse_cdf_box(char* cdf_name, struct box cdf_box(double))
|
||||||
|
{
|
||||||
|
struct box result = inverse_cdf_box(cdf_box, 0.5);
|
||||||
|
if (result.empty) {
|
||||||
|
printf("Inverse for %s not calculated\n", cdf_name);
|
||||||
|
exit(1);
|
||||||
|
} else {
|
||||||
|
printf("Inverse of %s at %f is: %f\n", cdf_name, 0.5, result.content);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void test_and_time_sampler_box(char* cdf_name, struct box cdf_box(double), uint64_t* seed)
|
||||||
|
{
|
||||||
|
printf("\nGetting some samples from %s:\n", cdf_name);
|
||||||
|
clock_t begin = clock();
|
||||||
|
for (int i = 0; i < NUM_SAMPLES; i++) {
|
||||||
|
struct box sample = sampler_cdf_box(cdf_box, seed);
|
||||||
|
if (sample.empty) {
|
||||||
|
printf("Error in sampler function for %s", cdf_name);
|
||||||
|
} else {
|
||||||
|
// printf("%f\n", sample.content);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
clock_t end = clock();
|
||||||
|
double time_spent = (double)(end - begin) / CLOCKS_PER_SEC;
|
||||||
|
printf("Time spent: %f\n", time_spent);
|
||||||
|
}
|
||||||
|
|
||||||
|
int main()
|
||||||
|
{
|
||||||
|
// Test inverse cdf box
|
||||||
|
test_inverse_cdf_box("cdf_beta", cdf_beta);
|
||||||
|
|
||||||
|
// Test box sampler
|
||||||
|
uint64_t* seed = malloc(sizeof(uint64_t));
|
||||||
|
*seed = 1000; // xorshift can't start with 0
|
||||||
|
test_and_time_sampler_box("cdf_beta", cdf_beta, seed);
|
||||||
|
// Ok, this is slower than python!!
|
||||||
|
// Partly this is because I am using a more general algorithm,
|
||||||
|
// which applies to any cdf
|
||||||
|
// But I am also using absurdly precise convergence conditions.
|
||||||
|
// This could be optimized.
|
||||||
|
|
||||||
|
free(seed);
|
||||||
|
return 0;
|
||||||
|
}
|
BIN
squiggle.c/squiggle_c/examples/more/03_ci_beta/example
Executable file
BIN
squiggle.c/squiggle_c/examples/more/03_ci_beta/example
Executable file
Binary file not shown.
23
squiggle.c/squiggle_c/examples/more/03_ci_beta/example.c
Normal file
23
squiggle.c/squiggle_c/examples/more/03_ci_beta/example.c
Normal file
|
@ -0,0 +1,23 @@
|
||||||
|
#include "../../../squiggle.h"
|
||||||
|
#include "../../../squiggle_more.h"
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
// Estimate functions
|
||||||
|
double beta_1_2_sampler(uint64_t* seed)
|
||||||
|
{
|
||||||
|
return sample_beta(1, 2.0, seed);
|
||||||
|
}
|
||||||
|
|
||||||
|
int main()
|
||||||
|
{
|
||||||
|
// set randomness seed
|
||||||
|
uint64_t* seed = malloc(sizeof(uint64_t));
|
||||||
|
*seed = 1000; // xorshift can't start with 0
|
||||||
|
|
||||||
|
ci beta_1_2_ci_90 = get_90_confidence_interval(beta_1_2_sampler, seed);
|
||||||
|
printf("90%% confidence interval of beta(1,2) is [%f, %f]\n", beta_1_2_ci_90.low, beta_1_2_ci_90.high);
|
||||||
|
|
||||||
|
free(seed);
|
||||||
|
}
|
BIN
squiggle.c/squiggle_c/examples/more/04_nuclear_war/example
Executable file
BIN
squiggle.c/squiggle_c/examples/more/04_nuclear_war/example
Executable file
Binary file not shown.
69
squiggle.c/squiggle_c/examples/more/04_nuclear_war/example.c
Normal file
69
squiggle.c/squiggle_c/examples/more/04_nuclear_war/example.c
Normal file
|
@ -0,0 +1,69 @@
|
||||||
|
#include "../../../squiggle.h"
|
||||||
|
#include "../../../squiggle_more.h"
|
||||||
|
#include <math.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
double probability_of_dying_nuno(uint64_t* seed)
|
||||||
|
{
|
||||||
|
double first_year_russian_nuclear_weapons = 1953;
|
||||||
|
double current_year = 2022;
|
||||||
|
double laplace_probability_nuclear_exchange_year = sample_beta(1, current_year - first_year_russian_nuclear_weapons + 1, seed);
|
||||||
|
double laplace_probability_nuclear_exchange_month = 1 - pow(1 - laplace_probability_nuclear_exchange_year, (1.0 / 12.0));
|
||||||
|
|
||||||
|
double london_hit_conditional_on_russia_nuclear_weapon_usage = sample_beta(7.67, 69.65, seed);
|
||||||
|
// I.e., a beta distribution with a range of 0.05 to 0.16 into: https://nunosempere.com/blog/2023/03/15/fit-beta/
|
||||||
|
// 0.05 were my estimate and Samotsvety's estimate in March 2022, respectively:
|
||||||
|
// https://forum.effectivealtruism.org/posts/KRFXjCqqfGQAYirm5/samotsvety-nuclear-risk-forecasts-march-2022#Nu_o_Sempere
|
||||||
|
double informed_actor_not_able_to_escape = sample_beta(3.26212166586967, 3.26228162008564, seed);
|
||||||
|
// 0.2 to 0.8, i.e., 20% to 80%, again using the previous tool
|
||||||
|
double proportion_which_die_if_bomb_drops_in_london = sample_beta(10.00, 2.45, seed); // 60% to 95%
|
||||||
|
|
||||||
|
double probability_of_dying = laplace_probability_nuclear_exchange_month * london_hit_conditional_on_russia_nuclear_weapon_usage * informed_actor_not_able_to_escape * proportion_which_die_if_bomb_drops_in_london;
|
||||||
|
return probability_of_dying;
|
||||||
|
}
|
||||||
|
|
||||||
|
double probability_of_dying_eli(uint64_t* seed)
|
||||||
|
{
|
||||||
|
double russia_nato_nuclear_exchange_in_next_month = sample_beta(1.30, 1182.99, seed); // .0001 to .003
|
||||||
|
double london_hit_conditional = sample_beta(3.47, 8.97, seed); // 0.1 to 0.5
|
||||||
|
double informed_actors_not_able_to_escape = sample_beta(2.73, 5.67, seed); // .1 to .6
|
||||||
|
double proportion_which_die_if_bomb_drops_in_london = sample_beta(3.00, 1.46, seed); // 0.3 to 0.95;
|
||||||
|
|
||||||
|
double probability_of_dying = russia_nato_nuclear_exchange_in_next_month * london_hit_conditional * informed_actors_not_able_to_escape * proportion_which_die_if_bomb_drops_in_london;
|
||||||
|
return probability_of_dying;
|
||||||
|
}
|
||||||
|
|
||||||
|
double mixture(uint64_t* seed)
|
||||||
|
{
|
||||||
|
double (*samplers[])(uint64_t*) = { probability_of_dying_nuno, probability_of_dying_eli };
|
||||||
|
double weights[] = { 0.5, 0.5 };
|
||||||
|
return sample_mixture(samplers, weights, 2, seed);
|
||||||
|
}
|
||||||
|
|
||||||
|
int main()
|
||||||
|
{
|
||||||
|
// set randomness seed
|
||||||
|
uint64_t* seed = malloc(sizeof(uint64_t));
|
||||||
|
*seed = 1000; // xorshift can't start with 0
|
||||||
|
|
||||||
|
int n = 1000 * 1000;
|
||||||
|
|
||||||
|
double* mixture_result = malloc(sizeof(double) * n);
|
||||||
|
for (int i = 0; i < n; i++) {
|
||||||
|
mixture_result[i] = mixture(seed);
|
||||||
|
}
|
||||||
|
|
||||||
|
printf("mixture_result: [ ");
|
||||||
|
for (int i = 0; i < 9; i++) {
|
||||||
|
printf("%.6f, ", mixture_result[i]);
|
||||||
|
}
|
||||||
|
printf("... ]\n");
|
||||||
|
|
||||||
|
ci ci_90 = get_90_confidence_interval(mixture, seed);
|
||||||
|
printf("mean: %f\n", array_mean(mixture_result, n));
|
||||||
|
printf("90%% confidence interval: [%f, %f]\n", ci_90.low, ci_90.high);
|
||||||
|
|
||||||
|
free(seed);
|
||||||
|
}
|
BIN
squiggle.c/squiggle_c/examples/more/04_nuclear_war/scratchpad/example
Executable file
BIN
squiggle.c/squiggle_c/examples/more/04_nuclear_war/scratchpad/example
Executable file
Binary file not shown.
|
@ -0,0 +1,20 @@
|
||||||
|
#include "../../../squiggle.h"
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
int main()
|
||||||
|
{
|
||||||
|
// set randomness seed
|
||||||
|
uint64_t* seed = malloc(sizeof(uint64_t));
|
||||||
|
*seed = 1000; // xorshift can't start with 0
|
||||||
|
double firstYearRussianNuclearWeapons = 1953;
|
||||||
|
double currentYear = 2023;
|
||||||
|
|
||||||
|
for(int i=0; i<10; i++){
|
||||||
|
double laplace_beta = sample_beta(currentYear-firstYearRussianNuclearWeapons + 1, 1, seed);
|
||||||
|
printf("%f\n", laplace_beta);
|
||||||
|
}
|
||||||
|
|
||||||
|
free(seed);
|
||||||
|
}
|
|
@ -0,0 +1,53 @@
|
||||||
|
# Interface:
|
||||||
|
# make
|
||||||
|
# make build
|
||||||
|
# make format
|
||||||
|
# make run
|
||||||
|
|
||||||
|
# Compiler
|
||||||
|
CC=gcc
|
||||||
|
# CC=tcc # <= faster compilation
|
||||||
|
|
||||||
|
# Main file
|
||||||
|
SRC=example.c ../../../squiggle.c
|
||||||
|
OUTPUT=example
|
||||||
|
|
||||||
|
## Dependencies
|
||||||
|
MATH=-lm
|
||||||
|
|
||||||
|
## Flags
|
||||||
|
DEBUG= #'-g'
|
||||||
|
STANDARD=-std=c99
|
||||||
|
WARNINGS=-Wall
|
||||||
|
OPTIMIZED=-O3 #-Ofast
|
||||||
|
# OPENMP=-fopenmp
|
||||||
|
|
||||||
|
## Formatter
|
||||||
|
STYLE_BLUEPRINT=webkit
|
||||||
|
FORMATTER=clang-format -i -style=$(STYLE_BLUEPRINT)
|
||||||
|
|
||||||
|
## make build
|
||||||
|
build: $(SRC)
|
||||||
|
$(CC) $(OPTIMIZED) $(DEBUG) $(SRC) $(MATH) -o $(OUTPUT)
|
||||||
|
|
||||||
|
format: $(SRC)
|
||||||
|
$(FORMATTER) $(SRC)
|
||||||
|
|
||||||
|
run: $(SRC) $(OUTPUT)
|
||||||
|
OMP_NUM_THREADS=1 ./$(OUTPUT) && echo
|
||||||
|
|
||||||
|
time-linux:
|
||||||
|
@echo "Requires /bin/time, found on GNU/Linux systems" && echo
|
||||||
|
|
||||||
|
@echo "Running 100x and taking avg time $(OUTPUT)"
|
||||||
|
@t=$$(/usr/bin/time -f "%e" -p bash -c 'for i in {1..100}; do $(OUTPUT); done' 2>&1 >/dev/null | grep real | awk '{print $$2}' ); echo "scale=2; 1000 * $$t / 100" | bc | sed "s|^|Time using 1 thread: |" | sed 's|$$|ms|' && echo
|
||||||
|
|
||||||
|
## Profiling
|
||||||
|
|
||||||
|
profile-linux:
|
||||||
|
echo "Requires perf, which depends on the kernel version, and might be in linux-tools package or similar"
|
||||||
|
echo "Must be run as sudo"
|
||||||
|
$(CC) $(SRC) $(MATH) -o $(OUTPUT)
|
||||||
|
sudo perf record ./$(OUTPUT)
|
||||||
|
sudo perf report
|
||||||
|
rm perf.data
|
BIN
squiggle.c/squiggle_c/examples/more/05_burn_10kg_fat/example
Executable file
BIN
squiggle.c/squiggle_c/examples/more/05_burn_10kg_fat/example
Executable file
Binary file not shown.
|
@ -0,0 +1,49 @@
|
||||||
|
#include "../../../squiggle.h"
|
||||||
|
#include "../../../squiggle_more.h"
|
||||||
|
#include <math.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
double sample_minutes_per_day_jumping_rope_needed_to_burn_10kg(uint64_t* seed)
|
||||||
|
{
|
||||||
|
double kcal_jumping_rope_minute = sample_to(15, 20, seed);
|
||||||
|
double kcal_jumping_rope_hour = kcal_jumping_rope_minute * 60;
|
||||||
|
|
||||||
|
double kcal_in_kg_of_fat = 7700;
|
||||||
|
double num_kg_of_fat_to_lose = 10;
|
||||||
|
|
||||||
|
double hours_jumping_rope_needed = kcal_in_kg_of_fat * num_kg_of_fat_to_lose / kcal_jumping_rope_hour;
|
||||||
|
|
||||||
|
double days_until_end_of_year = 152; // as of 2023-08-01
|
||||||
|
double hours_per_day = hours_jumping_rope_needed / days_until_end_of_year;
|
||||||
|
double minutes_per_day = hours_per_day * 60;
|
||||||
|
return minutes_per_day;
|
||||||
|
}
|
||||||
|
|
||||||
|
int main()
|
||||||
|
{
|
||||||
|
// set randomness seed
|
||||||
|
uint64_t* seed = malloc(sizeof(uint64_t));
|
||||||
|
*seed = 1000; // xorshift can't start with 0
|
||||||
|
|
||||||
|
int n = 1000 * 1000;
|
||||||
|
|
||||||
|
double* result = malloc(sizeof(double) * n);
|
||||||
|
for (int i = 0; i < n; i++) {
|
||||||
|
result[i] = sample_minutes_per_day_jumping_rope_needed_to_burn_10kg(seed);
|
||||||
|
}
|
||||||
|
|
||||||
|
printf("## How many minutes per day do I have to jump rope to lose 10kg of fat by the end of the year?\n");
|
||||||
|
printf("Mean: %f\n", array_mean(result, n));
|
||||||
|
printf("A few samples: [ ");
|
||||||
|
for (int i = 0; i < 9; i++) {
|
||||||
|
printf("%.6f, ", result[i]);
|
||||||
|
}
|
||||||
|
printf("... ]\n");
|
||||||
|
|
||||||
|
ci ci_90 = get_90_confidence_interval(sample_minutes_per_day_jumping_rope_needed_to_burn_10kg, seed);
|
||||||
|
printf("90%% confidence interval: [%f, %f]\n", ci_90.low, ci_90.high);
|
||||||
|
|
||||||
|
free(seed);
|
||||||
|
}
|
BIN
squiggle.c/squiggle_c/examples/more/06_nuclear_recovery/example
Executable file
BIN
squiggle.c/squiggle_c/examples/more/06_nuclear_recovery/example
Executable file
Binary file not shown.
|
@ -0,0 +1,86 @@
|
||||||
|
#include "../../../squiggle.h"
|
||||||
|
#include "../../../squiggle_more.h"
|
||||||
|
#include <math.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
double yearly_probability_nuclear_collapse(double year, uint64_t* seed)
|
||||||
|
{
|
||||||
|
double successes = 0;
|
||||||
|
double failures = (year - 1960);
|
||||||
|
return sample_laplace(successes, failures, seed);
|
||||||
|
// ^ can change to (successes + 1)/(trials + 2)
|
||||||
|
// to get a probability,
|
||||||
|
// rather than sampling from a distribution over probabilities.
|
||||||
|
}
|
||||||
|
double yearly_probability_nuclear_collapse_2023(uint64_t* seed)
|
||||||
|
{
|
||||||
|
return yearly_probability_nuclear_collapse(2023, seed);
|
||||||
|
}
|
||||||
|
|
||||||
|
double yearly_probability_nuclear_collapse_after_recovery(double year, double rebuilding_period_length_years, uint64_t* seed)
|
||||||
|
{
|
||||||
|
// assumption: nuclear
|
||||||
|
double successes = 1.0;
|
||||||
|
double failures = (year - rebuilding_period_length_years - 1960 - 1);
|
||||||
|
return sample_laplace(successes, failures, seed);
|
||||||
|
}
|
||||||
|
double yearly_probability_nuclear_collapse_after_recovery_example(uint64_t* seed)
|
||||||
|
{
|
||||||
|
double year = 2070;
|
||||||
|
double rebuilding_period_length_years = 30;
|
||||||
|
// So, there was a nuclear collapse in 2040,
|
||||||
|
// then a recovery period of 30 years
|
||||||
|
// and it's now 2070
|
||||||
|
return yearly_probability_nuclear_collapse_after_recovery(year, rebuilding_period_length_years, seed);
|
||||||
|
}
|
||||||
|
|
||||||
|
double yearly_probability_nuclear_collapse_after_recovery_antiinductive(uint64_t* seed)
|
||||||
|
{
|
||||||
|
return yearly_probability_nuclear_collapse(2023, seed) / 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
int main()
|
||||||
|
{
|
||||||
|
// set randomness seed
|
||||||
|
uint64_t* seed = malloc(sizeof(uint64_t));
|
||||||
|
*seed = 1000; // xorshift can't start with 0
|
||||||
|
|
||||||
|
int num_samples = 1000000;
|
||||||
|
|
||||||
|
// Before a first nuclear collapse
|
||||||
|
printf("## Before the first nuclear collapse\n");
|
||||||
|
ci ci_90_2023 = get_90_confidence_interval(yearly_probability_nuclear_collapse_2023, seed);
|
||||||
|
printf("90%% confidence interval: [%f, %f]\n", ci_90_2023.low, ci_90_2023.high);
|
||||||
|
|
||||||
|
double* yearly_probability_nuclear_collapse_2023_samples = malloc(sizeof(double) * num_samples);
|
||||||
|
for (int i = 0; i < num_samples; i++) {
|
||||||
|
yearly_probability_nuclear_collapse_2023_samples[i] = yearly_probability_nuclear_collapse_2023(seed);
|
||||||
|
}
|
||||||
|
printf("mean: %f\n", array_mean(yearly_probability_nuclear_collapse_2023_samples, num_samples));
|
||||||
|
|
||||||
|
// After the first nuclear collapse
|
||||||
|
printf("\n## After the first nuclear collapse\n");
|
||||||
|
ci ci_90_2070 = get_90_confidence_interval(yearly_probability_nuclear_collapse_after_recovery_example, seed);
|
||||||
|
printf("90%% confidence interval: [%f, %f]\n", ci_90_2070.low, ci_90_2070.high);
|
||||||
|
|
||||||
|
double* yearly_probability_nuclear_collapse_after_recovery_samples = malloc(sizeof(double) * num_samples);
|
||||||
|
for (int i = 0; i < num_samples; i++) {
|
||||||
|
yearly_probability_nuclear_collapse_after_recovery_samples[i] = yearly_probability_nuclear_collapse_after_recovery_example(seed);
|
||||||
|
}
|
||||||
|
printf("mean: %f\n", array_mean(yearly_probability_nuclear_collapse_after_recovery_samples, num_samples));
|
||||||
|
|
||||||
|
// After the first nuclear collapse (antiinductive)
|
||||||
|
printf("\n## After the first nuclear collapse (antiinductive)\n");
|
||||||
|
ci ci_90_antiinductive = get_90_confidence_interval(yearly_probability_nuclear_collapse_after_recovery_antiinductive, seed);
|
||||||
|
printf("90%% confidence interval: [%f, %f]\n", ci_90_antiinductive.low, ci_90_antiinductive.high);
|
||||||
|
|
||||||
|
double* yearly_probability_nuclear_collapse_after_recovery_antiinductive_samples = malloc(sizeof(double) * num_samples);
|
||||||
|
for (int i = 0; i < num_samples; i++) {
|
||||||
|
yearly_probability_nuclear_collapse_after_recovery_antiinductive_samples[i] = yearly_probability_nuclear_collapse_after_recovery_antiinductive(seed);
|
||||||
|
}
|
||||||
|
printf("mean: %f\n", array_mean(yearly_probability_nuclear_collapse_after_recovery_antiinductive_samples, num_samples));
|
||||||
|
|
||||||
|
free(seed);
|
||||||
|
}
|
BIN
squiggle.c/squiggle_c/examples/more/07_algebra/example
Executable file
BIN
squiggle.c/squiggle_c/examples/more/07_algebra/example
Executable file
Binary file not shown.
27
squiggle.c/squiggle_c/examples/more/07_algebra/example.c
Normal file
27
squiggle.c/squiggle_c/examples/more/07_algebra/example.c
Normal file
|
@ -0,0 +1,27 @@
|
||||||
|
#include "../../../squiggle.h"
|
||||||
|
#include "../../../squiggle_more.h"
|
||||||
|
#include <math.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
int main()
|
||||||
|
{
|
||||||
|
// set randomness seed
|
||||||
|
uint64_t* seed = malloc(sizeof(uint64_t));
|
||||||
|
*seed = 1000; // xorshift can't start with 0
|
||||||
|
|
||||||
|
normal_params n1 = { .mean = 1.0, .std = 3.0 };
|
||||||
|
normal_params n2 = { .mean = 2.0, .std = 4.0 };
|
||||||
|
normal_params sn = algebra_sum_normals(n1, n2);
|
||||||
|
printf("The sum of Normal(%f, %f) and Normal(%f, %f) is Normal(%f, %f)\n",
|
||||||
|
n1.mean, n1.std, n2.mean, n2.std, sn.mean, sn.std);
|
||||||
|
|
||||||
|
lognormal_params ln1 = { .logmean = 1.0, .logstd = 3.0 };
|
||||||
|
lognormal_params ln2 = { .logmean = 2.0, .logstd = 4.0 };
|
||||||
|
lognormal_params sln = algebra_product_lognormals(ln1, ln2);
|
||||||
|
printf("The product of Lognormal(%f, %f) and Lognormal(%f, %f) is Lognormal(%f, %f)\n",
|
||||||
|
ln1.logmean, ln1.logstd, ln2.logmean, ln2.logstd, sln.logmean, sln.logstd);
|
||||||
|
|
||||||
|
free(seed);
|
||||||
|
}
|
BIN
squiggle.c/squiggle_c/examples/more/08_algebra_and_conversion/example
Executable file
BIN
squiggle.c/squiggle_c/examples/more/08_algebra_and_conversion/example
Executable file
Binary file not shown.
|
@ -0,0 +1,34 @@
|
||||||
|
#include "../../../squiggle.h"
|
||||||
|
#include "../../../squiggle_more.h"
|
||||||
|
#include <math.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
int main()
|
||||||
|
{
|
||||||
|
// set randomness seed
|
||||||
|
uint64_t* seed = malloc(sizeof(uint64_t));
|
||||||
|
*seed = 1000; // xorshift can't start with 0
|
||||||
|
|
||||||
|
// Convert to 90% confidence interval form and back
|
||||||
|
lognormal_params ln1 = { .logmean = 1.0, .logstd = 3.0 };
|
||||||
|
ci ln1_ci = convert_lognormal_params_to_ci(ln1);
|
||||||
|
printf("The 90%% confidence interval of Lognormal(%f, %f) is [%f, %f]\n",
|
||||||
|
ln1.logmean, ln1.logstd,
|
||||||
|
ln1_ci.low, ln1_ci.high);
|
||||||
|
lognormal_params ln1_params2 = convert_ci_to_lognormal_params(ln1_ci);
|
||||||
|
printf("The lognormal which has 90%% confidence interval [%f, %f] is Lognormal(%f, %f)\n",
|
||||||
|
ln1_ci.low, ln1_ci.high,
|
||||||
|
ln1_params2.logmean, ln1_params2.logstd);
|
||||||
|
|
||||||
|
lognormal_params ln2 = convert_ci_to_lognormal_params((ci) { .low = 1, .high = 10 });
|
||||||
|
lognormal_params ln3 = convert_ci_to_lognormal_params((ci) { .low = 5, .high = 50 });
|
||||||
|
|
||||||
|
lognormal_params sln = algebra_product_lognormals(ln2, ln3);
|
||||||
|
ci sln_ci = convert_lognormal_params_to_ci(sln);
|
||||||
|
|
||||||
|
printf("Result of some lognormal products: to(%f, %f)\n", sln_ci.low, sln_ci.high);
|
||||||
|
|
||||||
|
free(seed);
|
||||||
|
}
|
BIN
squiggle.c/squiggle_c/examples/more/09_ergonomic_algebra/example
Executable file
BIN
squiggle.c/squiggle_c/examples/more/09_ergonomic_algebra/example
Executable file
Binary file not shown.
|
@ -0,0 +1,27 @@
|
||||||
|
#include "../../../squiggle.h"
|
||||||
|
#include "../../../squiggle_more.h"
|
||||||
|
#include <math.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
#define ln lognormal_params
|
||||||
|
#define to(...) convert_ci_to_lognormal_params((ci)__VA_ARGS__)
|
||||||
|
#define from(...) convert_lognormal_params_to_ci((ln)__VA_ARGS__)
|
||||||
|
#define times(a, b) algebra_product_lognormals(a, b)
|
||||||
|
|
||||||
|
int main()
|
||||||
|
{
|
||||||
|
// set randomness seed
|
||||||
|
uint64_t* seed = malloc(sizeof(uint64_t));
|
||||||
|
*seed = 1000; // xorshift can't start with 0
|
||||||
|
|
||||||
|
ln a = to({ .low = 1, .high = 10 });
|
||||||
|
ln b = to({ .low = 5, .high = 500 });
|
||||||
|
ln c = times(a, b);
|
||||||
|
|
||||||
|
printf("Result: to(%f, %f)\n", from(c).low, from(c).high);
|
||||||
|
printf("One sample from it is: %f\n", sample_lognormal(c.logmean, c.logstd, seed));
|
||||||
|
|
||||||
|
free(seed);
|
||||||
|
}
|
BIN
squiggle.c/squiggle_c/examples/more/10_twitter_thread_example/example
Executable file
BIN
squiggle.c/squiggle_c/examples/more/10_twitter_thread_example/example
Executable file
Binary file not shown.
|
@ -0,0 +1,48 @@
|
||||||
|
#include "../../../squiggle.h"
|
||||||
|
#include "../../../squiggle_more.h"
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
double sample_0(uint64_t* seed)
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
double sample_1(uint64_t* seed)
|
||||||
|
{
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
double sample_normal_mean_1_std_2(uint64_t* seed)
|
||||||
|
{
|
||||||
|
return sample_normal(1, 2, seed);
|
||||||
|
}
|
||||||
|
|
||||||
|
double sample_1_to_3(uint64_t* seed)
|
||||||
|
{
|
||||||
|
return sample_to(1, 3, seed);
|
||||||
|
}
|
||||||
|
|
||||||
|
int main()
|
||||||
|
{
|
||||||
|
// set randomness seed
|
||||||
|
uint64_t* seed = malloc(sizeof(uint64_t));
|
||||||
|
*seed = 1000; // xorshift can't start with 0
|
||||||
|
|
||||||
|
int n_dists = 4;
|
||||||
|
double weights[] = { 1, 2, 3, 4 };
|
||||||
|
double (*samplers[])(uint64_t*) = {
|
||||||
|
sample_0,
|
||||||
|
sample_1,
|
||||||
|
sample_normal_mean_1_std_2,
|
||||||
|
sample_1_to_3
|
||||||
|
};
|
||||||
|
|
||||||
|
int n_samples = 10;
|
||||||
|
for (int i = 0; i < n_samples; i++) {
|
||||||
|
printf("Sample #%d: %f\n", i, sample_mixture(samplers, weights, n_dists, seed));
|
||||||
|
}
|
||||||
|
|
||||||
|
free(seed);
|
||||||
|
}
|
BIN
squiggle.c/squiggle_c/examples/more/11_billion_lognormals_paralell/example
Executable file
BIN
squiggle.c/squiggle_c/examples/more/11_billion_lognormals_paralell/example
Executable file
Binary file not shown.
|
@ -0,0 +1,30 @@
|
||||||
|
#include "../../../squiggle.h"
|
||||||
|
#include "../../../squiggle_more.h"
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
// Estimate functions
|
||||||
|
int main()
|
||||||
|
{
|
||||||
|
// set randomness seed
|
||||||
|
// uint64_t* seed = malloc(sizeof(uint64_t));
|
||||||
|
// *seed = 1000; // xorshift can't start with 0
|
||||||
|
// ^ not necessary, because parallel_sampler takes care of the seed.
|
||||||
|
|
||||||
|
int n_samples = 1000 * 1000 * 1000;
|
||||||
|
int n_threads = 16;
|
||||||
|
double sampler(uint64_t* seed){
|
||||||
|
return sample_lognormal(0, 10, seed);
|
||||||
|
}
|
||||||
|
double* results = malloc(n_samples * sizeof(double));
|
||||||
|
|
||||||
|
parallel_sampler(sampler, results, n_threads, n_samples);
|
||||||
|
double avg = array_sum(results, n_samples)/n_samples;
|
||||||
|
printf("Average of 1B lognormal(0,10): %f", avg);
|
||||||
|
|
||||||
|
free(results);
|
||||||
|
|
||||||
|
// free(seed);
|
||||||
|
// ^ not necessary, because parallel_sampler takes care of the seed.
|
||||||
|
}
|
BIN
squiggle.c/squiggle_c/examples/more/12_time_to_botec_parallel/example
Executable file
BIN
squiggle.c/squiggle_c/examples/more/12_time_to_botec_parallel/example
Executable file
Binary file not shown.
|
@ -0,0 +1,29 @@
|
||||||
|
#include "../../../squiggle.h"
|
||||||
|
#include "../../../squiggle_more.h"
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
int main()
|
||||||
|
{
|
||||||
|
double p_a = 0.8;
|
||||||
|
double p_b = 0.5;
|
||||||
|
double p_c = p_a * p_b;
|
||||||
|
|
||||||
|
double sample_0(uint64_t* seed){ return 0; }
|
||||||
|
double sample_1(uint64_t* seed) { return 1; }
|
||||||
|
double sample_few(uint64_t* seed) { return sample_to(1, 3, seed); }
|
||||||
|
double sample_many(uint64_t* seed) { return sample_to(2, 10, seed); }
|
||||||
|
|
||||||
|
int n_dists = 4;
|
||||||
|
double weights[] = { 1 - p_c, p_c / 2, p_c / 4, p_c / 4 };
|
||||||
|
double (*samplers[])(uint64_t*) = { sample_0, sample_1, sample_few, sample_many };
|
||||||
|
double sampler_result(uint64_t* seed) {
|
||||||
|
return sample_mixture(samplers, weights, n_dists, seed);
|
||||||
|
}
|
||||||
|
|
||||||
|
int n_samples = 1000 * 1000, n_threads = 16;
|
||||||
|
double* results = malloc(n_samples * sizeof(double));
|
||||||
|
parallel_sampler(sampler_result, results, n_threads, n_samples);
|
||||||
|
printf("Avg: %f\n", array_sum(results, n_samples)/n_samples);
|
||||||
|
free(results);
|
||||||
|
}
|
108
squiggle.c/squiggle_c/examples/more/makefile
Normal file
108
squiggle.c/squiggle_c/examples/more/makefile
Normal file
|
@ -0,0 +1,108 @@
|
||||||
|
# Interface:
|
||||||
|
# make all
|
||||||
|
# make format-all
|
||||||
|
# make run-all
|
||||||
|
# make one DIR=06_nuclear_recovery
|
||||||
|
# make format-one DIR=06_nuclear_recovery
|
||||||
|
# make run-one DIR=06_nuclear_recovery
|
||||||
|
# make time-linux-one DIR=06_nuclear_recovery
|
||||||
|
# make profile-one DIR=06_nuclear_recovery
|
||||||
|
|
||||||
|
# Compiler
|
||||||
|
CC=gcc
|
||||||
|
# CC=tcc # <= faster compilation
|
||||||
|
|
||||||
|
# Main file
|
||||||
|
SRC=example.c
|
||||||
|
OUTPUT=example
|
||||||
|
|
||||||
|
## Dependencies
|
||||||
|
SQUIGGLE=../../squiggle.c
|
||||||
|
SQUIGGLE_MORE=../../squiggle_more.c
|
||||||
|
MATH=-lm
|
||||||
|
OPENMP=-fopenmp
|
||||||
|
DEPS=$(SQUIGGLE) $(SQUIGGLE_MORE) $(MATH) $(OPENMP)
|
||||||
|
|
||||||
|
## Flags
|
||||||
|
DEBUG= #'-g'
|
||||||
|
STANDARD=-std=c99
|
||||||
|
WARNINGS=-Wall
|
||||||
|
OPTIMIZED=-O3 #-Ofast
|
||||||
|
|
||||||
|
## Formatter
|
||||||
|
STYLE_BLUEPRINT=webkit
|
||||||
|
FORMATTER=clang-format -i -style=$(STYLE_BLUEPRINT)
|
||||||
|
|
||||||
|
## make all
|
||||||
|
all:
|
||||||
|
$(CC) $(OPTIMIZED) $(DEBUG) 00_example_template/$(SRC) $(DEPS) -o 00_example_template/$(OUTPUT)
|
||||||
|
$(CC) $(OPTIMIZED) $(DEBUG) 01_sample_from_cdf/$(SRC) $(DEPS) -o 01_sample_from_cdf/$(OUTPUT)
|
||||||
|
$(CC) $(OPTIMIZED) $(DEBUG) 02_sample_from_cdf_beta/$(SRC) $(DEPS) -o 02_sample_from_cdf_beta/$(OUTPUT)
|
||||||
|
$(CC) $(OPTIMIZED) $(DEBUG) 03_ci_beta/$(SRC) $(DEPS) -o 03_ci_beta/$(OUTPUT)
|
||||||
|
$(CC) $(OPTIMIZED) $(DEBUG) 04_nuclear_war/$(SRC) $(DEPS) -o 04_nuclear_war/$(OUTPUT)
|
||||||
|
$(CC) $(OPTIMIZED) $(DEBUG) 05_burn_10kg_fat/$(SRC) $(DEPS) -o 05_burn_10kg_fat/$(OUTPUT)
|
||||||
|
$(CC) $(OPTIMIZED) $(DEBUG) 06_nuclear_recovery/$(SRC) $(DEPS) -o 06_nuclear_recovery/$(OUTPUT)
|
||||||
|
$(CC) $(OPTIMIZED) $(DEBUG) 07_algebra/$(SRC) $(DEPS) -o 07_algebra/$(OUTPUT)
|
||||||
|
$(CC) $(OPTIMIZED) $(DEBUG) 08_algebra_and_conversion/$(SRC) $(DEPS) -o 08_algebra_and_conversion/$(OUTPUT)
|
||||||
|
$(CC) $(OPTIMIZED) $(DEBUG) 09_ergonomic_algebra/$(SRC) $(DEPS) -o 09_ergonomic_algebra/$(OUTPUT)
|
||||||
|
$(CC) $(OPTIMIZED) $(DEBUG) 10_twitter_thread_example/$(SRC) $(DEPS) -o 10_twitter_thread_example/$(OUTPUT)
|
||||||
|
$(CC) $(OPTIMIZED) $(DEBUG) 11_billion_lognormals_paralell/$(SRC) $(DEPS) -o 11_billion_lognormals_paralell/$(OUTPUT)
|
||||||
|
$(CC) $(OPTIMIZED) $(DEBUG) 12_time_to_botec_parallel/$(SRC) $(DEPS) -o 12_time_to_botec_parallel/$(OUTPUT)
|
||||||
|
|
||||||
|
format-all:
|
||||||
|
$(FORMATTER) 00_example_template/$(SRC)
|
||||||
|
$(FORMATTER) 01_sample_from_cdf/$(SRC)
|
||||||
|
$(FORMATTER) 02_sample_from_cdf_beta/$(SRC)
|
||||||
|
$(FORMATTER) 03_ci_beta/$(SRC)
|
||||||
|
$(FORMATTER) 04_nuclear_war/$(SRC)
|
||||||
|
$(FORMATTER) 05_burn_10kg_fat/$(SRC)
|
||||||
|
$(FORMATTER) 06_nuclear_recovery/$(SRC)
|
||||||
|
$(FORMATTER) 07_algebra/$(SRC)
|
||||||
|
$(FORMATTER) 08_algebra_and_conversion/$(SRC)
|
||||||
|
$(FORMATTER) 09_ergonomic_algebra/$(SRC)
|
||||||
|
$(FORMATTER) 10_twitter_thread_example/$(SRC)
|
||||||
|
$(FORMATTER) 11_billion_lognormals_paralell/$(SRC)
|
||||||
|
$(FORMATTER) 12_time_to_botec_parallel/$(SRC)
|
||||||
|
|
||||||
|
run-all:
|
||||||
|
00_example_template/$(OUTPUT)
|
||||||
|
01_sample_from_cdf/$(OUTPUT)
|
||||||
|
02_sample_from_cdf_beta/$(OUTPUT)
|
||||||
|
03_ci_beta/$(OUTPUT)
|
||||||
|
04_nuclear_war/$(OUTPUT)
|
||||||
|
05_burn_10kg_fat/$(OUTPUT)
|
||||||
|
06_nuclear_recovery/$(OUTPUT)
|
||||||
|
07_algebra/$(OUTPUT)
|
||||||
|
08_algebra_and_conversion/$(OUTPUT)
|
||||||
|
09_ergonomic_algebra/$(OUTPUT)
|
||||||
|
10_twitter_thread_example/$(OUTPUT)
|
||||||
|
11_billion_lognormals_paralell/$(OUTPUT)
|
||||||
|
12_time_to_botec_parallel/$(OUTPUT)
|
||||||
|
|
||||||
|
## make one DIR=06_nuclear_recovery
|
||||||
|
one: $(DIR)/$(SRC)
|
||||||
|
$(CC) $(OPTIMIZED) $(DEBUG) $(DIR)/$(SRC) $(DEPS) -o $(DIR)/$(OUTPUT)
|
||||||
|
|
||||||
|
## make format-one DIR=06_nuclear_recovery
|
||||||
|
format-one: $(DIR)/$(SRC)
|
||||||
|
$(FORMATTER) $(DIR)/$(SRC)
|
||||||
|
|
||||||
|
## make run-one DIR=06_nuclear_recovery
|
||||||
|
run-one: $(DIR)/$(OUTPUT)
|
||||||
|
$(DIR)/$(OUTPUT) && echo
|
||||||
|
|
||||||
|
## make time-linux-one DIR=06_nuclear_recovery
|
||||||
|
time-linux-one: $(DIR)/$(OUTPUT)
|
||||||
|
@echo "Requires /bin/time, found on GNU/Linux systems" && echo
|
||||||
|
@echo "Running 100x and taking avg time $(DIR)/$(OUTPUT)"
|
||||||
|
@t=$$(/usr/bin/time -f "%e" -p bash -c 'for i in {1..100}; do $(DIR)/$(OUTPUT); done' 2>&1 >/dev/null | grep real | awk '{print $$2}' ); echo "scale=2; 1000 * $$t / 100" | bc | sed "s|^|Time: |" | sed 's|$$|ms|' && echo
|
||||||
|
|
||||||
|
## e.g., make profile-linux-one DIR=06_nuclear_recovery
|
||||||
|
profile-linux-one:
|
||||||
|
echo "Requires perf, which depends on the kernel version, and might be in linux-tools package or similar"
|
||||||
|
echo "Must be run as sudo"
|
||||||
|
$(CC) $(OPTIMIZED) $(DEBUG) $(DIR)/$(SRC) $(DEPS) -o $(DIR)/$(OUTPUT)
|
||||||
|
# $(CC) $(SRC) $(DEPS) -o $(OUTPUT)
|
||||||
|
sudo perf record $(DIR)/$(OUTPUT)
|
||||||
|
sudo perf report
|
||||||
|
rm perf.data
|
21
squiggle.c/squiggle_c/makefile
Normal file
21
squiggle.c/squiggle_c/makefile
Normal file
|
@ -0,0 +1,21 @@
|
||||||
|
MAKEFLAGS += --no-print-directory
|
||||||
|
|
||||||
|
## Formatter
|
||||||
|
STYLE_BLUEPRINT=webkit
|
||||||
|
FORMATTER=clang-format -i -style=$(STYLE_BLUEPRINT)
|
||||||
|
|
||||||
|
build-examples:
|
||||||
|
cd examples/core && make all
|
||||||
|
cd examples/more && make all
|
||||||
|
|
||||||
|
format-examples:
|
||||||
|
cd examples/core && make format-all
|
||||||
|
cd examples/more && make format-all
|
||||||
|
|
||||||
|
format: squiggle.c squiggle.h
|
||||||
|
$(FORMATTER) squiggle.c
|
||||||
|
$(FORMATTER) squiggle.h
|
||||||
|
|
||||||
|
lint:
|
||||||
|
clang-tidy squiggle.c -- -lm
|
||||||
|
clang-tidy extra.c -- -lm
|
BIN
squiggle.c/squiggle_c/references/marsaglia-gamma.pdf
Normal file
BIN
squiggle.c/squiggle_c/references/marsaglia-gamma.pdf
Normal file
Binary file not shown.
27
squiggle.c/squiggle_c/scratchpad/core.c
Normal file
27
squiggle.c/squiggle_c/scratchpad/core.c
Normal file
|
@ -0,0 +1,27 @@
|
||||||
|
|
||||||
|
uint64_t xorshift64(uint64_t* seed)
|
||||||
|
{
|
||||||
|
// Algorithm "xor" from p. 4 of Marsaglia, "Xorshift RNGs"
|
||||||
|
// <https://en.wikipedia.org/wiki/Xorshift>
|
||||||
|
uint64_t x = *seed;
|
||||||
|
x ^= x << 13;
|
||||||
|
x ^= x >> 7;
|
||||||
|
x ^= x << 17;
|
||||||
|
return *seed = x;
|
||||||
|
}
|
||||||
|
|
||||||
|
double sample_unit_uniform(uint64_t* seed)
|
||||||
|
{
|
||||||
|
// samples uniform from [0,1] interval.
|
||||||
|
return ((double)xorshift64(seed)) / ((double)UINT64_MAX);
|
||||||
|
}
|
||||||
|
|
||||||
|
double sample_unit_normal(uint64_t* seed)
|
||||||
|
{
|
||||||
|
// // See: <https://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform>
|
||||||
|
double u1 = sample_unit_uniform(seed);
|
||||||
|
double u2 = sample_unit_uniform(seed);
|
||||||
|
double z = sqrtf(-2.0 * log(u1)) * sin(2 * PI * u2);
|
||||||
|
return z;
|
||||||
|
}
|
||||||
|
|
56
squiggle.c/squiggle_c/scratchpad/makefile
Normal file
56
squiggle.c/squiggle_c/scratchpad/makefile
Normal file
|
@ -0,0 +1,56 @@
|
||||||
|
# Interface:
|
||||||
|
# make
|
||||||
|
# make build
|
||||||
|
# make format
|
||||||
|
# make run
|
||||||
|
|
||||||
|
# Compiler
|
||||||
|
CC=gcc
|
||||||
|
# CC=tcc # <= faster compilation
|
||||||
|
|
||||||
|
# Main file
|
||||||
|
SRC=scratchpad.c ../squiggle.c
|
||||||
|
OUTPUT=scratchpad
|
||||||
|
|
||||||
|
## Dependencies
|
||||||
|
MATH=-lm
|
||||||
|
|
||||||
|
## Flags
|
||||||
|
DEBUG= #'-g'
|
||||||
|
STANDARD=-std=c99
|
||||||
|
WARNINGS=-Wall
|
||||||
|
OPTIMIZED=-O3 #-Ofast
|
||||||
|
# OPENMP=-fopenmp
|
||||||
|
|
||||||
|
## Formatter
|
||||||
|
STYLE_BLUEPRINT=webkit
|
||||||
|
FORMATTER=clang-format -i -style=$(STYLE_BLUEPRINT)
|
||||||
|
|
||||||
|
## make build
|
||||||
|
build: $(SRC)
|
||||||
|
$(CC) $(OPTIMIZED) $(DEBUG) $(SRC) $(MATH) -o $(OUTPUT)
|
||||||
|
|
||||||
|
format: $(SRC)
|
||||||
|
$(FORMATTER) $(SRC)
|
||||||
|
|
||||||
|
run: $(SRC) $(OUTPUT)
|
||||||
|
./$(OUTPUT)
|
||||||
|
|
||||||
|
verify: $(SRC) $(OUTPUT)
|
||||||
|
./$(OUTPUT) | grep "NOT passed" -A 2 --group-separator='' || true
|
||||||
|
|
||||||
|
time-linux:
|
||||||
|
@echo "Requires /bin/time, found on GNU/Linux systems" && echo
|
||||||
|
|
||||||
|
@echo "Running 100x and taking avg time $(OUTPUT)"
|
||||||
|
@t=$$(/usr/bin/time -f "%e" -p bash -c 'for i in {1..100}; do $(OUTPUT); done' 2>&1 >/dev/null | grep real | awk '{print $$2}' ); echo "scale=2; 1000 * $$t / 100" | bc | sed "s|^|Time using 1 thread: |" | sed 's|$$|ms|' && echo
|
||||||
|
|
||||||
|
## Profiling
|
||||||
|
|
||||||
|
profile-linux:
|
||||||
|
echo "Requires perf, which depends on the kernel version, and might be in linux-tools package or similar"
|
||||||
|
echo "Must be run as sudo"
|
||||||
|
$(CC) $(SRC) $(MATH) -o $(OUTPUT)
|
||||||
|
sudo perf record ./$(OUTPUT)
|
||||||
|
sudo perf report
|
||||||
|
rm perf.data
|
7
squiggle.c/squiggle_c/scratchpad/plotting/.Rhistory
Normal file
7
squiggle.c/squiggle_c/scratchpad/plotting/.Rhistory
Normal file
|
@ -0,0 +1,7 @@
|
||||||
|
library(ggplot2)
|
||||||
|
data <- read.csv("samples.txt", header = FALSE)
|
||||||
|
data <- as.data.frame(data)
|
||||||
|
ggplot(data = data, aes(x = V1)) +
|
||||||
|
geom_bar()
|
||||||
|
ggplot(data = data, aes(x = V1)) +
|
||||||
|
geom_freqpoly()
|
3
squiggle.c/squiggle_c/scratchpad/plotting/makefile
Normal file
3
squiggle.c/squiggle_c/scratchpad/plotting/makefile
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
build:
|
||||||
|
|
||||||
|
format:
|
12
squiggle.c/squiggle_c/scratchpad/plotting/notes.md
Normal file
12
squiggle.c/squiggle_c/scratchpad/plotting/notes.md
Normal file
|
@ -0,0 +1,12 @@
|
||||||
|
options:
|
||||||
|
- use gnuplot
|
||||||
|
- requires aggregation
|
||||||
|
- do aggregation in its own gnuplot language
|
||||||
|
- use something like awk
|
||||||
|
- use some other language, like python or R, specifically to do the aggregation
|
||||||
|
- do the aggregation in C
|
||||||
|
- use ggplot
|
||||||
|
- requires setting up bins correctly
|
||||||
|
- also can't pipe to an r script directly, sadly enough.
|
||||||
|
- use python for plotting?
|
||||||
|
- just present the confidence intervals in C?
|
7
squiggle.c/squiggle_c/scratchpad/plotting/r/plot.R
Normal file
7
squiggle.c/squiggle_c/scratchpad/plotting/r/plot.R
Normal file
|
@ -0,0 +1,7 @@
|
||||||
|
library(ggplot2)
|
||||||
|
|
||||||
|
data <- read.csv("samples.txt", header = FALSE)
|
||||||
|
data <- as.data.frame(data)
|
||||||
|
|
||||||
|
ggplot(data = data, aes(x = V1)) +
|
||||||
|
geom_freqpoly()
|
1000000
squiggle.c/squiggle_c/scratchpad/plotting/r/samples.txt
Normal file
1000000
squiggle.c/squiggle_c/scratchpad/plotting/r/samples.txt
Normal file
File diff suppressed because it is too large
Load Diff
BIN
squiggle.c/squiggle_c/scratchpad/plotting/src/example
Executable file
BIN
squiggle.c/squiggle_c/scratchpad/plotting/src/example
Executable file
Binary file not shown.
50
squiggle.c/squiggle_c/scratchpad/plotting/src/example.c
Normal file
50
squiggle.c/squiggle_c/scratchpad/plotting/src/example.c
Normal file
|
@ -0,0 +1,50 @@
|
||||||
|
#include "../../squiggle.h"
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
// Estimate functions
|
||||||
|
double sample_0(uint64_t* seed)
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
double sample_1(uint64_t* seed)
|
||||||
|
{
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
double sample_few(uint64_t* seed)
|
||||||
|
{
|
||||||
|
return sample_to(1, 3, seed);
|
||||||
|
}
|
||||||
|
|
||||||
|
double sample_many(uint64_t* seed)
|
||||||
|
{
|
||||||
|
return sample_to(2, 10, seed);
|
||||||
|
}
|
||||||
|
|
||||||
|
int main()
|
||||||
|
{
|
||||||
|
// set randomness seed
|
||||||
|
uint64_t* seed = malloc(sizeof(uint64_t));
|
||||||
|
*seed = 1000; // xorshift can't start with 0
|
||||||
|
|
||||||
|
double p_a = 0.8;
|
||||||
|
double p_b = 0.5;
|
||||||
|
double p_c = p_a * p_b;
|
||||||
|
|
||||||
|
int n_dists = 4;
|
||||||
|
double weights[] = { 1 - p_c, p_c / 2, p_c / 4, p_c / 4 };
|
||||||
|
double (*samplers[])(uint64_t*) = { sample_0, sample_1, sample_few, sample_many };
|
||||||
|
|
||||||
|
int n_samples = 1000000;
|
||||||
|
double* result_many = (double*)malloc(n_samples * sizeof(double));
|
||||||
|
for (int i = 0; i < n_samples; i++) {
|
||||||
|
result_many[i] = sample_mixture(samplers, weights, n_dists, seed);
|
||||||
|
printf("%f\n", result_many[i]);
|
||||||
|
}
|
||||||
|
// printf("Mean: %f\n", array_mean(result_many, n_samples));
|
||||||
|
|
||||||
|
free(seed);
|
||||||
|
}
|
53
squiggle.c/squiggle_c/scratchpad/plotting/src/makefile
Normal file
53
squiggle.c/squiggle_c/scratchpad/plotting/src/makefile
Normal file
|
@ -0,0 +1,53 @@
|
||||||
|
# Interface:
|
||||||
|
# make
|
||||||
|
# make build
|
||||||
|
# make format
|
||||||
|
# make run
|
||||||
|
|
||||||
|
# Compiler
|
||||||
|
CC=gcc
|
||||||
|
# CC=tcc # <= faster compilation
|
||||||
|
|
||||||
|
# Main file
|
||||||
|
SRC=example.c ../../squiggle.c
|
||||||
|
OUTPUT=example
|
||||||
|
|
||||||
|
## Dependencies
|
||||||
|
MATH=-lm
|
||||||
|
|
||||||
|
## Flags
|
||||||
|
DEBUG= #'-g'
|
||||||
|
STANDARD=-std=c99
|
||||||
|
WARNINGS=-Wall
|
||||||
|
OPTIMIZED=-O3 #-Ofast
|
||||||
|
# OPENMP=-fopenmp
|
||||||
|
|
||||||
|
## Formatter
|
||||||
|
STYLE_BLUEPRINT=webkit
|
||||||
|
FORMATTER=clang-format -i -style=$(STYLE_BLUEPRINT)
|
||||||
|
|
||||||
|
## make build
|
||||||
|
build: $(SRC)
|
||||||
|
$(CC) $(OPTIMIZED) $(DEBUG) $(SRC) $(MATH) -o $(OUTPUT)
|
||||||
|
|
||||||
|
format: $(SRC)
|
||||||
|
$(FORMATTER) $(SRC)
|
||||||
|
|
||||||
|
run: $(SRC) $(OUTPUT)
|
||||||
|
./$(OUTPUT) && echo
|
||||||
|
|
||||||
|
time-linux:
|
||||||
|
@echo "Requires /bin/time, found on GNU/Linux systems" && echo
|
||||||
|
|
||||||
|
@echo "Running 100x and taking avg time $(OUTPUT)"
|
||||||
|
@t=$$(/usr/bin/time -f "%e" -p bash -c 'for i in {1..100}; do ./$(OUTPUT); done' 2>&1 >/dev/null | grep real | awk '{print $$2}' ); echo "scale=2; 1000 * $$t / 100" | bc | sed "s|^|Time using 1 thread: |" | sed 's|$$|ms|' && echo
|
||||||
|
|
||||||
|
## Profiling
|
||||||
|
|
||||||
|
profile-linux:
|
||||||
|
echo "Requires perf, which depends on the kernel version, and might be in linux-tools package or similar"
|
||||||
|
echo "Must be run as sudo"
|
||||||
|
$(CC) $(SRC) $(MATH) -o $(OUTPUT)
|
||||||
|
sudo perf record ./$(OUTPUT)
|
||||||
|
sudo perf report
|
||||||
|
rm perf.data
|
BIN
squiggle.c/squiggle_c/scratchpad/scratchpad
Executable file
BIN
squiggle.c/squiggle_c/scratchpad/scratchpad
Executable file
Binary file not shown.
22
squiggle.c/squiggle_c/scratchpad/scratchpad.c
Normal file
22
squiggle.c/squiggle_c/scratchpad/scratchpad.c
Normal file
|
@ -0,0 +1,22 @@
|
||||||
|
#include "../squiggle.h"
|
||||||
|
#include <math.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
int main()
|
||||||
|
{
|
||||||
|
// set randomness seed
|
||||||
|
uint64_t* seed = malloc(sizeof(uint64_t));
|
||||||
|
*seed = 1000; // xorshift can't start with a seed of 0
|
||||||
|
/*
|
||||||
|
for (int i = 0; i < 100; i++) {
|
||||||
|
double draw = sample_unit_uniform(seed);
|
||||||
|
printf("%f\n", draw);
|
||||||
|
|
||||||
|
}*/
|
||||||
|
// Test division
|
||||||
|
printf("\n%d\n", 10 % 3);
|
||||||
|
|
||||||
|
free(seed);
|
||||||
|
}
|
225
squiggle.c/squiggle_c/squiggle.c
Normal file
225
squiggle.c/squiggle_c/squiggle.c
Normal file
|
@ -0,0 +1,225 @@
|
||||||
|
#include <limits.h>
|
||||||
|
#include <math.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
// math constants
|
||||||
|
#define PI 3.14159265358979323846 // M_PI in gcc gnu99
|
||||||
|
#define NORMAL90CONFIDENCE 1.6448536269514727
|
||||||
|
|
||||||
|
// Pseudo Random number generator
|
||||||
|
uint64_t xorshift32(uint32_t* seed)
|
||||||
|
{
|
||||||
|
// Algorithm "xor" from p. 4 of Marsaglia, "Xorshift RNGs"
|
||||||
|
// See:
|
||||||
|
// <https://en.wikipedia.org/wiki/Xorshift>
|
||||||
|
// <https://stackoverflow.com/questions/53886131/how-does-xorshift32-works>,
|
||||||
|
// Also some drama:
|
||||||
|
// <https://www.pcg-random.org/posts/on-vignas-pcg-critique.html>,
|
||||||
|
// <https://prng.di.unimi.it/>
|
||||||
|
uint64_t x = *seed;
|
||||||
|
x ^= x << 13;
|
||||||
|
x ^= x >> 17;
|
||||||
|
x ^= x << 5;
|
||||||
|
return *seed = x;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint64_t xorshift64(uint64_t* seed)
|
||||||
|
{
|
||||||
|
// same as above, but for generating doubles instead of floats
|
||||||
|
uint64_t x = *seed;
|
||||||
|
x ^= x << 13;
|
||||||
|
x ^= x >> 7;
|
||||||
|
x ^= x << 17;
|
||||||
|
return *seed = x;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Distribution & sampling functions
|
||||||
|
// Unit distributions
|
||||||
|
double sample_unit_uniform(uint64_t* seed)
|
||||||
|
{
|
||||||
|
// samples uniform from [0,1] interval.
|
||||||
|
return ((double)xorshift64(seed)) / ((double)UINT64_MAX);
|
||||||
|
}
|
||||||
|
|
||||||
|
double sample_unit_normal(uint64_t* seed)
|
||||||
|
{
|
||||||
|
// // See: <https://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform>
|
||||||
|
double u1 = sample_unit_uniform(seed);
|
||||||
|
double u2 = sample_unit_uniform(seed);
|
||||||
|
double z = sqrtf(-2.0 * log(u1)) * sin(2 * PI * u2);
|
||||||
|
return z;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Composite distributions
|
||||||
|
double sample_uniform(double start, double end, uint64_t* seed)
|
||||||
|
{
|
||||||
|
return sample_unit_uniform(seed) * (end - start) + start;
|
||||||
|
}
|
||||||
|
|
||||||
|
double sample_normal(double mean, double sigma, uint64_t* seed)
|
||||||
|
{
|
||||||
|
return (mean + sigma * sample_unit_normal(seed));
|
||||||
|
}
|
||||||
|
|
||||||
|
double sample_lognormal(double logmean, double logstd, uint64_t* seed)
|
||||||
|
{
|
||||||
|
return exp(sample_normal(logmean, logstd, seed));
|
||||||
|
}
|
||||||
|
|
||||||
|
inline double sample_normal_from_90_confidence_interval(double low, double high, uint64_t* seed)
|
||||||
|
{
|
||||||
|
// Explanation of key idea:
|
||||||
|
// 1. We know that the 90% confidence interval of the unit normal is
|
||||||
|
// [-1.6448536269514722, 1.6448536269514722]
|
||||||
|
// see e.g.: https://stackoverflow.com/questions/20626994/how-to-calculate-the-inverse-of-the-normal-cumulative-distribution-function-in-p
|
||||||
|
// or https://www.wolframalpha.com/input?i=N%5BInverseCDF%28normal%280%2C1%29%2C+0.05%29%2C%7B%E2%88%9E%2C100%7D%5D
|
||||||
|
// 2. So if we take a unit normal and multiply it by
|
||||||
|
// L / 1.6448536269514722, its new 90% confidence interval will be
|
||||||
|
// [-L, L], i.e., length 2 * L
|
||||||
|
// 3. Instead, if we want to get a confidence interval of length L,
|
||||||
|
// we should multiply the unit normal by
|
||||||
|
// L / (2 * 1.6448536269514722)
|
||||||
|
// Meaning that its standard deviation should be multiplied by that amount
|
||||||
|
// see: https://en.wikipedia.org/wiki/Normal_distribution?lang=en#Operations_on_a_single_normal_variable
|
||||||
|
// 4. So we have learnt that Normal(0, L / (2 * 1.6448536269514722))
|
||||||
|
// has a 90% confidence interval of length L
|
||||||
|
// 5. If we want a 90% confidence interval from high to low,
|
||||||
|
// we can set mean = (high + low)/2; the midpoint, and L = high-low,
|
||||||
|
// Normal([high + low]/2, [high - low]/(2 * 1.6448536269514722))
|
||||||
|
double mean = (high + low) / 2.0;
|
||||||
|
double std = (high - low) / (2.0 * NORMAL90CONFIDENCE);
|
||||||
|
return sample_normal(mean, std, seed);
|
||||||
|
}
|
||||||
|
|
||||||
|
double sample_to(double low, double high, uint64_t* seed)
|
||||||
|
{
|
||||||
|
// Given a (positive) 90% confidence interval,
|
||||||
|
// returns a sample from a lognorma with a matching 90% c.i.
|
||||||
|
// Key idea: If we want a lognormal with 90% confidence interval [a, b]
|
||||||
|
// we need but get a normal with 90% confidence interval [log(a), log(b)].
|
||||||
|
// Then see code for sample_normal_from_90_confidence_interval
|
||||||
|
double loglow = logf(low);
|
||||||
|
double loghigh = logf(high);
|
||||||
|
return exp(sample_normal_from_90_confidence_interval(loglow, loghigh, seed));
|
||||||
|
}
|
||||||
|
|
||||||
|
double sample_gamma(double alpha, uint64_t* seed)
|
||||||
|
{
|
||||||
|
|
||||||
|
// A Simple Method for Generating Gamma Variables, Marsaglia and Wan Tsang, 2001
|
||||||
|
// https://dl.acm.org/doi/pdf/10.1145/358407.358414
|
||||||
|
// see also the references/ folder
|
||||||
|
// Note that the Wikipedia page for the gamma distribution includes a scaling parameter
|
||||||
|
// k or beta
|
||||||
|
// https://en.wikipedia.org/wiki/Gamma_distribution
|
||||||
|
// such that gamma_k(alpha, k) = k * gamma(alpha)
|
||||||
|
// or gamma_beta(alpha, beta) = gamma(alpha) / beta
|
||||||
|
// So far I have not needed to use this, and thus the second parameter is by default 1.
|
||||||
|
if (alpha >= 1) {
|
||||||
|
double d, c, x, v, u;
|
||||||
|
d = alpha - 1.0 / 3.0;
|
||||||
|
c = 1.0 / sqrt(9.0 * d);
|
||||||
|
while (1) {
|
||||||
|
|
||||||
|
do {
|
||||||
|
x = sample_unit_normal(seed);
|
||||||
|
v = 1.0 + c * x;
|
||||||
|
} while (v <= 0.0);
|
||||||
|
|
||||||
|
v = v * v * v;
|
||||||
|
u = sample_unit_uniform(seed);
|
||||||
|
if (u < 1.0 - 0.0331 * (x * x * x * x)) { // Condition 1
|
||||||
|
// the 0.0331 doesn't inspire much confidence
|
||||||
|
// however, this isn't the whole story
|
||||||
|
// by knowing that Condition 1 implies condition 2
|
||||||
|
// we realize that this is just a way of making the algorithm faster
|
||||||
|
// i.e., of not using the logarithms
|
||||||
|
return d * v;
|
||||||
|
}
|
||||||
|
if (log(u) < 0.5 * (x * x) + d * (1.0 - v + log(v))) { // Condition 2
|
||||||
|
return d * v;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
return sample_gamma(1 + alpha, seed) * pow(sample_unit_uniform(seed), 1 / alpha);
|
||||||
|
// see note in p. 371 of https://dl.acm.org/doi/pdf/10.1145/358407.358414
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
double sample_beta(double a, double b, uint64_t* seed)
|
||||||
|
{
|
||||||
|
// See: https://en.wikipedia.org/wiki/Gamma_distribution#Related_distributions
|
||||||
|
double gamma_a = sample_gamma(a, seed);
|
||||||
|
double gamma_b = sample_gamma(b, seed);
|
||||||
|
return gamma_a / (gamma_a + gamma_b);
|
||||||
|
}
|
||||||
|
|
||||||
|
double sample_laplace(double successes, double failures, uint64_t* seed)
|
||||||
|
{
|
||||||
|
// see <https://en.wikipedia.org/wiki/Beta_distribution?lang=en#Rule_of_succession>
|
||||||
|
return sample_beta(successes + 1, failures + 1, seed);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Array helpers
|
||||||
|
double array_sum(double* array, int length)
|
||||||
|
{
|
||||||
|
double sum = 0.0;
|
||||||
|
for (int i = 0; i < length; i++) {
|
||||||
|
sum += array[i];
|
||||||
|
}
|
||||||
|
return sum;
|
||||||
|
}
|
||||||
|
|
||||||
|
void array_cumsum(double* array_to_sum, double* array_cumsummed, int length)
|
||||||
|
{
|
||||||
|
array_cumsummed[0] = array_to_sum[0];
|
||||||
|
for (int i = 1; i < length; i++) {
|
||||||
|
array_cumsummed[i] = array_cumsummed[i - 1] + array_to_sum[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
double array_mean(double* array, int length)
|
||||||
|
{
|
||||||
|
double sum = array_sum(array, length);
|
||||||
|
return sum / length;
|
||||||
|
}
|
||||||
|
|
||||||
|
double array_std(double* array, int length)
|
||||||
|
{
|
||||||
|
double mean = array_mean(array, length);
|
||||||
|
double std = 0.0;
|
||||||
|
for (int i = 0; i < length; i++) {
|
||||||
|
std += (array[i] - mean) * (array[i] - mean);
|
||||||
|
}
|
||||||
|
std = sqrt(std / length);
|
||||||
|
return std;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Mixture function
|
||||||
|
double sample_mixture(double (*samplers[])(uint64_t*), double* weights, int n_dists, uint64_t* seed)
|
||||||
|
{
|
||||||
|
// Sample from samples with frequency proportional to their weights.
|
||||||
|
double sum_weights = array_sum(weights, n_dists);
|
||||||
|
double* cumsummed_normalized_weights = (double*)malloc(n_dists * sizeof(double));
|
||||||
|
cumsummed_normalized_weights[0] = weights[0] / sum_weights;
|
||||||
|
for (int i = 1; i < n_dists; i++) {
|
||||||
|
cumsummed_normalized_weights[i] = cumsummed_normalized_weights[i - 1] + weights[i] / sum_weights;
|
||||||
|
}
|
||||||
|
|
||||||
|
double result;
|
||||||
|
int result_set_flag = 0;
|
||||||
|
double p = sample_uniform(0, 1, seed);
|
||||||
|
for (int k = 0; k < n_dists; k++) {
|
||||||
|
if (p < cumsummed_normalized_weights[k]) {
|
||||||
|
result = samplers[k](seed);
|
||||||
|
result_set_flag = 1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (result_set_flag == 0)
|
||||||
|
result = samplers[n_dists - 1](seed);
|
||||||
|
|
||||||
|
free(cumsummed_normalized_weights);
|
||||||
|
return result;
|
||||||
|
}
|
33
squiggle.c/squiggle_c/squiggle.h
Normal file
33
squiggle.c/squiggle_c/squiggle.h
Normal file
|
@ -0,0 +1,33 @@
|
||||||
|
#ifndef SQUIGGLE_C_CORE
|
||||||
|
#define SQUIGGLE_C_CORE
|
||||||
|
|
||||||
|
// uint64_t header
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
// Pseudo Random number generator
|
||||||
|
uint64_t xorshift64(uint64_t* seed);
|
||||||
|
|
||||||
|
// Basic distribution sampling functions
|
||||||
|
double sample_unit_uniform(uint64_t* seed);
|
||||||
|
double sample_unit_normal(uint64_t* seed);
|
||||||
|
|
||||||
|
// Composite distribution sampling functions
|
||||||
|
double sample_uniform(double start, double end, uint64_t* seed);
|
||||||
|
double sample_normal(double mean, double sigma, uint64_t* seed);
|
||||||
|
double sample_lognormal(double logmean, double logsigma, uint64_t* seed);
|
||||||
|
double sample_to(double low, double high, uint64_t* seed);
|
||||||
|
|
||||||
|
double sample_gamma(double alpha, uint64_t* seed);
|
||||||
|
double sample_beta(double a, double b, uint64_t* seed);
|
||||||
|
double sample_laplace(double successes, double failures, uint64_t* seed);
|
||||||
|
|
||||||
|
// Array helpers
|
||||||
|
double array_sum(double* array, int length);
|
||||||
|
void array_cumsum(double* array_to_sum, double* array_cumsummed, int length);
|
||||||
|
double array_mean(double* array, int length);
|
||||||
|
double array_std(double* array, int length);
|
||||||
|
|
||||||
|
// Mixture function
|
||||||
|
double sample_mixture(double (*samplers[])(uint64_t*), double* weights, int n_dists, uint64_t* seed);
|
||||||
|
|
||||||
|
#endif
|
337
squiggle.c/squiggle_c/squiggle_more.c
Normal file
337
squiggle.c/squiggle_c/squiggle_more.c
Normal file
|
@ -0,0 +1,337 @@
|
||||||
|
#include <float.h>
|
||||||
|
#include <math.h>
|
||||||
|
#include <limits.h>
|
||||||
|
#include <omp.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include "squiggle.h"
|
||||||
|
|
||||||
|
/* Math constants */
|
||||||
|
#define PI 3.14159265358979323846 // M_PI in gcc gnu99
|
||||||
|
#define NORMAL90CONFIDENCE 1.6448536269514727
|
||||||
|
|
||||||
|
/* Some error niceties */
|
||||||
|
// These won't be used until later
|
||||||
|
#define MAX_ERROR_LENGTH 500
|
||||||
|
#define EXIT_ON_ERROR 0
|
||||||
|
#define PROCESS_ERROR(error_msg) process_error(error_msg, EXIT_ON_ERROR, __FILE__, __LINE__)
|
||||||
|
|
||||||
|
/* Get confidence intervals, given a sampler */
|
||||||
|
// Not in core yet because I'm not sure how much I like the struct
|
||||||
|
// and the built-in 100k samples
|
||||||
|
// to do: add n to function parameters and document
|
||||||
|
typedef struct ci_t {
|
||||||
|
float low;
|
||||||
|
float high;
|
||||||
|
} ci;
|
||||||
|
int compare_doubles(const void* p, const void* q)
|
||||||
|
{
|
||||||
|
// https://wikiless.esmailelbob.xyz/wiki/Qsort?lang=en
|
||||||
|
double x = *(const double*)p;
|
||||||
|
double y = *(const double*)q;
|
||||||
|
|
||||||
|
/* Avoid returning x - y, which can cause undefined behaviour
|
||||||
|
because of signed integer overflow. */
|
||||||
|
if (x < y)
|
||||||
|
return -1; // Return -1 if you want ascending, 1 if you want descending order.
|
||||||
|
else if (x > y)
|
||||||
|
return 1; // Return 1 if you want ascending, -1 if you want descending order.
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
ci get_90_confidence_interval(double (*sampler)(uint64_t*), uint64_t* seed)
|
||||||
|
{
|
||||||
|
int n = 100 * 1000;
|
||||||
|
double* samples_array = malloc(n * sizeof(double));
|
||||||
|
for (int i = 0; i < n; i++) {
|
||||||
|
samples_array[i] = sampler(seed);
|
||||||
|
}
|
||||||
|
qsort(samples_array, n, sizeof(double), compare_doubles);
|
||||||
|
|
||||||
|
ci result = {
|
||||||
|
.low = samples_array[5000],
|
||||||
|
.high = samples_array[94999],
|
||||||
|
};
|
||||||
|
free(samples_array);
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Scaffolding to handle errors */
|
||||||
|
// We are building towards sample from an arbitrary cdf
|
||||||
|
// and that operation might fail
|
||||||
|
// so we build some scaffolding here
|
||||||
|
struct box {
|
||||||
|
int empty;
|
||||||
|
double content;
|
||||||
|
char* error_msg;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct box process_error(const char* error_msg, int should_exit, char* file, int line)
|
||||||
|
{
|
||||||
|
if (should_exit) {
|
||||||
|
printf("@, in %s (%d)", file, line);
|
||||||
|
exit(1);
|
||||||
|
} else {
|
||||||
|
char error_msg[MAX_ERROR_LENGTH];
|
||||||
|
snprintf(error_msg, MAX_ERROR_LENGTH, "@, in %s (%d)", file, line); // NOLINT: We are being carefull here by considering MAX_ERROR_LENGTH explicitly.
|
||||||
|
struct box error = { .empty = 1, .error_msg = error_msg };
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Invert an arbitrary cdf at a point */
|
||||||
|
// Version #1:
|
||||||
|
// - input: (cdf: double => double, p)
|
||||||
|
// - output: Box(number|error)
|
||||||
|
struct box inverse_cdf_double(double cdf(double), double p)
|
||||||
|
{
|
||||||
|
// given a cdf: [-Inf, Inf] => [0,1]
|
||||||
|
// returns a box with either
|
||||||
|
// x such that cdf(x) = p
|
||||||
|
// or an error
|
||||||
|
// if EXIT_ON_ERROR is set to 1, it exits instead of providing an error
|
||||||
|
|
||||||
|
double low = -1.0;
|
||||||
|
double high = 1.0;
|
||||||
|
|
||||||
|
// 1. Make sure that cdf(low) < p < cdf(high)
|
||||||
|
int interval_found = 0;
|
||||||
|
while ((!interval_found) && (low > -FLT_MAX / 4) && (high < FLT_MAX / 4)) {
|
||||||
|
// ^ Using FLT_MIN and FLT_MAX is overkill
|
||||||
|
// but it's also the *correct* thing to do.
|
||||||
|
|
||||||
|
int low_condition = (cdf(low) < p);
|
||||||
|
int high_condition = (p < cdf(high));
|
||||||
|
if (low_condition && high_condition) {
|
||||||
|
interval_found = 1;
|
||||||
|
} else if (!low_condition) {
|
||||||
|
low = low * 2;
|
||||||
|
} else if (!high_condition) {
|
||||||
|
high = high * 2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!interval_found) {
|
||||||
|
return PROCESS_ERROR("Interval containing the target value not found, in function inverse_cdf");
|
||||||
|
} else {
|
||||||
|
|
||||||
|
int convergence_condition = 0;
|
||||||
|
int count = 0;
|
||||||
|
while (!convergence_condition && (count < (INT_MAX / 2))) {
|
||||||
|
double mid = (high + low) / 2;
|
||||||
|
int mid_not_new = (mid == low) || (mid == high);
|
||||||
|
// double width = high - low;
|
||||||
|
// if ((width < 1e-8) || mid_not_new){
|
||||||
|
if (mid_not_new) {
|
||||||
|
convergence_condition = 1;
|
||||||
|
} else {
|
||||||
|
double mid_sign = cdf(mid) - p;
|
||||||
|
if (mid_sign < 0) {
|
||||||
|
low = mid;
|
||||||
|
} else if (mid_sign > 0) {
|
||||||
|
high = mid;
|
||||||
|
} else if (mid_sign == 0) {
|
||||||
|
low = mid;
|
||||||
|
high = mid;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (convergence_condition) {
|
||||||
|
struct box result = { .empty = 0, .content = low };
|
||||||
|
return result;
|
||||||
|
} else {
|
||||||
|
return PROCESS_ERROR("Search process did not converge, in function inverse_cdf");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Version #2:
|
||||||
|
// - input: (cdf: double => Box(number|error), p)
|
||||||
|
// - output: Box(number|error)
|
||||||
|
struct box inverse_cdf_box(struct box cdf_box(double), double p)
|
||||||
|
{
|
||||||
|
// given a cdf: [-Inf, Inf] => Box([0,1])
|
||||||
|
// returns a box with either
|
||||||
|
// x such that cdf(x) = p
|
||||||
|
// or an error
|
||||||
|
// if EXIT_ON_ERROR is set to 1, it exits instead of providing an error
|
||||||
|
|
||||||
|
double low = -1.0;
|
||||||
|
double high = 1.0;
|
||||||
|
|
||||||
|
// 1. Make sure that cdf(low) < p < cdf(high)
|
||||||
|
int interval_found = 0;
|
||||||
|
while ((!interval_found) && (low > -FLT_MAX / 4) && (high < FLT_MAX / 4)) {
|
||||||
|
// ^ Using FLT_MIN and FLT_MAX is overkill
|
||||||
|
// but it's also the *correct* thing to do.
|
||||||
|
struct box cdf_low = cdf_box(low);
|
||||||
|
if (cdf_low.empty) {
|
||||||
|
return PROCESS_ERROR(cdf_low.error_msg);
|
||||||
|
}
|
||||||
|
|
||||||
|
struct box cdf_high = cdf_box(high);
|
||||||
|
if (cdf_high.empty) {
|
||||||
|
return PROCESS_ERROR(cdf_low.error_msg);
|
||||||
|
}
|
||||||
|
|
||||||
|
int low_condition = (cdf_low.content < p);
|
||||||
|
int high_condition = (p < cdf_high.content);
|
||||||
|
if (low_condition && high_condition) {
|
||||||
|
interval_found = 1;
|
||||||
|
} else if (!low_condition) {
|
||||||
|
low = low * 2;
|
||||||
|
} else if (!high_condition) {
|
||||||
|
high = high * 2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!interval_found) {
|
||||||
|
return PROCESS_ERROR("Interval containing the target value not found, in function inverse_cdf");
|
||||||
|
} else {
|
||||||
|
|
||||||
|
int convergence_condition = 0;
|
||||||
|
int count = 0;
|
||||||
|
while (!convergence_condition && (count < (INT_MAX / 2))) {
|
||||||
|
double mid = (high + low) / 2;
|
||||||
|
int mid_not_new = (mid == low) || (mid == high);
|
||||||
|
// double width = high - low;
|
||||||
|
if (mid_not_new) {
|
||||||
|
// if ((width < 1e-8) || mid_not_new){
|
||||||
|
convergence_condition = 1;
|
||||||
|
} else {
|
||||||
|
struct box cdf_mid = cdf_box(mid);
|
||||||
|
if (cdf_mid.empty) {
|
||||||
|
return PROCESS_ERROR(cdf_mid.error_msg);
|
||||||
|
}
|
||||||
|
double mid_sign = cdf_mid.content - p;
|
||||||
|
if (mid_sign < 0) {
|
||||||
|
low = mid;
|
||||||
|
} else if (mid_sign > 0) {
|
||||||
|
high = mid;
|
||||||
|
} else if (mid_sign == 0) {
|
||||||
|
low = mid;
|
||||||
|
high = mid;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (convergence_condition) {
|
||||||
|
struct box result = { .empty = 0, .content = low };
|
||||||
|
return result;
|
||||||
|
} else {
|
||||||
|
return PROCESS_ERROR("Search process did not converge, in function inverse_cdf");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Sample from an arbitrary cdf */
|
||||||
|
// Before: invert an arbitrary cdf at a point
|
||||||
|
// Now: from an arbitrary cdf, get a sample
|
||||||
|
struct box sampler_cdf_box(struct box cdf(double), uint64_t* seed)
|
||||||
|
{
|
||||||
|
double p = sample_unit_uniform(seed);
|
||||||
|
struct box result = inverse_cdf_box(cdf, p);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
struct box sampler_cdf_double(double cdf(double), uint64_t* seed)
|
||||||
|
{
|
||||||
|
double p = sample_unit_uniform(seed);
|
||||||
|
struct box result = inverse_cdf_double(cdf, p);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
double sampler_cdf_danger(struct box cdf(double), uint64_t* seed)
|
||||||
|
{
|
||||||
|
double p = sample_unit_uniform(seed);
|
||||||
|
struct box result = inverse_cdf_box(cdf, p);
|
||||||
|
if(result.empty){
|
||||||
|
exit(1);
|
||||||
|
}else{
|
||||||
|
return result.content;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Algebra manipulations */
|
||||||
|
// here I discover named structs,
|
||||||
|
// which mean that I don't have to be typing
|
||||||
|
// struct blah all the time.
|
||||||
|
typedef struct normal_params_t {
|
||||||
|
double mean;
|
||||||
|
double std;
|
||||||
|
} normal_params;
|
||||||
|
|
||||||
|
normal_params algebra_sum_normals(normal_params a, normal_params b)
|
||||||
|
{
|
||||||
|
normal_params result = {
|
||||||
|
.mean = a.mean + b.mean,
|
||||||
|
.std = sqrt((a.std * a.std) + (b.std * b.std)),
|
||||||
|
};
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
typedef struct lognormal_params_t {
|
||||||
|
double logmean;
|
||||||
|
double logstd;
|
||||||
|
} lognormal_params;
|
||||||
|
|
||||||
|
lognormal_params algebra_product_lognormals(lognormal_params a, lognormal_params b)
|
||||||
|
{
|
||||||
|
lognormal_params result = {
|
||||||
|
.logmean = a.logmean + b.logmean,
|
||||||
|
.logstd = sqrt((a.logstd * a.logstd) + (b.logstd * b.logstd)),
|
||||||
|
};
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
lognormal_params convert_ci_to_lognormal_params(ci x)
|
||||||
|
{
|
||||||
|
double loghigh = logf(x.high);
|
||||||
|
double loglow = logf(x.low);
|
||||||
|
double logmean = (loghigh + loglow) / 2.0;
|
||||||
|
double logstd = (loghigh - loglow) / (2.0 * NORMAL90CONFIDENCE);
|
||||||
|
lognormal_params result = { .logmean = logmean, .logstd = logstd };
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
ci convert_lognormal_params_to_ci(lognormal_params y)
|
||||||
|
{
|
||||||
|
double h = y.logstd * NORMAL90CONFIDENCE;
|
||||||
|
double loghigh = y.logmean + h;
|
||||||
|
double loglow = y.logmean - h;
|
||||||
|
ci result = { .low = exp(loglow), .high = exp(loghigh) };
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Parallel sampler */
|
||||||
|
void parallel_sampler(double (*sampler)(uint64_t* seed), double* results, int n_threads, int n_samples){
|
||||||
|
if((n_samples % n_threads) != 0){
|
||||||
|
fprintf(stderr, "Number of samples isn't divisible by number of threads, aborting\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
uint64_t** seeds = malloc(n_threads * sizeof(uint64_t*));
|
||||||
|
for (uint64_t i = 0; i < n_threads; i++) {
|
||||||
|
seeds[i] = malloc(sizeof(uint64_t));
|
||||||
|
*seeds[i] = i + 1; // xorshift can't start with 0
|
||||||
|
}
|
||||||
|
|
||||||
|
int i;
|
||||||
|
#pragma omp parallel private(i)
|
||||||
|
{
|
||||||
|
#pragma omp for
|
||||||
|
for (i = 0; i < n_threads; i++) {
|
||||||
|
int lower_bound = i * (n_samples / n_threads);
|
||||||
|
int upper_bound = ((i+1) * (n_samples / n_threads)) - 1;
|
||||||
|
// printf("Lower bound: %d, upper bound: %d\n", lower_bound, upper_bound);
|
||||||
|
for (int j = lower_bound; j < upper_bound; j++) {
|
||||||
|
results[j] = sampler(seeds[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (uint64_t i = 0; i < n_threads; i++) {
|
||||||
|
free(seeds[i]);
|
||||||
|
}
|
||||||
|
free(seeds);
|
||||||
|
}
|
51
squiggle.c/squiggle_c/squiggle_more.h
Normal file
51
squiggle.c/squiggle_c/squiggle_more.h
Normal file
|
@ -0,0 +1,51 @@
|
||||||
|
#ifndef SQUIGGLE_C_EXTRA
|
||||||
|
#define SQUIGGLE_C_EXTRA
|
||||||
|
|
||||||
|
// Box
|
||||||
|
struct box {
|
||||||
|
int empty;
|
||||||
|
double content;
|
||||||
|
char* error_msg;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Macros to handle errors
|
||||||
|
#define MAX_ERROR_LENGTH 500
|
||||||
|
#define EXIT_ON_ERROR 0
|
||||||
|
#define PROCESS_ERROR(error_msg) process_error(error_msg, EXIT_ON_ERROR, __FILE__, __LINE__)
|
||||||
|
struct box process_error(const char* error_msg, int should_exit, char* file, int line);
|
||||||
|
|
||||||
|
// Inverse cdf
|
||||||
|
struct box inverse_cdf_double(double cdf(double), double p);
|
||||||
|
struct box inverse_cdf_box(struct box cdf_box(double), double p);
|
||||||
|
|
||||||
|
// Samplers from cdf
|
||||||
|
struct box sampler_cdf_double(double cdf(double), uint64_t* seed);
|
||||||
|
struct box sampler_cdf_box(struct box cdf(double), uint64_t* seed);
|
||||||
|
|
||||||
|
// Get 90% confidence interval
|
||||||
|
typedef struct ci_t {
|
||||||
|
float low;
|
||||||
|
float high;
|
||||||
|
} ci;
|
||||||
|
ci get_90_confidence_interval(double (*sampler)(uint64_t*), uint64_t* seed);
|
||||||
|
|
||||||
|
// small algebra manipulations
|
||||||
|
|
||||||
|
typedef struct normal_params_t {
|
||||||
|
double mean;
|
||||||
|
double std;
|
||||||
|
} normal_params;
|
||||||
|
normal_params algebra_sum_normals(normal_params a, normal_params b);
|
||||||
|
|
||||||
|
typedef struct lognormal_params_t {
|
||||||
|
double logmean;
|
||||||
|
double logstd;
|
||||||
|
} lognormal_params;
|
||||||
|
lognormal_params algebra_product_lognormals(lognormal_params a, lognormal_params b);
|
||||||
|
|
||||||
|
lognormal_params convert_ci_to_lognormal_params(ci x);
|
||||||
|
ci convert_lognormal_params_to_ci(lognormal_params y);
|
||||||
|
|
||||||
|
void parallel_sampler(double (*sampler)(uint64_t* seed), double* results, int n_threads, int n_samples);
|
||||||
|
|
||||||
|
#endif
|
56
squiggle.c/squiggle_c/test/makefile
Normal file
56
squiggle.c/squiggle_c/test/makefile
Normal file
|
@ -0,0 +1,56 @@
|
||||||
|
# Interface:
|
||||||
|
# make
|
||||||
|
# make build
|
||||||
|
# make format
|
||||||
|
# make run
|
||||||
|
|
||||||
|
# Compiler
|
||||||
|
CC=gcc
|
||||||
|
# CC=tcc # <= faster compilation
|
||||||
|
|
||||||
|
# Main file
|
||||||
|
SRC=test.c ../squiggle.c
|
||||||
|
OUTPUT=test
|
||||||
|
|
||||||
|
## Dependencies
|
||||||
|
MATH=-lm
|
||||||
|
|
||||||
|
## Flags
|
||||||
|
DEBUG= #'-g'
|
||||||
|
STANDARD=-std=c99
|
||||||
|
WARNINGS=-Wall
|
||||||
|
OPTIMIZED=-O3 #-Ofast
|
||||||
|
# OPENMP=-fopenmp
|
||||||
|
|
||||||
|
## Formatter
|
||||||
|
STYLE_BLUEPRINT=webkit
|
||||||
|
FORMATTER=clang-format -i -style=$(STYLE_BLUEPRINT)
|
||||||
|
|
||||||
|
## make build
|
||||||
|
build: $(SRC)
|
||||||
|
$(CC) $(OPTIMIZED) $(DEBUG) $(SRC) $(MATH) -o $(OUTPUT)
|
||||||
|
|
||||||
|
format: $(SRC)
|
||||||
|
$(FORMATTER) $(SRC)
|
||||||
|
|
||||||
|
run: $(SRC) $(OUTPUT)
|
||||||
|
./$(OUTPUT)
|
||||||
|
|
||||||
|
verify: $(SRC) $(OUTPUT)
|
||||||
|
./$(OUTPUT) | grep "NOT passed" -A 2 --group-separator='' || true
|
||||||
|
|
||||||
|
time-linux:
|
||||||
|
@echo "Requires /bin/time, found on GNU/Linux systems" && echo
|
||||||
|
|
||||||
|
@echo "Running 100x and taking avg time $(OUTPUT)"
|
||||||
|
@t=$$(/usr/bin/time -f "%e" -p bash -c 'for i in {1..100}; do $(OUTPUT); done' 2>&1 >/dev/null | grep real | awk '{print $$2}' ); echo "scale=2; 1000 * $$t / 100" | bc | sed "s|^|Time using 1 thread: |" | sed 's|$$|ms|' && echo
|
||||||
|
|
||||||
|
## Profiling
|
||||||
|
|
||||||
|
profile-linux:
|
||||||
|
echo "Requires perf, which depends on the kernel version, and might be in linux-tools package or similar"
|
||||||
|
echo "Must be run as sudo"
|
||||||
|
$(CC) $(SRC) $(MATH) -o $(OUTPUT)
|
||||||
|
sudo perf record ./$(OUTPUT)
|
||||||
|
sudo perf report
|
||||||
|
rm perf.data
|
BIN
squiggle.c/squiggle_c/test/test
Executable file
BIN
squiggle.c/squiggle_c/test/test
Executable file
Binary file not shown.
328
squiggle.c/squiggle_c/test/test.c
Normal file
328
squiggle.c/squiggle_c/test/test.c
Normal file
|
@ -0,0 +1,328 @@
|
||||||
|
#include "../squiggle.h"
|
||||||
|
#include <math.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
#define TOLERANCE 5.0 / 1000.0
|
||||||
|
#define MAX_NAME_LENGTH 500
|
||||||
|
|
||||||
|
// Structs
|
||||||
|
|
||||||
|
struct array_expectations {
|
||||||
|
double* array;
|
||||||
|
int n;
|
||||||
|
char* name;
|
||||||
|
double expected_mean;
|
||||||
|
double expected_std;
|
||||||
|
double tolerance;
|
||||||
|
};
|
||||||
|
|
||||||
|
void test_array_expectations(struct array_expectations e)
|
||||||
|
{
|
||||||
|
double mean = array_mean(e.array, e.n);
|
||||||
|
double delta_mean = mean - e.expected_mean;
|
||||||
|
|
||||||
|
double std = array_std(e.array, e.n);
|
||||||
|
double delta_std = std - e.expected_std;
|
||||||
|
|
||||||
|
if ((fabs(delta_mean) / fabs(mean) > e.tolerance) && (fabs(delta_mean) > e.tolerance)) {
|
||||||
|
printf("[-] Mean test for %s NOT passed.\n", e.name);
|
||||||
|
printf("Mean of %s: %f, vs expected mean: %f\n", e.name, mean, e.expected_mean);
|
||||||
|
printf("delta: %f, relative delta: %f\n", delta_mean, delta_mean / fabs(mean));
|
||||||
|
} else {
|
||||||
|
printf("[x] Mean test for %s PASSED\n", e.name);
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((fabs(delta_std) / fabs(std) > e.tolerance) && (fabs(delta_std) > e.tolerance)) {
|
||||||
|
printf("[-] Std test for %s NOT passed.\n", e.name);
|
||||||
|
printf("Std of %s: %f, vs expected std: %f\n", e.name, std, e.expected_std);
|
||||||
|
printf("delta: %f, relative delta: %f\n", delta_std, delta_std / fabs(std));
|
||||||
|
} else {
|
||||||
|
printf("[x] Std test for %s PASSED\n", e.name);
|
||||||
|
}
|
||||||
|
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test unit uniform
|
||||||
|
void test_unit_uniform(uint64_t* seed)
|
||||||
|
{
|
||||||
|
int n = 1000 * 1000;
|
||||||
|
double* unit_uniform_array = malloc(sizeof(double) * n);
|
||||||
|
|
||||||
|
for (int i = 0; i < n; i++) {
|
||||||
|
unit_uniform_array[i] = sample_unit_uniform(seed);
|
||||||
|
}
|
||||||
|
|
||||||
|
struct array_expectations expectations = {
|
||||||
|
.array = unit_uniform_array,
|
||||||
|
.n = n,
|
||||||
|
.name = "unit uniform",
|
||||||
|
.expected_mean = 0.5,
|
||||||
|
.expected_std = sqrt(1.0 / 12.0),
|
||||||
|
.tolerance = TOLERANCE,
|
||||||
|
};
|
||||||
|
|
||||||
|
test_array_expectations(expectations);
|
||||||
|
free(unit_uniform_array);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test uniforms
|
||||||
|
void test_uniform(double start, double end, uint64_t* seed)
|
||||||
|
{
|
||||||
|
int n = 1000 * 1000;
|
||||||
|
double* uniform_array = malloc(sizeof(double) * n);
|
||||||
|
|
||||||
|
for (int i = 0; i < n; i++) {
|
||||||
|
uniform_array[i] = sample_uniform(start, end, seed);
|
||||||
|
}
|
||||||
|
|
||||||
|
char* name = malloc(MAX_NAME_LENGTH * sizeof(char));
|
||||||
|
snprintf(name, MAX_NAME_LENGTH, "[%f, %f] uniform", start, end);
|
||||||
|
struct array_expectations expectations = {
|
||||||
|
.array = uniform_array,
|
||||||
|
.n = n,
|
||||||
|
.name = name,
|
||||||
|
.expected_mean = (start + end) / 2,
|
||||||
|
.expected_std = sqrt(1.0 / 12.0) * fabs(end - start),
|
||||||
|
.tolerance = fabs(end - start) * TOLERANCE,
|
||||||
|
};
|
||||||
|
|
||||||
|
test_array_expectations(expectations);
|
||||||
|
free(name);
|
||||||
|
free(uniform_array);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test unit normal
|
||||||
|
void test_unit_normal(uint64_t* seed)
|
||||||
|
{
|
||||||
|
int n = 1000 * 1000;
|
||||||
|
double* unit_normal_array = malloc(sizeof(double) * n);
|
||||||
|
|
||||||
|
for (int i = 0; i < n; i++) {
|
||||||
|
unit_normal_array[i] = sample_unit_normal(seed);
|
||||||
|
}
|
||||||
|
|
||||||
|
struct array_expectations expectations = {
|
||||||
|
.array = unit_normal_array,
|
||||||
|
.n = n,
|
||||||
|
.name = "unit normal",
|
||||||
|
.expected_mean = 0,
|
||||||
|
.expected_std = 1,
|
||||||
|
.tolerance = TOLERANCE,
|
||||||
|
};
|
||||||
|
|
||||||
|
test_array_expectations(expectations);
|
||||||
|
free(unit_normal_array);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test normal
|
||||||
|
void test_normal(double mean, double std, uint64_t* seed)
|
||||||
|
{
|
||||||
|
int n = 10 * 1000 * 1000;
|
||||||
|
double* normal_array = malloc(sizeof(double) * n);
|
||||||
|
|
||||||
|
for (int i = 0; i < n; i++) {
|
||||||
|
normal_array[i] = sample_normal(mean, std, seed);
|
||||||
|
}
|
||||||
|
|
||||||
|
char* name = malloc(MAX_NAME_LENGTH * sizeof(char));
|
||||||
|
snprintf(name, MAX_NAME_LENGTH, "normal(%f, %f)", mean, std);
|
||||||
|
struct array_expectations expectations = {
|
||||||
|
.array = normal_array,
|
||||||
|
.n = n,
|
||||||
|
.name = name,
|
||||||
|
.expected_mean = mean,
|
||||||
|
.expected_std = std,
|
||||||
|
.tolerance = TOLERANCE,
|
||||||
|
};
|
||||||
|
|
||||||
|
test_array_expectations(expectations);
|
||||||
|
free(name);
|
||||||
|
free(normal_array);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test lognormal
|
||||||
|
void test_lognormal(double logmean, double logstd, uint64_t* seed)
|
||||||
|
{
|
||||||
|
int n = 10 * 1000 * 1000;
|
||||||
|
double* lognormal_array = malloc(sizeof(double) * n);
|
||||||
|
|
||||||
|
for (int i = 0; i < n; i++) {
|
||||||
|
lognormal_array[i] = sample_lognormal(logmean, logstd, seed);
|
||||||
|
}
|
||||||
|
|
||||||
|
char* name = malloc(MAX_NAME_LENGTH * sizeof(char));
|
||||||
|
snprintf(name, MAX_NAME_LENGTH, "lognormal(%f, %f)", logmean, logstd);
|
||||||
|
struct array_expectations expectations = {
|
||||||
|
.array = lognormal_array,
|
||||||
|
.n = n,
|
||||||
|
.name = name,
|
||||||
|
.expected_mean = exp(logmean + pow(logstd, 2) / 2),
|
||||||
|
.expected_std = sqrt((exp(pow(logstd, 2)) - 1) * exp(2 * logmean + pow(logstd, 2))),
|
||||||
|
.tolerance = TOLERANCE,
|
||||||
|
};
|
||||||
|
|
||||||
|
test_array_expectations(expectations);
|
||||||
|
free(name);
|
||||||
|
free(lognormal_array);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test lognormal to
|
||||||
|
void test_to(double low, double high, uint64_t* seed)
|
||||||
|
{
|
||||||
|
int n = 10 * 1000 * 1000;
|
||||||
|
double* lognormal_array = malloc(sizeof(double) * n);
|
||||||
|
|
||||||
|
for (int i = 0; i < n; i++) {
|
||||||
|
lognormal_array[i] = sample_to(low, high, seed);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
char* name = malloc(MAX_NAME_LENGTH * sizeof(char));
|
||||||
|
snprintf(name, MAX_NAME_LENGTH, "to(%f, %f)", low, high);
|
||||||
|
|
||||||
|
const double NORMAL95CONFIDENCE = 1.6448536269514722;
|
||||||
|
double loglow = logf(low);
|
||||||
|
double loghigh = logf(high);
|
||||||
|
double logmean = (loglow + loghigh) / 2;
|
||||||
|
double logstd = (loghigh - loglow) / (2.0 * NORMAL95CONFIDENCE);
|
||||||
|
|
||||||
|
struct array_expectations expectations = {
|
||||||
|
.array = lognormal_array,
|
||||||
|
.n = n,
|
||||||
|
.name = name,
|
||||||
|
.expected_mean = exp(logmean + pow(logstd, 2) / 2),
|
||||||
|
.expected_std = sqrt((exp(pow(logstd, 2)) - 1) * exp(2 * logmean + pow(logstd, 2))),
|
||||||
|
.tolerance = TOLERANCE,
|
||||||
|
};
|
||||||
|
|
||||||
|
test_array_expectations(expectations);
|
||||||
|
free(name);
|
||||||
|
free(lognormal_array);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test beta
|
||||||
|
|
||||||
|
void test_beta(double a, double b, uint64_t* seed)
|
||||||
|
{
|
||||||
|
int n = 10 * 1000 * 1000;
|
||||||
|
double* beta_array = malloc(sizeof(double) * n);
|
||||||
|
|
||||||
|
for (int i = 0; i < n; i++) {
|
||||||
|
beta_array[i] = sample_beta(a, b, seed);
|
||||||
|
}
|
||||||
|
|
||||||
|
char* name = malloc(MAX_NAME_LENGTH * sizeof(char));
|
||||||
|
snprintf(name, MAX_NAME_LENGTH, "beta(%f, %f)", a, b);
|
||||||
|
struct array_expectations expectations = {
|
||||||
|
.array = beta_array,
|
||||||
|
.n = n,
|
||||||
|
.name = name,
|
||||||
|
.expected_mean = a / (a + b),
|
||||||
|
.expected_std = sqrt((a * b) / (pow(a + b, 2) * (a + b + 1))),
|
||||||
|
.tolerance = TOLERANCE,
|
||||||
|
};
|
||||||
|
|
||||||
|
test_array_expectations(expectations);
|
||||||
|
free(name);
|
||||||
|
}
|
||||||
|
|
||||||
|
int main()
|
||||||
|
{
|
||||||
|
// set randomness seed
|
||||||
|
uint64_t* seed = malloc(sizeof(uint64_t));
|
||||||
|
*seed = 1000; // xorshift can't start with a seed of 0
|
||||||
|
|
||||||
|
printf("Testing unit uniform\n");
|
||||||
|
test_unit_uniform(seed);
|
||||||
|
|
||||||
|
printf("Testing small uniforms\n");
|
||||||
|
for (int i = 0; i < 100; i++) {
|
||||||
|
double start = sample_uniform(-10, 10, seed);
|
||||||
|
double end = sample_uniform(-10, 10, seed);
|
||||||
|
if (end > start) {
|
||||||
|
test_uniform(start, end, seed);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
printf("Testing wide uniforms\n");
|
||||||
|
for (int i = 0; i < 100; i++) {
|
||||||
|
double start = sample_uniform(-1000 * 1000, 1000 * 1000, seed);
|
||||||
|
double end = sample_uniform(-1000 * 1000, 1000 * 1000, seed);
|
||||||
|
if (end > start) {
|
||||||
|
test_uniform(start, end, seed);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
printf("Testing unit normal\n");
|
||||||
|
test_unit_normal(seed);
|
||||||
|
|
||||||
|
printf("Testing small normals\n");
|
||||||
|
for (int i = 0; i < 100; i++) {
|
||||||
|
double mean = sample_uniform(-10, 10, seed);
|
||||||
|
double std = sample_uniform(0, 10, seed);
|
||||||
|
if (std > 0) {
|
||||||
|
test_normal(mean, std, seed);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
printf("Testing larger normals\n");
|
||||||
|
for (int i = 0; i < 100; i++) {
|
||||||
|
double mean = sample_uniform(-1000 * 1000, 1000 * 1000, seed);
|
||||||
|
double std = sample_uniform(0, 1000 * 1000, seed);
|
||||||
|
if (std > 0) {
|
||||||
|
test_normal(mean, std, seed);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
printf("Testing smaller lognormals\n");
|
||||||
|
for (int i = 0; i < 100; i++) {
|
||||||
|
double mean = sample_uniform(-1, 1, seed);
|
||||||
|
double std = sample_uniform(0, 1, seed);
|
||||||
|
if (std > 0) {
|
||||||
|
test_lognormal(mean, std, seed);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
printf("Testing larger lognormals\n");
|
||||||
|
for (int i = 0; i < 100; i++) {
|
||||||
|
double mean = sample_uniform(-1, 5, seed);
|
||||||
|
double std = sample_uniform(0, 5, seed);
|
||||||
|
if (std > 0) {
|
||||||
|
test_lognormal(mean, std, seed);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
printf("Testing lognormals — sample_to(low, high) syntax\n");
|
||||||
|
for (int i = 0; i < 100; i++) {
|
||||||
|
double low = sample_uniform(0, 1000 * 1000, seed);
|
||||||
|
double high = sample_uniform(0, 1000 * 1000, seed);
|
||||||
|
if (low < high) {
|
||||||
|
test_to(low, high, seed);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Bonus example
|
||||||
|
test_to(10, 10 * 1000, seed);
|
||||||
|
|
||||||
|
printf("Testing beta distribution\n");
|
||||||
|
for (int i = 0; i < 100; i++) {
|
||||||
|
double a = sample_uniform(0, 1000, seed);
|
||||||
|
double b = sample_uniform(0, 1000, seed);
|
||||||
|
if ((a > 0) && (b > 0)) {
|
||||||
|
test_beta(a, b, seed);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
printf("Testing larger beta distributions\n");
|
||||||
|
for (int i = 0; i < 100; i++) {
|
||||||
|
double a = sample_uniform(0, 1000 * 1000, seed);
|
||||||
|
double b = sample_uniform(0, 1000 * 1000, seed);
|
||||||
|
if ((a > 0) && (b > 0)) {
|
||||||
|
test_beta(a, b, seed);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
free(seed);
|
||||||
|
}
|
19
squiggle/makefile
Normal file
19
squiggle/makefile
Normal file
|
@ -0,0 +1,19 @@
|
||||||
|
SHELL := /bin/bash ## <= required to use time
|
||||||
|
|
||||||
|
time-linux:
|
||||||
|
@echo "Requires /bin/time, found on GNU/Linux systems" && echo
|
||||||
|
@echo "Running 100x and taking avg time of: $(OUTPUT)"
|
||||||
|
@t=$$(/usr/bin/time -f "%e" -p bash -c 'for i in {1..100}; do bun src/samples.js; done' 2>&1 >/dev/null | grep real | awk '{print $$2}' ); echo "scale=2; 1000 * $$t / 100" | bc | sed "s|^|Time: |" | sed 's|$$|ms|' && echo
|
||||||
|
|
||||||
|
time-bun:
|
||||||
|
time bun src/samples.js
|
||||||
|
|
||||||
|
run-bun:
|
||||||
|
bun src/samples.js
|
||||||
|
|
||||||
|
patch:
|
||||||
|
sed -i 's/defaultSampleCount: 1000/defaultSampleCount: 1000000/g' node_modules/@quri/squiggle-lang/src/magicNumbers.ts node_modules/@quri/squiggle-lang/dist/magicNumbers.js
|
||||||
|
|
||||||
|
run-node:
|
||||||
|
node src/samples.js
|
||||||
|
|
64
squiggle/node_modules/.yarn-integrity
generated
vendored
64
squiggle/node_modules/.yarn-integrity
generated
vendored
|
@ -1,64 +0,0 @@
|
||||||
{
|
|
||||||
"systemParams": "linux-x64-93",
|
|
||||||
"modulesFolders": [
|
|
||||||
"node_modules"
|
|
||||||
],
|
|
||||||
"flags": [],
|
|
||||||
"linkedModules": [],
|
|
||||||
"topLevelPatterns": [
|
|
||||||
"@quri/squiggle-lang@^0.5.1"
|
|
||||||
],
|
|
||||||
"lockfileEntries": {
|
|
||||||
"@commander-js/extra-typings@^9.4.1": "https://registry.yarnpkg.com/@commander-js/extra-typings/-/extra-typings-9.4.1.tgz#acb75021594519799f509bde8f9210b114f1a0f1",
|
|
||||||
"@quri/squiggle-lang@^0.5.1": "https://registry.yarnpkg.com/@quri/squiggle-lang/-/squiggle-lang-0.5.1.tgz#e9708079a301833bc96c986c704a0ecd47b17358",
|
|
||||||
"@rescript/std@^10.0.0": "https://registry.yarnpkg.com/@rescript/std/-/std-10.0.1.tgz#2ffa229f3345fea3341fed4bbc9b15949cf182ba",
|
|
||||||
"@stdlib/array@^0.0.x": "https://registry.yarnpkg.com/@stdlib/array/-/array-0.0.12.tgz#12f40ab95bb36d424cdad991f29fc3cb491ee29e",
|
|
||||||
"@stdlib/assert@^0.0.x": "https://registry.yarnpkg.com/@stdlib/assert/-/assert-0.0.12.tgz#1648c9016e5041291f55a6464abcc4069c5103ce",
|
|
||||||
"@stdlib/bigint@^0.0.x": "https://registry.yarnpkg.com/@stdlib/bigint/-/bigint-0.0.11.tgz#c416a1d727001c55f4897e6424124199d638f2fd",
|
|
||||||
"@stdlib/blas@^0.0.x": "https://registry.yarnpkg.com/@stdlib/blas/-/blas-0.0.12.tgz#7e93e42b4621fc6903bf63264f045047333536c2",
|
|
||||||
"@stdlib/buffer@^0.0.x": "https://registry.yarnpkg.com/@stdlib/buffer/-/buffer-0.0.11.tgz#6137b00845e6c905181cc7ebfae9f7e47c01b0ce",
|
|
||||||
"@stdlib/cli@^0.0.x": "https://registry.yarnpkg.com/@stdlib/cli/-/cli-0.0.10.tgz#28e2fbe6865d7f5cd15b7dc5846c99bd3b91674f",
|
|
||||||
"@stdlib/complex@^0.0.x": "https://registry.yarnpkg.com/@stdlib/complex/-/complex-0.0.12.tgz#3afbc190cd0a9b37fc7c6e508c3aa9fda9106944",
|
|
||||||
"@stdlib/constants@^0.0.x": "https://registry.yarnpkg.com/@stdlib/constants/-/constants-0.0.11.tgz#78cd56d6c2982b30264843c3d75bde7125e90cd2",
|
|
||||||
"@stdlib/fs@^0.0.x": "https://registry.yarnpkg.com/@stdlib/fs/-/fs-0.0.12.tgz#662365fd5846a51f075724b4f2888ae88441b70d",
|
|
||||||
"@stdlib/math@^0.0.x": "https://registry.yarnpkg.com/@stdlib/math/-/math-0.0.11.tgz#eb6638bc03a20fbd6727dd5b977ee0170bda4649",
|
|
||||||
"@stdlib/ndarray@^0.0.x": "https://registry.yarnpkg.com/@stdlib/ndarray/-/ndarray-0.0.13.tgz#2e8fc645e10f56a645a0ab81598808c0e8f43b82",
|
|
||||||
"@stdlib/nlp@^0.0.x": "https://registry.yarnpkg.com/@stdlib/nlp/-/nlp-0.0.11.tgz#532ec0f7267b8d639e4c20c6de864e8de8a09054",
|
|
||||||
"@stdlib/number@^0.0.x": "https://registry.yarnpkg.com/@stdlib/number/-/number-0.0.10.tgz#4030ad8fc3fac19a9afb415c443cee6deea0e65c",
|
|
||||||
"@stdlib/os@^0.0.x": "https://registry.yarnpkg.com/@stdlib/os/-/os-0.0.12.tgz#08bbf013c62a7153099fa9cbac086ca1349a4677",
|
|
||||||
"@stdlib/process@^0.0.x": "https://registry.yarnpkg.com/@stdlib/process/-/process-0.0.12.tgz#123325079d89a32f4212f72fb694f8fe3614cf18",
|
|
||||||
"@stdlib/random@^0.0.x": "https://registry.yarnpkg.com/@stdlib/random/-/random-0.0.12.tgz#e819c3abd602ed5559ba800dba751e49c633ff85",
|
|
||||||
"@stdlib/regexp@^0.0.x": "https://registry.yarnpkg.com/@stdlib/regexp/-/regexp-0.0.13.tgz#80b98361dc7a441b47bc3fa964bb0c826759e971",
|
|
||||||
"@stdlib/stats@^0.0.13": "https://registry.yarnpkg.com/@stdlib/stats/-/stats-0.0.13.tgz#87c973f385379d794707c7b5196a173dba8b07e1",
|
|
||||||
"@stdlib/stats@^0.0.x": "https://registry.yarnpkg.com/@stdlib/stats/-/stats-0.0.13.tgz#87c973f385379d794707c7b5196a173dba8b07e1",
|
|
||||||
"@stdlib/streams@^0.0.x": "https://registry.yarnpkg.com/@stdlib/streams/-/streams-0.0.12.tgz#07f5ceae5852590afad8e1cb7ce94174becc8739",
|
|
||||||
"@stdlib/strided@^0.0.x": "https://registry.yarnpkg.com/@stdlib/strided/-/strided-0.0.12.tgz#86ac48e660cb7f64a45cf07e80cbbfe58be21ae1",
|
|
||||||
"@stdlib/string@^0.0.x": "https://registry.yarnpkg.com/@stdlib/string/-/string-0.0.14.tgz#4feea4f9089ab72428eebb65fe7b93d90a7f34f4",
|
|
||||||
"@stdlib/symbol@^0.0.x": "https://registry.yarnpkg.com/@stdlib/symbol/-/symbol-0.0.12.tgz#b9f396b0bf269c2985bb7fe99810a8e26d7288c3",
|
|
||||||
"@stdlib/time@^0.0.x": "https://registry.yarnpkg.com/@stdlib/time/-/time-0.0.14.tgz#ea6daa438b1d3b019b99f5091117ee4bcef55d60",
|
|
||||||
"@stdlib/types@^0.0.x": "https://registry.yarnpkg.com/@stdlib/types/-/types-0.0.14.tgz#02d3aab7a9bfaeb86e34ab749772ea22f7b2f7e0",
|
|
||||||
"@stdlib/utils@^0.0.x": "https://registry.yarnpkg.com/@stdlib/utils/-/utils-0.0.12.tgz#670de5a7b253f04f11a4cba38f790e82393bcb46",
|
|
||||||
"commander@^9.4.1": "https://registry.yarnpkg.com/commander/-/commander-9.4.1.tgz#d1dd8f2ce6faf93147295c0df13c7c21141cfbdd",
|
|
||||||
"core-util-is@~1.0.0": "https://registry.yarnpkg.com/core-util-is/-/core-util-is-1.0.3.tgz#a6042d3634c2b27e9328f837b965fac83808db85",
|
|
||||||
"debug@^2.6.9": "https://registry.yarnpkg.com/debug/-/debug-2.6.9.tgz#5d128515df134ff327e90a4c93f4e077a536341f",
|
|
||||||
"define-lazy-prop@^2.0.0": "https://registry.yarnpkg.com/define-lazy-prop/-/define-lazy-prop-2.0.0.tgz#3f7ae421129bcaaac9bc74905c98a0009ec9ee7f",
|
|
||||||
"inherits@~2.0.3": "https://registry.yarnpkg.com/inherits/-/inherits-2.0.4.tgz#0fa2c64f932917c3433a0ded55363aae37416b7c",
|
|
||||||
"is-docker@^2.0.0": "https://registry.yarnpkg.com/is-docker/-/is-docker-2.2.1.tgz#33eeabe23cfe86f14bde4408a02c0cfb853acdaa",
|
|
||||||
"is-docker@^2.1.1": "https://registry.yarnpkg.com/is-docker/-/is-docker-2.2.1.tgz#33eeabe23cfe86f14bde4408a02c0cfb853acdaa",
|
|
||||||
"is-wsl@^2.2.0": "https://registry.yarnpkg.com/is-wsl/-/is-wsl-2.2.0.tgz#74a4c76e77ca9fd3f932f290c17ea326cd157271",
|
|
||||||
"isarray@~1.0.0": "https://registry.yarnpkg.com/isarray/-/isarray-1.0.0.tgz#bb935d48582cba168c06834957a54a3e07124f11",
|
|
||||||
"jstat@^1.9.5": "https://registry.yarnpkg.com/jstat/-/jstat-1.9.6.tgz#60e801b0d4c26e37aab0f375d1859fe9d60e10c0",
|
|
||||||
"lodash@^4.17.21": "https://registry.yarnpkg.com/lodash/-/lodash-4.17.21.tgz#679591c564c3bffaae8454cf0b3df370c3d6911c",
|
|
||||||
"minimist@^1.2.0": "https://registry.yarnpkg.com/minimist/-/minimist-1.2.7.tgz#daa1c4d91f507390437c6a8bc01078e7000c4d18",
|
|
||||||
"ms@2.0.0": "https://registry.yarnpkg.com/ms/-/ms-2.0.0.tgz#5608aeadfc00be6c2901df5f9861788de0d597c8",
|
|
||||||
"open@^8.4.0": "https://registry.yarnpkg.com/open/-/open-8.4.0.tgz#345321ae18f8138f82565a910fdc6b39e8c244f8",
|
|
||||||
"process-nextick-args@~2.0.0": "https://registry.yarnpkg.com/process-nextick-args/-/process-nextick-args-2.0.1.tgz#7820d9b16120cc55ca9ae7792680ae7dba6d7fe2",
|
|
||||||
"readable-stream@^2.1.4": "https://registry.yarnpkg.com/readable-stream/-/readable-stream-2.3.7.tgz#1eca1cf711aef814c04f62252a36a62f6cb23b57",
|
|
||||||
"safe-buffer@~5.1.0": "https://registry.yarnpkg.com/safe-buffer/-/safe-buffer-5.1.2.tgz#991ec69d296e0313747d59bdfd2b745c35f8828d",
|
|
||||||
"safe-buffer@~5.1.1": "https://registry.yarnpkg.com/safe-buffer/-/safe-buffer-5.1.2.tgz#991ec69d296e0313747d59bdfd2b745c35f8828d",
|
|
||||||
"string_decoder@~1.1.1": "https://registry.yarnpkg.com/string_decoder/-/string_decoder-1.1.1.tgz#9cf1611ba62685d7030ae9e4ba34149c3af03fc8",
|
|
||||||
"util-deprecate@~1.0.1": "https://registry.yarnpkg.com/util-deprecate/-/util-deprecate-1.0.2.tgz#450d4dc9fa70de732762fbd2d4a28981419a0ccf"
|
|
||||||
},
|
|
||||||
"files": [],
|
|
||||||
"artifacts": {}
|
|
||||||
}
|
|
64
squiggle/node_modules/@commander-js/extra-typings/README.md
generated
vendored
64
squiggle/node_modules/@commander-js/extra-typings/README.md
generated
vendored
|
@ -1,64 +0,0 @@
|
||||||
# extra-typings for commander
|
|
||||||
|
|
||||||
[![NPM Version](http://img.shields.io/npm/v/@commander-js/extra-typings.svg?style=flat)](https://www.npmjs.org/package/@commander-js/extra-typings)
|
|
||||||
[![NPM Downloads](https://img.shields.io/npm/dm/@commander-js/extra-typings.svg?style=flat)](https://npmcharts.com/compare/@commander-js/extra-typings?minimal=true)
|
|
||||||
|
|
||||||
This package offers experimental TypeScript typings for `commander` which infer strong types for:
|
|
||||||
|
|
||||||
- all the parameters of the action handler, including the options
|
|
||||||
- options returned by `.opts()`
|
|
||||||
|
|
||||||
The runtime is supplied by commander. This package is all about the typings.
|
|
||||||
|
|
||||||
Usage
|
|
||||||
|
|
||||||
- install `@commander-js/extra-typings` using your preferred package manager
|
|
||||||
- install `commander`, if not already installed (peer dependency)
|
|
||||||
- in code import from `@commander-js/extra-typings` instead of `commander`
|
|
||||||
|
|
||||||
The installed version of this package should match the major and minor version numbers of the installed commander package, but the patch version number is independent (following pattern used by [Definitely Typed](https://github.com/DefinitelyTyped/DefinitelyTyped#how-do-definitely-typed-package-versions-relate-to-versions-of-the-corresponding-library)).
|
|
||||||
|
|
||||||
Credit: this builds on work by @PaperStrike in <https://github.com/tj/commander.js/pull/1758>
|
|
||||||
|
|
||||||
## Limitations
|
|
||||||
|
|
||||||
- the generics lead to some noisy types visible in editor and errors
|
|
||||||
- some minor code changes required for subclasses of `Command`, `Argument`, or `Option` (see [subclass.test-d.ts](./tests/subclass.test-d.ts))
|
|
||||||
- chaining methods which do type inference return base class rather than `this`
|
|
||||||
- subclass of `Command` returns base class not subclass from `.command(name)`
|
|
||||||
- type parameter needed for class declaration of subclass of `Option` and `Argument`
|
|
||||||
|
|
||||||
## Usage tips
|
|
||||||
|
|
||||||
The types are built up as the options and arguments are defined. The usage pattern for action handlers is easy. Just chain the action handler after the options and arguments.
|
|
||||||
|
|
||||||
```js
|
|
||||||
import { program } from '@commander-js/extra-typings';
|
|
||||||
|
|
||||||
program.command('print')
|
|
||||||
.argument('<file>')
|
|
||||||
.option('--double-sided')
|
|
||||||
.action((targetFile, options) => {
|
|
||||||
// command-arguments and options are fully typed
|
|
||||||
});
|
|
||||||
```
|
|
||||||
|
|
||||||
For working with a single command without an action handler, the configuration need to be done at the same time as the variable is declared.
|
|
||||||
|
|
||||||
```js
|
|
||||||
import { Command } from '@commander-js/extra-typings';
|
|
||||||
|
|
||||||
// broken pattern
|
|
||||||
const program = new Command(); // program type does not include options or arguments
|
|
||||||
program.option('-d, --debug'); // adding option does not change type of program
|
|
||||||
const options = program.opts(); // dumb type
|
|
||||||
```
|
|
||||||
|
|
||||||
```js
|
|
||||||
import { Command } from '@commander-js/extra-typings';
|
|
||||||
|
|
||||||
// working pattern
|
|
||||||
const program = new Command()
|
|
||||||
.option('-d, --debug'); // program type includes chained options and arguments
|
|
||||||
const options = program.opts(); // smart type
|
|
||||||
```
|
|
1063
squiggle/node_modules/@commander-js/extra-typings/index.d.ts
generated
vendored
1063
squiggle/node_modules/@commander-js/extra-typings/index.d.ts
generated
vendored
File diff suppressed because it is too large
Load Diff
25
squiggle/node_modules/@commander-js/extra-typings/index.js
generated
vendored
25
squiggle/node_modules/@commander-js/extra-typings/index.js
generated
vendored
|
@ -1,25 +0,0 @@
|
||||||
const commander = require('commander');
|
|
||||||
|
|
||||||
// @ts-check
|
|
||||||
|
|
||||||
exports = module.exports = {};
|
|
||||||
|
|
||||||
// Return a different global program than commander,
|
|
||||||
// and don't also return it as default export.
|
|
||||||
exports.program = new commander.Command();
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Expose classes. The FooT versions are just types, so return Commander original implementations!
|
|
||||||
*/
|
|
||||||
|
|
||||||
exports.Argument = commander.Argument;
|
|
||||||
exports.Command = commander.Command;
|
|
||||||
exports.CommanderError = commander.CommanderError;
|
|
||||||
exports.Help = commander.Help;
|
|
||||||
exports.InvalidArgumentError = commander.InvalidArgumentError;
|
|
||||||
exports.InvalidOptionArgumentError = commander.InvalidArgumentError; // Deprecated
|
|
||||||
exports.Option = commander.Option;
|
|
||||||
|
|
||||||
exports.createArgument = () => new commander.Argument();
|
|
||||||
exports.createCommand = () => new commander.Command();
|
|
||||||
exports.createOption = () => new commander.Option();
|
|
47
squiggle/node_modules/@commander-js/extra-typings/package.json
generated
vendored
47
squiggle/node_modules/@commander-js/extra-typings/package.json
generated
vendored
|
@ -1,47 +0,0 @@
|
||||||
{
|
|
||||||
"name": "@commander-js/extra-typings",
|
|
||||||
"version": "9.4.1",
|
|
||||||
"description": "Infer strong typings for commander options and action handlers",
|
|
||||||
"main": "index.js",
|
|
||||||
"scripts": {
|
|
||||||
"test": "tsd",
|
|
||||||
"prepublish": "tsd"
|
|
||||||
},
|
|
||||||
"repository": {
|
|
||||||
"type": "git",
|
|
||||||
"url": "git+https://github.com/commander-js/extra-typings.git"
|
|
||||||
},
|
|
||||||
"files": [
|
|
||||||
"esm.mjs",
|
|
||||||
"index.js",
|
|
||||||
"index.d.ts"
|
|
||||||
],
|
|
||||||
"type": "commonjs",
|
|
||||||
"exports": {
|
|
||||||
".": {
|
|
||||||
"types": "./index.d.ts",
|
|
||||||
"require": "./index.js",
|
|
||||||
"import": "./esm.mjs"
|
|
||||||
},
|
|
||||||
"./esm.mjs": "./esm.mjs"
|
|
||||||
},
|
|
||||||
"types": "index.d.ts",
|
|
||||||
"tsd": {
|
|
||||||
"directory": "tests"
|
|
||||||
},
|
|
||||||
"keywords": [],
|
|
||||||
"author": "",
|
|
||||||
"license": "MIT",
|
|
||||||
"bugs": {
|
|
||||||
"url": "https://github.com/commander-js/extra-typings/issues"
|
|
||||||
},
|
|
||||||
"homepage": "https://github.com/commander-js/extra-typings#readme",
|
|
||||||
"peerDependencies": {
|
|
||||||
"commander": "9.4.x"
|
|
||||||
},
|
|
||||||
"devDependencies": {
|
|
||||||
"@types/node": "^18.7.16",
|
|
||||||
"tsd": "^0.22.0",
|
|
||||||
"typescript": "^4.7.4"
|
|
||||||
}
|
|
||||||
}
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user