Compare commits

..

1 Commits

Author SHA1 Message Date
de33917f67 tweaks 2024-09-13 17:41:17 -04:00
134 changed files with 87 additions and 2448 deletions

View File

@ -1,34 +0,0 @@
#include "../../../squiggle.h"
#include "../../../squiggle_more.h"
#include <stdio.h>
#include <stdlib.h>
double cumsum_p0 = 0.6;
double cumsum_p1 = 0.8;
double cumsum_p2 = 0.9;
double cumsum_p3 = 1.0;
double sampler_result(uint64_t * seed)
{
double p = sample_uniform(0, 1, seed);
if(p< cumsum_p0){
return 0;
} else if (p < cumsum_p1){
return 1;
} else if (p < cumsum_p2){
return sample_to(1,3, seed);
} else {
return sample_to(2, 10, seed);
}
}
int main()
{
int n_samples = 1000 * 1000, n_threads = 16;
double* results = malloc((size_t)n_samples * sizeof(double));
sampler_parallel(sampler_result, results, n_threads, n_samples);
printf("Avg: %f\n", array_sum(results, n_samples) / n_samples);
free(results);
}

View File

@ -1,228 +0,0 @@
#include <limits.h>
#include <math.h>
#include <stdint.h>
#include <stdlib.h>
// Defs
#define PI 3.14159265358979323846 // M_PI in gcc gnu99
#define NORMAL90CONFIDENCE 1.6448536269514727
#define UNUSED(x) (void)(x)
// ^ https://stackoverflow.com/questions/3599160/how-can-i-suppress-unused-parameter-warnings-in-c
// Pseudo Random number generators
static uint64_t xorshift64(uint64_t* seed)
{
// Algorithm "xor" from p. 4 of Marsaglia, "Xorshift RNGs"
// See:
// <https://en.wikipedia.org/wiki/Xorshift>
// <https://stackoverflow.com/questions/53886131/how-does-xorshift32-works>,
// Also some drama:
// <https://www.pcg-random.org/posts/on-vignas-pcg-critique.html>,
// <https://prng.di.unimi.it/>
uint64_t x = *seed;
x ^= x << 13;
x ^= x >> 7;
x ^= x << 17;
return *seed = x;
/*
// if one wanted to generate 32 bit ints,
// from which to generate floats,
// one could do the following:
uint32_t x = *seed;
x ^= x << 13;
x ^= x >> 17;
x ^= x << 5;
return *seed = x;
*/
}
// Distribution & sampling functions
// Unit distributions
double sample_unit_uniform(uint64_t* seed)
{
// samples uniform from [0,1] interval.
return ((double)xorshift64(seed)) / ((double)UINT64_MAX);
}
double sample_unit_normal(uint64_t* seed)
{
// // See: <https://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform>
double u1 = sample_unit_uniform(seed);
double u2 = sample_unit_uniform(seed);
double z = sqrt(-2.0 * log(u1)) * sin(2.0 * PI * u2);
return z;
}
// Composite distributions
double sample_uniform(double start, double end, uint64_t* seed)
{
return sample_unit_uniform(seed) * (end - start) + start;
}
double sample_normal(double mean, double sigma, uint64_t* seed)
{
return (mean + sigma * sample_unit_normal(seed));
}
double sample_lognormal(double logmean, double logstd, uint64_t* seed)
{
return exp(sample_normal(logmean, logstd, seed));
}
double sample_normal_from_90_ci(double low, double high, uint64_t* seed)
{
// Explanation of key idea:
// 1. We know that the 90% confidence interval of the unit normal is
// [-1.6448536269514722, 1.6448536269514722]
// see e.g.: https://stackoverflow.com/questions/20626994/how-to-calculate-the-inverse-of-the-normal-cumulative-distribution-function-in-p
// or https://www.wolframalpha.com/input?i=N%5BInverseCDF%28normal%280%2C1%29%2C+0.05%29%2C%7B%E2%88%9E%2C100%7D%5D
// 2. So if we take a unit normal and multiply it by
// L / 1.6448536269514722, its new 90% confidence interval will be
// [-L, L], i.e., length 2 * L
// 3. Instead, if we want to get a confidence interval of length L,
// we should multiply the unit normal by
// L / (2 * 1.6448536269514722)
// Meaning that its standard deviation should be multiplied by that amount
// see: https://en.wikipedia.org/wiki/Normal_distribution?lang=en#Operations_on_a_single_normal_variable
// 4. So we have learnt that Normal(0, L / (2 * 1.6448536269514722))
// has a 90% confidence interval of length L
// 5. If we want a 90% confidence interval from high to low,
// we can set mean = (high + low)/2; the midpoint, and L = high-low,
// Normal([high + low]/2, [high - low]/(2 * 1.6448536269514722))
double mean = (high + low) * 0.5;
double std = (high - low) / (2.0 * NORMAL90CONFIDENCE);
return sample_normal(mean, std, seed);
}
double sample_to(double low, double high, uint64_t* seed)
{
// Given a (positive) 90% confidence interval,
// returns a sample from a lognorma with a matching 90% c.i.
// Key idea: If we want a lognormal with 90% confidence interval [a, b]
// we need but get a normal with 90% confidence interval [log(a), log(b)].
// Then see code for sample_normal_from_90_ci
double loglow = log(low);
double loghigh = log(high);
return exp(sample_normal_from_90_ci(loglow, loghigh, seed));
}
double sample_gamma(double alpha, uint64_t* seed)
{
// A Simple Method for Generating Gamma Variables, Marsaglia and Wan Tsang, 2001
// https://dl.acm.org/doi/pdf/10.1145/358407.358414
// see also the references/ folder
// Note that the Wikipedia page for the gamma distribution includes a scaling parameter
// k or beta
// https://en.wikipedia.org/wiki/Gamma_distribution
// such that gamma_k(alpha, k) = k * gamma(alpha)
// or gamma_beta(alpha, beta) = gamma(alpha) / beta
// So far I have not needed to use this, and thus the second parameter is by default 1.
if (alpha >= 1) {
double d, c, x, v, u;
d = alpha - 1.0 / 3.0;
c = 1.0 / sqrt(9.0 * d);
while (1) {
do {
x = sample_unit_normal(seed);
v = 1.0 + c * x;
} while (v <= 0.0);
v = v * v * v;
u = sample_unit_uniform(seed);
if (u < 1.0 - 0.0331 * (x * x * x * x)) { // Condition 1
// the 0.0331 doesn't inspire much confidence
// however, this isn't the whole story
// by knowing that Condition 1 implies condition 2
// we realize that this is just a way of making the algorithm faster
// i.e., of not using the logarithms
return d * v;
}
if (log(u) < 0.5 * (x * x) + d * (1.0 - v + log(v))) { // Condition 2
return d * v;
}
}
} else {
return sample_gamma(1 + alpha, seed) * pow(sample_unit_uniform(seed), 1 / alpha);
// see note in p. 371 of https://dl.acm.org/doi/pdf/10.1145/358407.358414
}
}
double sample_beta(double a, double b, uint64_t* seed)
{
// See: https://en.wikipedia.org/wiki/Gamma_distribution#Related_distributions
double gamma_a = sample_gamma(a, seed);
double gamma_b = sample_gamma(b, seed);
return gamma_a / (gamma_a + gamma_b);
}
double sample_laplace(double successes, double failures, uint64_t* seed)
{
// see <https://en.wikipedia.org/wiki/Beta_distribution?lang=en#Rule_of_succession>
return sample_beta(successes + 1, failures + 1, seed);
}
// Array helpers
double array_sum(double* array, int length)
{
double sum = 0.0;
for (int i = 0; i < length; i++) {
sum += array[i];
}
return sum;
}
void array_cumsum(double* array_to_sum, double* array_cumsummed, int length)
{
array_cumsummed[0] = array_to_sum[0];
for (int i = 1; i < length; i++) {
array_cumsummed[i] = array_cumsummed[i - 1] + array_to_sum[i];
}
}
double array_mean(double* array, int length)
{
double sum = array_sum(array, length);
return sum / length;
}
double array_std(double* array, int length)
{
double mean = array_mean(array, length);
double std = 0.0;
for (int i = 0; i < length; i++) {
std += (array[i] - mean) * (array[i] - mean);
}
std = sqrt(std / length);
return std;
}
// Mixture function
double sample_mixture(double (*samplers[])(uint64_t*), double* weights, int n_dists, uint64_t* seed)
{
// Sample from samples with frequency proportional to their weights.
double sum_weights = array_sum(weights, n_dists);
double* cumsummed_normalized_weights = (double*)malloc((size_t)n_dists * sizeof(double));
cumsummed_normalized_weights[0] = weights[0] / sum_weights;
for (int i = 1; i < n_dists; i++) {
cumsummed_normalized_weights[i] = cumsummed_normalized_weights[i - 1] + weights[i] / sum_weights;
}
double result;
int result_set_flag = 0;
double p = sample_uniform(0, 1, seed);
for (int k = 0; k < n_dists; k++) {
if (p < cumsummed_normalized_weights[k]) {
result = samplers[k](seed);
result_set_flag = 1;
break;
}
}
if (result_set_flag == 0)
result = samplers[n_dists - 1](seed);
free(cumsummed_normalized_weights);
return result;
}

View File

@ -1,14 +0,0 @@
#include "../../../squiggle.h"
#include <stdio.h>
#include <stdlib.h>
int main()
{
// set randomness seed
uint64_t* seed = malloc(sizeof(uint64_t));
*seed = 1000; // xorshift can't start with 0
// ...
free(seed);
}

View File

@ -1,34 +0,0 @@
#include "../../../squiggle.h"
#include <stdio.h>
#include <stdlib.h>
// Estimate functions
double sample_0(uint64_t* seed) { UNUSED(seed); return 0; }
double sample_1(uint64_t* seed) { UNUSED(seed); return 1; }
double sample_few(uint64_t* seed) { return sample_to(1, 3, seed); }
double sample_many(uint64_t* seed) { return sample_to(2, 10, seed); }
double sample_model(uint64_t* seed){
double p_a = 0.8;
double p_b = 0.5;
double p_c = p_a * p_b;
int n_dists = 4;
double weights[] = { 1 - p_c, p_c / 2, p_c / 4, p_c / 4 };
double (*samplers[])(uint64_t*) = { sample_0, sample_1, sample_few, sample_many };
double result = sample_mixture(samplers, weights, n_dists, seed);
return result;
}
int main()
{
// set randomness seed
uint64_t* seed = malloc(sizeof(uint64_t));
*seed = 1000; // xorshift can't start with 0
printf("result_one: %f\n", sample_model(seed));
free(seed);
}

View File

@ -1,38 +0,0 @@
#include "../../../squiggle.h"
#include <stdio.h>
#include <stdlib.h>
double sample_0(uint64_t* seed) { UNUSED(seed); return 0; }
double sample_1(uint64_t* seed) { UNUSED(seed); return 1; }
double sample_few(uint64_t* seed) { return sample_to(1, 3, seed); }
double sample_many(uint64_t* seed) { return sample_to(2, 10, seed); }
double sample_model(uint64_t* seed){
double p_a = 0.8;
double p_b = 0.5;
double p_c = p_a * p_b;
int n_dists = 4;
double weights[] = { 1 - p_c, p_c / 2, p_c / 4, p_c / 4 };
double (*samplers[])(uint64_t*) = { sample_0, sample_1, sample_few, sample_many };
double result = sample_mixture(samplers, weights, n_dists, seed);
return result;
}
int main()
{
// set randomness seed
uint64_t* seed = malloc(sizeof(uint64_t));
*seed = 1000; // xorshift can't start with 0
int n_samples = 1000000;
double* result_many = (double*)malloc((size_t)n_samples * sizeof(double));
for (int i = 0; i < n_samples; i++) {
result_many[i] = sample_model(seed);
}
printf("Mean: %f\n", array_mean(result_many, n_samples));
free(seed);
}

View File

@ -1,44 +0,0 @@
#include "../../../squiggle.h"
#include <stdio.h>
#include <stdlib.h>
double sample_model(uint64_t* seed){
double sample_0(uint64_t* seed) { UNUSED(seed); return 0; }
// Using a gcc extension, you can define a function inside another function
double sample_1(uint64_t* seed) { UNUSED(seed); return 1; }
double sample_few(uint64_t* seed) { return sample_to(1, 3, seed); }
double sample_many(uint64_t* seed) { return sample_to(2, 10, seed); }
double p_a = 0.8;
double p_b = 0.5;
double p_c = p_a * p_b;
int n_dists = 4;
double weights[] = { 1 - p_c, p_c / 2, p_c / 4, p_c / 4 };
double (*samplers[])(uint64_t*) = { sample_0, sample_1, sample_few, sample_many };
double result = sample_mixture(samplers, weights, n_dists, seed);
return result;
}
int main()
{
// set randomness seed
uint64_t* seed = malloc(sizeof(uint64_t));
*seed = 1000; // xorshift can't start with 0
int n_samples = 1000000;
double* result_many = (double*)malloc((size_t)n_samples * sizeof(double));
for (int i = 0; i < n_samples; i++) {
result_many[i] = sample_model(seed);
}
printf("result_many: [");
for (int i = 0; i < 100; i++) {
printf("%.2f, ", result_many[i]);
}
printf("]\n");
free(seed);
}

View File

@ -1,29 +0,0 @@
#include "../../../squiggle.h"
#include <stdio.h>
#include <stdlib.h>
int main()
{
// set randomness seed
uint64_t* seed = malloc(sizeof(uint64_t));
*seed = 1000; // xorshift can't start with 0
int n = 1000 * 1000;
double* gamma_array = malloc(sizeof(double) * (size_t)n);
for (int i = 0; i < n; i++) {
gamma_array[i] = sample_gamma(1.0, seed);
}
printf("gamma(1) summary statistics = mean: %f, std: %f\n", array_mean(gamma_array, n), array_std(gamma_array, n));
printf("\n");
double* beta_array = malloc(sizeof(double) * (size_t)n);
for (int i = 0; i < n; i++) {
beta_array[i] = sample_beta(1, 2.0, seed);
}
printf("beta(1,2) summary statistics: mean: %f, std: %f\n", array_mean(beta_array, n), array_std(beta_array, n));
printf("\n");
free(gamma_array);
free(beta_array);
free(seed);
}

View File

@ -1,17 +0,0 @@
#include "../../../squiggle.h"
#include <stdio.h>
#include <stdlib.h>
// Estimate functions
int main()
{
// set randomness seed
uint64_t* seed = malloc(sizeof(uint64_t));
*seed = 1000; // xorshift can't start with 0
for (int i = 0; i < 100; i++) {
double sample = sample_lognormal(0, 10, seed);
printf("%f\n", sample);
}
free(seed);
}

View File

@ -1 +0,0 @@
./example | sort -h

View File

@ -1,99 +0,0 @@
#include "../../../squiggle.h"
#include <math.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
double sample_fermi_logspace(uint64_t * seed)
{
// Replicate <https://arxiv.org/pdf/1806.02404.pdf>, and in particular the red line in page 11.
// You can see a simple version of this function in naive.c in this same folder
double log_rate_of_star_formation = sample_uniform(log(1), log(100), seed);
double log_fraction_of_stars_with_planets = sample_uniform(log(0.1), log(1), seed);
double log_number_of_habitable_planets_per_star_system = sample_uniform(log(0.1), log(1), seed);
double log_rate_of_life_formation_in_habitable_planets = sample_normal(1, 50, seed);
double log_fraction_of_habitable_planets_in_which_any_life_appears;
/*
Consider:
a = underlying normal
b = rate_of_life_formation_in_habitable_planets = exp(underlying normal) = exp(a)
c = 1 - exp(-b) = fraction_of_habitable_planets_in_which_any_life_appears
d = log(c)
Looking at the Taylor expansion for c = 1 - exp(-b), it's
b - b^2/2 + b^3/6 - x^b/24, etc.
<https://www.wolframalpha.com/input?i=1-exp%28-x%29>
When b ~ 0 (as is often the case), this is close to b.
But now, if b ~ 0, c ~ b
and d = log(c) ~ log(b) = log(exp(a)) = a
Now, we could play around with estimating errors,
and indeed if we want b^2/2 = exp(a)^2/2 < 10^(-n), i.e., to have n decimal digits of precision,
we could compute this as e.g., a < (nlog(10) + log(2))/2
so for example if we want ten digits of precision, that's a < -11
Empirically, the two numbers as calculated in C do become really close around 11 or so,
and at 38 that calculation results in a -inf (so probably a floating point error or similar.)
So we should be using that formula for somewhere between -38 << a < -11
I chose -16 as a happy medium after playing around with
double invert(double x){
return log(1-exp(-exp(-x)));
}
for(int i=0; i<64; i++){
double j = i;
printf("for %lf, log(1-exp(-exp(-x))) is calculated as... %lf\n", j, invert(j));
}
and <https://www.wolframalpha.com/input?i=log%281-exp%28-exp%28-16%29%29%29>
*/
if (log_rate_of_life_formation_in_habitable_planets < -16) {
log_fraction_of_habitable_planets_in_which_any_life_appears = log_rate_of_life_formation_in_habitable_planets;
} else {
double rate_of_life_formation_in_habitable_planets = exp(log_rate_of_life_formation_in_habitable_planets);
double fraction_of_habitable_planets_in_which_any_life_appears = -expm1(-rate_of_life_formation_in_habitable_planets);
log_fraction_of_habitable_planets_in_which_any_life_appears = log(fraction_of_habitable_planets_in_which_any_life_appears);
}
double log_fraction_of_planets_with_life_in_which_intelligent_life_appears = sample_uniform(log(0.001), log(1), seed);
double log_fraction_of_intelligent_planets_which_are_detectable_as_such = sample_uniform(log(0.01), log(1), seed);
double log_longevity_of_detectable_civilizations = sample_uniform(log(100), log(10000000000), seed);
double log_n =
log_rate_of_star_formation +
log_fraction_of_stars_with_planets +
log_number_of_habitable_planets_per_star_system +
log_fraction_of_habitable_planets_in_which_any_life_appears +
log_fraction_of_planets_with_life_in_which_intelligent_life_appears +
log_fraction_of_intelligent_planets_which_are_detectable_as_such +
log_longevity_of_detectable_civilizations;
return log_n;
}
double sample_are_we_alone_logspace(uint64_t * seed)
{
double log_n = sample_fermi_logspace(seed);
return ((log_n > 0) ? 1 : 0);
// log_n > 0 => n > 1
}
int main()
{
// set randomness seed
uint64_t* seed = malloc(sizeof(uint64_t));
*seed = 1001; // xorshift can't start with a seed of 0
double logspace_fermi_proportion = 0;
int n_samples = 1000 * 1000;
for (int i = 0; i < n_samples; i++) {
double result = sample_are_we_alone_logspace(seed);
logspace_fermi_proportion += result;
}
double p_not_alone = logspace_fermi_proportion / n_samples;
printf("Probability that we are not alone: %lf (%.lf%%)\n", p_not_alone, p_not_alone * 100);
free(seed);
}

View File

@ -1,79 +0,0 @@
#include "../../../squiggle.h"
#include <math.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#define VERBOSE 0
double sample_loguniform(double a, double b, uint64_t* seed)
{
return exp(sample_uniform(log(a), log(b), seed));
}
int main()
{
// Replicate <https://arxiv.org/pdf/1806.02404.pdf>, and in particular the red line in page 11.
// Could also be interesting to just produce and save many samples.
// set randomness seed
uint64_t* seed = malloc(sizeof(uint64_t));
*seed = UINT64_MAX / 64; // xorshift can't start with a seed of 0
// Do this naïvely, without worrying that much about numerical precision
double sample_fermi_naive(uint64_t * seed)
{
double rate_of_star_formation = sample_loguniform(1, 100, seed);
double fraction_of_stars_with_planets = sample_loguniform(0.1, 1, seed);
double number_of_habitable_planets_per_star_system = sample_loguniform(0.1, 1, seed);
double rate_of_life_formation_in_habitable_planets = sample_lognormal(1, 50, seed);
double fraction_of_habitable_planets_in_which_any_life_appears = -expm1(-rate_of_life_formation_in_habitable_planets);
// double fraction_of_habitable_planets_in_which_any_life_appears = 1-exp(-rate_of_life_formation_in_habitable_planets);
// but with more precision
double fraction_of_planets_with_life_in_which_intelligent_life_appears = sample_loguniform(0.001, 1, seed);
double fraction_of_intelligent_planets_which_are_detectable_as_such = sample_loguniform(0.01, 1, seed);
double longevity_of_detectable_civilizations = sample_loguniform(100, 10000000000, seed);
if(VERBOSE) printf(" rate_of_star_formation = %lf\n", rate_of_star_formation);
if(VERBOSE) printf(" fraction_of_stars_with_planets = %lf\n", fraction_of_stars_with_planets);
if(VERBOSE) printf(" number_of_habitable_planets_per_star_system = %lf\n", number_of_habitable_planets_per_star_system);
if(VERBOSE) printf(" rate_of_life_formation_in_habitable_planets = %.16lf\n", rate_of_life_formation_in_habitable_planets);
if(VERBOSE) printf(" fraction_of_habitable_planets_in_which_any_life_appears = %lf\n", fraction_of_habitable_planets_in_which_any_life_appears);
if(VERBOSE) printf(" fraction_of_planets_with_life_in_which_intelligent_life_appears = %lf\n", fraction_of_planets_with_life_in_which_intelligent_life_appears);
if(VERBOSE) printf(" fraction_of_intelligent_planets_which_are_detectable_as_such = %lf\n", fraction_of_intelligent_planets_which_are_detectable_as_such);
if(VERBOSE) printf(" longevity_of_detectable_civilizations = %lf\n", longevity_of_detectable_civilizations);
// Expected number of civilizations in the Milky way;
// see footnote 3 (p. 5)
double n = rate_of_star_formation * fraction_of_stars_with_planets * number_of_habitable_planets_per_star_system * fraction_of_habitable_planets_in_which_any_life_appears * fraction_of_planets_with_life_in_which_intelligent_life_appears * fraction_of_intelligent_planets_which_are_detectable_as_such * longevity_of_detectable_civilizations;
return n;
}
double sample_are_we_alone_naive(uint64_t * seed)
{
double n = sample_fermi_naive(seed);
return ((n > 1) ? 1 : 0);
}
double n = 1000000;
double naive_fermi_proportion = 0;
for (int i = 0; i < n; i++) {
double result = sample_are_we_alone_naive(seed);
if(VERBOSE) printf("result: %lf\n", result);
naive_fermi_proportion += result;
}
printf("Naïve %% that we are not alone: %lf\n", naive_fermi_proportion / n);
free(seed);
/*
double invert(double x){
return log(1-exp(-exp(-x)));
}
for(int i=0; i<64; i++){
double j = i;
printf("for %lf, log(1-exp(-exp(-x))) is calculated as... %lf\n", j, invert(j));
}
*/
}

View File

@ -1,91 +0,0 @@
# Interface:
# make all
# make format-all
# make run-all
# make one DIR=01_one_sample
# make format-one DIR=01_one_sample
# make run-one DIR=01_one_sample
# make time-linux-one DIR=01_one_sample
# make profile-one DIR=01_one_sample
# Compiler
CC=gcc
# CC=tcc # <= faster compilation
# Main file
SRC=example.c
OUTPUT=example
## Dependencies
SQUIGGLE=../../squiggle.c
MATH=-lm
DEPS=$(SQUIGGLE) $(MATH)
## Flags
# DEBUG=-fsanitize=address,undefined -fanalyzer
# DEBUG=-g
# DEBUG=
WARN=-Wall -Wextra -Wdouble-promotion -Wconversion
STANDARD=-std=c99
OPTIMIZED=-O3 #-Ofast
## Formatter
STYLE_BLUEPRINT=webkit
FORMATTER=clang-format -i -style=$(STYLE_BLUEPRINT)
## make all
all:
$(CC) $(OPTIMIZED) $(DEBUG) $(WARN) 00_example_template/$(SRC) $(DEPS) -o 00_example_template/$(OUTPUT)
$(CC) $(OPTIMIZED) $(DEBUG) $(WARN) 01_one_sample/$(SRC) $(DEPS) -o 01_one_sample/$(OUTPUT)
$(CC) $(OPTIMIZED) $(DEBUG) $(WARN) 02_time_to_botec/$(SRC) $(DEPS) -o 02_time_to_botec/$(OUTPUT)
$(CC) $(OPTIMIZED) $(DEBUG) $(WARN) 03_gcc_nested_function/$(SRC) $(DEPS) -o 03_gcc_nested_function/$(OUTPUT)
$(CC) $(OPTIMIZED) $(DEBUG) $(WARN) 04_gamma_beta/$(SRC) $(DEPS) -o 04_gamma_beta/$(OUTPUT)
$(CC) $(OPTIMIZED) $(DEBUG) $(WARN) 05_hundred_lognormals/$(SRC) $(DEPS) -o 05_hundred_lognormals/$(OUTPUT)
$(CC) $(OPTIMIZED) $(DEBUG) $(WARN) 06_dissolving_fermi_paradox/$(SRC) $(DEPS) -o 06_dissolving_fermi_paradox/$(OUTPUT)
format-all:
$(FORMATTER) 00_example_template/$(SRC)
$(FORMATTER) 01_one_sample/$(SRC)
$(FORMATTER) 02_time_to_botec/$(SRC)
$(FORMATTER) 03_gcc_nested_function/$(SRC)
$(FORMATTER) 04_gamma_beta/$(SRC)
$(FORMATTER) 05_hundred_lognormals/$(SRC)
$(FORMATTER) 06_dissolving_fermi_paradox/$(SRC)
run-all:
00_example_template/$(OUTPUT)
01_one_sample/$(OUTPUT)
02_time_to_botec/$(OUTPUT)
03_gcc_nested_function/$(OUTPUT)
04_gamma_beta/$(OUTPUT)
05_hundred_lognormals/$(OUTPUT)
06_dissolving_fermi_paradox/$(OUTPUT)
## make one DIR=01_one_sample
one: $(DIR)/$(SRC)
$(CC) $(OPTIMIZED) $(DEBUG) $(WARN) $(DIR)/$(SRC) $(DEPS) -o $(DIR)/$(OUTPUT)
## make format-one DIR=01_one_sample
format-one: $(DIR)/$(SRC)
$(FORMATTER) $(DIR)/$(SRC)
## make run-one DIR=01_one_sample
run-one: $(DIR)/$(OUTPUT)
$(DIR)/$(OUTPUT) && echo
## make time-linux-one DIR=01_one_sample
time-linux-one: $(DIR)/$(OUTPUT)
@echo "Requires /bin/time, found on GNU/Linux systems" && echo
@echo "Running 100x and taking avg time $(DIR)/$(OUTPUT)"
@t=$$(/usr/bin/time -f "%e" -p bash -c 'for i in {1..100}; do $(DIR)/$(OUTPUT); done' 2>&1 >/dev/null | grep real | awk '{print $$2}' ); echo "scale=2; 1000 * $$t / 100" | bc | sed "s|^|Time using 1 thread: |" | sed 's|$$|ms|' && echo
## e.g., make profile-linux-one DIR=01_one_sample
profile-linux-one:
echo "Requires perf, which depends on the kernel version, and might be in linux-tools package or similar"
echo "Must be run as sudo"
$(CC) $(OPTIMIZED) $(DEBUG) $(WARN) $(DIR)/$(SRC) $(DEPS) -o $(DIR)/$(OUTPUT)
# $(CC) $(SRC) $(DEPS) -o $(OUTPUT)
sudo perf record $(DIR)/$(OUTPUT)
sudo perf report
rm perf.data

View File

@ -1,19 +0,0 @@
#include "../../../squiggle.h"
#include "../../../squiggle_more.h"
#include <stdio.h>
#include <stdlib.h>
double sample_model(uint64_t* seed){
return sample_to(1, 10, seed);
}
int main()
{
// set randomness seed
uint64_t* seed = malloc(sizeof(uint64_t));
*seed = 1000; // xorshift can't start with 0
// ...
free(seed);
}

View File

@ -1,30 +0,0 @@
#include "../../../squiggle.h"
#include "../../../squiggle_more.h"
#include <stdio.h>
#include <stdlib.h>
// Estimate functions
double sample_beta_3_2(uint64_t* seed)
{
return sample_beta(3.0, 2.0, seed);
}
int main()
{
// set randomness seed
uint64_t* seed = malloc(sizeof(uint64_t));
*seed = 1000; // xorshift can't start with 0
int n_samples = 1 * MILLION;
double* xs = malloc(sizeof(double) * (size_t)n_samples);
for (int i = 0; i < n_samples; i++) {
xs[i] = sample_beta_3_2(seed);
}
printf("\n# Stats\n");
array_print_stats(xs, n_samples);
printf("\n# Histogram\n");
array_print_histogram(xs, n_samples, 23);
free(seed);
}

View File

@ -1,28 +0,0 @@
#include "../../../squiggle.h"
#include "../../../squiggle_more.h"
#include <stdio.h>
#include <stdlib.h>
// Estimate functions
double sample_beta_3_2(uint64_t* seed)
{
return sample_beta(3.0, 2.0, seed);
}
int main()
{
// set randomness seed
uint64_t* seed = malloc(sizeof(uint64_t));
*seed = 1000; // xorshift can't start with 0
int n_samples = 1 * MILLION;
double* xs = malloc(sizeof(double) * (size_t)n_samples);
sampler_parallel(sample_beta_3_2, xs, 16, n_samples);
printf("\n# Stats\n");
array_print_stats(xs, n_samples);
printf("\n# Histogram\n");
array_print_histogram(xs, n_samples, 23);
free(seed);
}

View File

@ -1,63 +0,0 @@
#include "../../../squiggle.h"
#include "../../../squiggle_more.h"
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
double probability_of_dying_nuno(uint64_t* seed)
{
double first_year_russian_nuclear_weapons = 1953;
double current_year = 2022;
double laplace_probability_nuclear_exchange_year = sample_beta(1, current_year - first_year_russian_nuclear_weapons + 1, seed);
double laplace_probability_nuclear_exchange_month = 1 - pow(1 - laplace_probability_nuclear_exchange_year, (1.0 / 12.0));
double london_hit_conditional_on_russia_nuclear_weapon_usage = sample_beta(7.67, 69.65, seed);
// I.e., a beta distribution with a range of 0.05 to 0.16 into: https://nunosempere.com/blog/2023/03/15/fit-beta/
// 0.05 were my estimate and Samotsvety's estimate in March 2022, respectively:
// https://forum.effectivealtruism.org/posts/KRFXjCqqfGQAYirm5/samotsvety-nuclear-risk-forecasts-march-2022#Nu_o_Sempere
double informed_actor_not_able_to_escape = sample_beta(3.26212166586967, 3.26228162008564, seed);
// 0.2 to 0.8, i.e., 20% to 80%, again using the previous tool
double proportion_which_die_if_bomb_drops_in_london = sample_beta(10.00, 2.45, seed); // 60% to 95%
double probability_of_dying = laplace_probability_nuclear_exchange_month * london_hit_conditional_on_russia_nuclear_weapon_usage * informed_actor_not_able_to_escape * proportion_which_die_if_bomb_drops_in_london;
return probability_of_dying;
}
double probability_of_dying_eli(uint64_t* seed)
{
double russia_nato_nuclear_exchange_in_next_month = sample_beta(1.30, 1182.99, seed); // .0001 to .003
double london_hit_conditional = sample_beta(3.47, 8.97, seed); // 0.1 to 0.5
double informed_actors_not_able_to_escape = sample_beta(2.73, 5.67, seed); // .1 to .6
double proportion_which_die_if_bomb_drops_in_london = sample_beta(3.00, 1.46, seed); // 0.3 to 0.95;
double probability_of_dying = russia_nato_nuclear_exchange_in_next_month * london_hit_conditional * informed_actors_not_able_to_escape * proportion_which_die_if_bomb_drops_in_london;
return probability_of_dying;
}
double sample_nuclear_model(uint64_t* seed)
{
double (*samplers[])(uint64_t*) = { probability_of_dying_nuno, probability_of_dying_eli };
double weights[] = { 0.5, 0.5 };
return sample_mixture(samplers, weights, 2, seed);
}
int main()
{
// set randomness seed
uint64_t* seed = malloc(sizeof(uint64_t));
*seed = 1000; // xorshift can't start with 0
int n = 1 * MILLION;
double* xs = malloc(sizeof(double) * (size_t)n);
for (int i = 0; i < n; i++) {
xs[i] = sample_nuclear_model(seed);
}
printf("\n# Stats\n");
array_print_stats(xs, n);
printf("\n# Histogram\n");
array_print_90_ci_histogram(xs, n, 20);
free(xs);
free(seed);
}

View File

@ -1,20 +0,0 @@
#include "../../../squiggle.h"
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
int main()
{
// set randomness seed
uint64_t* seed = malloc(sizeof(uint64_t));
*seed = 1000; // xorshift can't start with 0
double firstYearRussianNuclearWeapons = 1953;
double currentYear = 2023;
for(int i=0; i<10; i++){
double laplace_beta = sample_beta(currentYear-firstYearRussianNuclearWeapons + 1, 1, seed);
printf("%f\n", laplace_beta);
}
free(seed);
}

View File

@ -1,53 +0,0 @@
# Interface:
# make
# make build
# make format
# make run
# Compiler
CC=gcc
# CC=tcc # <= faster compilation
# Main file
SRC=example.c ../../../squiggle.c
OUTPUT=example
## Dependencies
MATH=-lm
## Flags
DEBUG= #'-g'
STANDARD=-std=c99
WARNINGS=-Wall
OPTIMIZED=-O3 #-Ofast
# OPENMP=-fopenmp
## Formatter
STYLE_BLUEPRINT=webkit
FORMATTER=clang-format -i -style=$(STYLE_BLUEPRINT)
## make build
build: $(SRC)
$(CC) $(OPTIMIZED) $(DEBUG) $(SRC) $(MATH) -o $(OUTPUT)
format: $(SRC)
$(FORMATTER) $(SRC)
run: $(SRC) $(OUTPUT)
OMP_NUM_THREADS=1 ./$(OUTPUT) && echo
time-linux:
@echo "Requires /bin/time, found on GNU/Linux systems" && echo
@echo "Running 100x and taking avg time $(OUTPUT)"
@t=$$(/usr/bin/time -f "%e" -p bash -c 'for i in {1..100}; do $(OUTPUT); done' 2>&1 >/dev/null | grep real | awk '{print $$2}' ); echo "scale=2; 1000 * $$t / 100" | bc | sed "s|^|Time using 1 thread: |" | sed 's|$$|ms|' && echo
## Profiling
profile-linux:
echo "Requires perf, which depends on the kernel version, and might be in linux-tools package or similar"
echo "Must be run as sudo"
$(CC) $(SRC) $(MATH) -o $(OUTPUT)
sudo perf record ./$(OUTPUT)
sudo perf report
rm perf.data

View File

@ -1,43 +0,0 @@
#include "../../../squiggle.h"
#include "../../../squiggle_more.h"
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
double sample_minutes_per_day_jumping_rope_needed_to_burn_10kg(uint64_t* seed)
{
double kcal_jumping_rope_minute = sample_to(15, 20, seed);
double kcal_jumping_rope_hour = kcal_jumping_rope_minute * 60;
double kcal_in_kg_of_fat = 7700;
double num_kg_of_fat_to_lose = 10;
double hours_jumping_rope_needed = kcal_in_kg_of_fat * num_kg_of_fat_to_lose / kcal_jumping_rope_hour;
double days_until_end_of_year = 152; // as of 2023-08-01
double hours_per_day = hours_jumping_rope_needed / days_until_end_of_year;
double minutes_per_day = hours_per_day * 60;
return minutes_per_day;
}
int main()
{
// set randomness seed
uint64_t* seed = malloc(sizeof(uint64_t));
*seed = 1000; // xorshift can't start with 0
int n = 1000 * 1000;
double* xs = malloc(sizeof(double) * (size_t)n);
for (int i = 0; i < n; i++) {
xs[i] = sample_minutes_per_day_jumping_rope_needed_to_burn_10kg(seed);
}
printf("## How many minutes per day do I have to jump rope to lose 10kg of fat by the end of the year?\n");
printf("\n# Stats\n");
array_print_stats(xs, n);
printf("\n# Histogram\n");
array_print_histogram(xs, n, 23);
free(seed);
}

View File

@ -1,84 +0,0 @@
#include "../../../squiggle.h"
#include "../../../squiggle_more.h"
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
double yearly_probability_nuclear_collapse(double year, uint64_t* seed)
{
double successes = 0;
double failures = (year - 1960);
return sample_laplace(successes, failures, seed);
// ^ can change to (successes + 1)/(trials + 2)
// to get a probability,
// rather than sampling from a distribution over probabilities.
}
double yearly_probability_nuclear_collapse_2023(uint64_t* seed)
{
return yearly_probability_nuclear_collapse(2023, seed);
}
double yearly_probability_nuclear_collapse_after_recovery(double year, double rebuilding_period_length_years, uint64_t* seed)
{
// assumption: nuclear
double successes = 1.0;
double failures = (year - rebuilding_period_length_years - 1960 - 1);
return sample_laplace(successes, failures, seed);
}
double yearly_probability_nuclear_collapse_after_recovery_example(uint64_t* seed)
{
double year = 2070;
double rebuilding_period_length_years = 30;
// So, there was a nuclear collapse in 2040,
// then a recovery period of 30 years
// and it's now 2070
return yearly_probability_nuclear_collapse_after_recovery(year, rebuilding_period_length_years, seed);
}
double yearly_probability_nuclear_collapse_after_recovery_antiinductive(uint64_t* seed)
{
return yearly_probability_nuclear_collapse(2023, seed) / 2;
}
int main()
{
// set randomness seed
uint64_t* seed = malloc(sizeof(uint64_t));
*seed = 1000; // xorshift can't start with 0
int n_samples = 1000000;
// Before a first nuclear collapse
printf("## Before the first nuclear collapse\n");
double* yearly_probability_nuclear_collapse_2023_samples = malloc(sizeof(double) * (size_t)n_samples);
for (int i = 0; i < n_samples; i++) {
yearly_probability_nuclear_collapse_2023_samples[i] = yearly_probability_nuclear_collapse_2023(seed);
}
ci ci_90_2023 = array_get_90_ci(yearly_probability_nuclear_collapse_2023_samples, n_samples);
printf("90%% confidence interval: [%f, %f]\n", ci_90_2023.low, ci_90_2023.high);
// After the first nuclear collapse
printf("\n## After the first nuclear collapse\n");
double* yearly_probability_nuclear_collapse_after_recovery_samples = malloc(sizeof(double) * (size_t)n_samples);
for (int i = 0; i < n_samples; i++) {
yearly_probability_nuclear_collapse_after_recovery_samples[i] = yearly_probability_nuclear_collapse_after_recovery_example(seed);
}
ci ci_90_2070 = array_get_90_ci(yearly_probability_nuclear_collapse_after_recovery_samples, 1000000);
printf("90%% confidence interval: [%f, %f]\n", ci_90_2070.low, ci_90_2070.high);
// After the first nuclear collapse (antiinductive)
printf("\n## After the first nuclear collapse (antiinductive)\n");
double* yearly_probability_nuclear_collapse_after_recovery_antiinductive_samples = malloc(sizeof(double) * (size_t)n_samples);
for (int i = 0; i < n_samples; i++) {
yearly_probability_nuclear_collapse_after_recovery_antiinductive_samples[i] = yearly_probability_nuclear_collapse_after_recovery_antiinductive(seed);
}
ci ci_90_antiinductive = array_get_90_ci(yearly_probability_nuclear_collapse_after_recovery_antiinductive_samples, 1000000);
printf("90%% confidence interval: [%f, %f]\n", ci_90_antiinductive.low, ci_90_antiinductive.high);
// free seeds
free(yearly_probability_nuclear_collapse_2023_samples);
free(yearly_probability_nuclear_collapse_after_recovery_samples);
free(yearly_probability_nuclear_collapse_after_recovery_antiinductive_samples);
free(seed);
}

View File

@ -1,26 +0,0 @@
#include "../../../squiggle.h"
#include "../../../squiggle_more.h"
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
int main()
{
// set randomness seed
uint64_t* seed = malloc(sizeof(uint64_t));
*seed = 1000; // xorshift can't start with 0
normal_params n1 = { .mean = 1.0, .std = 3.0 };
normal_params n2 = { .mean = 2.0, .std = 4.0 };
normal_params sn = algebra_sum_normals(n1, n2);
printf("The sum of Normal(%f, %f) and Normal(%f, %f) is Normal(%f, %f)\n",
n1.mean, n1.std, n2.mean, n2.std, sn.mean, sn.std);
lognormal_params ln1 = { .logmean = 1.0, .logstd = 3.0 };
lognormal_params ln2 = { .logmean = 2.0, .logstd = 4.0 };
lognormal_params sln = algebra_product_lognormals(ln1, ln2);
printf("The product of Lognormal(%f, %f) and Lognormal(%f, %f) is Lognormal(%f, %f)\n",
ln1.logmean, ln1.logstd, ln2.logmean, ln2.logstd, sln.logmean, sln.logstd);
free(seed);
}

View File

@ -1,33 +0,0 @@
#include "../../../squiggle.h"
#include "../../../squiggle_more.h"
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
int main()
{
// set randomness seed
uint64_t* seed = malloc(sizeof(uint64_t));
*seed = 1000; // xorshift can't start with 0
// Convert to 90% confidence interval form and back
lognormal_params ln1 = { .logmean = 1.0, .logstd = 3.0 };
ci ln1_ci = convert_lognormal_params_to_ci(ln1);
printf("The 90%% confidence interval of Lognormal(%f, %f) is [%f, %f]\n",
ln1.logmean, ln1.logstd,
ln1_ci.low, ln1_ci.high);
lognormal_params ln1_params2 = convert_ci_to_lognormal_params(ln1_ci);
printf("The lognormal which has 90%% confidence interval [%f, %f] is Lognormal(%f, %f)\n",
ln1_ci.low, ln1_ci.high,
ln1_params2.logmean, ln1_params2.logstd);
lognormal_params ln2 = convert_ci_to_lognormal_params((ci) { .low = 1, .high = 10 });
lognormal_params ln3 = convert_ci_to_lognormal_params((ci) { .low = 5, .high = 50 });
lognormal_params sln = algebra_product_lognormals(ln2, ln3);
ci sln_ci = convert_lognormal_params_to_ci(sln);
printf("Result of some lognormal products: to(%f, %f)\n", sln_ci.low, sln_ci.high);
free(seed);
}

View File

@ -1,26 +0,0 @@
#include "../../../squiggle.h"
#include "../../../squiggle_more.h"
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#define ln lognormal_params
#define to(...) convert_ci_to_lognormal_params((ci)__VA_ARGS__)
#define from(...) convert_lognormal_params_to_ci((ln)__VA_ARGS__)
#define times(a, b) algebra_product_lognormals(a, b)
int main()
{
// set randomness seed
uint64_t* seed = malloc(sizeof(uint64_t));
*seed = 1000; // xorshift can't start with 0
ln a = to({ .low = 1, .high = 10 });
ln b = to({ .low = 5, .high = 500 });
ln c = times(a, b);
printf("Result: to(%f, %f)\n", from(c).low, from(c).high);
printf("One sample from it is: %f\n", sample_lognormal(c.logmean, c.logstd, seed));
free(seed);
}

View File

@ -1,49 +0,0 @@
#include "../../../squiggle.h"
#include "../../../squiggle_more.h"
#include <stdio.h>
#include <stdlib.h>
double sample_0(uint64_t* seed)
{
UNUSED(seed);
return 0;
}
double sample_1(uint64_t* seed)
{
UNUSED(seed);
return 1;
}
double sample_normal_mean_1_std_2(uint64_t* seed)
{
return sample_normal(1, 2, seed);
}
double sample_1_to_3(uint64_t* seed)
{
return sample_to(1, 3, seed);
}
int main()
{
// set randomness seed
uint64_t* seed = malloc(sizeof(uint64_t));
*seed = 1000; // xorshift can't start with 0
int n_dists = 4;
double weights[] = { 1, 2, 3, 4 };
double (*samplers[])(uint64_t*) = {
sample_0,
sample_1,
sample_normal_mean_1_std_2,
sample_1_to_3
};
int n_samples = 10;
for (int i = 0; i < n_samples; i++) {
printf("Sample #%d: %f\n", i, sample_mixture(samplers, weights, n_dists, seed));
}
free(seed);
}

View File

@ -1,30 +0,0 @@
#include "../../../squiggle.h"
#include "../../../squiggle_more.h"
#include <stdio.h>
#include <stdlib.h>
double sample_model(uint64_t * seed)
{
return sample_lognormal(0, 10, seed);
}
// Estimate functions
int main()
{
// set randomness seed
// uint64_t* seed = malloc(sizeof(uint64_t));
// *seed = 1000; // xorshift can't start with 0
// ^ not necessary, because sampler_parallel takes care of the seed.
int n_samples = 1000 * 1000 * 1000;
int n_threads = 16;
double* results = malloc((size_t)n_samples * sizeof(double));
sampler_parallel(sample_model, results, n_threads, n_samples);
double avg = array_sum(results, n_samples) / n_samples;
printf("Average of 1B lognormal(0,10): %f\n", avg);
free(results);
// free(seed);
// ^ not necessary, because sampler_parallel takes care of the seed.
}

View File

@ -1,31 +0,0 @@
#include "../../../squiggle.h"
#include "../../../squiggle_more.h"
#include <stdio.h>
#include <stdlib.h>
double sampler_result(uint64_t * seed)
{
double p_a = 0.8;
double p_b = 0.5;
double p_c = p_a * p_b;
double sample_0(uint64_t * seed) { UNUSED(seed); return 0; }
double sample_1(uint64_t * seed) { UNUSED(seed); return 1; }
double sample_few(uint64_t * seed) { return sample_to(1, 3, seed); }
double sample_many(uint64_t * seed) { return sample_to(2, 10, seed); }
int n_dists = 4;
double weights[] = { 1 - p_c, p_c / 2, p_c / 4, p_c / 4 };
double (*samplers[])(uint64_t*) = { sample_0, sample_1, sample_few, sample_many };
return sample_mixture(samplers, weights, n_dists, seed);
}
int main()
{
int n_samples = 1000 * 1000, n_threads = 16;
double* results = malloc((size_t)n_samples * sizeof(double));
sampler_parallel(sampler_result, results, n_threads, n_samples);
printf("Avg: %f\n", array_sum(results, n_samples) / n_samples);
free(results);
}

View File

@ -1,70 +0,0 @@
#include "../../../squiggle.h"
#include "../../../squiggle_more.h"
#include <stdio.h>
#include <stdlib.h>
int main()
{
/* Question: can we parallelize this?
A = normal(5,2)
B = min(A)
B * 20
*/
/* Option 1: parallelize taking from n samples */
// Question being asked: what is the distribution of sampling 1000 times and taking the min?
double sample_min_of_n(uint64_t * seed, int n)
{
double min = sample_normal(5, 2, seed);
for (int i = 0; i < (n - 2); i++) {
double sample = sample_normal(5, 2, seed);
if (sample < min) {
min = sample;
}
}
return min;
}
double sample_min_of_1000(uint64_t * seed)
{
return sample_min_of_n(seed, 1000);
}
int n_samples = 1000000, n_threads = 16;
double* results = malloc((size_t)n_samples * sizeof(double));
sampler_parallel(sample_min_of_1000, results, n_threads, n_samples);
printf("Mean of the distribution of (taking the min of 1000 samples of a normal(5,2)): %f\n", array_mean(results, n_samples));
free(results);
/* Option 2: take the min from n samples cleverly using parallelism */
// Question being asked: can we take the min of n samples cleverly?
double sample_n_parallel(int n)
{
int n_threads = 16;
int quotient = n / 16;
int remainder = n % 16;
uint64_t seed = 1000;
double result_remainder = sample_min_of_n(&seed, remainder);
double sample_min_of_quotient(uint64_t * seed)
{
return sample_min_of_n(seed, quotient);
}
double* results_quotient = malloc((size_t)quotient * sizeof(double));
sampler_parallel(sample_min_of_quotient, results_quotient, n_threads, quotient);
double min = results_quotient[0];
for (int i = 1; i < quotient; i++) {
if (min > results_quotient[i]) {
min = results_quotient[i];
}
}
if (min > result_remainder) {
min = result_remainder;
}
free(results_quotient);
return min;
}
printf("Minimum of 1M samples of normal(5,2): %f\n", sample_n_parallel(1000000));
}

View File

@ -1,22 +0,0 @@
#include "../../../squiggle.h"
#include "../../../squiggle_more.h"
#include <stdio.h>
#include <stdlib.h>
int main()
{
// set randomness seed
uint64_t* seed = malloc(sizeof(uint64_t));
*seed = 1000; // xorshift can't start with a seed of 0
int n = 1000000;
double* xs = malloc(sizeof(double) * (size_t)n);
for (int i = 0; i < n; i++) {
xs[i] = sample_to(10, 100, seed);
}
ci ci_90 = array_get_90_ci(xs, n);
printf("Recovering confidence interval of sample_to(10, 100):\n low: %f, high: %f\n", ci_90.low, ci_90.high);
free(xs);
free(seed);
}

View File

@ -1,34 +0,0 @@
#include "../../../squiggle.h"
#include "../../../squiggle_more.h"
#include <stdio.h>
#include <stdlib.h>
double cumsum_p0 = 0.6;
double cumsum_p1 = 0.8;
double cumsum_p2 = 0.9;
double cumsum_p3 = 1.0;
double sampler_result(uint64_t * seed)
{
double p = sample_uniform(0, 1, seed);
if(p< cumsum_p0){
return 0;
} else if (p < cumsum_p1){
return 1;
} else if (p < cumsum_p2){
return sample_to(1,3, seed);
} else {
return sample_to(2, 10, seed);
}
}
int main()
{
int n_samples = 1000 * 1000, n_threads = 16;
double* results = malloc((size_t)n_samples * sizeof(double));
sampler_parallel(sampler_result, results, n_threads, n_samples);
printf("Avg: %f\n", array_sum(results, n_samples) / n_samples);
free(results);
}

View File

@ -1,117 +0,0 @@
# Interface:
# make all
# make format-all
# make run-all
# make one DIR=06_nuclear_recovery
# make format-one DIR=06_nuclear_recovery
# make run-one DIR=06_nuclear_recovery
# make time-linux-one DIR=06_nuclear_recovery
# make profile-one DIR=06_nuclear_recovery
# Compiler
CC=gcc
# CC=tcc # <= faster compilation
# Main file
SRC=example.c
OUTPUT=example
## Dependencies
SQUIGGLE=../../squiggle.c
SQUIGGLE_MORE=../../squiggle_more.c
MATH=-lm
OPENMP=-fopenmp
DEPS=$(SQUIGGLE) $(SQUIGGLE_MORE) $(MATH) $(OPENMP)
## Flags
# DEBUG=-fsanitize=address,undefined
# DEBUG=-g
DEBUG=
WARN=-Wall -Wextra -Wdouble-promotion -Wconversion
STANDARD=-std=c99
WARNINGS=-Wall
OPTIMIZED=-O3 #-Ofast
## Formatter
STYLE_BLUEPRINT=webkit
FORMATTER=clang-format -i -style=$(STYLE_BLUEPRINT)
## make all
all:
$(CC) $(OPTIMIZED) $(DEBUG) $(WARN) 00_example_template/$(SRC) $(DEPS) -o 00_example_template/$(OUTPUT)
$(CC) $(OPTIMIZED) $(DEBUG) $(WARN) 02_ci_beta/$(SRC) $(DEPS) -o 02_ci_beta/$(OUTPUT)
$(CC) $(OPTIMIZED) $(DEBUG) $(WARN) 03_ci_beta_parallel/$(SRC) $(DEPS) -o 03_ci_beta_parallel/$(OUTPUT)
$(CC) $(OPTIMIZED) $(DEBUG) $(WARN) 04_nuclear_war/$(SRC) $(DEPS) -o 04_nuclear_war/$(OUTPUT)
$(CC) $(OPTIMIZED) $(DEBUG) $(WARN) 05_burn_10kg_fat/$(SRC) $(DEPS) -o 05_burn_10kg_fat/$(OUTPUT)
$(CC) $(OPTIMIZED) $(DEBUG) $(WARN) 06_nuclear_recovery/$(SRC) $(DEPS) -o 06_nuclear_recovery/$(OUTPUT)
$(CC) $(OPTIMIZED) $(DEBUG) $(WARN) 07_algebra/$(SRC) $(DEPS) -o 07_algebra/$(OUTPUT)
$(CC) $(OPTIMIZED) $(DEBUG) $(WARN) 08_algebra_and_conversion/$(SRC) $(DEPS) -o 08_algebra_and_conversion/$(OUTPUT)
$(CC) $(OPTIMIZED) $(DEBUG) $(WARN) 09_ergonomic_algebra/$(SRC) $(DEPS) -o 09_ergonomic_algebra/$(OUTPUT)
$(CC) $(OPTIMIZED) $(DEBUG) $(WARN) 10_twitter_thread_example/$(SRC) $(DEPS) -o 10_twitter_thread_example/$(OUTPUT)
$(CC) $(OPTIMIZED) $(DEBUG) $(WARN) 11_billion_lognormals_paralell/$(SRC) $(DEPS) -o 11_billion_lognormals_paralell/$(OUTPUT)
$(CC) $(OPTIMIZED) $(DEBUG) $(WARN) 12_time_to_botec_parallel/$(SRC) $(DEPS) -o 12_time_to_botec_parallel/$(OUTPUT)
$(CC) $(OPTIMIZED) $(DEBUG) $(WARN) 13_parallelize_min/$(SRC) $(DEPS) -o 13_parallelize_min/$(OUTPUT)
$(CC) $(OPTIMIZED) $(DEBUG) $(WARN) 14_check_confidence_interval/$(SRC) $(DEPS) -o 14_check_confidence_interval/$(OUTPUT)
$(CC) $(OPTIMIZED) $(DEBUG) $(WARN) 15_time_to_botec_custom_mixture/$(SRC) $(DEPS) -o 15_time_to_botec_custom_mixture/$(OUTPUT)
format-all:
$(FORMATTER) 00_example_template/$(SRC)
$(FORMATTER) 01_sample_from_cdf/$(SRC)
$(FORMATTER) 02_ci_beta/$(SRC)
$(FORMATTER) 03_ci_beta_parallel/$(SRC)
$(FORMATTER) 04_nuclear_war/$(SRC)
$(FORMATTER) 05_burn_10kg_fat/$(SRC)
$(FORMATTER) 06_nuclear_recovery/$(SRC)
$(FORMATTER) 07_algebra/$(SRC)
$(FORMATTER) 08_algebra_and_conversion/$(SRC)
$(FORMATTER) 09_ergonomic_algebra/$(SRC)
$(FORMATTER) 10_twitter_thread_example/$(SRC)
$(FORMATTER) 11_billion_lognormals_paralell/$(SRC)
$(FORMATTER) 12_time_to_botec_parallel/$(SRC)
$(FORMATTER) 13_parallelize_min/$(SRC)
$(FORMATTER) 14_check_confidence_interval/$(SRC)
run-all:
00_example_template/$(OUTPUT)
01_sample_from_cdf/$(OUTPUT)
02_ci_beta/$(OUTPUT)
03_ci_beta_parallel/$(OUTPUT)
04_nuclear_war/$(OUTPUT)
05_burn_10kg_fat/$(OUTPUT)
06_nuclear_recovery/$(OUTPUT)
07_algebra/$(OUTPUT)
08_algebra_and_conversion/$(OUTPUT)
09_ergonomic_algebra/$(OUTPUT)
10_twitter_thread_example/$(OUTPUT)
11_billion_lognormals_paralell/$(OUTPUT)
12_time_to_botec_parallel/$(OUTPUT)
13_parallelize_min/$(OUTPUT)
14_check_confidence_interval/$(OUTPUT)
## make one DIR=06_nuclear_recovery
one: $(DIR)/$(SRC)
$(CC) $(OPTIMIZED) $(DEBUG) $(WARN) $(DIR)/$(SRC) $(DEPS) -o $(DIR)/$(OUTPUT)
## make format-one DIR=06_nuclear_recovery
format-one: $(DIR)/$(SRC)
$(FORMATTER) $(DIR)/$(SRC)
## make run-one DIR=06_nuclear_recovery
run-one: $(DIR)/$(OUTPUT)
$(DIR)/$(OUTPUT) && echo
## make time-linux-one DIR=06_nuclear_recovery
time-linux-one: $(DIR)/$(OUTPUT)
@echo "Requires /bin/time, found on GNU/Linux systems" && echo
@echo "Running 100x and taking avg time $(DIR)/$(OUTPUT)"
@t=$$(/usr/bin/time -f "%e" -p bash -c 'for i in {1..100}; do $(DIR)/$(OUTPUT); done' 2>&1 >/dev/null | grep real | awk '{print $$2}' ); echo "scale=2; 1000 * $$t / 100" | bc | sed "s|^|Time: |" | sed 's|$$|ms|' && echo
## e.g., make profile-linux-one DIR=06_nuclear_recovery
profile-linux-one:
echo "Requires perf, which depends on the kernel version, and might be in linux-tools package or similar"
echo "Must be run as sudo"
$(CC) $(OPTIMIZED) $(DEBUG) $(WARN) $(DIR)/$(SRC) $(DEPS) -o $(DIR)/$(OUTPUT)
# $(CC) $(SRC) $(DEPS) -o $(OUTPUT)
sudo perf record $(DIR)/$(OUTPUT)
sudo perf report
rm perf.data

View File

@ -1,35 +0,0 @@
MAKEFLAGS += --no-print-directory
## Formatter
STYLE_BLUEPRINT=webkit
FORMATTER=clang-format -i -style=$(STYLE_BLUEPRINT)
## Time to botec
TTB=./examples/more/12_time_to_botec_parallel/example
build-examples:
cd examples/core && make all
cd examples/more && make all
format-examples:
cd examples/core && make format-all
cd examples/more && make format-all
format: squiggle.c squiggle.h
$(FORMATTER) squiggle.c squiggle.h
$(FORMATTER) squiggle_more.c squiggle_more.h
lint:
clang-tidy squiggle.c -- -lm
clang-tidy squiggle_more.c -- -lm
profile:
sudo perf record -g ./examples/more/12_time_to_botec_parallel/example
sudo perf report
rm perf.data
sudo perf stat ./examples/more/12_time_to_botec_parallel/example
time-linux:
gcc -O3 -Wall -Wextra -Wdouble-promotion -Wconversion examples/more/12_time_to_botec_parallel/example.c squiggle.c squiggle_more.c -lm -fopenmp -o examples/more/12_time_to_botec_parallel/example
@echo "Running 100x and taking avg time: $(TTB)"
@t=$$(/usr/bin/time -f "%e" -p bash -c 'for i in {1..100}; do OMP_PROC_BIND=TRUE $(TTB); done' 2>&1 >/dev/null | grep real | awk '{print $$2}' ); echo "scale=2; 1000 * $$t / 100" | bc | sed "s|^|Time using 16 threads: |" | sed 's|$$|ms|' && echo

View File

@ -1,37 +0,0 @@
#ifndef SQUIGGLE_C_CORE
#define SQUIGGLE_C_CORE
// uint64_t header
#include <stdint.h>
// Pseudo Random number generator
uint64_t xorshift64(uint64_t* seed);
// Basic distribution sampling functions
double sample_unit_uniform(uint64_t* seed);
double sample_unit_normal(uint64_t* seed);
// Composite distribution sampling functions
double sample_uniform(double start, double end, uint64_t* seed);
double sample_normal(double mean, double sigma, uint64_t* seed);
double sample_lognormal(double logmean, double logsigma, uint64_t* seed);
double sample_normal_from_90_ci(double low, double high, uint64_t* seed);
double sample_to(double low, double high, uint64_t* seed);
double sample_gamma(double alpha, uint64_t* seed);
double sample_beta(double a, double b, uint64_t* seed);
double sample_laplace(double successes, double failures, uint64_t* seed);
// Array helpers
double array_sum(double* array, int length);
void array_cumsum(double* array_to_sum, double* array_cumsummed, int length);
double array_mean(double* array, int length);
double array_std(double* array, int length);
// Mixture function
double sample_mixture(double (*samplers[])(uint64_t*), double* weights, int n_dists, uint64_t* seed);
// Macro to mute "unused variable" warning when -Wall -Wextra is enabled. Useful for nested functions
#define UNUSED(x) (void)(x)
#endif

View File

@ -1,459 +0,0 @@
#include "squiggle.h"
#include <float.h>
#include <limits.h>
#include <math.h>
#include <omp.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h> // memcpy
/* Cache optimizations */
#define CACHE_LINE_SIZE 64
// getconf LEVEL1_DCACHE_LINESIZE
// <https://stackoverflow.com/questions/794632/programmatically-get-the-cache-line-size>
typedef struct seed_cache_box_t {
uint64_t seed;
char padding[CACHE_LINE_SIZE - sizeof(uint64_t)];
// Cache line size is 64 *bytes*, uint64_t is 64 *bits* (8 bytes). Different units!
} seed_cache_box;
// This avoids "false sharing", i.e., different threads competing for the same cache line
// Dealing with this shaves 4ms from a 12ms process, or a third of runtime
// <http://www.nic.uoregon.edu/~khuck/ts/acumem-report/manual_html/ch06s07.html>
/* Parallel sampler */
void sampler_parallel(double (*sampler)(uint64_t* seed), double* results, int n_threads, int n_samples)
{
// Terms of the division:
// a = b * quotient + reminder
// a = b * (a/b) + (a%b)
// dividend: a
// divisor: b
// quotient = a/b
// reminder = a%b
// "divisor's multiple" := b*(a/b)
// now, we have n_samples and n_threads
// to make our life easy, each thread will have a number of samples of: a/b (quotient)
// and we'll compute the remainder of samples separately
// to possibly do by Jorge: improve so that the remainder is included in the threads
int quotient = n_samples / n_threads;
int divisor_multiple = quotient * n_threads;
// uint64_t** seeds = malloc((size_t)n_threads * sizeof(uint64_t*));
seed_cache_box* cache_box = (seed_cache_box*)malloc(sizeof(seed_cache_box) * (size_t)n_threads);
// seed_cache_box cache_box[n_threads]; // we could use the C stack. On normal linux machines, it's 8MB ($ ulimit -s). However, it doesn't quite feel right.
srand(1);
for (int i = 0; i < n_threads; i++) {
// Constraints:
// - xorshift can't start with 0
// - the seeds should be reasonably separated and not correlated
cache_box[i].seed = (uint64_t)rand() * (UINT64_MAX / RAND_MAX);
// Other initializations tried:
// *seeds[i] = 1 + i;
// *seeds[i] = (i + 0.5)*(UINT64_MAX/n_threads);
// *seeds[i] = (i + 0.5)*(UINT64_MAX/n_threads) + constant * i;
}
int i;
#pragma omp parallel private(i)
{
#pragma omp for
for (i = 0; i < n_threads; i++) {
// It's possible I don't need the for, and could instead call omp
// in some different way and get the thread number with omp_get_thread_num()
int lower_bound_inclusive = i * quotient;
int upper_bound_not_inclusive = ((i + 1) * quotient); // note the < in the for loop below,
for (int j = lower_bound_inclusive; j < upper_bound_not_inclusive; j++) {
results[j] = sampler(&(cache_box[i].seed));
/*
t starts at 0 and ends at T
at t=0,
thread i accesses: results[i*quotient +0],
thread i+1 acccesses: results[(i+1)*quotient +0]
at t=T
thread i accesses: results[(i+1)*quotient -1]
thread i+1 acccesses: results[(i+2)*quotient -1]
The results[j] that are directly adjacent are
results[(i+1)*quotient -1] (accessed by thread i at time T)
results[(i+1)*quotient +0] (accessed by thread i+1 at time 0)
and these are themselves adjacent to
results[(i+1)*quotient -2] (accessed by thread i at time T-1)
results[(i+1)*quotient +1] (accessed by thread i+1 at time 2)
If T is large enough, which it is, two threads won't access the same
cache line at the same time.
Pictorially:
at t=0 ....i.........I.........
at t=T .............i.........I
and the two never overlap
Note that results[j] is a double, a double has 8 bytes (64 bits)
8 doubles fill a cache line of 64 bytes.
So we specifically won't get problems as long as n_samples/n_threads > 8
n_threads is normally 16, so n_samples > 128
Note also that this is only a problem in terms of speed, if n_samples<128
the results are still computed, it'll just be slower
*/
}
}
}
for (int j = divisor_multiple; j < n_samples; j++) {
results[j] = sampler(&(cache_box[0].seed));
// we can just reuse a seed,
// this isn't problematic because we;ve now stopped doing multithreading
}
free(cache_box);
}
/* Get confidence intervals, given a sampler */
typedef struct ci_t {
double low;
double high;
} ci;
inline static void swp(int i, int j, double xs[])
{
double tmp = xs[i];
xs[i] = xs[j];
xs[j] = tmp;
}
static int partition(int low, int high, double xs[], int length)
{
if (low > high || high >= length) {
printf("Invariant violated for function partition in %s (%d)", __FILE__, __LINE__);
exit(1);
}
// Note: the scratchpad/ folder in commit 578bfa27 has printfs sprinkled throughout
int pivot = low + (int)floor((high - low) / 2);
double pivot_value = xs[pivot];
swp(pivot, high, xs);
int gt = low; /* This pointer will iterate until finding an element which is greater than the pivot. Then it will move elements that are smaller before it--more specifically, it will move elements to its position and then increment. As a result all elements between gt and i will be greater than the pivot. */
for (int i = low; i < high; i++) {
if (xs[i] < pivot_value) {
swp(gt, i, xs);
gt++;
}
}
swp(high, gt, xs);
return gt;
}
static double quickselect(int k, double xs[], int n)
{
// https://en.wikipedia.org/wiki/Quickselect
double* ys = malloc((size_t)n * sizeof(double));
memcpy(ys, xs, (size_t)n * sizeof(double));
// ^: don't rearrange item order in the original array
int low = 0;
int high = n - 1;
for (;;) {
if (low == high) {
double result = ys[low];
free(ys);
return result;
}
int pivot = partition(low, high, ys, n);
if (pivot == k) {
double result = ys[pivot];
free(ys);
return result;
} else if (k < pivot) {
high = pivot - 1;
} else {
low = pivot + 1;
}
}
}
ci array_get_ci(ci interval, double* xs, int n)
{
int low_k = (int)floor(interval.low * n);
int high_k = (int)ceil(interval.high * n);
ci result = {
.low = quickselect(low_k, xs, n),
.high = quickselect(high_k, xs, n),
};
return result;
}
ci array_get_90_ci(double xs[], int n)
{
return array_get_ci((ci) { .low = 0.05, .high = 0.95 }, xs, n);
}
double array_get_median(double xs[], int n)
{
int median_k = (int)floor(0.5 * n);
return quickselect(median_k, xs, n);
}
/* array print: potentially useful for debugging */
void array_print(double xs[], int n)
{
printf("[");
for (int i = 0; i < n - 1; i++) {
printf("%f, ", xs[i]);
}
printf("%f", xs[n - 1]);
printf("]\n");
}
void array_print_stats(double xs[], int n)
{
ci ci_90 = array_get_ci((ci) { .low = 0.05, .high = 0.95 }, xs, n);
ci ci_80 = array_get_ci((ci) { .low = 0.1, .high = 0.9 }, xs, n);
ci ci_50 = array_get_ci((ci) { .low = 0.25, .high = 0.75 }, xs, n);
double median = array_get_median(xs, n);
double mean = array_mean(xs, n);
double std = array_std(xs, n);
printf("| Statistic | Value |\n"
"| --- | --- |\n"
"| Mean | %lf |\n"
"| Median | %lf |\n"
"| Std | %lf |\n"
"| 90%% confidence interval | %lf to %lf |\n"
"| 80%% confidence interval | %lf to %lf |\n"
"| 50%% confidence interval | %lf to %lf |\n",
mean, median, std, ci_90.low, ci_90.high, ci_80.low, ci_80.high, ci_50.low, ci_50.high);
}
void array_print_histogram(double* xs, int n_samples, int n_bins)
{
// Interface inspired by <https://github.com/red-data-tools/YouPlot>
if (n_bins <= 1) {
fprintf(stderr, "Number of bins must be greater than 1.\n");
return;
} else if (n_samples <= 1) {
fprintf(stderr, "Number of samples must be higher than 1.\n");
return;
}
int* bins = (int*)calloc((size_t)n_bins, sizeof(int));
if (bins == NULL) {
fprintf(stderr, "Memory allocation for bins failed.\n");
return;
}
// Find the minimum and maximum values from the samples
double min_value = xs[0], max_value = xs[0];
for (int i = 0; i < n_samples; i++) {
if (xs[i] < min_value) {
min_value = xs[i];
}
if (xs[i] > max_value) {
max_value = xs[i];
}
}
// Avoid division by zero for a single unique value
if (min_value == max_value) {
max_value++;
}
// Calculate bin width
double bin_width = (max_value - min_value) / n_bins;
// Fill the bins with sample counts
for (int i = 0; i < n_samples; i++) {
int bin_index = (int)((xs[i] - min_value) / bin_width);
if (bin_index == n_bins) {
bin_index--; // Last bin includes max_value
}
bins[bin_index]++;
}
// Calculate the scaling factor based on the maximum bin count
int max_bin_count = 0;
for (int i = 0; i < n_bins; i++) {
if (bins[i] > max_bin_count) {
max_bin_count = bins[i];
}
}
const int MAX_WIDTH = 50; // Adjust this to your terminal width
double scale = max_bin_count > MAX_WIDTH ? (double)MAX_WIDTH / max_bin_count : 1.0;
// Print the histogram
for (int i = 0; i < n_bins; i++) {
double bin_start = min_value + i * bin_width;
double bin_end = bin_start + bin_width;
int decimalPlaces = 1;
if ((0 < bin_width) && (bin_width < 1)) {
int magnitude = (int)floor(log10(bin_width));
decimalPlaces = -magnitude;
decimalPlaces = decimalPlaces > 10 ? 10 : decimalPlaces;
}
printf("[%*.*f, %*.*f", 4 + decimalPlaces, decimalPlaces, bin_start, 4 + decimalPlaces, decimalPlaces, bin_end);
char interval_delimiter = ')';
if (i == (n_bins - 1)) {
interval_delimiter = ']'; // last bucket is inclusive
}
printf("%c: ", interval_delimiter);
int marks = (int)(bins[i] * scale);
for (int j = 0; j < marks; j++) {
printf("");
}
printf(" %d\n", bins[i]);
}
// Free the allocated memory for bins
free(bins);
}
void array_print_90_ci_histogram(double* xs, int n_samples, int n_bins)
{
// Code duplicated from previous function
// I'll consider simplifying it at some future point
// Possible ideas:
// - having only one function that takes any confidence interval?
// - having a utility function that is called by both functions?
ci ci_90 = array_get_90_ci(xs, n_samples);
if (n_bins <= 1) {
fprintf(stderr, "Number of bins must be greater than 1.\n");
return;
} else if (n_samples <= 10) {
fprintf(stderr, "Number of samples must be higher than 10.\n");
return;
}
int* bins = (int*)calloc((size_t)n_bins, sizeof(int));
if (bins == NULL) {
fprintf(stderr, "Memory allocation for bins failed.\n");
return;
}
double min_value = ci_90.low, max_value = ci_90.high;
// Avoid division by zero for a single unique value
if (min_value == max_value) {
max_value++;
}
double bin_width = (max_value - min_value) / n_bins;
// Fill the bins with sample counts
int below_min = 0, above_max = 0;
for (int i = 0; i < n_samples; i++) {
if (xs[i] < min_value) {
below_min++;
} else if (xs[i] > max_value) {
above_max++;
} else {
int bin_index = (int)((xs[i] - min_value) / bin_width);
if (bin_index == n_bins) {
bin_index--; // Last bin includes max_value
}
bins[bin_index]++;
}
}
// Calculate the scaling factor based on the maximum bin count
int max_bin_count = 0;
for (int i = 0; i < n_bins; i++) {
if (bins[i] > max_bin_count) {
max_bin_count = bins[i];
}
}
const int MAX_WIDTH = 40; // Adjust this to your terminal width
double scale = max_bin_count > MAX_WIDTH ? (double)MAX_WIDTH / max_bin_count : 1.0;
// Print the histogram
int decimalPlaces = 1;
if ((0 < bin_width) && (bin_width < 1)) {
int magnitude = (int)floor(log10(bin_width));
decimalPlaces = -magnitude;
decimalPlaces = decimalPlaces > 10 ? 10 : decimalPlaces;
}
printf("(%*s, %*.*f): ", 6 + decimalPlaces, "-∞", 4 + decimalPlaces, decimalPlaces, min_value);
int marks_below_min = (int)(below_min * scale);
for (int j = 0; j < marks_below_min; j++) {
printf("");
}
printf(" %d\n", below_min);
for (int i = 0; i < n_bins; i++) {
double bin_start = min_value + i * bin_width;
double bin_end = bin_start + bin_width;
printf("[%*.*f, %*.*f", 4 + decimalPlaces, decimalPlaces, bin_start, 4 + decimalPlaces, decimalPlaces, bin_end);
char interval_delimiter = ')';
if (i == (n_bins - 1)) {
interval_delimiter = ']'; // last bucket is inclusive
}
printf("%c: ", interval_delimiter);
int marks = (int)(bins[i] * scale);
for (int j = 0; j < marks; j++) {
printf("");
}
printf(" %d\n", bins[i]);
}
printf("(%*.*f, %*s): ", 4 + decimalPlaces, decimalPlaces, max_value, 6 + decimalPlaces, "+∞");
int marks_above_max = (int)(above_max * scale);
for (int j = 0; j < marks_above_max; j++) {
printf("");
}
printf(" %d\n", above_max);
// Free the allocated memory for bins
free(bins);
}
/* Algebra manipulations */
#define NORMAL90CONFIDENCE 1.6448536269514727
typedef struct normal_params_t {
double mean;
double std;
} normal_params;
normal_params algebra_sum_normals(normal_params a, normal_params b)
{
normal_params result = {
.mean = a.mean + b.mean,
.std = sqrt((a.std * a.std) + (b.std * b.std)),
};
return result;
}
typedef struct lognormal_params_t {
double logmean;
double logstd;
} lognormal_params;
lognormal_params algebra_product_lognormals(lognormal_params a, lognormal_params b)
{
lognormal_params result = {
.logmean = a.logmean + b.logmean,
.logstd = sqrt((a.logstd * a.logstd) + (b.logstd * b.logstd)),
};
return result;
}
lognormal_params convert_ci_to_lognormal_params(ci x)
{
double loghigh = log(x.high);
double loglow = log(x.low);
double logmean = (loghigh + loglow) / 2.0;
double logstd = (loghigh - loglow) / (2.0 * NORMAL90CONFIDENCE);
lognormal_params result = { .logmean = logmean, .logstd = logstd };
return result;
}
ci convert_lognormal_params_to_ci(lognormal_params y)
{
double h = y.logstd * NORMAL90CONFIDENCE;
double loghigh = y.logmean + h;
double loglow = y.logmean - h;
ci result = { .low = exp(loglow), .high = exp(loghigh) };
return result;
}

View File

@ -1,42 +0,0 @@
#ifndef SQUIGGLE_C_EXTRA
#define SQUIGGLE_C_EXTRA
/* Parallel sampling */
void sampler_parallel(double (*sampler)(uint64_t* seed), double* results, int n_threads, int n_samples);
/* Stats */
double array_get_median(double xs[], int n);
typedef struct ci_t {
double low;
double high;
} ci;
ci array_get_ci(ci interval, double* xs, int n);
ci array_get_90_ci(double xs[], int n);
void array_print_stats(double xs[], int n);
void array_print_histogram(double* xs, int n_samples, int n_bins);
void array_print_90_ci_histogram(double* xs, int n, int n_bins);
/* Algebra manipulations */
typedef struct normal_params_t {
double mean;
double std;
} normal_params;
normal_params algebra_sum_normals(normal_params a, normal_params b);
typedef struct lognormal_params_t {
double logmean;
double logstd;
} lognormal_params;
lognormal_params algebra_product_lognormals(lognormal_params a, lognormal_params b);
lognormal_params convert_ci_to_lognormal_params(ci x);
ci convert_lognormal_params_to_ci(lognormal_params y);
/* Utilities */
#define THOUSAND 1000
#define MILLION 1000000
#endif

View File

@ -1,56 +0,0 @@
# Interface:
# make
# make build
# make format
# make run
# Compiler
CC=gcc
# CC=tcc # <= faster compilation
# Main file
SRC=test.c ../squiggle.c
OUTPUT=test
## Dependencies
MATH=-lm
## Flags
DEBUG= #'-g'
STANDARD=-std=c99
WARNINGS=-Wall
OPTIMIZED=-O3 #-Ofast
# OPENMP=-fopenmp
## Formatter
STYLE_BLUEPRINT=webkit
FORMATTER=clang-format -i -style=$(STYLE_BLUEPRINT)
## make build
build: $(SRC)
$(CC) $(OPTIMIZED) $(DEBUG) $(SRC) $(MATH) -o $(OUTPUT)
format: $(SRC)
$(FORMATTER) $(SRC)
run: $(SRC) $(OUTPUT)
./$(OUTPUT)
verify: $(SRC) $(OUTPUT)
./$(OUTPUT) | grep "NOT passed" -A 2 --group-separator='' || true
time-linux:
@echo "Requires /bin/time, found on GNU/Linux systems" && echo
@echo "Running 100x and taking avg time $(OUTPUT)"
@t=$$(/usr/bin/time -f "%e" -p bash -c 'for i in {1..100}; do $(OUTPUT); done' 2>&1 >/dev/null | grep real | awk '{print $$2}' ); echo "scale=2; 1000 * $$t / 100" | bc | sed "s|^|Time using 1 thread: |" | sed 's|$$|ms|' && echo
## Profiling
profile-linux:
echo "Requires perf, which depends on the kernel version, and might be in linux-tools package or similar"
echo "Must be run as sudo"
$(CC) $(SRC) $(MATH) -o $(OUTPUT)
sudo perf record ./$(OUTPUT)
sudo perf report
rm perf.data

Binary file not shown.

View File

@ -1,328 +0,0 @@
#include "../squiggle.h"
#include <math.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#define TOLERANCE 5.0 / 1000.0
#define MAX_NAME_LENGTH 500
// Structs
struct array_expectations {
double* array;
int n;
char* name;
double expected_mean;
double expected_std;
double tolerance;
};
void test_array_expectations(struct array_expectations e)
{
double mean = array_mean(e.array, e.n);
double delta_mean = mean - e.expected_mean;
double std = array_std(e.array, e.n);
double delta_std = std - e.expected_std;
if ((fabs(delta_mean) / fabs(mean) > e.tolerance) && (fabs(delta_mean) > e.tolerance)) {
printf("[-] Mean test for %s NOT passed.\n", e.name);
printf("Mean of %s: %f, vs expected mean: %f\n", e.name, mean, e.expected_mean);
printf("delta: %f, relative delta: %f\n", delta_mean, delta_mean / fabs(mean));
} else {
printf("[x] Mean test for %s PASSED\n", e.name);
}
if ((fabs(delta_std) / fabs(std) > e.tolerance) && (fabs(delta_std) > e.tolerance)) {
printf("[-] Std test for %s NOT passed.\n", e.name);
printf("Std of %s: %f, vs expected std: %f\n", e.name, std, e.expected_std);
printf("delta: %f, relative delta: %f\n", delta_std, delta_std / fabs(std));
} else {
printf("[x] Std test for %s PASSED\n", e.name);
}
printf("\n");
}
// Test unit uniform
void test_unit_uniform(uint64_t* seed)
{
int n = 1000 * 1000;
double* unit_uniform_array = malloc(sizeof(double) * n);
for (int i = 0; i < n; i++) {
unit_uniform_array[i] = sample_unit_uniform(seed);
}
struct array_expectations expectations = {
.array = unit_uniform_array,
.n = n,
.name = "unit uniform",
.expected_mean = 0.5,
.expected_std = sqrt(1.0 / 12.0),
.tolerance = TOLERANCE,
};
test_array_expectations(expectations);
free(unit_uniform_array);
}
// Test uniforms
void test_uniform(double start, double end, uint64_t* seed)
{
int n = 1000 * 1000;
double* uniform_array = malloc(sizeof(double) * n);
for (int i = 0; i < n; i++) {
uniform_array[i] = sample_uniform(start, end, seed);
}
char* name = malloc(MAX_NAME_LENGTH * sizeof(char));
snprintf(name, MAX_NAME_LENGTH, "[%f, %f] uniform", start, end);
struct array_expectations expectations = {
.array = uniform_array,
.n = n,
.name = name,
.expected_mean = (start + end) / 2,
.expected_std = sqrt(1.0 / 12.0) * fabs(end - start),
.tolerance = fabs(end - start) * TOLERANCE,
};
test_array_expectations(expectations);
free(name);
free(uniform_array);
}
// Test unit normal
void test_unit_normal(uint64_t* seed)
{
int n = 1000 * 1000;
double* unit_normal_array = malloc(sizeof(double) * n);
for (int i = 0; i < n; i++) {
unit_normal_array[i] = sample_unit_normal(seed);
}
struct array_expectations expectations = {
.array = unit_normal_array,
.n = n,
.name = "unit normal",
.expected_mean = 0,
.expected_std = 1,
.tolerance = TOLERANCE,
};
test_array_expectations(expectations);
free(unit_normal_array);
}
// Test normal
void test_normal(double mean, double std, uint64_t* seed)
{
int n = 10 * 1000 * 1000;
double* normal_array = malloc(sizeof(double) * n);
for (int i = 0; i < n; i++) {
normal_array[i] = sample_normal(mean, std, seed);
}
char* name = malloc(MAX_NAME_LENGTH * sizeof(char));
snprintf(name, MAX_NAME_LENGTH, "normal(%f, %f)", mean, std);
struct array_expectations expectations = {
.array = normal_array,
.n = n,
.name = name,
.expected_mean = mean,
.expected_std = std,
.tolerance = TOLERANCE,
};
test_array_expectations(expectations);
free(name);
free(normal_array);
}
// Test lognormal
void test_lognormal(double logmean, double logstd, uint64_t* seed)
{
int n = 10 * 1000 * 1000;
double* lognormal_array = malloc(sizeof(double) * n);
for (int i = 0; i < n; i++) {
lognormal_array[i] = sample_lognormal(logmean, logstd, seed);
}
char* name = malloc(MAX_NAME_LENGTH * sizeof(char));
snprintf(name, MAX_NAME_LENGTH, "lognormal(%f, %f)", logmean, logstd);
struct array_expectations expectations = {
.array = lognormal_array,
.n = n,
.name = name,
.expected_mean = exp(logmean + pow(logstd, 2) / 2),
.expected_std = sqrt((exp(pow(logstd, 2)) - 1) * exp(2 * logmean + pow(logstd, 2))),
.tolerance = TOLERANCE,
};
test_array_expectations(expectations);
free(name);
free(lognormal_array);
}
// Test lognormal to
void test_to(double low, double high, uint64_t* seed)
{
int n = 10 * 1000 * 1000;
double* lognormal_array = malloc(sizeof(double) * n);
for (int i = 0; i < n; i++) {
lognormal_array[i] = sample_to(low, high, seed);
}
char* name = malloc(MAX_NAME_LENGTH * sizeof(char));
snprintf(name, MAX_NAME_LENGTH, "to(%f, %f)", low, high);
const double NORMAL95CONFIDENCE = 1.6448536269514722;
double loglow = logf(low);
double loghigh = logf(high);
double logmean = (loglow + loghigh) / 2;
double logstd = (loghigh - loglow) / (2.0 * NORMAL95CONFIDENCE);
struct array_expectations expectations = {
.array = lognormal_array,
.n = n,
.name = name,
.expected_mean = exp(logmean + pow(logstd, 2) / 2),
.expected_std = sqrt((exp(pow(logstd, 2)) - 1) * exp(2 * logmean + pow(logstd, 2))),
.tolerance = TOLERANCE,
};
test_array_expectations(expectations);
free(name);
free(lognormal_array);
}
// Test beta
void test_beta(double a, double b, uint64_t* seed)
{
int n = 10 * 1000 * 1000;
double* beta_array = malloc(sizeof(double) * n);
for (int i = 0; i < n; i++) {
beta_array[i] = sample_beta(a, b, seed);
}
char* name = malloc(MAX_NAME_LENGTH * sizeof(char));
snprintf(name, MAX_NAME_LENGTH, "beta(%f, %f)", a, b);
struct array_expectations expectations = {
.array = beta_array,
.n = n,
.name = name,
.expected_mean = a / (a + b),
.expected_std = sqrt((a * b) / (pow(a + b, 2) * (a + b + 1))),
.tolerance = TOLERANCE,
};
test_array_expectations(expectations);
free(name);
}
int main()
{
// set randomness seed
uint64_t* seed = malloc(sizeof(uint64_t));
*seed = 1000; // xorshift can't start with a seed of 0
printf("Testing unit uniform\n");
test_unit_uniform(seed);
printf("Testing small uniforms\n");
for (int i = 0; i < 100; i++) {
double start = sample_uniform(-10, 10, seed);
double end = sample_uniform(-10, 10, seed);
if (end > start) {
test_uniform(start, end, seed);
}
}
printf("Testing wide uniforms\n");
for (int i = 0; i < 100; i++) {
double start = sample_uniform(-1000 * 1000, 1000 * 1000, seed);
double end = sample_uniform(-1000 * 1000, 1000 * 1000, seed);
if (end > start) {
test_uniform(start, end, seed);
}
}
printf("Testing unit normal\n");
test_unit_normal(seed);
printf("Testing small normals\n");
for (int i = 0; i < 100; i++) {
double mean = sample_uniform(-10, 10, seed);
double std = sample_uniform(0, 10, seed);
if (std > 0) {
test_normal(mean, std, seed);
}
}
printf("Testing larger normals\n");
for (int i = 0; i < 100; i++) {
double mean = sample_uniform(-1000 * 1000, 1000 * 1000, seed);
double std = sample_uniform(0, 1000 * 1000, seed);
if (std > 0) {
test_normal(mean, std, seed);
}
}
printf("Testing smaller lognormals\n");
for (int i = 0; i < 100; i++) {
double mean = sample_uniform(-1, 1, seed);
double std = sample_uniform(0, 1, seed);
if (std > 0) {
test_lognormal(mean, std, seed);
}
}
printf("Testing larger lognormals\n");
for (int i = 0; i < 100; i++) {
double mean = sample_uniform(-1, 5, seed);
double std = sample_uniform(0, 5, seed);
if (std > 0) {
test_lognormal(mean, std, seed);
}
}
printf("Testing lognormals — sample_to(low, high) syntax\n");
for (int i = 0; i < 100; i++) {
double low = sample_uniform(0, 1000 * 1000, seed);
double high = sample_uniform(0, 1000 * 1000, seed);
if (low < high) {
test_to(low, high, seed);
}
}
// Bonus example
test_to(10, 10 * 1000, seed);
printf("Testing beta distribution\n");
for (int i = 0; i < 100; i++) {
double a = sample_uniform(0, 1000, seed);
double b = sample_uniform(0, 1000, seed);
if ((a > 0) && (b > 0)) {
test_beta(a, b, seed);
}
}
printf("Testing larger beta distributions\n");
for (int i = 0; i < 100; i++) {
double a = sample_uniform(0, 1000 * 1000, seed);
double b = sample_uniform(0, 1000 * 1000, seed);
if ((a > 0) && (b > 0)) {
test_beta(a, b, seed);
}
}
free(seed);
}

View File

@ -7,10 +7,9 @@
- [x] Make README.md less messy
- [x] Give examples of new functions
- [x] Reference commit with cdf functions, even though deleted
- [ ] Figure out fixed point libraries <https://github.com/PetteriAimonen/libfixmath/>, and overflow guards for operations
- [ ] Post on suckless subreddit
- [ ] Look into <https://lite.duckduckgo.com/html/> instead?
- [ ] Drive in a few more real-life applications
- [ ] US election modelling?
- [ ] Look into using size_t instead of int for sample numbers
- [ ] Reorganize code a little bit to reduce usage of gcc's nested functions
- [ ] Rename examples

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Some files were not shown because too many files have changed in this diff Show More