Compare commits

..

No commits in common. "58a329bcc349866ac02093ab525f1dfa1cdc6b62" and "ffd6e5dcbb9e213b8adf8f3988dbea99a9f6837d" have entirely different histories.

40 changed files with 274 additions and 294 deletions

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -15,9 +15,8 @@ int main()
uint64_t* seed = malloc(sizeof(uint64_t)); uint64_t* seed = malloc(sizeof(uint64_t));
*seed = 1000; // xorshift can't start with 0 *seed = 1000; // xorshift can't start with 0
ci beta_1_2_ci_90 = sampler_get_90_ci(beta_1_2_sampler, 1000000, seed); ci beta_1_2_ci_90 = get_90_confidence_interval(beta_1_2_sampler, seed);
printf("90%% confidence interval of beta(1,2) is [%f, %f]\n", beta_1_2_ci_90.low, beta_1_2_ci_90.high); printf("90%% confidence interval of beta(1,2) is [%f, %f]\n", beta_1_2_ci_90.low, beta_1_2_ci_90.high);
printf("You can check this in <https://nunosempere.com/blog/2023/03/15/fit-beta/>\n");
free(seed); free(seed);
} }

Binary file not shown.

View File

@ -60,7 +60,7 @@ int main()
} }
printf("... ]\n"); printf("... ]\n");
ci ci_90 = sampler_get_90_ci(mixture, 1000000, seed); ci ci_90 = get_90_confidence_interval(mixture, seed);
printf("mean: %f\n", array_mean(mixture_result, n)); printf("mean: %f\n", array_mean(mixture_result, n));
printf("90%% confidence interval: [%f, %f]\n", ci_90.low, ci_90.high); printf("90%% confidence interval: [%f, %f]\n", ci_90.low, ci_90.high);

View File

@ -41,7 +41,7 @@ int main()
} }
printf("... ]\n"); printf("... ]\n");
ci ci_90 = sampler_get_90_ci(sample_minutes_per_day_jumping_rope_needed_to_burn_10kg, 1000000, seed); ci ci_90 = get_90_confidence_interval(sample_minutes_per_day_jumping_rope_needed_to_burn_10kg, seed);
printf("90%% confidence interval: [%f, %f]\n", ci_90.low, ci_90.high); printf("90%% confidence interval: [%f, %f]\n", ci_90.low, ci_90.high);
free(seed); free(seed);

View File

@ -50,7 +50,7 @@ int main()
// Before a first nuclear collapse // Before a first nuclear collapse
printf("## Before the first nuclear collapse\n"); printf("## Before the first nuclear collapse\n");
ci ci_90_2023 = sampler_get_90_ci(yearly_probability_nuclear_collapse_2023, 1000000, seed); ci ci_90_2023 = get_90_confidence_interval(yearly_probability_nuclear_collapse_2023, seed);
printf("90%% confidence interval: [%f, %f]\n", ci_90_2023.low, ci_90_2023.high); printf("90%% confidence interval: [%f, %f]\n", ci_90_2023.low, ci_90_2023.high);
double* yearly_probability_nuclear_collapse_2023_samples = malloc(sizeof(double) * num_samples); double* yearly_probability_nuclear_collapse_2023_samples = malloc(sizeof(double) * num_samples);
@ -61,7 +61,7 @@ int main()
// After the first nuclear collapse // After the first nuclear collapse
printf("\n## After the first nuclear collapse\n"); printf("\n## After the first nuclear collapse\n");
ci ci_90_2070 = sampler_get_90_ci(yearly_probability_nuclear_collapse_after_recovery_example, 1000000, seed); ci ci_90_2070 = get_90_confidence_interval(yearly_probability_nuclear_collapse_after_recovery_example, seed);
printf("90%% confidence interval: [%f, %f]\n", ci_90_2070.low, ci_90_2070.high); printf("90%% confidence interval: [%f, %f]\n", ci_90_2070.low, ci_90_2070.high);
double* yearly_probability_nuclear_collapse_after_recovery_samples = malloc(sizeof(double) * num_samples); double* yearly_probability_nuclear_collapse_after_recovery_samples = malloc(sizeof(double) * num_samples);
@ -72,7 +72,7 @@ int main()
// After the first nuclear collapse (antiinductive) // After the first nuclear collapse (antiinductive)
printf("\n## After the first nuclear collapse (antiinductive)\n"); printf("\n## After the first nuclear collapse (antiinductive)\n");
ci ci_90_antiinductive = sampler_get_90_ci(yearly_probability_nuclear_collapse_after_recovery_antiinductive, 1000000, seed); ci ci_90_antiinductive = get_90_confidence_interval(yearly_probability_nuclear_collapse_after_recovery_antiinductive, seed);
printf("90%% confidence interval: [%f, %f]\n", ci_90_antiinductive.low, ci_90_antiinductive.high); printf("90%% confidence interval: [%f, %f]\n", ci_90_antiinductive.low, ci_90_antiinductive.high);
double* yearly_probability_nuclear_collapse_after_recovery_antiinductive_samples = malloc(sizeof(double) * num_samples); double* yearly_probability_nuclear_collapse_after_recovery_antiinductive_samples = malloc(sizeof(double) * num_samples);

Binary file not shown.

View File

@ -9,22 +9,21 @@ int main()
// set randomness seed // set randomness seed
// uint64_t* seed = malloc(sizeof(uint64_t)); // uint64_t* seed = malloc(sizeof(uint64_t));
// *seed = 1000; // xorshift can't start with 0 // *seed = 1000; // xorshift can't start with 0
// ^ not necessary, because sampler_parallel takes care of the seed. // ^ not necessary, because parallel_sampler takes care of the seed.
int n_samples = 1000 * 1000 * 1000; int n_samples = 1000 * 1000 * 1000;
int n_threads = 16; int n_threads = 16;
double sampler(uint64_t * seed) double sampler(uint64_t* seed){
{
return sample_lognormal(0, 10, seed); return sample_lognormal(0, 10, seed);
} }
double* results = malloc(n_samples * sizeof(double)); double* results = malloc(n_samples * sizeof(double));
sampler_parallel(sampler, results, n_threads, n_samples); parallel_sampler(sampler, results, n_threads, n_samples);
double avg = array_sum(results, n_samples)/n_samples; double avg = array_sum(results, n_samples)/n_samples;
printf("Average of 1B lognormal(0,10): %f", avg); printf("Average of 1B lognormal(0,10): %f", avg);
free(results); free(results);
// free(seed); // free(seed);
// ^ not necessary, because sampler_parallel takes care of the seed. // ^ not necessary, because parallel_sampler takes care of the seed.
} }

View File

@ -17,14 +17,13 @@ int main()
int n_dists = 4; int n_dists = 4;
double weights[] = { 1 - p_c, p_c / 2, p_c / 4, p_c / 4 }; double weights[] = { 1 - p_c, p_c / 2, p_c / 4, p_c / 4 };
double (*samplers[])(uint64_t*) = { sample_0, sample_1, sample_few, sample_many }; double (*samplers[])(uint64_t*) = { sample_0, sample_1, sample_few, sample_many };
double sampler_result(uint64_t * seed) double sampler_result(uint64_t* seed) {
{
return sample_mixture(samplers, weights, n_dists, seed); return sample_mixture(samplers, weights, n_dists, seed);
} }
int n_samples = 1000 * 1000, n_threads = 16; int n_samples = 1000 * 1000, n_threads = 16;
double* results = malloc(n_samples * sizeof(double)); double* results = malloc(n_samples * sizeof(double));
sampler_parallel(sampler_result, results, n_threads, n_samples); parallel_sampler(sampler_result, results, n_threads, n_samples);
printf("Avg: %f\n", array_sum(results, n_samples)/n_samples); printf("Avg: %f\n", array_sum(results, n_samples)/n_samples);
free(results); free(results);
} }

View File

@ -13,10 +13,9 @@ int main()
/* Option 1: parallelize taking from n samples */ /* Option 1: parallelize taking from n samples */
// Question being asked: what is the distribution of sampling 1000 times and taking the min? // Question being asked: what is the distribution of sampling 1000 times and taking the min?
double sample_min_of_n(uint64_t * seed, int n) double sample_min_of_n(uint64_t* seed, int n){
{
double min = sample_normal(5, 2, seed); double min = sample_normal(5, 2, seed);
for (int i = 0; i < (n - 2); i++) { for(int i=0; i<(n-1); i++){
double sample = sample_normal(5, 2, seed); double sample = sample_normal(5, 2, seed);
if(sample < min){ if(sample < min){
min = sample; min = sample;
@ -24,47 +23,47 @@ int main()
} }
return min; return min;
} }
double sample_min_of_1000(uint64_t * seed) double sampler_min_of_1000(uint64_t* seed) {
{
return sample_min_of_n(seed, 1000); return sample_min_of_n(seed, 1000);
} }
int n_samples = 1000000, n_threads = 16; int n_samples = 10000, n_threads = 16;
double* results = malloc(n_samples * sizeof(double)); double* results = malloc(n_samples * sizeof(double));
sampler_parallel(sample_min_of_1000, results, n_threads, n_samples); parallel_sampler(sampler_min_of_1000, results, n_threads, n_samples);
printf("Mean of the distribution of (taking the min of 1000 samples of a normal(5,2)): %f\n", array_mean(results, n_samples)); printf("Mean of the distribution of (taking the min of 1000 samples of a normal(5,2)): %f\n", array_mean(results, n_samples));
free(results); free(results);
/* Option 2: take the min from n samples cleverly using parallelism */ /* Option 2: take the min from n samples cleverly using parallelism */
// Question being asked: can we take the min of n samples cleverly? // Question being asked: can we take the min of n samples cleverly?
double sample_n_parallel(int n) double sample_n_parallel(int n){
{
int n_threads = 16; int n_threads = 16;
int quotient = n / 16; int quotient = n / 16;
int remainder = n % 16; int remainder = n % 16;
uint64_t seed = 1000; uint64_t seed = 100;
double result_remainder = sample_min_of_n(&seed, remainder); double result_remainder = sample_min_of_n(&seed, remainder);
double sample_min_of_quotient(uint64_t * seed) double sample_min_of_quotient(uint64_t* seed) {
{ double result = sample_min_of_n(seed, quotient);
return sample_min_of_n(seed, quotient); // printf("Result: %f\n", result);
return result;
} }
double* results_quotient = malloc(quotient * sizeof(double)); double* results = malloc(n_threads * sizeof(double));
sampler_parallel(sample_min_of_quotient, results_quotient, n_threads, quotient); parallel_sampler(sample_min_of_quotient, results, n_threads, n_threads);
double min = results_quotient[0]; double min = results[0];
for (int i = 1; i < quotient; i++) { for(int i=1; i<n_threads; i++){
if (min > results_quotient[i]) { if(min > results[i]){
min = results_quotient[i]; min = results[i];
} }
} }
if(min > result_remainder){ if(min > result_remainder){
min = result_remainder; min = result_remainder;
} }
free(results_quotient); free(results);
return min; return min;
} }
printf("Minimum of 1M samples of normal(5,2): %f\n", sample_n_parallel(1000000)); printf("Minimum of 10M samples of normal(5,2): %f\n", sample_n_parallel(1000 * 1000));
} }

View File

@ -1,21 +0,0 @@
#include "../../../squiggle.h"
#include "../../../squiggle_more.h"
#include <stdio.h>
#include <stdlib.h>
int main()
{
// set randomness seed
uint64_t* seed = malloc(sizeof(uint64_t));
*seed = 1000; // xorshift can't start with a seed of 0
int n = 1000000;
double* xs = malloc(sizeof(double) * n);
for (int i = 0; i < n; i++) {
xs[i] = sample_to(10, 100, seed);
}
ci ci_90 = array_get_90_ci(xs, n);
printf("Recovering confidence interval of sample_to(10, 100):\n low: %f, high: %f\n", ci_90.low, ci_90.high);
free(seed);
}

View File

@ -49,7 +49,6 @@ all:
$(CC) $(OPTIMIZED) $(DEBUG) 11_billion_lognormals_paralell/$(SRC) $(DEPS) -o 11_billion_lognormals_paralell/$(OUTPUT) $(CC) $(OPTIMIZED) $(DEBUG) 11_billion_lognormals_paralell/$(SRC) $(DEPS) -o 11_billion_lognormals_paralell/$(OUTPUT)
$(CC) $(OPTIMIZED) $(DEBUG) 12_time_to_botec_parallel/$(SRC) $(DEPS) -o 12_time_to_botec_parallel/$(OUTPUT) $(CC) $(OPTIMIZED) $(DEBUG) 12_time_to_botec_parallel/$(SRC) $(DEPS) -o 12_time_to_botec_parallel/$(OUTPUT)
$(CC) $(OPTIMIZED) $(DEBUG) 13_parallelize_min/$(SRC) $(DEPS) -o 13_parallelize_min/$(OUTPUT) $(CC) $(OPTIMIZED) $(DEBUG) 13_parallelize_min/$(SRC) $(DEPS) -o 13_parallelize_min/$(OUTPUT)
$(CC) $(OPTIMIZED) $(DEBUG) 14_check_confidence_interval/$(SRC) $(DEPS) -o 14_check_confidence_interval/$(OUTPUT)
format-all: format-all:
$(FORMATTER) 00_example_template/$(SRC) $(FORMATTER) 00_example_template/$(SRC)
@ -66,7 +65,6 @@ format-all:
$(FORMATTER) 11_billion_lognormals_paralell/$(SRC) $(FORMATTER) 11_billion_lognormals_paralell/$(SRC)
$(FORMATTER) 12_time_to_botec_parallel/$(SRC) $(FORMATTER) 12_time_to_botec_parallel/$(SRC)
$(FORMATTER) 13_parallelize_min/$(SRC) $(FORMATTER) 13_parallelize_min/$(SRC)
$(FORMATTER) 14_check_confidence_interval/$(SRC)
run-all: run-all:
00_example_template/$(OUTPUT) 00_example_template/$(OUTPUT)
@ -83,7 +81,6 @@ run-all:
11_billion_lognormals_paralell/$(OUTPUT) 11_billion_lognormals_paralell/$(OUTPUT)
12_time_to_botec_parallel/$(OUTPUT) 12_time_to_botec_parallel/$(OUTPUT)
13_parallelize_min/$(OUTPUT) 13_parallelize_min/$(OUTPUT)
14_check_confidence_interval/$(OUTPUT)
## make one DIR=06_nuclear_recovery ## make one DIR=06_nuclear_recovery
one: $(DIR)/$(SRC) one: $(DIR)/$(SRC)

View File

@ -13,8 +13,8 @@ format-examples:
cd examples/more && make format-all cd examples/more && make format-all
format: squiggle.c squiggle.h format: squiggle.c squiggle.h
$(FORMATTER) squiggle.c squiggle.h $(FORMATTER) squiggle.c
$(FORMATTER) squiggle_more.c squiggle_more.h $(FORMATTER) squiggle.h
lint: lint:
clang-tidy squiggle.c -- -lm clang-tidy squiggle.c -- -lm

27
scratchpad/core.c Normal file
View File

@ -0,0 +1,27 @@
uint64_t xorshift64(uint64_t* seed)
{
// Algorithm "xor" from p. 4 of Marsaglia, "Xorshift RNGs"
// <https://en.wikipedia.org/wiki/Xorshift>
uint64_t x = *seed;
x ^= x << 13;
x ^= x >> 7;
x ^= x << 17;
return *seed = x;
}
double sample_unit_uniform(uint64_t* seed)
{
// samples uniform from [0,1] interval.
return ((double)xorshift64(seed)) / ((double)UINT64_MAX);
}
double sample_unit_normal(uint64_t* seed)
{
// // See: <https://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform>
double u1 = sample_unit_uniform(seed);
double u2 = sample_unit_uniform(seed);
double z = sqrtf(-2.0 * log(u1)) * sin(2 * PI * u2);
return z;
}

Binary file not shown.

Binary file not shown.

View File

@ -10,14 +10,25 @@ int main()
// set randomness seed // set randomness seed
uint64_t* seed = malloc(sizeof(uint64_t)); uint64_t* seed = malloc(sizeof(uint64_t));
*seed = 1000; // xorshift can't start with a seed of 0 *seed = 1000; // xorshift can't start with a seed of 0
/*
for (int i = 0; i < 100; i++) {
double draw = sample_unit_uniform(seed);
printf("%f\n", draw);
int n = 1000000; }*/
double* xs = malloc(sizeof(double) * n); // Test division
for (int i = 0; i < n; i++) { // printf("\n%d\n", 10 % 3);
xs[i] = sample_to(10, 100, seed); //
int n_samples = 100, n_threads = 16;
double* results = malloc(n_samples * sizeof(double));
double sampler_scratchpad(uint64_t* seed){
return 1;
}
parallel_sampler(sampler_scratchpad, results, n_threads, n_samples);
for(int i=0; i<n_samples; i++){
printf("Sample %d: %f\n", i, results[i]);
} }
ci ci_90 = array_get_90_ci(xs, n);
printf("Recovering confidence interval of sample_to(10, 100):\n low: %f, high: %f\n", ci_90.low, ci_90.high);
free(seed); free(seed);
} }

View File

@ -8,7 +8,7 @@
#define NORMAL90CONFIDENCE 1.6448536269514727 #define NORMAL90CONFIDENCE 1.6448536269514727
// Pseudo Random number generator // Pseudo Random number generator
static uint64_t xorshift32(uint32_t* seed) uint64_t xorshift32(uint32_t* seed)
{ {
// Algorithm "xor" from p. 4 of Marsaglia, "Xorshift RNGs" // Algorithm "xor" from p. 4 of Marsaglia, "Xorshift RNGs"
// See: // See:
@ -24,7 +24,7 @@ static uint64_t xorshift32(uint32_t* seed)
return *seed = x; return *seed = x;
} }
static uint64_t xorshift64(uint64_t* seed) uint64_t xorshift64(uint64_t* seed)
{ {
// same as above, but for generating doubles instead of floats // same as above, but for generating doubles instead of floats
uint64_t x = *seed; uint64_t x = *seed;

View File

@ -1,195 +1,67 @@
#include "squiggle.h"
#include <float.h> #include <float.h>
#include <limits.h>
#include <math.h> #include <math.h>
#include <limits.h>
#include <omp.h> #include <omp.h>
#include <stdint.h> #include <stdint.h>
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include "squiggle.h"
/* Parallel sampler */ /* Math constants */
void sampler_parallel(double (*sampler)(uint64_t* seed), double* results, int n_threads, int n_samples) #define PI 3.14159265358979323846 // M_PI in gcc gnu99
{ #define NORMAL90CONFIDENCE 1.6448536269514727
if ((n_samples % n_threads) != 0) {
fprintf(stderr, "Number of samples isn't divisible by number of threads, aborting\n");
exit(1);
}
uint64_t** seeds = malloc(n_threads * sizeof(uint64_t*));
for (uint64_t i = 0; i < n_threads; i++) {
seeds[i] = malloc(sizeof(uint64_t));
*seeds[i] = i + 1; // xorshift can't start with 0
}
int i; /* Some error niceties */
#pragma omp parallel private(i) // These won't be used until later
{ #define MAX_ERROR_LENGTH 500
#pragma omp for #define EXIT_ON_ERROR 0
for (i = 0; i < n_threads; i++) { #define PROCESS_ERROR(error_msg) process_error(error_msg, EXIT_ON_ERROR, __FILE__, __LINE__)
int lower_bound = i * (n_samples / n_threads);
int upper_bound = ((i + 1) * (n_samples / n_threads)) - 1;
// printf("Lower bound: %d, upper bound: %d\n", lower_bound, upper_bound);
for (int j = lower_bound; j < upper_bound; j++) {
results[j] = sampler(seeds[i]);
}
}
}
for (uint64_t i = 0; i < n_threads; i++) {
free(seeds[i]);
}
free(seeds);
}
/* Get confidence intervals, given a sampler */ /* Get confidence intervals, given a sampler */
// Not in core yet because I'm not sure how much I like the struct // Not in core yet because I'm not sure how much I like the struct
// and the built-in 100k samples // and the built-in 100k samples
// to do: add n to function parameters and document // to do: add n to function parameters and document
typedef struct ci_t { typedef struct ci_t {
double low; float low;
double high; float high;
} ci; } ci;
int compare_doubles(const void* p, const void* q)
static void swp(int i, int j, double xs[])
{ {
double tmp = xs[i]; // https://wikiless.esmailelbob.xyz/wiki/Qsort?lang=en
xs[i] = xs[j]; double x = *(const double*)p;
xs[j] = tmp; double y = *(const double*)q;
}
static int partition(int low, int high, double xs[], int length) /* Avoid returning x - y, which can cause undefined behaviour
{ because of signed integer overflow. */
// To understand this function: if (x < y)
// - see the note after gt variable definition return -1; // Return -1 if you want ascending, 1 if you want descending order.
// - go to commit 578bfa27 and the scratchpad/ folder in it, which has printfs sprinkled throughout else if (x > y)
int pivot = low + floor((high - low) / 2); return 1; // Return 1 if you want ascending, -1 if you want descending order.
double pivot_value = xs[pivot];
swp(pivot, high, xs);
int gt = low; /* This pointer will iterate until finding an element which is greater than the pivot. Then it will move elements that are smaller before it--more specifically, it will move elements to its position and then increment. As a result all elements between gt and i will be greater than the pivot. */
for (int i = low; i < high; i++) {
if (xs[i] < pivot_value) {
swp(gt, i, xs);
gt++;
}
}
swp(high, gt, xs);
return gt;
}
static double quickselect(int k, double xs[], int n) return 0;
}
ci get_90_confidence_interval(double (*sampler)(uint64_t*), uint64_t* seed)
{ {
// https://en.wikipedia.org/wiki/Quickselect int n = 100 * 1000;
int low = 0; double* samples_array = malloc(n * sizeof(double));
int high = n - 1;
for (;;) {
if (low == high) {
return xs[low];
}
int pivot = partition(low, high, xs, n);
if (pivot == k) {
return xs[pivot];
} else if (k < pivot) {
high = pivot - 1;
} else {
low = pivot + 1;
}
}
}
ci array_get_ci(ci interval, double* xs, int n)
{
int low_k = floor(interval.low * n);
int high_k = ceil(interval.high * n);
ci result = {
.low = quickselect(low_k, xs, n),
.high = quickselect(high_k, xs, n),
};
return result;
}
ci array_get_90_ci(double xs[], int n)
{
return array_get_ci((ci) { .low = 0.05, .high = 0.95 }, xs, n);
}
ci sampler_get_ci(ci interval, double (*sampler)(uint64_t*), int n, uint64_t* seed)
{
double* xs = malloc(n * sizeof(double));
for (int i = 0; i < n; i++) { for (int i = 0; i < n; i++) {
xs[i] = sampler(seed); samples_array[i] = sampler(seed);
}
ci result = array_get_ci(interval, xs, n);
free(xs);
return result;
}
ci sampler_get_90_ci(double (*sampler)(uint64_t*), int n, uint64_t* seed)
{
return sampler_get_ci((ci) { .low = 0.05, .high = 0.95 }, sampler, n, seed);
} }
qsort(samples_array, n, sizeof(double), compare_doubles);
/* Algebra manipulations */ ci result = {
// here I discover named structs, .low = samples_array[5000],
// which mean that I don't have to be typing .high = samples_array[94999],
// struct blah all the time.
#define NORMAL90CONFIDENCE 1.6448536269514727
typedef struct normal_params_t {
double mean;
double std;
} normal_params;
normal_params algebra_sum_normals(normal_params a, normal_params b)
{
normal_params result = {
.mean = a.mean + b.mean,
.std = sqrt((a.std * a.std) + (b.std * b.std)),
}; };
return result; free(samples_array);
}
typedef struct lognormal_params_t {
double logmean;
double logstd;
} lognormal_params;
lognormal_params algebra_product_lognormals(lognormal_params a, lognormal_params b)
{
lognormal_params result = {
.logmean = a.logmean + b.logmean,
.logstd = sqrt((a.logstd * a.logstd) + (b.logstd * b.logstd)),
};
return result;
}
lognormal_params convert_ci_to_lognormal_params(ci x)
{
double loghigh = logf(x.high);
double loglow = logf(x.low);
double logmean = (loghigh + loglow) / 2.0;
double logstd = (loghigh - loglow) / (2.0 * NORMAL90CONFIDENCE);
lognormal_params result = { .logmean = logmean, .logstd = logstd };
return result;
}
ci convert_lognormal_params_to_ci(lognormal_params y)
{
double h = y.logstd * NORMAL90CONFIDENCE;
double loghigh = y.logmean + h;
double loglow = y.logmean - h;
ci result = { .low = exp(loglow), .high = exp(loghigh) };
return result; return result;
} }
/* Scaffolding to handle errors */ /* Scaffolding to handle errors */
// We will sample from an arbitrary cdf // We are building towards sample from an arbitrary cdf
// and that operation might fail // and that operation might fail
// so we build some scaffolding here // so we build some scaffolding here
#define MAX_ERROR_LENGTH 500
#define EXIT_ON_ERROR 0
#define PROCESS_ERROR(error_msg) process_error(error_msg, EXIT_ON_ERROR, __FILE__, __LINE__)
struct box { struct box {
int empty; int empty;
double content; double content;
@ -381,14 +253,115 @@ double sampler_cdf_danger(struct box cdf(double), uint64_t* seed)
} }
} }
/* array print: potentially useful for debugging */ /* Algebra manipulations */
// here I discover named structs,
// which mean that I don't have to be typing
// struct blah all the time.
typedef struct normal_params_t {
double mean;
double std;
} normal_params;
void array_print(double xs[], int n) normal_params algebra_sum_normals(normal_params a, normal_params b)
{ {
printf("["); normal_params result = {
for (int i = 0; i < n - 1; i++) { .mean = a.mean + b.mean,
printf("%f, ", xs[i]); .std = sqrt((a.std * a.std) + (b.std * b.std)),
};
return result;
} }
printf("%f", xs[n - 1]);
printf("]\n"); typedef struct lognormal_params_t {
double logmean;
double logstd;
} lognormal_params;
lognormal_params algebra_product_lognormals(lognormal_params a, lognormal_params b)
{
lognormal_params result = {
.logmean = a.logmean + b.logmean,
.logstd = sqrt((a.logstd * a.logstd) + (b.logstd * b.logstd)),
};
return result;
}
lognormal_params convert_ci_to_lognormal_params(ci x)
{
double loghigh = logf(x.high);
double loglow = logf(x.low);
double logmean = (loghigh + loglow) / 2.0;
double logstd = (loghigh - loglow) / (2.0 * NORMAL90CONFIDENCE);
lognormal_params result = { .logmean = logmean, .logstd = logstd };
return result;
}
ci convert_lognormal_params_to_ci(lognormal_params y)
{
double h = y.logstd * NORMAL90CONFIDENCE;
double loghigh = y.logmean + h;
double loglow = y.logmean - h;
ci result = { .low = exp(loglow), .high = exp(loghigh) };
return result;
}
/* Parallel sampler */
void parallel_sampler(double (*sampler)(uint64_t* seed), double* results, int n_threads, int n_samples){
// Division terminology:
// a = b * quotient + reminder
// a = (a/b)*b + (a%b)
// dividend: a
// divisor: b
// quotient = a / b
// reminder = a % b
// "divisor's multiple" := (a/b)*b
// now, we have n_samples and n_threads
// to make our life easy, each thread will have a number of samples of: a/b (quotient)
// and we'll compute the remainder of samples separately
// to possibly do by Jorge: improve so that the remainder is included in the threads
int quotient = n_samples / n_threads;
int remainder = n_samples % n_threads;
int divisor_multiple = quotient * n_threads;
uint64_t** seeds = malloc(n_threads * sizeof(uint64_t*));
// printf("UINT64_MAX: %lu\n", UINT64_MAX);
srand(1);
for (uint64_t i = 0; i < n_threads; i++) {
seeds[i] = malloc(sizeof(uint64_t));
// Constraints:
// - xorshift can't start with 0
// - the seeds should be reasonably separated and not correlated
*seeds[i] = (uint64_t) rand() * (UINT64_MAX / RAND_MAX);
// printf("#%ld: %lu\n",i, *seeds[i]);
// Other initializations tried:
// *seeds[i] = 1 + i;
// *seeds[i] = (i + 0.5)*(UINT64_MAX/n_threads);
// *seeds[i] = (i + 0.5)*(UINT64_MAX/n_threads) + constant * i;
}
int i;
#pragma omp parallel private(i)
{
#pragma omp for
for (i = 0; i < n_threads; i++) {
int lower_bound_inclusive = i * quotient;
int upper_bound_not_inclusive = ((i+1) * quotient); // note the < in the for loop below,
// printf("Lower bound: %d, upper bound: %d\n", lower_bound, upper_bound);
for (int j = lower_bound_inclusive; j < upper_bound_not_inclusive; j++) {
results[j] = sampler(seeds[i]);
}
}
}
for(int j=divisor_multiple; j<n_samples; j++){
results[j] = sampler(seeds[0]);
// we can just reuse a seed, this isn't problematic because we are not doing multithreading
}
for (uint64_t i = 0; i < n_threads; i++) {
free(seeds[i]);
}
free(seeds);
} }

View File

@ -1,20 +1,35 @@
#ifndef SQUIGGLE_C_EXTRA #ifndef SQUIGGLE_C_EXTRA
#define SQUIGGLE_C_EXTRA #define SQUIGGLE_C_EXTRA
/* Parallel sampling */ // Box
void sampler_parallel(double (*sampler)(uint64_t* seed), double* results, int n_threads, int n_samples); struct box {
int empty;
double content;
char* error_msg;
};
/* Get 90% confidence interval */ // Macros to handle errors
#define MAX_ERROR_LENGTH 500
#define EXIT_ON_ERROR 0
#define PROCESS_ERROR(error_msg) process_error(error_msg, EXIT_ON_ERROR, __FILE__, __LINE__)
struct box process_error(const char* error_msg, int should_exit, char* file, int line);
// Inverse cdf
struct box inverse_cdf_double(double cdf(double), double p);
struct box inverse_cdf_box(struct box cdf_box(double), double p);
// Samplers from cdf
struct box sampler_cdf_double(double cdf(double), uint64_t* seed);
struct box sampler_cdf_box(struct box cdf(double), uint64_t* seed);
// Get 90% confidence interval
typedef struct ci_t { typedef struct ci_t {
double low; float low;
double high; float high;
} ci; } ci;
ci array_get_ci(ci interval, double* xs, int n); ci get_90_confidence_interval(double (*sampler)(uint64_t*), uint64_t* seed);
ci array_get_90_ci(double xs[], int n);
ci sampler_get_ci(ci interval, double (*sampler)(uint64_t*), int n, uint64_t* seed);
ci sampler_get_90_ci(double (*sampler)(uint64_t*), int n, uint64_t* seed);
/* Algebra manipulations */ // small algebra manipulations
typedef struct normal_params_t { typedef struct normal_params_t {
double mean; double mean;
@ -31,24 +46,6 @@ lognormal_params algebra_product_lognormals(lognormal_params a, lognormal_params
lognormal_params convert_ci_to_lognormal_params(ci x); lognormal_params convert_ci_to_lognormal_params(ci x);
ci convert_lognormal_params_to_ci(lognormal_params y); ci convert_lognormal_params_to_ci(lognormal_params y);
/* Error handling */ void parallel_sampler(double (*sampler)(uint64_t* seed), double* results, int n_threads, int n_samples);
struct box {
int empty;
double content;
char* error_msg;
};
#define MAX_ERROR_LENGTH 500
#define EXIT_ON_ERROR 0
#define PROCESS_ERROR(error_msg) process_error(error_msg, EXIT_ON_ERROR, __FILE__, __LINE__)
struct box process_error(const char* error_msg, int should_exit, char* file, int line);
void array_print(double* array, int length);
/* Inverse cdf */
struct box inverse_cdf_double(double cdf(double), double p);
struct box inverse_cdf_box(struct box cdf_box(double), double p);
/* Samplers from cdf */
struct box sampler_cdf_double(double cdf(double), uint64_t* seed);
struct box sampler_cdf_box(struct box cdf(double), uint64_t* seed);
#endif #endif