forked from personal/squiggle.c
Revert "Revert "Merge branch 'master' into quickselect""
This reverts commit 4d218468cf
.
This commit is contained in:
parent
fb123dd14c
commit
58a329bcc3
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -15,8 +15,9 @@ int main()
|
|||
uint64_t* seed = malloc(sizeof(uint64_t));
|
||||
*seed = 1000; // xorshift can't start with 0
|
||||
|
||||
ci beta_1_2_ci_90 = get_90_confidence_interval(beta_1_2_sampler, seed);
|
||||
ci beta_1_2_ci_90 = sampler_get_90_ci(beta_1_2_sampler, 1000000, seed);
|
||||
printf("90%% confidence interval of beta(1,2) is [%f, %f]\n", beta_1_2_ci_90.low, beta_1_2_ci_90.high);
|
||||
printf("You can check this in <https://nunosempere.com/blog/2023/03/15/fit-beta/>\n");
|
||||
|
||||
free(seed);
|
||||
}
|
||||
|
|
Binary file not shown.
|
@ -60,7 +60,7 @@ int main()
|
|||
}
|
||||
printf("... ]\n");
|
||||
|
||||
ci ci_90 = get_90_confidence_interval(mixture, seed);
|
||||
ci ci_90 = sampler_get_90_ci(mixture, 1000000, seed);
|
||||
printf("mean: %f\n", array_mean(mixture_result, n));
|
||||
printf("90%% confidence interval: [%f, %f]\n", ci_90.low, ci_90.high);
|
||||
|
||||
|
|
Binary file not shown.
Binary file not shown.
|
@ -41,7 +41,7 @@ int main()
|
|||
}
|
||||
printf("... ]\n");
|
||||
|
||||
ci ci_90 = get_90_confidence_interval(sample_minutes_per_day_jumping_rope_needed_to_burn_10kg, seed);
|
||||
ci ci_90 = sampler_get_90_ci(sample_minutes_per_day_jumping_rope_needed_to_burn_10kg, 1000000, seed);
|
||||
printf("90%% confidence interval: [%f, %f]\n", ci_90.low, ci_90.high);
|
||||
|
||||
free(seed);
|
||||
|
|
Binary file not shown.
|
@ -50,7 +50,7 @@ int main()
|
|||
|
||||
// Before a first nuclear collapse
|
||||
printf("## Before the first nuclear collapse\n");
|
||||
ci ci_90_2023 = get_90_confidence_interval(yearly_probability_nuclear_collapse_2023, seed);
|
||||
ci ci_90_2023 = sampler_get_90_ci(yearly_probability_nuclear_collapse_2023, 1000000, seed);
|
||||
printf("90%% confidence interval: [%f, %f]\n", ci_90_2023.low, ci_90_2023.high);
|
||||
|
||||
double* yearly_probability_nuclear_collapse_2023_samples = malloc(sizeof(double) * num_samples);
|
||||
|
@ -61,7 +61,7 @@ int main()
|
|||
|
||||
// After the first nuclear collapse
|
||||
printf("\n## After the first nuclear collapse\n");
|
||||
ci ci_90_2070 = get_90_confidence_interval(yearly_probability_nuclear_collapse_after_recovery_example, seed);
|
||||
ci ci_90_2070 = sampler_get_90_ci(yearly_probability_nuclear_collapse_after_recovery_example, 1000000, seed);
|
||||
printf("90%% confidence interval: [%f, %f]\n", ci_90_2070.low, ci_90_2070.high);
|
||||
|
||||
double* yearly_probability_nuclear_collapse_after_recovery_samples = malloc(sizeof(double) * num_samples);
|
||||
|
@ -72,7 +72,7 @@ int main()
|
|||
|
||||
// After the first nuclear collapse (antiinductive)
|
||||
printf("\n## After the first nuclear collapse (antiinductive)\n");
|
||||
ci ci_90_antiinductive = get_90_confidence_interval(yearly_probability_nuclear_collapse_after_recovery_antiinductive, seed);
|
||||
ci ci_90_antiinductive = sampler_get_90_ci(yearly_probability_nuclear_collapse_after_recovery_antiinductive, 1000000, seed);
|
||||
printf("90%% confidence interval: [%f, %f]\n", ci_90_antiinductive.low, ci_90_antiinductive.high);
|
||||
|
||||
double* yearly_probability_nuclear_collapse_after_recovery_antiinductive_samples = malloc(sizeof(double) * num_samples);
|
||||
|
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -9,21 +9,22 @@ int main()
|
|||
// set randomness seed
|
||||
// uint64_t* seed = malloc(sizeof(uint64_t));
|
||||
// *seed = 1000; // xorshift can't start with 0
|
||||
// ^ not necessary, because parallel_sampler takes care of the seed.
|
||||
// ^ not necessary, because sampler_parallel takes care of the seed.
|
||||
|
||||
int n_samples = 1000 * 1000 * 1000;
|
||||
int n_threads = 16;
|
||||
double sampler(uint64_t* seed){
|
||||
double sampler(uint64_t * seed)
|
||||
{
|
||||
return sample_lognormal(0, 10, seed);
|
||||
}
|
||||
double* results = malloc(n_samples * sizeof(double));
|
||||
|
||||
parallel_sampler(sampler, results, n_threads, n_samples);
|
||||
sampler_parallel(sampler, results, n_threads, n_samples);
|
||||
double avg = array_sum(results, n_samples) / n_samples;
|
||||
printf("Average of 1B lognormal(0,10): %f", avg);
|
||||
|
||||
free(results);
|
||||
|
||||
// free(seed);
|
||||
// ^ not necessary, because parallel_sampler takes care of the seed.
|
||||
// ^ not necessary, because sampler_parallel takes care of the seed.
|
||||
}
|
||||
|
|
Binary file not shown.
|
@ -17,13 +17,14 @@ int main()
|
|||
int n_dists = 4;
|
||||
double weights[] = { 1 - p_c, p_c / 2, p_c / 4, p_c / 4 };
|
||||
double (*samplers[])(uint64_t*) = { sample_0, sample_1, sample_few, sample_many };
|
||||
double sampler_result(uint64_t* seed) {
|
||||
double sampler_result(uint64_t * seed)
|
||||
{
|
||||
return sample_mixture(samplers, weights, n_dists, seed);
|
||||
}
|
||||
|
||||
int n_samples = 1000 * 1000, n_threads = 16;
|
||||
double* results = malloc(n_samples * sizeof(double));
|
||||
parallel_sampler(sampler_result, results, n_threads, n_samples);
|
||||
sampler_parallel(sampler_result, results, n_threads, n_samples);
|
||||
printf("Avg: %f\n", array_sum(results, n_samples) / n_samples);
|
||||
free(results);
|
||||
}
|
||||
|
|
Binary file not shown.
|
@ -13,9 +13,10 @@ int main()
|
|||
|
||||
/* Option 1: parallelize taking from n samples */
|
||||
// Question being asked: what is the distribution of sampling 1000 times and taking the min?
|
||||
double sample_min_of_n(uint64_t* seed, int n){
|
||||
double sample_min_of_n(uint64_t * seed, int n)
|
||||
{
|
||||
double min = sample_normal(5, 2, seed);
|
||||
for(int i=0; i<(n-1); i++){
|
||||
for (int i = 0; i < (n - 2); i++) {
|
||||
double sample = sample_normal(5, 2, seed);
|
||||
if (sample < min) {
|
||||
min = sample;
|
||||
|
@ -23,47 +24,47 @@ int main()
|
|||
}
|
||||
return min;
|
||||
}
|
||||
double sampler_min_of_1000(uint64_t* seed) {
|
||||
double sample_min_of_1000(uint64_t * seed)
|
||||
{
|
||||
return sample_min_of_n(seed, 1000);
|
||||
}
|
||||
|
||||
int n_samples = 10000, n_threads = 16;
|
||||
int n_samples = 1000000, n_threads = 16;
|
||||
double* results = malloc(n_samples * sizeof(double));
|
||||
parallel_sampler(sampler_min_of_1000, results, n_threads, n_samples);
|
||||
sampler_parallel(sample_min_of_1000, results, n_threads, n_samples);
|
||||
printf("Mean of the distribution of (taking the min of 1000 samples of a normal(5,2)): %f\n", array_mean(results, n_samples));
|
||||
free(results);
|
||||
|
||||
/* Option 2: take the min from n samples cleverly using parallelism */
|
||||
// Question being asked: can we take the min of n samples cleverly?
|
||||
double sample_n_parallel(int n){
|
||||
double sample_n_parallel(int n)
|
||||
{
|
||||
|
||||
int n_threads = 16;
|
||||
int quotient = n / 16;
|
||||
int remainder = n % 16;
|
||||
|
||||
uint64_t seed = 100;
|
||||
uint64_t seed = 1000;
|
||||
double result_remainder = sample_min_of_n(&seed, remainder);
|
||||
|
||||
double sample_min_of_quotient(uint64_t* seed) {
|
||||
double result = sample_min_of_n(seed, quotient);
|
||||
// printf("Result: %f\n", result);
|
||||
return result;
|
||||
double sample_min_of_quotient(uint64_t * seed)
|
||||
{
|
||||
return sample_min_of_n(seed, quotient);
|
||||
}
|
||||
double* results = malloc(n_threads * sizeof(double));
|
||||
parallel_sampler(sample_min_of_quotient, results, n_threads, n_threads);
|
||||
double* results_quotient = malloc(quotient * sizeof(double));
|
||||
sampler_parallel(sample_min_of_quotient, results_quotient, n_threads, quotient);
|
||||
|
||||
double min = results[0];
|
||||
for(int i=1; i<n_threads; i++){
|
||||
if(min > results[i]){
|
||||
min = results[i];
|
||||
double min = results_quotient[0];
|
||||
for (int i = 1; i < quotient; i++) {
|
||||
if (min > results_quotient[i]) {
|
||||
min = results_quotient[i];
|
||||
}
|
||||
}
|
||||
if (min > result_remainder) {
|
||||
min = result_remainder;
|
||||
}
|
||||
free(results);
|
||||
free(results_quotient);
|
||||
return min;
|
||||
}
|
||||
printf("Minimum of 10M samples of normal(5,2): %f\n", sample_n_parallel(1000 * 1000));
|
||||
|
||||
printf("Minimum of 1M samples of normal(5,2): %f\n", sample_n_parallel(1000000));
|
||||
}
|
||||
|
|
BIN
examples/more/14_check_confidence_interval/example
Executable file
BIN
examples/more/14_check_confidence_interval/example
Executable file
Binary file not shown.
21
examples/more/14_check_confidence_interval/example.c
Normal file
21
examples/more/14_check_confidence_interval/example.c
Normal file
|
@ -0,0 +1,21 @@
|
|||
#include "../../../squiggle.h"
|
||||
#include "../../../squiggle_more.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
int main()
|
||||
{
|
||||
// set randomness seed
|
||||
uint64_t* seed = malloc(sizeof(uint64_t));
|
||||
*seed = 1000; // xorshift can't start with a seed of 0
|
||||
|
||||
int n = 1000000;
|
||||
double* xs = malloc(sizeof(double) * n);
|
||||
for (int i = 0; i < n; i++) {
|
||||
xs[i] = sample_to(10, 100, seed);
|
||||
}
|
||||
ci ci_90 = array_get_90_ci(xs, n);
|
||||
printf("Recovering confidence interval of sample_to(10, 100):\n low: %f, high: %f\n", ci_90.low, ci_90.high);
|
||||
|
||||
free(seed);
|
||||
}
|
|
@ -49,6 +49,7 @@ all:
|
|||
$(CC) $(OPTIMIZED) $(DEBUG) 11_billion_lognormals_paralell/$(SRC) $(DEPS) -o 11_billion_lognormals_paralell/$(OUTPUT)
|
||||
$(CC) $(OPTIMIZED) $(DEBUG) 12_time_to_botec_parallel/$(SRC) $(DEPS) -o 12_time_to_botec_parallel/$(OUTPUT)
|
||||
$(CC) $(OPTIMIZED) $(DEBUG) 13_parallelize_min/$(SRC) $(DEPS) -o 13_parallelize_min/$(OUTPUT)
|
||||
$(CC) $(OPTIMIZED) $(DEBUG) 14_check_confidence_interval/$(SRC) $(DEPS) -o 14_check_confidence_interval/$(OUTPUT)
|
||||
|
||||
format-all:
|
||||
$(FORMATTER) 00_example_template/$(SRC)
|
||||
|
@ -65,6 +66,7 @@ format-all:
|
|||
$(FORMATTER) 11_billion_lognormals_paralell/$(SRC)
|
||||
$(FORMATTER) 12_time_to_botec_parallel/$(SRC)
|
||||
$(FORMATTER) 13_parallelize_min/$(SRC)
|
||||
$(FORMATTER) 14_check_confidence_interval/$(SRC)
|
||||
|
||||
run-all:
|
||||
00_example_template/$(OUTPUT)
|
||||
|
@ -81,6 +83,7 @@ run-all:
|
|||
11_billion_lognormals_paralell/$(OUTPUT)
|
||||
12_time_to_botec_parallel/$(OUTPUT)
|
||||
13_parallelize_min/$(OUTPUT)
|
||||
14_check_confidence_interval/$(OUTPUT)
|
||||
|
||||
## make one DIR=06_nuclear_recovery
|
||||
one: $(DIR)/$(SRC)
|
||||
|
|
4
makefile
4
makefile
|
@ -13,8 +13,8 @@ format-examples:
|
|||
cd examples/more && make format-all
|
||||
|
||||
format: squiggle.c squiggle.h
|
||||
$(FORMATTER) squiggle.c
|
||||
$(FORMATTER) squiggle.h
|
||||
$(FORMATTER) squiggle.c squiggle.h
|
||||
$(FORMATTER) squiggle_more.c squiggle_more.h
|
||||
|
||||
lint:
|
||||
clang-tidy squiggle.c -- -lm
|
||||
|
|
|
@ -1,27 +0,0 @@
|
|||
|
||||
uint64_t xorshift64(uint64_t* seed)
|
||||
{
|
||||
// Algorithm "xor" from p. 4 of Marsaglia, "Xorshift RNGs"
|
||||
// <https://en.wikipedia.org/wiki/Xorshift>
|
||||
uint64_t x = *seed;
|
||||
x ^= x << 13;
|
||||
x ^= x >> 7;
|
||||
x ^= x << 17;
|
||||
return *seed = x;
|
||||
}
|
||||
|
||||
double sample_unit_uniform(uint64_t* seed)
|
||||
{
|
||||
// samples uniform from [0,1] interval.
|
||||
return ((double)xorshift64(seed)) / ((double)UINT64_MAX);
|
||||
}
|
||||
|
||||
double sample_unit_normal(uint64_t* seed)
|
||||
{
|
||||
// // See: <https://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform>
|
||||
double u1 = sample_unit_uniform(seed);
|
||||
double u2 = sample_unit_uniform(seed);
|
||||
double z = sqrtf(-2.0 * log(u1)) * sin(2 * PI * u2);
|
||||
return z;
|
||||
}
|
||||
|
Binary file not shown.
Binary file not shown.
|
@ -10,25 +10,14 @@ int main()
|
|||
// set randomness seed
|
||||
uint64_t* seed = malloc(sizeof(uint64_t));
|
||||
*seed = 1000; // xorshift can't start with a seed of 0
|
||||
/*
|
||||
for (int i = 0; i < 100; i++) {
|
||||
double draw = sample_unit_uniform(seed);
|
||||
printf("%f\n", draw);
|
||||
|
||||
}*/
|
||||
// Test division
|
||||
// printf("\n%d\n", 10 % 3);
|
||||
//
|
||||
|
||||
int n_samples = 100, n_threads = 16;
|
||||
double* results = malloc(n_samples * sizeof(double));
|
||||
double sampler_scratchpad(uint64_t* seed){
|
||||
return 1;
|
||||
}
|
||||
parallel_sampler(sampler_scratchpad, results, n_threads, n_samples);
|
||||
for(int i=0; i<n_samples; i++){
|
||||
printf("Sample %d: %f\n", i, results[i]);
|
||||
int n = 1000000;
|
||||
double* xs = malloc(sizeof(double) * n);
|
||||
for (int i = 0; i < n; i++) {
|
||||
xs[i] = sample_to(10, 100, seed);
|
||||
}
|
||||
ci ci_90 = array_get_90_ci(xs, n);
|
||||
printf("Recovering confidence interval of sample_to(10, 100):\n low: %f, high: %f\n", ci_90.low, ci_90.high);
|
||||
|
||||
free(seed);
|
||||
}
|
||||
|
|
|
@ -8,7 +8,7 @@
|
|||
#define NORMAL90CONFIDENCE 1.6448536269514727
|
||||
|
||||
// Pseudo Random number generator
|
||||
uint64_t xorshift32(uint32_t* seed)
|
||||
static uint64_t xorshift32(uint32_t* seed)
|
||||
{
|
||||
// Algorithm "xor" from p. 4 of Marsaglia, "Xorshift RNGs"
|
||||
// See:
|
||||
|
@ -24,7 +24,7 @@ uint64_t xorshift32(uint32_t* seed)
|
|||
return *seed = x;
|
||||
}
|
||||
|
||||
uint64_t xorshift64(uint64_t* seed)
|
||||
static uint64_t xorshift64(uint64_t* seed)
|
||||
{
|
||||
// same as above, but for generating doubles instead of floats
|
||||
uint64_t x = *seed;
|
||||
|
|
313
squiggle_more.c
313
squiggle_more.c
|
@ -1,67 +1,195 @@
|
|||
#include "squiggle.h"
|
||||
#include <float.h>
|
||||
#include <math.h>
|
||||
#include <limits.h>
|
||||
#include <math.h>
|
||||
#include <omp.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include "squiggle.h"
|
||||
|
||||
/* Math constants */
|
||||
#define PI 3.14159265358979323846 // M_PI in gcc gnu99
|
||||
#define NORMAL90CONFIDENCE 1.6448536269514727
|
||||
/* Parallel sampler */
|
||||
void sampler_parallel(double (*sampler)(uint64_t* seed), double* results, int n_threads, int n_samples)
|
||||
{
|
||||
if ((n_samples % n_threads) != 0) {
|
||||
fprintf(stderr, "Number of samples isn't divisible by number of threads, aborting\n");
|
||||
exit(1);
|
||||
}
|
||||
uint64_t** seeds = malloc(n_threads * sizeof(uint64_t*));
|
||||
for (uint64_t i = 0; i < n_threads; i++) {
|
||||
seeds[i] = malloc(sizeof(uint64_t));
|
||||
*seeds[i] = i + 1; // xorshift can't start with 0
|
||||
}
|
||||
|
||||
/* Some error niceties */
|
||||
// These won't be used until later
|
||||
#define MAX_ERROR_LENGTH 500
|
||||
#define EXIT_ON_ERROR 0
|
||||
#define PROCESS_ERROR(error_msg) process_error(error_msg, EXIT_ON_ERROR, __FILE__, __LINE__)
|
||||
int i;
|
||||
#pragma omp parallel private(i)
|
||||
{
|
||||
#pragma omp for
|
||||
for (i = 0; i < n_threads; i++) {
|
||||
int lower_bound = i * (n_samples / n_threads);
|
||||
int upper_bound = ((i + 1) * (n_samples / n_threads)) - 1;
|
||||
// printf("Lower bound: %d, upper bound: %d\n", lower_bound, upper_bound);
|
||||
for (int j = lower_bound; j < upper_bound; j++) {
|
||||
results[j] = sampler(seeds[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (uint64_t i = 0; i < n_threads; i++) {
|
||||
free(seeds[i]);
|
||||
}
|
||||
free(seeds);
|
||||
}
|
||||
|
||||
/* Get confidence intervals, given a sampler */
|
||||
// Not in core yet because I'm not sure how much I like the struct
|
||||
// and the built-in 100k samples
|
||||
// to do: add n to function parameters and document
|
||||
|
||||
typedef struct ci_t {
|
||||
float low;
|
||||
float high;
|
||||
double low;
|
||||
double high;
|
||||
} ci;
|
||||
int compare_doubles(const void* p, const void* q)
|
||||
|
||||
static void swp(int i, int j, double xs[])
|
||||
{
|
||||
// https://wikiless.esmailelbob.xyz/wiki/Qsort?lang=en
|
||||
double x = *(const double*)p;
|
||||
double y = *(const double*)q;
|
||||
|
||||
/* Avoid returning x - y, which can cause undefined behaviour
|
||||
because of signed integer overflow. */
|
||||
if (x < y)
|
||||
return -1; // Return -1 if you want ascending, 1 if you want descending order.
|
||||
else if (x > y)
|
||||
return 1; // Return 1 if you want ascending, -1 if you want descending order.
|
||||
|
||||
return 0;
|
||||
double tmp = xs[i];
|
||||
xs[i] = xs[j];
|
||||
xs[j] = tmp;
|
||||
}
|
||||
ci get_90_confidence_interval(double (*sampler)(uint64_t*), uint64_t* seed)
|
||||
|
||||
static int partition(int low, int high, double xs[], int length)
|
||||
{
|
||||
int n = 100 * 1000;
|
||||
double* samples_array = malloc(n * sizeof(double));
|
||||
for (int i = 0; i < n; i++) {
|
||||
samples_array[i] = sampler(seed);
|
||||
// To understand this function:
|
||||
// - see the note after gt variable definition
|
||||
// - go to commit 578bfa27 and the scratchpad/ folder in it, which has printfs sprinkled throughout
|
||||
int pivot = low + floor((high - low) / 2);
|
||||
double pivot_value = xs[pivot];
|
||||
swp(pivot, high, xs);
|
||||
int gt = low; /* This pointer will iterate until finding an element which is greater than the pivot. Then it will move elements that are smaller before it--more specifically, it will move elements to its position and then increment. As a result all elements between gt and i will be greater than the pivot. */
|
||||
for (int i = low; i < high; i++) {
|
||||
if (xs[i] < pivot_value) {
|
||||
swp(gt, i, xs);
|
||||
gt++;
|
||||
}
|
||||
}
|
||||
swp(high, gt, xs);
|
||||
return gt;
|
||||
}
|
||||
qsort(samples_array, n, sizeof(double), compare_doubles);
|
||||
|
||||
static double quickselect(int k, double xs[], int n)
|
||||
{
|
||||
// https://en.wikipedia.org/wiki/Quickselect
|
||||
int low = 0;
|
||||
int high = n - 1;
|
||||
for (;;) {
|
||||
if (low == high) {
|
||||
return xs[low];
|
||||
}
|
||||
int pivot = partition(low, high, xs, n);
|
||||
if (pivot == k) {
|
||||
return xs[pivot];
|
||||
} else if (k < pivot) {
|
||||
high = pivot - 1;
|
||||
} else {
|
||||
low = pivot + 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ci array_get_ci(ci interval, double* xs, int n)
|
||||
{
|
||||
|
||||
int low_k = floor(interval.low * n);
|
||||
int high_k = ceil(interval.high * n);
|
||||
ci result = {
|
||||
.low = samples_array[5000],
|
||||
.high = samples_array[94999],
|
||||
.low = quickselect(low_k, xs, n),
|
||||
.high = quickselect(high_k, xs, n),
|
||||
};
|
||||
free(samples_array);
|
||||
return result;
|
||||
}
|
||||
ci array_get_90_ci(double xs[], int n)
|
||||
{
|
||||
return array_get_ci((ci) { .low = 0.05, .high = 0.95 }, xs, n);
|
||||
}
|
||||
|
||||
ci sampler_get_ci(ci interval, double (*sampler)(uint64_t*), int n, uint64_t* seed)
|
||||
{
|
||||
double* xs = malloc(n * sizeof(double));
|
||||
for (int i = 0; i < n; i++) {
|
||||
xs[i] = sampler(seed);
|
||||
}
|
||||
ci result = array_get_ci(interval, xs, n);
|
||||
free(xs);
|
||||
return result;
|
||||
}
|
||||
ci sampler_get_90_ci(double (*sampler)(uint64_t*), int n, uint64_t* seed)
|
||||
{
|
||||
return sampler_get_ci((ci) { .low = 0.05, .high = 0.95 }, sampler, n, seed);
|
||||
}
|
||||
|
||||
/* Algebra manipulations */
|
||||
// here I discover named structs,
|
||||
// which mean that I don't have to be typing
|
||||
// struct blah all the time.
|
||||
|
||||
#define NORMAL90CONFIDENCE 1.6448536269514727
|
||||
|
||||
typedef struct normal_params_t {
|
||||
double mean;
|
||||
double std;
|
||||
} normal_params;
|
||||
|
||||
normal_params algebra_sum_normals(normal_params a, normal_params b)
|
||||
{
|
||||
normal_params result = {
|
||||
.mean = a.mean + b.mean,
|
||||
.std = sqrt((a.std * a.std) + (b.std * b.std)),
|
||||
};
|
||||
return result;
|
||||
}
|
||||
|
||||
typedef struct lognormal_params_t {
|
||||
double logmean;
|
||||
double logstd;
|
||||
} lognormal_params;
|
||||
|
||||
lognormal_params algebra_product_lognormals(lognormal_params a, lognormal_params b)
|
||||
{
|
||||
lognormal_params result = {
|
||||
.logmean = a.logmean + b.logmean,
|
||||
.logstd = sqrt((a.logstd * a.logstd) + (b.logstd * b.logstd)),
|
||||
};
|
||||
return result;
|
||||
}
|
||||
|
||||
lognormal_params convert_ci_to_lognormal_params(ci x)
|
||||
{
|
||||
double loghigh = logf(x.high);
|
||||
double loglow = logf(x.low);
|
||||
double logmean = (loghigh + loglow) / 2.0;
|
||||
double logstd = (loghigh - loglow) / (2.0 * NORMAL90CONFIDENCE);
|
||||
lognormal_params result = { .logmean = logmean, .logstd = logstd };
|
||||
return result;
|
||||
}
|
||||
|
||||
ci convert_lognormal_params_to_ci(lognormal_params y)
|
||||
{
|
||||
double h = y.logstd * NORMAL90CONFIDENCE;
|
||||
double loghigh = y.logmean + h;
|
||||
double loglow = y.logmean - h;
|
||||
ci result = { .low = exp(loglow), .high = exp(loghigh) };
|
||||
return result;
|
||||
}
|
||||
|
||||
/* Scaffolding to handle errors */
|
||||
// We are building towards sample from an arbitrary cdf
|
||||
// We will sample from an arbitrary cdf
|
||||
// and that operation might fail
|
||||
// so we build some scaffolding here
|
||||
|
||||
#define MAX_ERROR_LENGTH 500
|
||||
#define EXIT_ON_ERROR 0
|
||||
#define PROCESS_ERROR(error_msg) process_error(error_msg, EXIT_ON_ERROR, __FILE__, __LINE__)
|
||||
|
||||
struct box {
|
||||
int empty;
|
||||
double content;
|
||||
|
@ -253,115 +381,14 @@ double sampler_cdf_danger(struct box cdf(double), uint64_t* seed)
|
|||
}
|
||||
}
|
||||
|
||||
/* Algebra manipulations */
|
||||
// here I discover named structs,
|
||||
// which mean that I don't have to be typing
|
||||
// struct blah all the time.
|
||||
typedef struct normal_params_t {
|
||||
double mean;
|
||||
double std;
|
||||
} normal_params;
|
||||
/* array print: potentially useful for debugging */
|
||||
|
||||
normal_params algebra_sum_normals(normal_params a, normal_params b)
|
||||
void array_print(double xs[], int n)
|
||||
{
|
||||
normal_params result = {
|
||||
.mean = a.mean + b.mean,
|
||||
.std = sqrt((a.std * a.std) + (b.std * b.std)),
|
||||
};
|
||||
return result;
|
||||
printf("[");
|
||||
for (int i = 0; i < n - 1; i++) {
|
||||
printf("%f, ", xs[i]);
|
||||
}
|
||||
|
||||
typedef struct lognormal_params_t {
|
||||
double logmean;
|
||||
double logstd;
|
||||
} lognormal_params;
|
||||
|
||||
lognormal_params algebra_product_lognormals(lognormal_params a, lognormal_params b)
|
||||
{
|
||||
lognormal_params result = {
|
||||
.logmean = a.logmean + b.logmean,
|
||||
.logstd = sqrt((a.logstd * a.logstd) + (b.logstd * b.logstd)),
|
||||
};
|
||||
return result;
|
||||
}
|
||||
|
||||
lognormal_params convert_ci_to_lognormal_params(ci x)
|
||||
{
|
||||
double loghigh = logf(x.high);
|
||||
double loglow = logf(x.low);
|
||||
double logmean = (loghigh + loglow) / 2.0;
|
||||
double logstd = (loghigh - loglow) / (2.0 * NORMAL90CONFIDENCE);
|
||||
lognormal_params result = { .logmean = logmean, .logstd = logstd };
|
||||
return result;
|
||||
}
|
||||
|
||||
ci convert_lognormal_params_to_ci(lognormal_params y)
|
||||
{
|
||||
double h = y.logstd * NORMAL90CONFIDENCE;
|
||||
double loghigh = y.logmean + h;
|
||||
double loglow = y.logmean - h;
|
||||
ci result = { .low = exp(loglow), .high = exp(loghigh) };
|
||||
return result;
|
||||
}
|
||||
|
||||
/* Parallel sampler */
|
||||
void parallel_sampler(double (*sampler)(uint64_t* seed), double* results, int n_threads, int n_samples){
|
||||
|
||||
// Division terminology:
|
||||
// a = b * quotient + reminder
|
||||
// a = (a/b)*b + (a%b)
|
||||
// dividend: a
|
||||
// divisor: b
|
||||
// quotient = a / b
|
||||
// reminder = a % b
|
||||
// "divisor's multiple" := (a/b)*b
|
||||
|
||||
// now, we have n_samples and n_threads
|
||||
// to make our life easy, each thread will have a number of samples of: a/b (quotient)
|
||||
// and we'll compute the remainder of samples separately
|
||||
// to possibly do by Jorge: improve so that the remainder is included in the threads
|
||||
|
||||
int quotient = n_samples / n_threads;
|
||||
int remainder = n_samples % n_threads;
|
||||
int divisor_multiple = quotient * n_threads;
|
||||
|
||||
uint64_t** seeds = malloc(n_threads * sizeof(uint64_t*));
|
||||
// printf("UINT64_MAX: %lu\n", UINT64_MAX);
|
||||
srand(1);
|
||||
for (uint64_t i = 0; i < n_threads; i++) {
|
||||
seeds[i] = malloc(sizeof(uint64_t));
|
||||
// Constraints:
|
||||
// - xorshift can't start with 0
|
||||
// - the seeds should be reasonably separated and not correlated
|
||||
*seeds[i] = (uint64_t) rand() * (UINT64_MAX / RAND_MAX);
|
||||
// printf("#%ld: %lu\n",i, *seeds[i]);
|
||||
|
||||
// Other initializations tried:
|
||||
// *seeds[i] = 1 + i;
|
||||
// *seeds[i] = (i + 0.5)*(UINT64_MAX/n_threads);
|
||||
// *seeds[i] = (i + 0.5)*(UINT64_MAX/n_threads) + constant * i;
|
||||
}
|
||||
|
||||
int i;
|
||||
#pragma omp parallel private(i)
|
||||
{
|
||||
#pragma omp for
|
||||
for (i = 0; i < n_threads; i++) {
|
||||
int lower_bound_inclusive = i * quotient;
|
||||
int upper_bound_not_inclusive = ((i+1) * quotient); // note the < in the for loop below,
|
||||
// printf("Lower bound: %d, upper bound: %d\n", lower_bound, upper_bound);
|
||||
for (int j = lower_bound_inclusive; j < upper_bound_not_inclusive; j++) {
|
||||
results[j] = sampler(seeds[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
for(int j=divisor_multiple; j<n_samples; j++){
|
||||
results[j] = sampler(seeds[0]);
|
||||
// we can just reuse a seed, this isn't problematic because we are not doing multithreading
|
||||
}
|
||||
|
||||
for (uint64_t i = 0; i < n_threads; i++) {
|
||||
free(seeds[i]);
|
||||
}
|
||||
free(seeds);
|
||||
printf("%f", xs[n - 1]);
|
||||
printf("]\n");
|
||||
}
|
||||
|
|
|
@ -1,35 +1,20 @@
|
|||
#ifndef SQUIGGLE_C_EXTRA
|
||||
#define SQUIGGLE_C_EXTRA
|
||||
|
||||
// Box
|
||||
struct box {
|
||||
int empty;
|
||||
double content;
|
||||
char* error_msg;
|
||||
};
|
||||
/* Parallel sampling */
|
||||
void sampler_parallel(double (*sampler)(uint64_t* seed), double* results, int n_threads, int n_samples);
|
||||
|
||||
// Macros to handle errors
|
||||
#define MAX_ERROR_LENGTH 500
|
||||
#define EXIT_ON_ERROR 0
|
||||
#define PROCESS_ERROR(error_msg) process_error(error_msg, EXIT_ON_ERROR, __FILE__, __LINE__)
|
||||
struct box process_error(const char* error_msg, int should_exit, char* file, int line);
|
||||
|
||||
// Inverse cdf
|
||||
struct box inverse_cdf_double(double cdf(double), double p);
|
||||
struct box inverse_cdf_box(struct box cdf_box(double), double p);
|
||||
|
||||
// Samplers from cdf
|
||||
struct box sampler_cdf_double(double cdf(double), uint64_t* seed);
|
||||
struct box sampler_cdf_box(struct box cdf(double), uint64_t* seed);
|
||||
|
||||
// Get 90% confidence interval
|
||||
/* Get 90% confidence interval */
|
||||
typedef struct ci_t {
|
||||
float low;
|
||||
float high;
|
||||
double low;
|
||||
double high;
|
||||
} ci;
|
||||
ci get_90_confidence_interval(double (*sampler)(uint64_t*), uint64_t* seed);
|
||||
ci array_get_ci(ci interval, double* xs, int n);
|
||||
ci array_get_90_ci(double xs[], int n);
|
||||
ci sampler_get_ci(ci interval, double (*sampler)(uint64_t*), int n, uint64_t* seed);
|
||||
ci sampler_get_90_ci(double (*sampler)(uint64_t*), int n, uint64_t* seed);
|
||||
|
||||
// small algebra manipulations
|
||||
/* Algebra manipulations */
|
||||
|
||||
typedef struct normal_params_t {
|
||||
double mean;
|
||||
|
@ -46,6 +31,24 @@ lognormal_params algebra_product_lognormals(lognormal_params a, lognormal_params
|
|||
lognormal_params convert_ci_to_lognormal_params(ci x);
|
||||
ci convert_lognormal_params_to_ci(lognormal_params y);
|
||||
|
||||
void parallel_sampler(double (*sampler)(uint64_t* seed), double* results, int n_threads, int n_samples);
|
||||
/* Error handling */
|
||||
struct box {
|
||||
int empty;
|
||||
double content;
|
||||
char* error_msg;
|
||||
};
|
||||
#define MAX_ERROR_LENGTH 500
|
||||
#define EXIT_ON_ERROR 0
|
||||
#define PROCESS_ERROR(error_msg) process_error(error_msg, EXIT_ON_ERROR, __FILE__, __LINE__)
|
||||
struct box process_error(const char* error_msg, int should_exit, char* file, int line);
|
||||
void array_print(double* array, int length);
|
||||
|
||||
/* Inverse cdf */
|
||||
struct box inverse_cdf_double(double cdf(double), double p);
|
||||
struct box inverse_cdf_box(struct box cdf_box(double), double p);
|
||||
|
||||
/* Samplers from cdf */
|
||||
struct box sampler_cdf_double(double cdf(double), uint64_t* seed);
|
||||
struct box sampler_cdf_box(struct box cdf(double), uint64_t* seed);
|
||||
|
||||
#endif
|
||||
|
|
Loading…
Reference in New Issue
Block a user