forked from personal/squiggle.c
Revert "Merge branch 'master' into quickselect"
This reverts commitc77fa34410
, reversing changes made toffd6e5dcbb
.
This commit is contained in:
parent
c77fa34410
commit
4d218468cf
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -15,9 +15,8 @@ int main()
|
|||
uint64_t* seed = malloc(sizeof(uint64_t));
|
||||
*seed = 1000; // xorshift can't start with 0
|
||||
|
||||
ci beta_1_2_ci_90 = sampler_get_90_ci(beta_1_2_sampler, 1000000, seed);
|
||||
ci beta_1_2_ci_90 = get_90_confidence_interval(beta_1_2_sampler, seed);
|
||||
printf("90%% confidence interval of beta(1,2) is [%f, %f]\n", beta_1_2_ci_90.low, beta_1_2_ci_90.high);
|
||||
printf("You can check this in <https://nunosempere.com/blog/2023/03/15/fit-beta/>\n");
|
||||
|
||||
free(seed);
|
||||
}
|
||||
|
|
Binary file not shown.
|
@ -60,7 +60,7 @@ int main()
|
|||
}
|
||||
printf("... ]\n");
|
||||
|
||||
ci ci_90 = sampler_get_90_ci(mixture, 1000000, seed);
|
||||
ci ci_90 = get_90_confidence_interval(mixture, seed);
|
||||
printf("mean: %f\n", array_mean(mixture_result, n));
|
||||
printf("90%% confidence interval: [%f, %f]\n", ci_90.low, ci_90.high);
|
||||
|
||||
|
|
Binary file not shown.
Binary file not shown.
|
@ -41,7 +41,7 @@ int main()
|
|||
}
|
||||
printf("... ]\n");
|
||||
|
||||
ci ci_90 = sampler_get_90_ci(sample_minutes_per_day_jumping_rope_needed_to_burn_10kg, 1000000, seed);
|
||||
ci ci_90 = get_90_confidence_interval(sample_minutes_per_day_jumping_rope_needed_to_burn_10kg, seed);
|
||||
printf("90%% confidence interval: [%f, %f]\n", ci_90.low, ci_90.high);
|
||||
|
||||
free(seed);
|
||||
|
|
Binary file not shown.
|
@ -50,7 +50,7 @@ int main()
|
|||
|
||||
// Before a first nuclear collapse
|
||||
printf("## Before the first nuclear collapse\n");
|
||||
ci ci_90_2023 = sampler_get_90_ci(yearly_probability_nuclear_collapse_2023, 1000000, seed);
|
||||
ci ci_90_2023 = get_90_confidence_interval(yearly_probability_nuclear_collapse_2023, seed);
|
||||
printf("90%% confidence interval: [%f, %f]\n", ci_90_2023.low, ci_90_2023.high);
|
||||
|
||||
double* yearly_probability_nuclear_collapse_2023_samples = malloc(sizeof(double) * num_samples);
|
||||
|
@ -61,7 +61,7 @@ int main()
|
|||
|
||||
// After the first nuclear collapse
|
||||
printf("\n## After the first nuclear collapse\n");
|
||||
ci ci_90_2070 = sampler_get_90_ci(yearly_probability_nuclear_collapse_after_recovery_example, 1000000, seed);
|
||||
ci ci_90_2070 = get_90_confidence_interval(yearly_probability_nuclear_collapse_after_recovery_example, seed);
|
||||
printf("90%% confidence interval: [%f, %f]\n", ci_90_2070.low, ci_90_2070.high);
|
||||
|
||||
double* yearly_probability_nuclear_collapse_after_recovery_samples = malloc(sizeof(double) * num_samples);
|
||||
|
@ -72,7 +72,7 @@ int main()
|
|||
|
||||
// After the first nuclear collapse (antiinductive)
|
||||
printf("\n## After the first nuclear collapse (antiinductive)\n");
|
||||
ci ci_90_antiinductive = sampler_get_90_ci(yearly_probability_nuclear_collapse_after_recovery_antiinductive, 1000000, seed);
|
||||
ci ci_90_antiinductive = get_90_confidence_interval(yearly_probability_nuclear_collapse_after_recovery_antiinductive, seed);
|
||||
printf("90%% confidence interval: [%f, %f]\n", ci_90_antiinductive.low, ci_90_antiinductive.high);
|
||||
|
||||
double* yearly_probability_nuclear_collapse_after_recovery_antiinductive_samples = malloc(sizeof(double) * num_samples);
|
||||
|
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -9,22 +9,21 @@ int main()
|
|||
// set randomness seed
|
||||
// uint64_t* seed = malloc(sizeof(uint64_t));
|
||||
// *seed = 1000; // xorshift can't start with 0
|
||||
// ^ not necessary, because sampler_parallel takes care of the seed.
|
||||
// ^ not necessary, because parallel_sampler takes care of the seed.
|
||||
|
||||
int n_samples = 1000 * 1000 * 1000;
|
||||
int n_threads = 16;
|
||||
double sampler(uint64_t * seed)
|
||||
{
|
||||
double sampler(uint64_t* seed){
|
||||
return sample_lognormal(0, 10, seed);
|
||||
}
|
||||
double* results = malloc(n_samples * sizeof(double));
|
||||
|
||||
sampler_parallel(sampler, results, n_threads, n_samples);
|
||||
parallel_sampler(sampler, results, n_threads, n_samples);
|
||||
double avg = array_sum(results, n_samples)/n_samples;
|
||||
printf("Average of 1B lognormal(0,10): %f", avg);
|
||||
|
||||
free(results);
|
||||
|
||||
// free(seed);
|
||||
// ^ not necessary, because sampler_parallel takes care of the seed.
|
||||
// ^ not necessary, because parallel_sampler takes care of the seed.
|
||||
}
|
||||
|
|
Binary file not shown.
|
@ -17,14 +17,13 @@ int main()
|
|||
int n_dists = 4;
|
||||
double weights[] = { 1 - p_c, p_c / 2, p_c / 4, p_c / 4 };
|
||||
double (*samplers[])(uint64_t*) = { sample_0, sample_1, sample_few, sample_many };
|
||||
double sampler_result(uint64_t * seed)
|
||||
{
|
||||
double sampler_result(uint64_t* seed) {
|
||||
return sample_mixture(samplers, weights, n_dists, seed);
|
||||
}
|
||||
|
||||
int n_samples = 1000 * 1000, n_threads = 16;
|
||||
double* results = malloc(n_samples * sizeof(double));
|
||||
sampler_parallel(sampler_result, results, n_threads, n_samples);
|
||||
parallel_sampler(sampler_result, results, n_threads, n_samples);
|
||||
printf("Avg: %f\n", array_sum(results, n_samples)/n_samples);
|
||||
free(results);
|
||||
}
|
||||
|
|
Binary file not shown.
|
@ -13,10 +13,9 @@ int main()
|
|||
|
||||
/* Option 1: parallelize taking from n samples */
|
||||
// Question being asked: what is the distribution of sampling 1000 times and taking the min?
|
||||
double sample_min_of_n(uint64_t * seed, int n)
|
||||
{
|
||||
double sample_min_of_n(uint64_t* seed, int n){
|
||||
double min = sample_normal(5, 2, seed);
|
||||
for (int i = 0; i < (n - 2); i++) {
|
||||
for(int i=0; i<(n-1); i++){
|
||||
double sample = sample_normal(5, 2, seed);
|
||||
if(sample < min){
|
||||
min = sample;
|
||||
|
@ -24,47 +23,47 @@ int main()
|
|||
}
|
||||
return min;
|
||||
}
|
||||
double sample_min_of_1000(uint64_t * seed)
|
||||
{
|
||||
double sampler_min_of_1000(uint64_t* seed) {
|
||||
return sample_min_of_n(seed, 1000);
|
||||
}
|
||||
|
||||
int n_samples = 1000000, n_threads = 16;
|
||||
int n_samples = 10000, n_threads = 16;
|
||||
double* results = malloc(n_samples * sizeof(double));
|
||||
sampler_parallel(sample_min_of_1000, results, n_threads, n_samples);
|
||||
parallel_sampler(sampler_min_of_1000, results, n_threads, n_samples);
|
||||
printf("Mean of the distribution of (taking the min of 1000 samples of a normal(5,2)): %f\n", array_mean(results, n_samples));
|
||||
free(results);
|
||||
|
||||
/* Option 2: take the min from n samples cleverly using parallelism */
|
||||
// Question being asked: can we take the min of n samples cleverly?
|
||||
double sample_n_parallel(int n)
|
||||
{
|
||||
double sample_n_parallel(int n){
|
||||
|
||||
int n_threads = 16;
|
||||
int quotient = n / 16;
|
||||
int remainder = n % 16;
|
||||
|
||||
uint64_t seed = 1000;
|
||||
uint64_t seed = 100;
|
||||
double result_remainder = sample_min_of_n(&seed, remainder);
|
||||
|
||||
double sample_min_of_quotient(uint64_t * seed)
|
||||
{
|
||||
return sample_min_of_n(seed, quotient);
|
||||
double sample_min_of_quotient(uint64_t* seed) {
|
||||
double result = sample_min_of_n(seed, quotient);
|
||||
// printf("Result: %f\n", result);
|
||||
return result;
|
||||
}
|
||||
double* results_quotient = malloc(quotient * sizeof(double));
|
||||
sampler_parallel(sample_min_of_quotient, results_quotient, n_threads, quotient);
|
||||
double* results = malloc(n_threads * sizeof(double));
|
||||
parallel_sampler(sample_min_of_quotient, results, n_threads, n_threads);
|
||||
|
||||
double min = results_quotient[0];
|
||||
for (int i = 1; i < quotient; i++) {
|
||||
if (min > results_quotient[i]) {
|
||||
min = results_quotient[i];
|
||||
double min = results[0];
|
||||
for(int i=1; i<n_threads; i++){
|
||||
if(min > results[i]){
|
||||
min = results[i];
|
||||
}
|
||||
}
|
||||
if(min > result_remainder){
|
||||
min = result_remainder;
|
||||
}
|
||||
free(results_quotient);
|
||||
free(results);
|
||||
return min;
|
||||
}
|
||||
printf("Minimum of 1M samples of normal(5,2): %f\n", sample_n_parallel(1000000));
|
||||
printf("Minimum of 10M samples of normal(5,2): %f\n", sample_n_parallel(1000 * 1000));
|
||||
|
||||
}
|
||||
|
|
Binary file not shown.
|
@ -1,21 +0,0 @@
|
|||
#include "../../../squiggle.h"
|
||||
#include "../../../squiggle_more.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
int main()
|
||||
{
|
||||
// set randomness seed
|
||||
uint64_t* seed = malloc(sizeof(uint64_t));
|
||||
*seed = 1000; // xorshift can't start with a seed of 0
|
||||
|
||||
int n = 1000000;
|
||||
double* xs = malloc(sizeof(double) * n);
|
||||
for (int i = 0; i < n; i++) {
|
||||
xs[i] = sample_to(10, 100, seed);
|
||||
}
|
||||
ci ci_90 = array_get_90_ci(xs, n);
|
||||
printf("Recovering confidence interval of sample_to(10, 100):\n low: %f, high: %f\n", ci_90.low, ci_90.high);
|
||||
|
||||
free(seed);
|
||||
}
|
|
@ -49,7 +49,6 @@ all:
|
|||
$(CC) $(OPTIMIZED) $(DEBUG) 11_billion_lognormals_paralell/$(SRC) $(DEPS) -o 11_billion_lognormals_paralell/$(OUTPUT)
|
||||
$(CC) $(OPTIMIZED) $(DEBUG) 12_time_to_botec_parallel/$(SRC) $(DEPS) -o 12_time_to_botec_parallel/$(OUTPUT)
|
||||
$(CC) $(OPTIMIZED) $(DEBUG) 13_parallelize_min/$(SRC) $(DEPS) -o 13_parallelize_min/$(OUTPUT)
|
||||
$(CC) $(OPTIMIZED) $(DEBUG) 14_check_confidence_interval/$(SRC) $(DEPS) -o 14_check_confidence_interval/$(OUTPUT)
|
||||
|
||||
format-all:
|
||||
$(FORMATTER) 00_example_template/$(SRC)
|
||||
|
@ -66,7 +65,6 @@ format-all:
|
|||
$(FORMATTER) 11_billion_lognormals_paralell/$(SRC)
|
||||
$(FORMATTER) 12_time_to_botec_parallel/$(SRC)
|
||||
$(FORMATTER) 13_parallelize_min/$(SRC)
|
||||
$(FORMATTER) 14_check_confidence_interval/$(SRC)
|
||||
|
||||
run-all:
|
||||
00_example_template/$(OUTPUT)
|
||||
|
@ -83,7 +81,6 @@ run-all:
|
|||
11_billion_lognormals_paralell/$(OUTPUT)
|
||||
12_time_to_botec_parallel/$(OUTPUT)
|
||||
13_parallelize_min/$(OUTPUT)
|
||||
14_check_confidence_interval/$(OUTPUT)
|
||||
|
||||
## make one DIR=06_nuclear_recovery
|
||||
one: $(DIR)/$(SRC)
|
||||
|
|
4
makefile
4
makefile
|
@ -13,8 +13,8 @@ format-examples:
|
|||
cd examples/more && make format-all
|
||||
|
||||
format: squiggle.c squiggle.h
|
||||
$(FORMATTER) squiggle.c squiggle.h
|
||||
$(FORMATTER) squiggle_more.c squiggle_more.h
|
||||
$(FORMATTER) squiggle.c
|
||||
$(FORMATTER) squiggle.h
|
||||
|
||||
lint:
|
||||
clang-tidy squiggle.c -- -lm
|
||||
|
|
27
scratchpad/core.c
Normal file
27
scratchpad/core.c
Normal file
|
@ -0,0 +1,27 @@
|
|||
|
||||
uint64_t xorshift64(uint64_t* seed)
|
||||
{
|
||||
// Algorithm "xor" from p. 4 of Marsaglia, "Xorshift RNGs"
|
||||
// <https://en.wikipedia.org/wiki/Xorshift>
|
||||
uint64_t x = *seed;
|
||||
x ^= x << 13;
|
||||
x ^= x >> 7;
|
||||
x ^= x << 17;
|
||||
return *seed = x;
|
||||
}
|
||||
|
||||
double sample_unit_uniform(uint64_t* seed)
|
||||
{
|
||||
// samples uniform from [0,1] interval.
|
||||
return ((double)xorshift64(seed)) / ((double)UINT64_MAX);
|
||||
}
|
||||
|
||||
double sample_unit_normal(uint64_t* seed)
|
||||
{
|
||||
// // See: <https://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform>
|
||||
double u1 = sample_unit_uniform(seed);
|
||||
double u2 = sample_unit_uniform(seed);
|
||||
double z = sqrtf(-2.0 * log(u1)) * sin(2 * PI * u2);
|
||||
return z;
|
||||
}
|
||||
|
Binary file not shown.
Binary file not shown.
|
@ -10,14 +10,25 @@ int main()
|
|||
// set randomness seed
|
||||
uint64_t* seed = malloc(sizeof(uint64_t));
|
||||
*seed = 1000; // xorshift can't start with a seed of 0
|
||||
/*
|
||||
for (int i = 0; i < 100; i++) {
|
||||
double draw = sample_unit_uniform(seed);
|
||||
printf("%f\n", draw);
|
||||
|
||||
int n = 1000000;
|
||||
double* xs = malloc(sizeof(double) * n);
|
||||
for (int i = 0; i < n; i++) {
|
||||
xs[i] = sample_to(10, 100, seed);
|
||||
}*/
|
||||
// Test division
|
||||
// printf("\n%d\n", 10 % 3);
|
||||
//
|
||||
|
||||
int n_samples = 100, n_threads = 16;
|
||||
double* results = malloc(n_samples * sizeof(double));
|
||||
double sampler_scratchpad(uint64_t* seed){
|
||||
return 1;
|
||||
}
|
||||
parallel_sampler(sampler_scratchpad, results, n_threads, n_samples);
|
||||
for(int i=0; i<n_samples; i++){
|
||||
printf("Sample %d: %f\n", i, results[i]);
|
||||
}
|
||||
ci ci_90 = array_get_90_ci(xs, n);
|
||||
printf("Recovering confidence interval of sample_to(10, 100):\n low: %f, high: %f\n", ci_90.low, ci_90.high);
|
||||
|
||||
free(seed);
|
||||
}
|
||||
|
|
|
@ -8,7 +8,7 @@
|
|||
#define NORMAL90CONFIDENCE 1.6448536269514727
|
||||
|
||||
// Pseudo Random number generator
|
||||
static uint64_t xorshift32(uint32_t* seed)
|
||||
uint64_t xorshift32(uint32_t* seed)
|
||||
{
|
||||
// Algorithm "xor" from p. 4 of Marsaglia, "Xorshift RNGs"
|
||||
// See:
|
||||
|
@ -24,7 +24,7 @@ static uint64_t xorshift32(uint32_t* seed)
|
|||
return *seed = x;
|
||||
}
|
||||
|
||||
static uint64_t xorshift64(uint64_t* seed)
|
||||
uint64_t xorshift64(uint64_t* seed)
|
||||
{
|
||||
// same as above, but for generating doubles instead of floats
|
||||
uint64_t x = *seed;
|
||||
|
|
311
squiggle_more.c
311
squiggle_more.c
|
@ -1,195 +1,67 @@
|
|||
#include "squiggle.h"
|
||||
#include <float.h>
|
||||
#include <limits.h>
|
||||
#include <math.h>
|
||||
#include <limits.h>
|
||||
#include <omp.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include "squiggle.h"
|
||||
|
||||
/* Parallel sampler */
|
||||
void sampler_parallel(double (*sampler)(uint64_t* seed), double* results, int n_threads, int n_samples)
|
||||
{
|
||||
if ((n_samples % n_threads) != 0) {
|
||||
fprintf(stderr, "Number of samples isn't divisible by number of threads, aborting\n");
|
||||
exit(1);
|
||||
}
|
||||
uint64_t** seeds = malloc(n_threads * sizeof(uint64_t*));
|
||||
for (uint64_t i = 0; i < n_threads; i++) {
|
||||
seeds[i] = malloc(sizeof(uint64_t));
|
||||
*seeds[i] = i + 1; // xorshift can't start with 0
|
||||
}
|
||||
/* Math constants */
|
||||
#define PI 3.14159265358979323846 // M_PI in gcc gnu99
|
||||
#define NORMAL90CONFIDENCE 1.6448536269514727
|
||||
|
||||
int i;
|
||||
#pragma omp parallel private(i)
|
||||
{
|
||||
#pragma omp for
|
||||
for (i = 0; i < n_threads; i++) {
|
||||
int lower_bound = i * (n_samples / n_threads);
|
||||
int upper_bound = ((i + 1) * (n_samples / n_threads)) - 1;
|
||||
// printf("Lower bound: %d, upper bound: %d\n", lower_bound, upper_bound);
|
||||
for (int j = lower_bound; j < upper_bound; j++) {
|
||||
results[j] = sampler(seeds[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (uint64_t i = 0; i < n_threads; i++) {
|
||||
free(seeds[i]);
|
||||
}
|
||||
free(seeds);
|
||||
}
|
||||
/* Some error niceties */
|
||||
// These won't be used until later
|
||||
#define MAX_ERROR_LENGTH 500
|
||||
#define EXIT_ON_ERROR 0
|
||||
#define PROCESS_ERROR(error_msg) process_error(error_msg, EXIT_ON_ERROR, __FILE__, __LINE__)
|
||||
|
||||
/* Get confidence intervals, given a sampler */
|
||||
// Not in core yet because I'm not sure how much I like the struct
|
||||
// and the built-in 100k samples
|
||||
// to do: add n to function parameters and document
|
||||
|
||||
typedef struct ci_t {
|
||||
double low;
|
||||
double high;
|
||||
float low;
|
||||
float high;
|
||||
} ci;
|
||||
|
||||
static void swp(int i, int j, double xs[])
|
||||
int compare_doubles(const void* p, const void* q)
|
||||
{
|
||||
double tmp = xs[i];
|
||||
xs[i] = xs[j];
|
||||
xs[j] = tmp;
|
||||
}
|
||||
// https://wikiless.esmailelbob.xyz/wiki/Qsort?lang=en
|
||||
double x = *(const double*)p;
|
||||
double y = *(const double*)q;
|
||||
|
||||
static int partition(int low, int high, double xs[], int length)
|
||||
{
|
||||
// To understand this function:
|
||||
// - see the note after gt variable definition
|
||||
// - go to commit 578bfa27 and the scratchpad/ folder in it, which has printfs sprinkled throughout
|
||||
int pivot = low + floor((high - low) / 2);
|
||||
double pivot_value = xs[pivot];
|
||||
swp(pivot, high, xs);
|
||||
int gt = low; /* This pointer will iterate until finding an element which is greater than the pivot. Then it will move elements that are smaller before it--more specifically, it will move elements to its position and then increment. As a result all elements between gt and i will be greater than the pivot. */
|
||||
for (int i = low; i < high; i++) {
|
||||
if (xs[i] < pivot_value) {
|
||||
swp(gt, i, xs);
|
||||
gt++;
|
||||
}
|
||||
}
|
||||
swp(high, gt, xs);
|
||||
return gt;
|
||||
}
|
||||
/* Avoid returning x - y, which can cause undefined behaviour
|
||||
because of signed integer overflow. */
|
||||
if (x < y)
|
||||
return -1; // Return -1 if you want ascending, 1 if you want descending order.
|
||||
else if (x > y)
|
||||
return 1; // Return 1 if you want ascending, -1 if you want descending order.
|
||||
|
||||
static double quickselect(int k, double xs[], int n)
|
||||
return 0;
|
||||
}
|
||||
ci get_90_confidence_interval(double (*sampler)(uint64_t*), uint64_t* seed)
|
||||
{
|
||||
// https://en.wikipedia.org/wiki/Quickselect
|
||||
int low = 0;
|
||||
int high = n - 1;
|
||||
for (;;) {
|
||||
if (low == high) {
|
||||
return xs[low];
|
||||
}
|
||||
int pivot = partition(low, high, xs, n);
|
||||
if (pivot == k) {
|
||||
return xs[pivot];
|
||||
} else if (k < pivot) {
|
||||
high = pivot - 1;
|
||||
} else {
|
||||
low = pivot + 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ci array_get_ci(ci interval, double* xs, int n)
|
||||
{
|
||||
|
||||
int low_k = floor(interval.low * n);
|
||||
int high_k = ceil(interval.high * n);
|
||||
ci result = {
|
||||
.low = quickselect(low_k, xs, n),
|
||||
.high = quickselect(high_k, xs, n),
|
||||
};
|
||||
return result;
|
||||
}
|
||||
ci array_get_90_ci(double xs[], int n)
|
||||
{
|
||||
return array_get_ci((ci) { .low = 0.05, .high = 0.95 }, xs, n);
|
||||
}
|
||||
|
||||
ci sampler_get_ci(ci interval, double (*sampler)(uint64_t*), int n, uint64_t* seed)
|
||||
{
|
||||
double* xs = malloc(n * sizeof(double));
|
||||
int n = 100 * 1000;
|
||||
double* samples_array = malloc(n * sizeof(double));
|
||||
for (int i = 0; i < n; i++) {
|
||||
xs[i] = sampler(seed);
|
||||
}
|
||||
ci result = array_get_ci(interval, xs, n);
|
||||
free(xs);
|
||||
return result;
|
||||
}
|
||||
ci sampler_get_90_ci(double (*sampler)(uint64_t*), int n, uint64_t* seed)
|
||||
{
|
||||
return sampler_get_ci((ci) { .low = 0.05, .high = 0.95 }, sampler, n, seed);
|
||||
samples_array[i] = sampler(seed);
|
||||
}
|
||||
qsort(samples_array, n, sizeof(double), compare_doubles);
|
||||
|
||||
/* Algebra manipulations */
|
||||
// here I discover named structs,
|
||||
// which mean that I don't have to be typing
|
||||
// struct blah all the time.
|
||||
|
||||
#define NORMAL90CONFIDENCE 1.6448536269514727
|
||||
|
||||
typedef struct normal_params_t {
|
||||
double mean;
|
||||
double std;
|
||||
} normal_params;
|
||||
|
||||
normal_params algebra_sum_normals(normal_params a, normal_params b)
|
||||
{
|
||||
normal_params result = {
|
||||
.mean = a.mean + b.mean,
|
||||
.std = sqrt((a.std * a.std) + (b.std * b.std)),
|
||||
ci result = {
|
||||
.low = samples_array[5000],
|
||||
.high = samples_array[94999],
|
||||
};
|
||||
return result;
|
||||
}
|
||||
free(samples_array);
|
||||
|
||||
typedef struct lognormal_params_t {
|
||||
double logmean;
|
||||
double logstd;
|
||||
} lognormal_params;
|
||||
|
||||
lognormal_params algebra_product_lognormals(lognormal_params a, lognormal_params b)
|
||||
{
|
||||
lognormal_params result = {
|
||||
.logmean = a.logmean + b.logmean,
|
||||
.logstd = sqrt((a.logstd * a.logstd) + (b.logstd * b.logstd)),
|
||||
};
|
||||
return result;
|
||||
}
|
||||
|
||||
lognormal_params convert_ci_to_lognormal_params(ci x)
|
||||
{
|
||||
double loghigh = logf(x.high);
|
||||
double loglow = logf(x.low);
|
||||
double logmean = (loghigh + loglow) / 2.0;
|
||||
double logstd = (loghigh - loglow) / (2.0 * NORMAL90CONFIDENCE);
|
||||
lognormal_params result = { .logmean = logmean, .logstd = logstd };
|
||||
return result;
|
||||
}
|
||||
|
||||
ci convert_lognormal_params_to_ci(lognormal_params y)
|
||||
{
|
||||
double h = y.logstd * NORMAL90CONFIDENCE;
|
||||
double loghigh = y.logmean + h;
|
||||
double loglow = y.logmean - h;
|
||||
ci result = { .low = exp(loglow), .high = exp(loghigh) };
|
||||
return result;
|
||||
}
|
||||
|
||||
/* Scaffolding to handle errors */
|
||||
// We will sample from an arbitrary cdf
|
||||
// We are building towards sample from an arbitrary cdf
|
||||
// and that operation might fail
|
||||
// so we build some scaffolding here
|
||||
|
||||
#define MAX_ERROR_LENGTH 500
|
||||
#define EXIT_ON_ERROR 0
|
||||
#define PROCESS_ERROR(error_msg) process_error(error_msg, EXIT_ON_ERROR, __FILE__, __LINE__)
|
||||
|
||||
struct box {
|
||||
int empty;
|
||||
double content;
|
||||
|
@ -381,14 +253,115 @@ double sampler_cdf_danger(struct box cdf(double), uint64_t* seed)
|
|||
}
|
||||
}
|
||||
|
||||
/* array print: potentially useful for debugging */
|
||||
/* Algebra manipulations */
|
||||
// here I discover named structs,
|
||||
// which mean that I don't have to be typing
|
||||
// struct blah all the time.
|
||||
typedef struct normal_params_t {
|
||||
double mean;
|
||||
double std;
|
||||
} normal_params;
|
||||
|
||||
void array_print(double xs[], int n)
|
||||
normal_params algebra_sum_normals(normal_params a, normal_params b)
|
||||
{
|
||||
printf("[");
|
||||
for (int i = 0; i < n - 1; i++) {
|
||||
printf("%f, ", xs[i]);
|
||||
normal_params result = {
|
||||
.mean = a.mean + b.mean,
|
||||
.std = sqrt((a.std * a.std) + (b.std * b.std)),
|
||||
};
|
||||
return result;
|
||||
}
|
||||
printf("%f", xs[n - 1]);
|
||||
printf("]\n");
|
||||
|
||||
typedef struct lognormal_params_t {
|
||||
double logmean;
|
||||
double logstd;
|
||||
} lognormal_params;
|
||||
|
||||
lognormal_params algebra_product_lognormals(lognormal_params a, lognormal_params b)
|
||||
{
|
||||
lognormal_params result = {
|
||||
.logmean = a.logmean + b.logmean,
|
||||
.logstd = sqrt((a.logstd * a.logstd) + (b.logstd * b.logstd)),
|
||||
};
|
||||
return result;
|
||||
}
|
||||
|
||||
lognormal_params convert_ci_to_lognormal_params(ci x)
|
||||
{
|
||||
double loghigh = logf(x.high);
|
||||
double loglow = logf(x.low);
|
||||
double logmean = (loghigh + loglow) / 2.0;
|
||||
double logstd = (loghigh - loglow) / (2.0 * NORMAL90CONFIDENCE);
|
||||
lognormal_params result = { .logmean = logmean, .logstd = logstd };
|
||||
return result;
|
||||
}
|
||||
|
||||
ci convert_lognormal_params_to_ci(lognormal_params y)
|
||||
{
|
||||
double h = y.logstd * NORMAL90CONFIDENCE;
|
||||
double loghigh = y.logmean + h;
|
||||
double loglow = y.logmean - h;
|
||||
ci result = { .low = exp(loglow), .high = exp(loghigh) };
|
||||
return result;
|
||||
}
|
||||
|
||||
/* Parallel sampler */
|
||||
void parallel_sampler(double (*sampler)(uint64_t* seed), double* results, int n_threads, int n_samples){
|
||||
|
||||
// Division terminology:
|
||||
// a = b * quotient + reminder
|
||||
// a = (a/b)*b + (a%b)
|
||||
// dividend: a
|
||||
// divisor: b
|
||||
// quotient = a / b
|
||||
// reminder = a % b
|
||||
// "divisor's multiple" := (a/b)*b
|
||||
|
||||
// now, we have n_samples and n_threads
|
||||
// to make our life easy, each thread will have a number of samples of: a/b (quotient)
|
||||
// and we'll compute the remainder of samples separately
|
||||
// to possibly do by Jorge: improve so that the remainder is included in the threads
|
||||
|
||||
int quotient = n_samples / n_threads;
|
||||
int remainder = n_samples % n_threads;
|
||||
int divisor_multiple = quotient * n_threads;
|
||||
|
||||
uint64_t** seeds = malloc(n_threads * sizeof(uint64_t*));
|
||||
// printf("UINT64_MAX: %lu\n", UINT64_MAX);
|
||||
srand(1);
|
||||
for (uint64_t i = 0; i < n_threads; i++) {
|
||||
seeds[i] = malloc(sizeof(uint64_t));
|
||||
// Constraints:
|
||||
// - xorshift can't start with 0
|
||||
// - the seeds should be reasonably separated and not correlated
|
||||
*seeds[i] = (uint64_t) rand() * (UINT64_MAX / RAND_MAX);
|
||||
// printf("#%ld: %lu\n",i, *seeds[i]);
|
||||
|
||||
// Other initializations tried:
|
||||
// *seeds[i] = 1 + i;
|
||||
// *seeds[i] = (i + 0.5)*(UINT64_MAX/n_threads);
|
||||
// *seeds[i] = (i + 0.5)*(UINT64_MAX/n_threads) + constant * i;
|
||||
}
|
||||
|
||||
int i;
|
||||
#pragma omp parallel private(i)
|
||||
{
|
||||
#pragma omp for
|
||||
for (i = 0; i < n_threads; i++) {
|
||||
int lower_bound_inclusive = i * quotient;
|
||||
int upper_bound_not_inclusive = ((i+1) * quotient); // note the < in the for loop below,
|
||||
// printf("Lower bound: %d, upper bound: %d\n", lower_bound, upper_bound);
|
||||
for (int j = lower_bound_inclusive; j < upper_bound_not_inclusive; j++) {
|
||||
results[j] = sampler(seeds[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
for(int j=divisor_multiple; j<n_samples; j++){
|
||||
results[j] = sampler(seeds[0]);
|
||||
// we can just reuse a seed, this isn't problematic because we are not doing multithreading
|
||||
}
|
||||
|
||||
for (uint64_t i = 0; i < n_threads; i++) {
|
||||
free(seeds[i]);
|
||||
}
|
||||
free(seeds);
|
||||
}
|
||||
|
|
|
@ -1,20 +1,35 @@
|
|||
#ifndef SQUIGGLE_C_EXTRA
|
||||
#define SQUIGGLE_C_EXTRA
|
||||
|
||||
/* Parallel sampling */
|
||||
void sampler_parallel(double (*sampler)(uint64_t* seed), double* results, int n_threads, int n_samples);
|
||||
// Box
|
||||
struct box {
|
||||
int empty;
|
||||
double content;
|
||||
char* error_msg;
|
||||
};
|
||||
|
||||
/* Get 90% confidence interval */
|
||||
// Macros to handle errors
|
||||
#define MAX_ERROR_LENGTH 500
|
||||
#define EXIT_ON_ERROR 0
|
||||
#define PROCESS_ERROR(error_msg) process_error(error_msg, EXIT_ON_ERROR, __FILE__, __LINE__)
|
||||
struct box process_error(const char* error_msg, int should_exit, char* file, int line);
|
||||
|
||||
// Inverse cdf
|
||||
struct box inverse_cdf_double(double cdf(double), double p);
|
||||
struct box inverse_cdf_box(struct box cdf_box(double), double p);
|
||||
|
||||
// Samplers from cdf
|
||||
struct box sampler_cdf_double(double cdf(double), uint64_t* seed);
|
||||
struct box sampler_cdf_box(struct box cdf(double), uint64_t* seed);
|
||||
|
||||
// Get 90% confidence interval
|
||||
typedef struct ci_t {
|
||||
double low;
|
||||
double high;
|
||||
float low;
|
||||
float high;
|
||||
} ci;
|
||||
ci array_get_ci(ci interval, double* xs, int n);
|
||||
ci array_get_90_ci(double xs[], int n);
|
||||
ci sampler_get_ci(ci interval, double (*sampler)(uint64_t*), int n, uint64_t* seed);
|
||||
ci sampler_get_90_ci(double (*sampler)(uint64_t*), int n, uint64_t* seed);
|
||||
ci get_90_confidence_interval(double (*sampler)(uint64_t*), uint64_t* seed);
|
||||
|
||||
/* Algebra manipulations */
|
||||
// small algebra manipulations
|
||||
|
||||
typedef struct normal_params_t {
|
||||
double mean;
|
||||
|
@ -31,24 +46,6 @@ lognormal_params algebra_product_lognormals(lognormal_params a, lognormal_params
|
|||
lognormal_params convert_ci_to_lognormal_params(ci x);
|
||||
ci convert_lognormal_params_to_ci(lognormal_params y);
|
||||
|
||||
/* Error handling */
|
||||
struct box {
|
||||
int empty;
|
||||
double content;
|
||||
char* error_msg;
|
||||
};
|
||||
#define MAX_ERROR_LENGTH 500
|
||||
#define EXIT_ON_ERROR 0
|
||||
#define PROCESS_ERROR(error_msg) process_error(error_msg, EXIT_ON_ERROR, __FILE__, __LINE__)
|
||||
struct box process_error(const char* error_msg, int should_exit, char* file, int line);
|
||||
void array_print(double* array, int length);
|
||||
|
||||
/* Inverse cdf */
|
||||
struct box inverse_cdf_double(double cdf(double), double p);
|
||||
struct box inverse_cdf_box(struct box cdf_box(double), double p);
|
||||
|
||||
/* Samplers from cdf */
|
||||
struct box sampler_cdf_double(double cdf(double), uint64_t* seed);
|
||||
struct box sampler_cdf_box(struct box cdf(double), uint64_t* seed);
|
||||
void parallel_sampler(double (*sampler)(uint64_t* seed), double* results, int n_threads, int n_samples);
|
||||
|
||||
#endif
|
||||
|
|
Loading…
Reference in New Issue
Block a user