reorg, refactor, recompile

This commit is contained in:
NunoSempere 2023-11-29 22:24:42 +00:00
parent 3e4360f930
commit 023c9f28ac
22 changed files with 141 additions and 146 deletions

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -8,7 +8,7 @@
#define NORMAL90CONFIDENCE 1.6448536269514727 #define NORMAL90CONFIDENCE 1.6448536269514727
// Pseudo Random number generator // Pseudo Random number generator
uint64_t xorshift32(uint32_t* seed) static uint64_t xorshift32(uint32_t* seed)
{ {
// Algorithm "xor" from p. 4 of Marsaglia, "Xorshift RNGs" // Algorithm "xor" from p. 4 of Marsaglia, "Xorshift RNGs"
// See: // See:
@ -24,7 +24,7 @@ uint64_t xorshift32(uint32_t* seed)
return *seed = x; return *seed = x;
} }
uint64_t xorshift64(uint64_t* seed) static uint64_t xorshift64(uint64_t* seed)
{ {
// same as above, but for generating doubles instead of floats // same as above, but for generating doubles instead of floats
uint64_t x = *seed; uint64_t x = *seed;
@ -196,16 +196,6 @@ double array_std(double* array, int length)
return std; return std;
} }
void array_print(double xs[], int n)
{
printf("[");
for (int i = 0; i < n - 1; i++) {
printf("%f, ", xs[i]);
}
printf("%f", xs[n - 1]);
printf("]\n");
}
// Mixture function // Mixture function
double sample_mixture(double (*samplers[])(uint64_t*), double* weights, int n_dists, uint64_t* seed) double sample_mixture(double (*samplers[])(uint64_t*), double* weights, int n_dists, uint64_t* seed)
{ {

View File

@ -7,37 +7,56 @@
#include <stdlib.h> #include <stdlib.h>
#include "squiggle.h" #include "squiggle.h"
/* Math constants */ /* Parallel sampler */
#define PI 3.14159265358979323846 // M_PI in gcc gnu99 void parallel_sampler(double (*sampler)(uint64_t* seed), double* results, int n_threads, int n_samples){
#define NORMAL90CONFIDENCE 1.6448536269514727 if((n_samples % n_threads) != 0){
fprintf(stderr, "Number of samples isn't divisible by number of threads, aborting\n");
exit(1);
}
uint64_t** seeds = malloc(n_threads * sizeof(uint64_t*));
for (uint64_t i = 0; i < n_threads; i++) {
seeds[i] = malloc(sizeof(uint64_t));
*seeds[i] = i + 1; // xorshift can't start with 0
}
/* Some error niceties */ int i;
// These won't be used until later #pragma omp parallel private(i)
#define MAX_ERROR_LENGTH 500 {
#define EXIT_ON_ERROR 0 #pragma omp for
#define PROCESS_ERROR(error_msg) process_error(error_msg, EXIT_ON_ERROR, __FILE__, __LINE__) for (i = 0; i < n_threads; i++) {
int lower_bound = i * (n_samples / n_threads);
int upper_bound = ((i+1) * (n_samples / n_threads)) - 1;
// printf("Lower bound: %d, upper bound: %d\n", lower_bound, upper_bound);
for (int j = lower_bound; j < upper_bound; j++) {
results[j] = sampler(seeds[i]);
}
}
}
for (uint64_t i = 0; i < n_threads; i++) {
free(seeds[i]);
}
free(seeds);
}
/* Get confidence intervals, given a sampler */ /* Get confidence intervals, given a sampler */
// Not in core yet because I'm not sure how much I like the struct // Not in core yet because I'm not sure how much I like the struct
// and the built-in 100k samples // and the built-in 100k samples
// to do: add n to function parameters and document // to do: add n to function parameters and document
typedef struct ci_t { typedef struct ci_t {
float low; float low;
float high; float high;
} ci; } ci;
typedef struct ci_searcher_t {
double num;
int remaining;
} ci_searcher;
void swp(int i, int j, double xs[]) static void swp(int i, int j, double xs[])
{ {
double tmp = xs[i]; double tmp = xs[i];
xs[i] = xs[j]; xs[i] = xs[j];
xs[j] = tmp; xs[j] = tmp;
} }
int partition(int low, int high, double xs[], int length) static int partition(int low, int high, double xs[], int length)
{ {
// To understand this function: // To understand this function:
// - see the note after gt variable definition // - see the note after gt variable definition
@ -56,8 +75,9 @@ int partition(int low, int high, double xs[], int length)
return gt; return gt;
} }
double quickselect(int k, double xs[], int length) static double quickselect(int k, double xs[], int length)
{ {
// https://en.wikipedia.org/wiki/Quickselect
int low = 0; int low = 0;
int high = length - 1; int high = length - 1;
for (;;) { for (;;) {
@ -75,37 +95,92 @@ double quickselect(int k, double xs[], int length)
} }
} }
ci get_90_confidence_interval(double (*sampler)(uint64_t*), uint64_t* seed) ci sampler_get_ci(double (*sampler)(uint64_t*), ci interval, int n, uint64_t* seed){
{ double* xs = malloc(n * sizeof(double));
int n = 100 * 1000;
double* samples_array = malloc(n * sizeof(double));
for (int i = 0; i < n; i++) { for (int i = 0; i < n; i++) {
samples_array[i] = sampler(seed); xs[i] = sampler(seed);
} }
// 10% confidence interval: n/20, n - n/20
ci_searcher low = {.x = samples_array[0], .remaining = n/20) };
ci_searcher high = {.x = samples_array[0], .remaining = n-(n/20) };
// test with finding the lowest int low_k = floor(interval.low * n);
for(int j=1; i<n; j++){ int high_k = ceil(interval.high * n);
if(low.x > samples_array[i]){
low.x = samples_array[i];
}
}
ci result = { ci result = {
.low = samples_array[5000], .low = quickselect(low_k, xs, n),
.high = samples_array[94999], .high = quickselect(high_k, xs, n),
}; };
free(samples_array); free(xs);
return result;
}
ci get_90_confidence_interval(double (*sampler)(uint64_t*), uint64_t* seed)
{
return sampler_get_ci(sampler, (ci) {.low = 0.05, .high = 0.95}, 1000000, seed);
}
/* Algebra manipulations */
// here I discover named structs,
// which mean that I don't have to be typing
// struct blah all the time.
#define NORMAL90CONFIDENCE 1.6448536269514727
typedef struct normal_params_t {
double mean;
double std;
} normal_params;
normal_params algebra_sum_normals(normal_params a, normal_params b)
{
normal_params result = {
.mean = a.mean + b.mean,
.std = sqrt((a.std * a.std) + (b.std * b.std)),
};
return result;
}
typedef struct lognormal_params_t {
double logmean;
double logstd;
} lognormal_params;
lognormal_params algebra_product_lognormals(lognormal_params a, lognormal_params b)
{
lognormal_params result = {
.logmean = a.logmean + b.logmean,
.logstd = sqrt((a.logstd * a.logstd) + (b.logstd * b.logstd)),
};
return result;
}
lognormal_params convert_ci_to_lognormal_params(ci x)
{
double loghigh = logf(x.high);
double loglow = logf(x.low);
double logmean = (loghigh + loglow) / 2.0;
double logstd = (loghigh - loglow) / (2.0 * NORMAL90CONFIDENCE);
lognormal_params result = { .logmean = logmean, .logstd = logstd };
return result;
}
ci convert_lognormal_params_to_ci(lognormal_params y)
{
double h = y.logstd * NORMAL90CONFIDENCE;
double loghigh = y.logmean + h;
double loglow = y.logmean - h;
ci result = { .low = exp(loglow), .high = exp(loghigh) };
return result; return result;
} }
/* Scaffolding to handle errors */ /* Scaffolding to handle errors */
// We are building towards sample from an arbitrary cdf // We will sample from an arbitrary cdf
// and that operation might fail // and that operation might fail
// so we build some scaffolding here // so we build some scaffolding here
#define MAX_ERROR_LENGTH 500
#define EXIT_ON_ERROR 0
#define PROCESS_ERROR(error_msg) process_error(error_msg, EXIT_ON_ERROR, __FILE__, __LINE__)
struct box { struct box {
int empty; int empty;
double content; double content;
@ -297,85 +372,15 @@ double sampler_cdf_danger(struct box cdf(double), uint64_t* seed)
} }
} }
/* Algebra manipulations */ /* array print: potentially useful for debugging */
// here I discover named structs,
// which mean that I don't have to be typing
// struct blah all the time.
typedef struct normal_params_t {
double mean;
double std;
} normal_params;
normal_params algebra_sum_normals(normal_params a, normal_params b) void array_print(double xs[], int n)
{ {
normal_params result = { printf("[");
.mean = a.mean + b.mean, for (int i = 0; i < n - 1; i++) {
.std = sqrt((a.std * a.std) + (b.std * b.std)), printf("%f, ", xs[i]);
}; }
return result; printf("%f", xs[n - 1]);
printf("]\n");
} }
typedef struct lognormal_params_t {
double logmean;
double logstd;
} lognormal_params;
lognormal_params algebra_product_lognormals(lognormal_params a, lognormal_params b)
{
lognormal_params result = {
.logmean = a.logmean + b.logmean,
.logstd = sqrt((a.logstd * a.logstd) + (b.logstd * b.logstd)),
};
return result;
}
lognormal_params convert_ci_to_lognormal_params(ci x)
{
double loghigh = logf(x.high);
double loglow = logf(x.low);
double logmean = (loghigh + loglow) / 2.0;
double logstd = (loghigh - loglow) / (2.0 * NORMAL90CONFIDENCE);
lognormal_params result = { .logmean = logmean, .logstd = logstd };
return result;
}
ci convert_lognormal_params_to_ci(lognormal_params y)
{
double h = y.logstd * NORMAL90CONFIDENCE;
double loghigh = y.logmean + h;
double loglow = y.logmean - h;
ci result = { .low = exp(loglow), .high = exp(loghigh) };
return result;
}
/* Parallel sampler */
void parallel_sampler(double (*sampler)(uint64_t* seed), double* results, int n_threads, int n_samples){
if((n_samples % n_threads) != 0){
fprintf(stderr, "Number of samples isn't divisible by number of threads, aborting\n");
exit(1);
}
uint64_t** seeds = malloc(n_threads * sizeof(uint64_t*));
for (uint64_t i = 0; i < n_threads; i++) {
seeds[i] = malloc(sizeof(uint64_t));
*seeds[i] = i + 1; // xorshift can't start with 0
}
int i;
#pragma omp parallel private(i)
{
#pragma omp for
for (i = 0; i < n_threads; i++) {
int lower_bound = i * (n_samples / n_threads);
int upper_bound = ((i+1) * (n_samples / n_threads)) - 1;
// printf("Lower bound: %d, upper bound: %d\n", lower_bound, upper_bound);
for (int j = lower_bound; j < upper_bound; j++) {
results[j] = sampler(seeds[i]);
}
}
}
for (uint64_t i = 0; i < n_threads; i++) {
free(seeds[i]);
}
free(seeds);
}

View File

@ -1,35 +1,17 @@
#ifndef SQUIGGLE_C_EXTRA #ifndef SQUIGGLE_C_EXTRA
#define SQUIGGLE_C_EXTRA #define SQUIGGLE_C_EXTRA
// Box /* Parallel sampling */
struct box { void parallel_sampler(double (*sampler)(uint64_t* seed), double* results, int n_threads, int n_samples);
int empty;
double content;
char* error_msg;
};
// Macros to handle errors /* Get 90% confidence interval */
#define MAX_ERROR_LENGTH 500
#define EXIT_ON_ERROR 0
#define PROCESS_ERROR(error_msg) process_error(error_msg, EXIT_ON_ERROR, __FILE__, __LINE__)
struct box process_error(const char* error_msg, int should_exit, char* file, int line);
// Inverse cdf
struct box inverse_cdf_double(double cdf(double), double p);
struct box inverse_cdf_box(struct box cdf_box(double), double p);
// Samplers from cdf
struct box sampler_cdf_double(double cdf(double), uint64_t* seed);
struct box sampler_cdf_box(struct box cdf(double), uint64_t* seed);
// Get 90% confidence interval
typedef struct ci_t { typedef struct ci_t {
float low; float low;
float high; float high;
} ci; } ci;
ci get_90_confidence_interval(double (*sampler)(uint64_t*), uint64_t* seed); ci get_90_confidence_interval(double (*sampler)(uint64_t*), uint64_t* seed);
// small algebra manipulations /* Algebra manipulations */
typedef struct normal_params_t { typedef struct normal_params_t {
double mean; double mean;
@ -46,6 +28,24 @@ lognormal_params algebra_product_lognormals(lognormal_params a, lognormal_params
lognormal_params convert_ci_to_lognormal_params(ci x); lognormal_params convert_ci_to_lognormal_params(ci x);
ci convert_lognormal_params_to_ci(lognormal_params y); ci convert_lognormal_params_to_ci(lognormal_params y);
void parallel_sampler(double (*sampler)(uint64_t* seed), double* results, int n_threads, int n_samples); /* Error handling */
struct box {
int empty;
double content;
char* error_msg;
};
#define MAX_ERROR_LENGTH 500
#define EXIT_ON_ERROR 0
#define PROCESS_ERROR(error_msg) process_error(error_msg, EXIT_ON_ERROR, __FILE__, __LINE__)
struct box process_error(const char* error_msg, int should_exit, char* file, int line);
void array_print(double* array, int length);
/* Inverse cdf */
struct box inverse_cdf_double(double cdf(double), double p);
struct box inverse_cdf_box(struct box cdf_box(double), double p);
/* Samplers from cdf */
struct box sampler_cdf_double(double cdf(double), uint64_t* seed);
struct box sampler_cdf_box(struct box cdf(double), uint64_t* seed);
#endif #endif