reorg, refactor, recompile

This commit is contained in:
NunoSempere 2023-11-29 22:24:42 +00:00
parent 3e4360f930
commit 023c9f28ac
22 changed files with 141 additions and 146 deletions

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -8,7 +8,7 @@
#define NORMAL90CONFIDENCE 1.6448536269514727
// Pseudo Random number generator
uint64_t xorshift32(uint32_t* seed)
static uint64_t xorshift32(uint32_t* seed)
{
// Algorithm "xor" from p. 4 of Marsaglia, "Xorshift RNGs"
// See:
@ -24,7 +24,7 @@ uint64_t xorshift32(uint32_t* seed)
return *seed = x;
}
uint64_t xorshift64(uint64_t* seed)
static uint64_t xorshift64(uint64_t* seed)
{
// same as above, but for generating doubles instead of floats
uint64_t x = *seed;
@ -196,16 +196,6 @@ double array_std(double* array, int length)
return std;
}
void array_print(double xs[], int n)
{
printf("[");
for (int i = 0; i < n - 1; i++) {
printf("%f, ", xs[i]);
}
printf("%f", xs[n - 1]);
printf("]\n");
}
// Mixture function
double sample_mixture(double (*samplers[])(uint64_t*), double* weights, int n_dists, uint64_t* seed)
{

View File

@ -7,37 +7,56 @@
#include <stdlib.h>
#include "squiggle.h"
/* Math constants */
#define PI 3.14159265358979323846 // M_PI in gcc gnu99
#define NORMAL90CONFIDENCE 1.6448536269514727
/* Parallel sampler */
void parallel_sampler(double (*sampler)(uint64_t* seed), double* results, int n_threads, int n_samples){
if((n_samples % n_threads) != 0){
fprintf(stderr, "Number of samples isn't divisible by number of threads, aborting\n");
exit(1);
}
uint64_t** seeds = malloc(n_threads * sizeof(uint64_t*));
for (uint64_t i = 0; i < n_threads; i++) {
seeds[i] = malloc(sizeof(uint64_t));
*seeds[i] = i + 1; // xorshift can't start with 0
}
/* Some error niceties */
// These won't be used until later
#define MAX_ERROR_LENGTH 500
#define EXIT_ON_ERROR 0
#define PROCESS_ERROR(error_msg) process_error(error_msg, EXIT_ON_ERROR, __FILE__, __LINE__)
int i;
#pragma omp parallel private(i)
{
#pragma omp for
for (i = 0; i < n_threads; i++) {
int lower_bound = i * (n_samples / n_threads);
int upper_bound = ((i+1) * (n_samples / n_threads)) - 1;
// printf("Lower bound: %d, upper bound: %d\n", lower_bound, upper_bound);
for (int j = lower_bound; j < upper_bound; j++) {
results[j] = sampler(seeds[i]);
}
}
}
for (uint64_t i = 0; i < n_threads; i++) {
free(seeds[i]);
}
free(seeds);
}
/* Get confidence intervals, given a sampler */
// Not in core yet because I'm not sure how much I like the struct
// and the built-in 100k samples
// to do: add n to function parameters and document
typedef struct ci_t {
float low;
float high;
} ci;
typedef struct ci_searcher_t {
double num;
int remaining;
} ci_searcher;
void swp(int i, int j, double xs[])
static void swp(int i, int j, double xs[])
{
double tmp = xs[i];
xs[i] = xs[j];
xs[j] = tmp;
}
int partition(int low, int high, double xs[], int length)
static int partition(int low, int high, double xs[], int length)
{
// To understand this function:
// - see the note after gt variable definition
@ -56,8 +75,9 @@ int partition(int low, int high, double xs[], int length)
return gt;
}
double quickselect(int k, double xs[], int length)
static double quickselect(int k, double xs[], int length)
{
// https://en.wikipedia.org/wiki/Quickselect
int low = 0;
int high = length - 1;
for (;;) {
@ -75,37 +95,92 @@ double quickselect(int k, double xs[], int length)
}
}
ci get_90_confidence_interval(double (*sampler)(uint64_t*), uint64_t* seed)
{
int n = 100 * 1000;
double* samples_array = malloc(n * sizeof(double));
ci sampler_get_ci(double (*sampler)(uint64_t*), ci interval, int n, uint64_t* seed){
double* xs = malloc(n * sizeof(double));
for (int i = 0; i < n; i++) {
samples_array[i] = sampler(seed);
xs[i] = sampler(seed);
}
// 10% confidence interval: n/20, n - n/20
ci_searcher low = {.x = samples_array[0], .remaining = n/20) };
ci_searcher high = {.x = samples_array[0], .remaining = n-(n/20) };
// test with finding the lowest
for(int j=1; i<n; j++){
if(low.x > samples_array[i]){
low.x = samples_array[i];
}
}
int low_k = floor(interval.low * n);
int high_k = ceil(interval.high * n);
ci result = {
.low = samples_array[5000],
.high = samples_array[94999],
.low = quickselect(low_k, xs, n),
.high = quickselect(high_k, xs, n),
};
free(samples_array);
free(xs);
return result;
}
ci get_90_confidence_interval(double (*sampler)(uint64_t*), uint64_t* seed)
{
return sampler_get_ci(sampler, (ci) {.low = 0.05, .high = 0.95}, 1000000, seed);
}
/* Algebra manipulations */
// here I discover named structs,
// which mean that I don't have to be typing
// struct blah all the time.
#define NORMAL90CONFIDENCE 1.6448536269514727
typedef struct normal_params_t {
double mean;
double std;
} normal_params;
normal_params algebra_sum_normals(normal_params a, normal_params b)
{
normal_params result = {
.mean = a.mean + b.mean,
.std = sqrt((a.std * a.std) + (b.std * b.std)),
};
return result;
}
typedef struct lognormal_params_t {
double logmean;
double logstd;
} lognormal_params;
lognormal_params algebra_product_lognormals(lognormal_params a, lognormal_params b)
{
lognormal_params result = {
.logmean = a.logmean + b.logmean,
.logstd = sqrt((a.logstd * a.logstd) + (b.logstd * b.logstd)),
};
return result;
}
lognormal_params convert_ci_to_lognormal_params(ci x)
{
double loghigh = logf(x.high);
double loglow = logf(x.low);
double logmean = (loghigh + loglow) / 2.0;
double logstd = (loghigh - loglow) / (2.0 * NORMAL90CONFIDENCE);
lognormal_params result = { .logmean = logmean, .logstd = logstd };
return result;
}
ci convert_lognormal_params_to_ci(lognormal_params y)
{
double h = y.logstd * NORMAL90CONFIDENCE;
double loghigh = y.logmean + h;
double loglow = y.logmean - h;
ci result = { .low = exp(loglow), .high = exp(loghigh) };
return result;
}
/* Scaffolding to handle errors */
// We are building towards sample from an arbitrary cdf
// We will sample from an arbitrary cdf
// and that operation might fail
// so we build some scaffolding here
#define MAX_ERROR_LENGTH 500
#define EXIT_ON_ERROR 0
#define PROCESS_ERROR(error_msg) process_error(error_msg, EXIT_ON_ERROR, __FILE__, __LINE__)
struct box {
int empty;
double content;
@ -297,85 +372,15 @@ double sampler_cdf_danger(struct box cdf(double), uint64_t* seed)
}
}
/* Algebra manipulations */
// here I discover named structs,
// which mean that I don't have to be typing
// struct blah all the time.
typedef struct normal_params_t {
double mean;
double std;
} normal_params;
/* array print: potentially useful for debugging */
normal_params algebra_sum_normals(normal_params a, normal_params b)
void array_print(double xs[], int n)
{
normal_params result = {
.mean = a.mean + b.mean,
.std = sqrt((a.std * a.std) + (b.std * b.std)),
};
return result;
}
typedef struct lognormal_params_t {
double logmean;
double logstd;
} lognormal_params;
lognormal_params algebra_product_lognormals(lognormal_params a, lognormal_params b)
{
lognormal_params result = {
.logmean = a.logmean + b.logmean,
.logstd = sqrt((a.logstd * a.logstd) + (b.logstd * b.logstd)),
};
return result;
}
lognormal_params convert_ci_to_lognormal_params(ci x)
{
double loghigh = logf(x.high);
double loglow = logf(x.low);
double logmean = (loghigh + loglow) / 2.0;
double logstd = (loghigh - loglow) / (2.0 * NORMAL90CONFIDENCE);
lognormal_params result = { .logmean = logmean, .logstd = logstd };
return result;
}
ci convert_lognormal_params_to_ci(lognormal_params y)
{
double h = y.logstd * NORMAL90CONFIDENCE;
double loghigh = y.logmean + h;
double loglow = y.logmean - h;
ci result = { .low = exp(loglow), .high = exp(loghigh) };
return result;
}
/* Parallel sampler */
void parallel_sampler(double (*sampler)(uint64_t* seed), double* results, int n_threads, int n_samples){
if((n_samples % n_threads) != 0){
fprintf(stderr, "Number of samples isn't divisible by number of threads, aborting\n");
exit(1);
printf("[");
for (int i = 0; i < n - 1; i++) {
printf("%f, ", xs[i]);
}
uint64_t** seeds = malloc(n_threads * sizeof(uint64_t*));
for (uint64_t i = 0; i < n_threads; i++) {
seeds[i] = malloc(sizeof(uint64_t));
*seeds[i] = i + 1; // xorshift can't start with 0
}
int i;
#pragma omp parallel private(i)
{
#pragma omp for
for (i = 0; i < n_threads; i++) {
int lower_bound = i * (n_samples / n_threads);
int upper_bound = ((i+1) * (n_samples / n_threads)) - 1;
// printf("Lower bound: %d, upper bound: %d\n", lower_bound, upper_bound);
for (int j = lower_bound; j < upper_bound; j++) {
results[j] = sampler(seeds[i]);
}
}
}
for (uint64_t i = 0; i < n_threads; i++) {
free(seeds[i]);
}
free(seeds);
printf("%f", xs[n - 1]);
printf("]\n");
}

View File

@ -1,35 +1,17 @@
#ifndef SQUIGGLE_C_EXTRA
#define SQUIGGLE_C_EXTRA
// Box
struct box {
int empty;
double content;
char* error_msg;
};
/* Parallel sampling */
void parallel_sampler(double (*sampler)(uint64_t* seed), double* results, int n_threads, int n_samples);
// Macros to handle errors
#define MAX_ERROR_LENGTH 500
#define EXIT_ON_ERROR 0
#define PROCESS_ERROR(error_msg) process_error(error_msg, EXIT_ON_ERROR, __FILE__, __LINE__)
struct box process_error(const char* error_msg, int should_exit, char* file, int line);
// Inverse cdf
struct box inverse_cdf_double(double cdf(double), double p);
struct box inverse_cdf_box(struct box cdf_box(double), double p);
// Samplers from cdf
struct box sampler_cdf_double(double cdf(double), uint64_t* seed);
struct box sampler_cdf_box(struct box cdf(double), uint64_t* seed);
// Get 90% confidence interval
/* Get 90% confidence interval */
typedef struct ci_t {
float low;
float high;
} ci;
ci get_90_confidence_interval(double (*sampler)(uint64_t*), uint64_t* seed);
// small algebra manipulations
/* Algebra manipulations */
typedef struct normal_params_t {
double mean;
@ -46,6 +28,24 @@ lognormal_params algebra_product_lognormals(lognormal_params a, lognormal_params
lognormal_params convert_ci_to_lognormal_params(ci x);
ci convert_lognormal_params_to_ci(lognormal_params y);
void parallel_sampler(double (*sampler)(uint64_t* seed), double* results, int n_threads, int n_samples);
/* Error handling */
struct box {
int empty;
double content;
char* error_msg;
};
#define MAX_ERROR_LENGTH 500
#define EXIT_ON_ERROR 0
#define PROCESS_ERROR(error_msg) process_error(error_msg, EXIT_ON_ERROR, __FILE__, __LINE__)
struct box process_error(const char* error_msg, int should_exit, char* file, int line);
void array_print(double* array, int length);
/* Inverse cdf */
struct box inverse_cdf_double(double cdf(double), double p);
struct box inverse_cdf_box(struct box cdf_box(double), double p);
/* Samplers from cdf */
struct box sampler_cdf_double(double cdf(double), uint64_t* seed);
struct box sampler_cdf_box(struct box cdf(double), uint64_t* seed);
#endif