forked from personal/squiggle.c
reorg, refactor, recompile
This commit is contained in:
parent
3e4360f930
commit
023c9f28ac
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
14
squiggle.c
14
squiggle.c
|
@ -8,7 +8,7 @@
|
|||
#define NORMAL90CONFIDENCE 1.6448536269514727
|
||||
|
||||
// Pseudo Random number generator
|
||||
uint64_t xorshift32(uint32_t* seed)
|
||||
static uint64_t xorshift32(uint32_t* seed)
|
||||
{
|
||||
// Algorithm "xor" from p. 4 of Marsaglia, "Xorshift RNGs"
|
||||
// See:
|
||||
|
@ -24,7 +24,7 @@ uint64_t xorshift32(uint32_t* seed)
|
|||
return *seed = x;
|
||||
}
|
||||
|
||||
uint64_t xorshift64(uint64_t* seed)
|
||||
static uint64_t xorshift64(uint64_t* seed)
|
||||
{
|
||||
// same as above, but for generating doubles instead of floats
|
||||
uint64_t x = *seed;
|
||||
|
@ -196,16 +196,6 @@ double array_std(double* array, int length)
|
|||
return std;
|
||||
}
|
||||
|
||||
void array_print(double xs[], int n)
|
||||
{
|
||||
printf("[");
|
||||
for (int i = 0; i < n - 1; i++) {
|
||||
printf("%f, ", xs[i]);
|
||||
}
|
||||
printf("%f", xs[n - 1]);
|
||||
printf("]\n");
|
||||
}
|
||||
|
||||
// Mixture function
|
||||
double sample_mixture(double (*samplers[])(uint64_t*), double* weights, int n_dists, uint64_t* seed)
|
||||
{
|
||||
|
|
227
squiggle_more.c
227
squiggle_more.c
|
@ -7,37 +7,56 @@
|
|||
#include <stdlib.h>
|
||||
#include "squiggle.h"
|
||||
|
||||
/* Math constants */
|
||||
#define PI 3.14159265358979323846 // M_PI in gcc gnu99
|
||||
#define NORMAL90CONFIDENCE 1.6448536269514727
|
||||
/* Parallel sampler */
|
||||
void parallel_sampler(double (*sampler)(uint64_t* seed), double* results, int n_threads, int n_samples){
|
||||
if((n_samples % n_threads) != 0){
|
||||
fprintf(stderr, "Number of samples isn't divisible by number of threads, aborting\n");
|
||||
exit(1);
|
||||
}
|
||||
uint64_t** seeds = malloc(n_threads * sizeof(uint64_t*));
|
||||
for (uint64_t i = 0; i < n_threads; i++) {
|
||||
seeds[i] = malloc(sizeof(uint64_t));
|
||||
*seeds[i] = i + 1; // xorshift can't start with 0
|
||||
}
|
||||
|
||||
/* Some error niceties */
|
||||
// These won't be used until later
|
||||
#define MAX_ERROR_LENGTH 500
|
||||
#define EXIT_ON_ERROR 0
|
||||
#define PROCESS_ERROR(error_msg) process_error(error_msg, EXIT_ON_ERROR, __FILE__, __LINE__)
|
||||
int i;
|
||||
#pragma omp parallel private(i)
|
||||
{
|
||||
#pragma omp for
|
||||
for (i = 0; i < n_threads; i++) {
|
||||
int lower_bound = i * (n_samples / n_threads);
|
||||
int upper_bound = ((i+1) * (n_samples / n_threads)) - 1;
|
||||
// printf("Lower bound: %d, upper bound: %d\n", lower_bound, upper_bound);
|
||||
for (int j = lower_bound; j < upper_bound; j++) {
|
||||
results[j] = sampler(seeds[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (uint64_t i = 0; i < n_threads; i++) {
|
||||
free(seeds[i]);
|
||||
}
|
||||
free(seeds);
|
||||
}
|
||||
|
||||
/* Get confidence intervals, given a sampler */
|
||||
// Not in core yet because I'm not sure how much I like the struct
|
||||
// and the built-in 100k samples
|
||||
// to do: add n to function parameters and document
|
||||
|
||||
typedef struct ci_t {
|
||||
float low;
|
||||
float high;
|
||||
} ci;
|
||||
typedef struct ci_searcher_t {
|
||||
double num;
|
||||
int remaining;
|
||||
} ci_searcher;
|
||||
|
||||
void swp(int i, int j, double xs[])
|
||||
static void swp(int i, int j, double xs[])
|
||||
{
|
||||
double tmp = xs[i];
|
||||
xs[i] = xs[j];
|
||||
xs[j] = tmp;
|
||||
}
|
||||
|
||||
int partition(int low, int high, double xs[], int length)
|
||||
static int partition(int low, int high, double xs[], int length)
|
||||
{
|
||||
// To understand this function:
|
||||
// - see the note after gt variable definition
|
||||
|
@ -56,8 +75,9 @@ int partition(int low, int high, double xs[], int length)
|
|||
return gt;
|
||||
}
|
||||
|
||||
double quickselect(int k, double xs[], int length)
|
||||
static double quickselect(int k, double xs[], int length)
|
||||
{
|
||||
// https://en.wikipedia.org/wiki/Quickselect
|
||||
int low = 0;
|
||||
int high = length - 1;
|
||||
for (;;) {
|
||||
|
@ -75,37 +95,92 @@ double quickselect(int k, double xs[], int length)
|
|||
}
|
||||
}
|
||||
|
||||
ci get_90_confidence_interval(double (*sampler)(uint64_t*), uint64_t* seed)
|
||||
{
|
||||
int n = 100 * 1000;
|
||||
double* samples_array = malloc(n * sizeof(double));
|
||||
ci sampler_get_ci(double (*sampler)(uint64_t*), ci interval, int n, uint64_t* seed){
|
||||
double* xs = malloc(n * sizeof(double));
|
||||
for (int i = 0; i < n; i++) {
|
||||
samples_array[i] = sampler(seed);
|
||||
xs[i] = sampler(seed);
|
||||
}
|
||||
// 10% confidence interval: n/20, n - n/20
|
||||
ci_searcher low = {.x = samples_array[0], .remaining = n/20) };
|
||||
ci_searcher high = {.x = samples_array[0], .remaining = n-(n/20) };
|
||||
|
||||
// test with finding the lowest
|
||||
for(int j=1; i<n; j++){
|
||||
if(low.x > samples_array[i]){
|
||||
low.x = samples_array[i];
|
||||
}
|
||||
}
|
||||
int low_k = floor(interval.low * n);
|
||||
int high_k = ceil(interval.high * n);
|
||||
|
||||
ci result = {
|
||||
.low = samples_array[5000],
|
||||
.high = samples_array[94999],
|
||||
.low = quickselect(low_k, xs, n),
|
||||
.high = quickselect(high_k, xs, n),
|
||||
};
|
||||
free(samples_array);
|
||||
free(xs);
|
||||
return result;
|
||||
|
||||
}
|
||||
|
||||
ci get_90_confidence_interval(double (*sampler)(uint64_t*), uint64_t* seed)
|
||||
{
|
||||
return sampler_get_ci(sampler, (ci) {.low = 0.05, .high = 0.95}, 1000000, seed);
|
||||
}
|
||||
|
||||
/* Algebra manipulations */
|
||||
// here I discover named structs,
|
||||
// which mean that I don't have to be typing
|
||||
// struct blah all the time.
|
||||
|
||||
#define NORMAL90CONFIDENCE 1.6448536269514727
|
||||
|
||||
typedef struct normal_params_t {
|
||||
double mean;
|
||||
double std;
|
||||
} normal_params;
|
||||
|
||||
normal_params algebra_sum_normals(normal_params a, normal_params b)
|
||||
{
|
||||
normal_params result = {
|
||||
.mean = a.mean + b.mean,
|
||||
.std = sqrt((a.std * a.std) + (b.std * b.std)),
|
||||
};
|
||||
return result;
|
||||
}
|
||||
|
||||
typedef struct lognormal_params_t {
|
||||
double logmean;
|
||||
double logstd;
|
||||
} lognormal_params;
|
||||
|
||||
lognormal_params algebra_product_lognormals(lognormal_params a, lognormal_params b)
|
||||
{
|
||||
lognormal_params result = {
|
||||
.logmean = a.logmean + b.logmean,
|
||||
.logstd = sqrt((a.logstd * a.logstd) + (b.logstd * b.logstd)),
|
||||
};
|
||||
return result;
|
||||
}
|
||||
|
||||
lognormal_params convert_ci_to_lognormal_params(ci x)
|
||||
{
|
||||
double loghigh = logf(x.high);
|
||||
double loglow = logf(x.low);
|
||||
double logmean = (loghigh + loglow) / 2.0;
|
||||
double logstd = (loghigh - loglow) / (2.0 * NORMAL90CONFIDENCE);
|
||||
lognormal_params result = { .logmean = logmean, .logstd = logstd };
|
||||
return result;
|
||||
}
|
||||
|
||||
ci convert_lognormal_params_to_ci(lognormal_params y)
|
||||
{
|
||||
double h = y.logstd * NORMAL90CONFIDENCE;
|
||||
double loghigh = y.logmean + h;
|
||||
double loglow = y.logmean - h;
|
||||
ci result = { .low = exp(loglow), .high = exp(loghigh) };
|
||||
return result;
|
||||
}
|
||||
|
||||
/* Scaffolding to handle errors */
|
||||
// We are building towards sample from an arbitrary cdf
|
||||
// We will sample from an arbitrary cdf
|
||||
// and that operation might fail
|
||||
// so we build some scaffolding here
|
||||
|
||||
#define MAX_ERROR_LENGTH 500
|
||||
#define EXIT_ON_ERROR 0
|
||||
#define PROCESS_ERROR(error_msg) process_error(error_msg, EXIT_ON_ERROR, __FILE__, __LINE__)
|
||||
|
||||
struct box {
|
||||
int empty;
|
||||
double content;
|
||||
|
@ -297,85 +372,15 @@ double sampler_cdf_danger(struct box cdf(double), uint64_t* seed)
|
|||
}
|
||||
}
|
||||
|
||||
/* Algebra manipulations */
|
||||
// here I discover named structs,
|
||||
// which mean that I don't have to be typing
|
||||
// struct blah all the time.
|
||||
typedef struct normal_params_t {
|
||||
double mean;
|
||||
double std;
|
||||
} normal_params;
|
||||
/* array print: potentially useful for debugging */
|
||||
|
||||
normal_params algebra_sum_normals(normal_params a, normal_params b)
|
||||
void array_print(double xs[], int n)
|
||||
{
|
||||
normal_params result = {
|
||||
.mean = a.mean + b.mean,
|
||||
.std = sqrt((a.std * a.std) + (b.std * b.std)),
|
||||
};
|
||||
return result;
|
||||
}
|
||||
|
||||
typedef struct lognormal_params_t {
|
||||
double logmean;
|
||||
double logstd;
|
||||
} lognormal_params;
|
||||
|
||||
lognormal_params algebra_product_lognormals(lognormal_params a, lognormal_params b)
|
||||
{
|
||||
lognormal_params result = {
|
||||
.logmean = a.logmean + b.logmean,
|
||||
.logstd = sqrt((a.logstd * a.logstd) + (b.logstd * b.logstd)),
|
||||
};
|
||||
return result;
|
||||
}
|
||||
|
||||
lognormal_params convert_ci_to_lognormal_params(ci x)
|
||||
{
|
||||
double loghigh = logf(x.high);
|
||||
double loglow = logf(x.low);
|
||||
double logmean = (loghigh + loglow) / 2.0;
|
||||
double logstd = (loghigh - loglow) / (2.0 * NORMAL90CONFIDENCE);
|
||||
lognormal_params result = { .logmean = logmean, .logstd = logstd };
|
||||
return result;
|
||||
}
|
||||
|
||||
ci convert_lognormal_params_to_ci(lognormal_params y)
|
||||
{
|
||||
double h = y.logstd * NORMAL90CONFIDENCE;
|
||||
double loghigh = y.logmean + h;
|
||||
double loglow = y.logmean - h;
|
||||
ci result = { .low = exp(loglow), .high = exp(loghigh) };
|
||||
return result;
|
||||
}
|
||||
|
||||
/* Parallel sampler */
|
||||
void parallel_sampler(double (*sampler)(uint64_t* seed), double* results, int n_threads, int n_samples){
|
||||
if((n_samples % n_threads) != 0){
|
||||
fprintf(stderr, "Number of samples isn't divisible by number of threads, aborting\n");
|
||||
exit(1);
|
||||
printf("[");
|
||||
for (int i = 0; i < n - 1; i++) {
|
||||
printf("%f, ", xs[i]);
|
||||
}
|
||||
uint64_t** seeds = malloc(n_threads * sizeof(uint64_t*));
|
||||
for (uint64_t i = 0; i < n_threads; i++) {
|
||||
seeds[i] = malloc(sizeof(uint64_t));
|
||||
*seeds[i] = i + 1; // xorshift can't start with 0
|
||||
}
|
||||
|
||||
int i;
|
||||
#pragma omp parallel private(i)
|
||||
{
|
||||
#pragma omp for
|
||||
for (i = 0; i < n_threads; i++) {
|
||||
int lower_bound = i * (n_samples / n_threads);
|
||||
int upper_bound = ((i+1) * (n_samples / n_threads)) - 1;
|
||||
// printf("Lower bound: %d, upper bound: %d\n", lower_bound, upper_bound);
|
||||
for (int j = lower_bound; j < upper_bound; j++) {
|
||||
results[j] = sampler(seeds[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (uint64_t i = 0; i < n_threads; i++) {
|
||||
free(seeds[i]);
|
||||
}
|
||||
free(seeds);
|
||||
printf("%f", xs[n - 1]);
|
||||
printf("]\n");
|
||||
}
|
||||
|
||||
|
|
|
@ -1,35 +1,17 @@
|
|||
#ifndef SQUIGGLE_C_EXTRA
|
||||
#define SQUIGGLE_C_EXTRA
|
||||
|
||||
// Box
|
||||
struct box {
|
||||
int empty;
|
||||
double content;
|
||||
char* error_msg;
|
||||
};
|
||||
/* Parallel sampling */
|
||||
void parallel_sampler(double (*sampler)(uint64_t* seed), double* results, int n_threads, int n_samples);
|
||||
|
||||
// Macros to handle errors
|
||||
#define MAX_ERROR_LENGTH 500
|
||||
#define EXIT_ON_ERROR 0
|
||||
#define PROCESS_ERROR(error_msg) process_error(error_msg, EXIT_ON_ERROR, __FILE__, __LINE__)
|
||||
struct box process_error(const char* error_msg, int should_exit, char* file, int line);
|
||||
|
||||
// Inverse cdf
|
||||
struct box inverse_cdf_double(double cdf(double), double p);
|
||||
struct box inverse_cdf_box(struct box cdf_box(double), double p);
|
||||
|
||||
// Samplers from cdf
|
||||
struct box sampler_cdf_double(double cdf(double), uint64_t* seed);
|
||||
struct box sampler_cdf_box(struct box cdf(double), uint64_t* seed);
|
||||
|
||||
// Get 90% confidence interval
|
||||
/* Get 90% confidence interval */
|
||||
typedef struct ci_t {
|
||||
float low;
|
||||
float high;
|
||||
} ci;
|
||||
ci get_90_confidence_interval(double (*sampler)(uint64_t*), uint64_t* seed);
|
||||
|
||||
// small algebra manipulations
|
||||
/* Algebra manipulations */
|
||||
|
||||
typedef struct normal_params_t {
|
||||
double mean;
|
||||
|
@ -46,6 +28,24 @@ lognormal_params algebra_product_lognormals(lognormal_params a, lognormal_params
|
|||
lognormal_params convert_ci_to_lognormal_params(ci x);
|
||||
ci convert_lognormal_params_to_ci(lognormal_params y);
|
||||
|
||||
void parallel_sampler(double (*sampler)(uint64_t* seed), double* results, int n_threads, int n_samples);
|
||||
/* Error handling */
|
||||
struct box {
|
||||
int empty;
|
||||
double content;
|
||||
char* error_msg;
|
||||
};
|
||||
#define MAX_ERROR_LENGTH 500
|
||||
#define EXIT_ON_ERROR 0
|
||||
#define PROCESS_ERROR(error_msg) process_error(error_msg, EXIT_ON_ERROR, __FILE__, __LINE__)
|
||||
struct box process_error(const char* error_msg, int should_exit, char* file, int line);
|
||||
void array_print(double* array, int length);
|
||||
|
||||
/* Inverse cdf */
|
||||
struct box inverse_cdf_double(double cdf(double), double p);
|
||||
struct box inverse_cdf_box(struct box cdf_box(double), double p);
|
||||
|
||||
/* Samplers from cdf */
|
||||
struct box sampler_cdf_double(double cdf(double), uint64_t* seed);
|
||||
struct box sampler_cdf_box(struct box cdf(double), uint64_t* seed);
|
||||
|
||||
#endif
|
||||
|
|
Loading…
Reference in New Issue
Block a user