diff --git a/scratchpad/makefile b/scratchpad/makefile index cf6d6b5..fb1f8b2 100644 --- a/scratchpad/makefile +++ b/scratchpad/makefile @@ -9,7 +9,7 @@ CC=gcc # required for nested functions # CC=tcc # <= faster compilation # Main file -SRC=scratchpad.c +SRC=scratchpad.c ../squiggle.c OUTPUT=./scratchpad ## Dependencies diff --git a/scratchpad/scratchpad b/scratchpad/scratchpad index 51d92ed..a689f5e 100755 Binary files a/scratchpad/scratchpad and b/scratchpad/scratchpad differ diff --git a/scratchpad/scratchpad.c b/scratchpad/scratchpad.c index 3a91fa6..a7a6506 100644 --- a/scratchpad/scratchpad.c +++ b/scratchpad/scratchpad.c @@ -1,33 +1,13 @@ -#include // FLT_MAX, FLT_MIN -#include // INT_MAX #include // erf, sqrt #include #include #include -// #include #include +#include "../squiggle.h" -#define EXIT_ON_ERROR 0 -#define MAX_ERROR_LENGTH 500 -#define PROCESS_ERROR(...) \ - do { \ - if (EXIT_ON_ERROR) { \ - printf("@, in %s (%d)", __FILE__, __LINE__); \ - exit(1); \ - } else { \ - char error_msg[MAX_ERROR_LENGTH]; \ - snprintf(error_msg, MAX_ERROR_LENGTH, "@, in %s (%d)", __FILE__, __LINE__); \ - struct box error = { .empty = 1, .error_msg = error_msg }; \ - return error; \ - } \ - } while (0) #define NUM_SAMPLES 1000000 - -struct box { - int empty; - float content; - char* error_msg; -}; +#define STOP_BETA 1.0e-8 +#define TINY_BETA 1.0e-30 // Example cdf float cdf_uniform_0_1(float x) @@ -59,16 +39,10 @@ float cdf_normal_0_1(float x) return 0.5 * (1 + erf((x - mean) / (std * sqrt(2)))); // erf from math.h } -// [x] to do: add beta. -// [x] for the cdf, use this incomplete beta function implementation, based on continuous fractions: -// -// - -#define STOP_BETA 1.0e-8 -#define TINY_BETA 1.0e-30 struct box incbeta(float a, float b, float x) { // Descended from , + // // but modified to return a box struct and floats instead of doubles. // [ ] to do: add attribution in README // Original code under this license: @@ -174,200 +148,6 @@ struct box cdf_beta(float x) } } -// Inverse cdf at point -// Two versions of this function: -// - raw, dealing with cdfs that return floats -// - box, dealing with cdfs that return a box. - -// Inverse cdf -struct box inverse_cdf_float(float cdf(float), float p) -{ - // given a cdf: [-Inf, Inf] => [0,1] - // returns a box with either - // x such that cdf(x) = p - // or an error - // if EXIT_ON_ERROR is set to 1, it exits instead of providing an error - - float low = -1.0; - float high = 1.0; - - // 1. Make sure that cdf(low) < p < cdf(high) - int interval_found = 0; - while ((!interval_found) && (low > -FLT_MAX / 4) && (high < FLT_MAX / 4)) { - // ^ Using FLT_MIN and FLT_MAX is overkill - // but it's also the *correct* thing to do. - - int low_condition = (cdf(low) < p); - int high_condition = (p < cdf(high)); - if (low_condition && high_condition) { - interval_found = 1; - } else if (!low_condition) { - low = low * 2; - } else if (!high_condition) { - high = high * 2; - } - } - - if (!interval_found) { - PROCESS_ERROR("Interval containing the target value not found, in function inverse_cdf"); - } else { - - int convergence_condition = 0; - int count = 0; - while (!convergence_condition && (count < (INT_MAX / 2))) { - float mid = (high + low) / 2; - int mid_not_new = (mid == low) || (mid == high); - // float width = high - low; - // if ((width < 1e-8) || mid_not_new){ - if (mid_not_new) { - convergence_condition = 1; - } else { - float mid_sign = cdf(mid) - p; - if (mid_sign < 0) { - low = mid; - } else if (mid_sign > 0) { - high = mid; - } else if (mid_sign == 0) { - low = mid; - high = mid; - } - } - } - - if (convergence_condition) { - struct box result = { .empty = 0, .content = low }; - return result; - } else { - PROCESS_ERROR("Search process did not converge, in function inverse_cdf"); - } - } -} - -struct box inverse_cdf_box(struct box cdf_box(float), float p) -{ - // given a cdf: [-Inf, Inf] => Box([0,1]) - // returns a box with either - // x such that cdf(x) = p - // or an error - // if EXIT_ON_ERROR is set to 1, it exits instead of providing an error - - float low = -1.0; - float high = 1.0; - - // 1. Make sure that cdf(low) < p < cdf(high) - int interval_found = 0; - while ((!interval_found) && (low > -FLT_MAX / 4) && (high < FLT_MAX / 4)) { - // ^ Using FLT_MIN and FLT_MAX is overkill - // but it's also the *correct* thing to do. - struct box cdf_low = cdf_box(low); - if (cdf_low.empty) { - PROCESS_ERROR(cdf_low.error_msg); - } - - struct box cdf_high = cdf_box(high); - if (cdf_high.empty) { - PROCESS_ERROR(cdf_low.error_msg); - } - - int low_condition = (cdf_low.content < p); - int high_condition = (p < cdf_high.content); - if (low_condition && high_condition) { - interval_found = 1; - } else if (!low_condition) { - low = low * 2; - } else if (!high_condition) { - high = high * 2; - } - } - - if (!interval_found) { - PROCESS_ERROR("Interval containing the target value not found, in function inverse_cdf"); - } else { - - int convergence_condition = 0; - int count = 0; - while (!convergence_condition && (count < (INT_MAX / 2))) { - float mid = (high + low) / 2; - int mid_not_new = (mid == low) || (mid == high); - // float width = high - low; - if (mid_not_new) { - // if ((width < 1e-8) || mid_not_new){ - convergence_condition = 1; - } else { - struct box cdf_mid = cdf_box(mid); - if (cdf_mid.empty) { - PROCESS_ERROR(cdf_mid.error_msg); - } - float mid_sign = cdf_mid.content - p; - if (mid_sign < 0) { - low = mid; - } else if (mid_sign > 0) { - high = mid; - } else if (mid_sign == 0) { - low = mid; - high = mid; - } - } - } - - if (convergence_condition) { - struct box result = { .empty = 0, .content = low }; - return result; - } else { - PROCESS_ERROR("Search process did not converge, in function inverse_cdf"); - } - } -} - -// Some randomness functions for: -// - Sampling from a cdf -// - Benchmarking against a previous approach, which will be faster, but less general - -// Get random number between 0 and 1 -uint32_t xorshift32(uint32_t* seed) -{ - // Algorithm "xor" from p. 4 of Marsaglia, "Xorshift RNGs" - // See - // https://en.wikipedia.org/wiki/Xorshift - // Also some drama: , - - uint32_t x = *seed; - x ^= x << 13; - x ^= x >> 17; - x ^= x << 5; - return *seed = x; -} - -// Distribution & sampling functions -float rand_0_to_1(uint32_t* seed) -{ - return ((float)xorshift32(seed)) / ((float)UINT32_MAX); -} - -// Sampler based on inverse cdf and randomness function -struct box sampler_box_cdf(struct box cdf(float), uint32_t* seed) -{ - float p = rand_0_to_1(seed); - struct box result = inverse_cdf_box(cdf, p); - return result; -} -struct box sampler_float_cdf(float cdf(float), uint32_t* seed) -{ - float p = rand_0_to_1(seed); - struct box result = inverse_cdf_float(cdf, p); - return result; -} - -// Comparison point with raw normal sampler -const float PI = 3.14159265358979323846; -float sampler_normal_0_1(uint32_t* seed) -{ - float u1 = rand_0_to_1(seed); - float u2 = rand_0_to_1(seed); - float z = sqrtf(-2.0 * log(u1)) * sin(2 * PI * u2); - return z; -} - // Some testers void test_inverse_cdf_float(char* cdf_name, float cdf_float(float)) { @@ -445,12 +225,12 @@ int main() test_and_time_sampler_float("cdf_normal_0_1", cdf_normal_0_1, seed); // Get some normal samples using a previous approach - printf("\nGetting some samples from sampler_normal_0_1\n"); + printf("\nGetting some samples from unit_normal\n"); clock_t begin_2 = clock(); for (int i = 0; i < NUM_SAMPLES; i++) { - float normal_sample = sampler_normal_0_1(seed); + float normal_sample = unit_normal(seed); // printf("%f\n", normal_sample); } @@ -460,11 +240,11 @@ int main() // Test box sampler test_and_time_sampler_box("cdf_beta", cdf_beta, seed); - // Ok, this is slower than python!! - // Partly this is because I am using a more general algorithm, - // which applies to any cdf - // But I am also using really anal convergence conditions. - // This could be optimized. + // Ok, this is slower than python!! + // Partly this is because I am using a more general algorithm, + // which applies to any cdf + // But I am also using really anal convergence conditions. + // This could be optimized. free(seed); return 0; diff --git a/squiggle.c b/squiggle.c index 0c6d638..4d38779 100644 --- a/squiggle.c +++ b/squiggle.c @@ -1,6 +1,25 @@ #include #include +#include #include +#include +#include +#include +// #include +#define EXIT_ON_ERROR 0 +#define MAX_ERROR_LENGTH 500 +#define PROCESS_ERROR(...) \ + do { \ + if (EXIT_ON_ERROR) { \ + printf("@, in %s (%d)", __FILE__, __LINE__); \ + exit(1); \ + } else { \ + char error_msg[MAX_ERROR_LENGTH]; \ + snprintf(error_msg, MAX_ERROR_LENGTH, "@, in %s (%d)", __FILE__, __LINE__); \ + struct box error = { .empty = 1, .error_msg = error_msg }; \ + return error; \ + } \ + } while (0) // PI constant const float PI = M_PI; // 3.14159265358979323846; @@ -112,3 +131,171 @@ float mixture(float (*samplers[])(uint32_t*), float* weights, int n_dists, uint3 free(cumsummed_normalized_weights); return result; } + +// Sample from an arbitrary cdf +struct box { + int empty; + float content; + char* error_msg; +}; + +// Inverse cdf at point +// Two versions of this function: +// - raw, dealing with cdfs that return floats +// - input: cdf: float => float, p +// - output: Box(number|error) +// - box, dealing with cdfs that return a box. +// - input: cdf: float => Box(number|error), p +// - output: Box(number|error) +struct box inverse_cdf_float(float cdf(float), float p) +{ + // given a cdf: [-Inf, Inf] => [0,1] + // returns a box with either + // x such that cdf(x) = p + // or an error + // if EXIT_ON_ERROR is set to 1, it exits instead of providing an error + + float low = -1.0; + float high = 1.0; + + // 1. Make sure that cdf(low) < p < cdf(high) + int interval_found = 0; + while ((!interval_found) && (low > -FLT_MAX / 4) && (high < FLT_MAX / 4)) { + // ^ Using FLT_MIN and FLT_MAX is overkill + // but it's also the *correct* thing to do. + + int low_condition = (cdf(low) < p); + int high_condition = (p < cdf(high)); + if (low_condition && high_condition) { + interval_found = 1; + } else if (!low_condition) { + low = low * 2; + } else if (!high_condition) { + high = high * 2; + } + } + + if (!interval_found) { + PROCESS_ERROR("Interval containing the target value not found, in function inverse_cdf"); + } else { + + int convergence_condition = 0; + int count = 0; + while (!convergence_condition && (count < (INT_MAX / 2))) { + float mid = (high + low) / 2; + int mid_not_new = (mid == low) || (mid == high); + // float width = high - low; + // if ((width < 1e-8) || mid_not_new){ + if (mid_not_new) { + convergence_condition = 1; + } else { + float mid_sign = cdf(mid) - p; + if (mid_sign < 0) { + low = mid; + } else if (mid_sign > 0) { + high = mid; + } else if (mid_sign == 0) { + low = mid; + high = mid; + } + } + } + + if (convergence_condition) { + struct box result = { .empty = 0, .content = low }; + return result; + } else { + PROCESS_ERROR("Search process did not converge, in function inverse_cdf"); + } + } +} + +struct box inverse_cdf_box(struct box cdf_box(float), float p) +{ + // given a cdf: [-Inf, Inf] => Box([0,1]) + // returns a box with either + // x such that cdf(x) = p + // or an error + // if EXIT_ON_ERROR is set to 1, it exits instead of providing an error + + float low = -1.0; + float high = 1.0; + + // 1. Make sure that cdf(low) < p < cdf(high) + int interval_found = 0; + while ((!interval_found) && (low > -FLT_MAX / 4) && (high < FLT_MAX / 4)) { + // ^ Using FLT_MIN and FLT_MAX is overkill + // but it's also the *correct* thing to do. + struct box cdf_low = cdf_box(low); + if (cdf_low.empty) { + PROCESS_ERROR(cdf_low.error_msg); + } + + struct box cdf_high = cdf_box(high); + if (cdf_high.empty) { + PROCESS_ERROR(cdf_low.error_msg); + } + + int low_condition = (cdf_low.content < p); + int high_condition = (p < cdf_high.content); + if (low_condition && high_condition) { + interval_found = 1; + } else if (!low_condition) { + low = low * 2; + } else if (!high_condition) { + high = high * 2; + } + } + + if (!interval_found) { + PROCESS_ERROR("Interval containing the target value not found, in function inverse_cdf"); + } else { + + int convergence_condition = 0; + int count = 0; + while (!convergence_condition && (count < (INT_MAX / 2))) { + float mid = (high + low) / 2; + int mid_not_new = (mid == low) || (mid == high); + // float width = high - low; + if (mid_not_new) { + // if ((width < 1e-8) || mid_not_new){ + convergence_condition = 1; + } else { + struct box cdf_mid = cdf_box(mid); + if (cdf_mid.empty) { + PROCESS_ERROR(cdf_mid.error_msg); + } + float mid_sign = cdf_mid.content - p; + if (mid_sign < 0) { + low = mid; + } else if (mid_sign > 0) { + high = mid; + } else if (mid_sign == 0) { + low = mid; + high = mid; + } + } + } + + if (convergence_condition) { + struct box result = { .empty = 0, .content = low }; + return result; + } else { + PROCESS_ERROR("Search process did not converge, in function inverse_cdf"); + } + } +} + +// Sampler based on inverse cdf and randomness function +struct box sampler_box_cdf(struct box cdf(float), uint32_t* seed) +{ + float p = rand_0_to_1(seed); + struct box result = inverse_cdf_box(cdf, p); + return result; +} +struct box sampler_float_cdf(float cdf(float), uint32_t* seed) +{ + float p = rand_0_to_1(seed); + struct box result = inverse_cdf_float(cdf, p); + return result; +} diff --git a/squiggle.h b/squiggle.h index 96e70c1..4adc838 100644 --- a/squiggle.h +++ b/squiggle.h @@ -4,13 +4,29 @@ // uint32_t header #include +// Macros +#define EXIT_ON_ERROR 0 +#define MAX_ERROR_LENGTH 500 +#define PROCESS_ERROR(...) \ + do { \ + if (EXIT_ON_ERROR) { \ + printf("@, in %s (%d)", __FILE__, __LINE__); \ + exit(1); \ + } else { \ + char error_msg[MAX_ERROR_LENGTH]; \ + snprintf(error_msg, MAX_ERROR_LENGTH, "@, in %s (%d)", __FILE__, __LINE__); \ + struct box error = { .empty = 1, .error_msg = error_msg }; \ + return error; \ + } \ + } while (0) + // Pseudo Random number generator uint32_t xorshift32(uint32_t* seed); // Distribution & sampling functions float rand_0_to_1(uint32_t* seed); float rand_float(float max, uint32_t* seed); -float ur_normal(uint32_t* seed); +float unit_normal(uint32_t* seed); float random_uniform(float from, float to, uint32_t* seed); float random_normal(float mean, float sigma, uint32_t* seed); float random_lognormal(float logmean, float logsigma, uint32_t* seed); @@ -23,4 +39,20 @@ void array_cumsum(float* array_to_sum, float* array_cumsummed, int length); // Mixture function float mixture(float (*samplers[])(uint32_t*), float* weights, int n_dists, uint32_t* seed); +// Box +struct box { + int empty; + float content; + char* error_msg; +}; + +// Inverse cdf +struct box inverse_cdf_float(float cdf(float), float p); +struct box inverse_cdf_box(struct box cdf_box(float), float p); + +// Samplers from cdf +struct box sampler_box_cdf(struct box cdf(float), uint32_t* seed); +struct box sampler_float_cdf(float cdf(float), uint32_t* seed); + #endif +