#include // FLT_MAX, FLT_MIN #include // INT_MAX #include // erf, sqrt #include #include #include #include #include #define EXIT_ON_ERROR 0 #define MAX_ERROR_LENGTH 500 #define PROCESS_ERROR(...) \ do { \ if (EXIT_ON_ERROR) { \ printf("@, in %s (%d)", __FILE__, __LINE__); \ exit(1); \ } else { \ char error_msg[MAX_ERROR_LENGTH]; \ snprintf(error_msg, MAX_ERROR_LENGTH, "@, in %s (%d)", __FILE__, __LINE__); \ struct box error = { .empty = 1, .error_msg = error_msg }; \ return error; \ } \ } while (0) #define NUM_SAMPLES 10 struct box { int empty; float content; char* error_msg; }; // Example cdf float cdf_uniform_0_1(float x) { if (x < 0) { return 0; } else if (x > 1) { return 1; } else { return x; } } float cdf_squared_0_1(float x) { if (x < 0) { return 0; } else if (x > 1) { return 1; } else { return x * x; } } float cdf_normal_0_1(float x) { float mean = 0; float std = 1; return 0.5 * (1 + erf((x - mean) / (std * sqrt(2)))); // erf from math.h } // [x] to do: add beta. // [x] for the cdf, use this incomplete beta function implementation, based on continuous fractions: // // #define STOP_BETA 1.0e-8 #define TINY_BETA 1.0e-30 struct box incbeta(float a, float b, float x) { // Descended from , // but modified to return a box struct and floats instead of doubles. // [ ] to do: add attribution in README // Original code under this license: /* * zlib License * * Regularized Incomplete Beta Function * * Copyright (c) 2016, 2017 Lewis Van Winkle * http://CodePlea.com * * This software is provided 'as-is', without any express or implied * warranty. In no event will the authors be held liable for any damages * arising from the use of this software. * * Permission is granted to anyone to use this software for any purpose, * including commercial applications, and to alter it and redistribute it * freely, subject to the following restrictions: * * 1. The origin of this software must not be misrepresented; you must not * claim that you wrote the original software. If you use this software * in a product, an acknowledgement in the product documentation would be * appreciated but is not required. * 2. Altered source versions must be plainly marked as such, and must not be * misrepresented as being the original software. * 3. This notice may not be removed or altered from any source distribution. */ if (x < 0.0 || x > 1.0) { PROCESS_ERROR("x out of bounds [0, 1], in function incbeta"); } /*The continued fraction converges nicely for x < (a+1)/(a+b+2)*/ if (x > (a + 1.0) / (a + b + 2.0)) { struct box symmetric_incbeta = incbeta(b, a, 1.0 - x); if (symmetric_incbeta.empty) { return symmetric_incbeta; // propagate error } else { struct box result = { .empty = 0, .content = 1 - symmetric_incbeta.content }; return result; } } /*Find the first part before the continued fraction.*/ const float lbeta_ab = lgamma(a) + lgamma(b) - lgamma(a + b); const float front = exp(log(x) * a + log(1.0 - x) * b - lbeta_ab) / a; /*Use Lentz's algorithm to evaluate the continued fraction.*/ float f = 1.0, c = 1.0, d = 0.0; int i, m; for (i = 0; i <= 200; ++i) { m = i / 2; float numerator; if (i == 0) { numerator = 1.0; /*First numerator is 1.0.*/ } else if (i % 2 == 0) { numerator = (m * (b - m) * x) / ((a + 2.0 * m - 1.0) * (a + 2.0 * m)); /*Even term.*/ } else { numerator = -((a + m) * (a + b + m) * x) / ((a + 2.0 * m) * (a + 2.0 * m + 1)); /*Odd term.*/ } /*Do an iteration of Lentz's algorithm.*/ d = 1.0 + numerator * d; if (fabs(d) < TINY_BETA) d = TINY_BETA; d = 1.0 / d; c = 1.0 + numerator / c; if (fabs(c) < TINY_BETA) c = TINY_BETA; const float cd = c * d; f *= cd; /*Check for stop.*/ if (fabs(1.0 - cd) < STOP_BETA) { struct box result = { .empty = 0, .content = front * (f - 1.0) }; return result; } } PROCESS_ERROR("More loops needed, did not converge, in function incbeta"); } struct box cdf_beta(float x) { if (x < 0) { struct box result = { .empty = 0, .content = 0 }; return result; } else if (x > 1) { struct box result = { .empty = 0, .content = 1 }; return result; } else { float successes = 1, failures = (2023 - 1945); return incbeta(successes, failures, x); } } // Inverse cdf at point // Two versions of this function: // - raw, dealing with cdfs that return floats // - box, dealing with cdfs that return a box. // Inverse cdf struct box inverse_cdf_float(float cdf(float), float p) { // given a cdf: [-Inf, Inf] => [0,1] // returns a box with either // x such that cdf(x) = p // or an error // if EXIT_ON_ERROR is set to 1, it exits instead of providing an error float low = -1.0; float high = 1.0; // 1. Make sure that cdf(low) < p < cdf(high) int interval_found = 0; while ((!interval_found) && (low > -FLT_MAX / 4) && (high < FLT_MAX / 4)) { // ^ Using FLT_MIN and FLT_MAX is overkill // but it's also the *correct* thing to do. int low_condition = (cdf(low) < p); int high_condition = (p < cdf(high)); if (low_condition && high_condition) { interval_found = 1; } else if (!low_condition) { low = low * 2; } else if (!high_condition) { high = high * 2; } } if (!interval_found) { PROCESS_ERROR("Interval containing the target value not found, in function inverse_cdf"); } else { int convergence_condition = 0; int count = 0; while (!convergence_condition && (count < (INT_MAX / 2))) { float mid = (high + low) / 2; int mid_not_new = (mid == low) || (mid == high); // float width = high - low; // if ((width < 1e-8) || mid_not_new){ if (mid_not_new) { convergence_condition = 1; } else { float mid_sign = cdf(mid) - p; if (mid_sign < 0) { low = mid; } else if (mid_sign > 0) { high = mid; } else if (mid_sign == 0) { low = mid; high = mid; } } } if (convergence_condition) { struct box result = {.empty = 0, .content = low}; return result; } else { PROCESS_ERROR("Search process did not converge, in function inverse_cdf"); } } } struct box inverse_cdf_box(struct box cdf_box(float), float p) { // given a cdf: [-Inf, Inf] => Box([0,1]) // returns a box with either // x such that cdf(x) = p // or an error // if EXIT_ON_ERROR is set to 1, it exits instead of providing an error float low = -1.0; float high = 1.0; // 1. Make sure that cdf(low) < p < cdf(high) int interval_found = 0; while ((!interval_found) && (low > -FLT_MAX / 4) && (high < FLT_MAX / 4)) { // ^ Using FLT_MIN and FLT_MAX is overkill // but it's also the *correct* thing to do. struct box cdf_low = cdf_box(low); if(cdf_low.empty){ PROCESS_ERROR(cdf_low.error_msg); } struct box cdf_high=cdf_box(high); if(cdf_high.empty){ PROCESS_ERROR(cdf_low.error_msg); } int low_condition = (cdf_low.content < p); int high_condition = (p < cdf_high.content); if (low_condition && high_condition) { interval_found = 1; } else if (!low_condition) { low = low * 2; } else if (!high_condition) { high = high * 2; } } if (!interval_found) { PROCESS_ERROR("Interval containing the target value not found, in function inverse_cdf"); } else { int convergence_condition = 0; int count = 0; while (!convergence_condition && (count < (INT_MAX / 2))) { float mid = (high + low) / 2; int mid_not_new = (mid == low) || (mid == high); // float width = high - low; if (mid_not_new) { // if ((width < 1e-8) || mid_not_new){ convergence_condition = 1; } else { struct box cdf_mid = cdf_box(mid); if(cdf_mid.empty){ PROCESS_ERROR(cdf_mid.error_msg); } float mid_sign = cdf_mid.content - p; if (mid_sign < 0) { low = mid; } else if (mid_sign > 0) { high = mid; } else if (mid_sign == 0) { low = mid; high = mid; } } } if (convergence_condition) { struct box result = {.empty = 0, .content = low}; return result; } else { PROCESS_ERROR("Search process did not converge, in function inverse_cdf"); } } } // Some randomness functions for: // - Sampling from a cdf // - Benchmarking against a previous approach, which will be faster, but less general // Get random number between 0 and 1 uint32_t xorshift32(uint32_t* seed) { // Algorithm "xor" from p. 4 of Marsaglia, "Xorshift RNGs" // See // https://en.wikipedia.org/wiki/Xorshift // Also some drama: , uint32_t x = *seed; x ^= x << 13; x ^= x >> 17; x ^= x << 5; return *seed = x; } // Distribution & sampling functions float rand_0_to_1(uint32_t* seed) { return ((float)xorshift32(seed)) / ((float)UINT32_MAX); } // Sampler based on inverse cdf and randomness function struct box sampler_box_cdf(struct box cdf(float), uint32_t* seed) { float p = rand_0_to_1(seed); struct box result = inverse_cdf_box(cdf, p); return result; } struct box sampler_float_cdf(float cdf(float), uint32_t* seed) { float p = rand_0_to_1(seed); struct box result = inverse_cdf_float(cdf, p); return result; } // Comparison point with raw normal sampler const float PI = 3.14159265358979323846; float sampler_normal_0_1(uint32_t* seed) { float u1 = rand_0_to_1(seed); float u2 = rand_0_to_1(seed); float z = sqrtf(-2.0 * log(u1)) * sin(2 * PI * u2); return z; } // Some testers void test_inverse_cdf_float(char* cdf_name, float cdf_float(float)){ struct box result = inverse_cdf_float(cdf_float, 0.5); if (result.empty) { printf("Inverse for %s not calculated\n", cdf_name); exit(1); } else { printf("Inverse of %s at %f is: %f\n", cdf_name, 0.5, result.content); } } void test_inverse_cdf_box(char* cdf_name, struct box cdf_box(float)){ struct box result = inverse_cdf_box(cdf_box, 0.5); if (result.empty) { printf("Inverse for %s not calculated\n", cdf_name); exit(1); } else { printf("Inverse of %s at %f is: %f\n", cdf_name, 0.5, result.content); } } void test_and_time_sampler_float(char* cdf_name, float cdf_float(float), uint32_t* seed){ printf("\nGetting some samples from %s:\n", cdf_name); clock_t begin = clock(); for (int i = 0; i < NUM_SAMPLES; i++) { struct box sample = sampler_float_cdf(cdf_float, seed); if (sample.empty) { printf("Error in sampler function for %s", cdf_name); } else { printf("%f\n", sample.content); } } clock_t end = clock(); float time_spent = (float)(end - begin) / CLOCKS_PER_SEC; printf("Time spent: %f\n", time_spent); } void test_and_time_sampler_box(char* cdf_name, struct box cdf_box(float), uint32_t* seed){ printf("\nGetting some samples from %s:\n", cdf_name); clock_t begin = clock(); for (int i = 0; i < NUM_SAMPLES; i++) { struct box sample = sampler_box_cdf(cdf_box, seed); if (sample.empty) { printf("Error in sampler function for %s", cdf_name); } else { printf("%f\n", sample.content); } } clock_t end = clock(); float time_spent = (float)(end - begin) / CLOCKS_PER_SEC; printf("Time spent: %f\n", time_spent); } int main() { // Test inverse cdf float test_inverse_cdf_float("cdf_uniform_0_1", cdf_uniform_0_1); test_inverse_cdf_float("cdf_squared_0_1", cdf_squared_0_1); test_inverse_cdf_float("cdf_normal_0_1", cdf_normal_0_1); // Test inverse cdf box test_inverse_cdf_box("cdf_beta", cdf_beta); // Testing samplers // set randomness seed uint32_t* seed = malloc(sizeof(uint32_t)); *seed = 1000; // xorshift can't start with 0 // Test float sampler test_and_time_sampler_float("cdf_uniform_0_1", cdf_uniform_0_1, seed); test_and_time_sampler_float("cdf_squared_0_1", cdf_squared_0_1, seed); test_and_time_sampler_float("cdf_normal_0_1", cdf_normal_0_1, seed); // Get some normal samples using a previous approach printf("\nGetting some samples from sampler_normal_0_1\n"); clock_t begin_2 = clock(); for (int i = 0; i < NUM_SAMPLES; i++) { float normal_sample = sampler_normal_0_1(seed); printf("%f\n", normal_sample); } clock_t end_2 = clock(); float time_spent_2 = (float)(end_2 - begin_2) / CLOCKS_PER_SEC; printf("Time spent: %f\n", time_spent_2); // Test box sampler test_and_time_sampler_box("cdf_beta", cdf_beta, seed); free(seed); return 0; }