From e05baa6fee6c6f760297d9cc256bb880ac4ed3b1 Mon Sep 17 00:00:00 2001 From: NunoSempere Date: Sun, 16 Jul 2023 17:10:36 +0200 Subject: [PATCH] reorder scratchpad stuff --- scratchpad/scratchpad.c | 318 ++++++++++++++-------------------------- 1 file changed, 112 insertions(+), 206 deletions(-) diff --git a/scratchpad/scratchpad.c b/scratchpad/scratchpad.c index 085e767..8e070c7 100644 --- a/scratchpad/scratchpad.c +++ b/scratchpad/scratchpad.c @@ -41,6 +41,7 @@ float cdf_uniform_0_1(float x) float cdf_squared_0_1(float x) { + float result; if (x < 0) { return 0; } else if (x > 1) { @@ -57,73 +58,123 @@ float cdf_normal_0_1(float x) return 0.5 * (1 + erf((x - mean) / (std * sqrt(2)))); // erf from math.h } -// Inverse cdf -struct box inverse_cdf(float cdf(float), float p) +// [x] to do: add beta. +// [x] for the cdf, use this incomplete beta function implementation, based on continuous fractions: +// +// + +#define STOP_BETA 1.0e-8 +#define TINY_BETA 1.0e-30 +struct box incbeta(float a, float b, float x) { - // given a cdf: [-Inf, Inf] => [0,1] - // returns a box with either - // x such that cdf(x) = p - // or an error - // if EXIT_ON_ERROR is set to 1, it exits instead of providing an error + // Descended from , + // but modified to return a box struct and floats instead of doubles. + // [ ] to do: add attribution in README + // Original code under this license: + /* + * zlib License + * + * Regularized Incomplete Beta Function + * + * Copyright (c) 2016, 2017 Lewis Van Winkle + * http://CodePlea.com + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgement in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + if (x < 0.0 || x > 1.0) { + PROCESS_ERROR("x out of bounds [0, 1], in function incbeta"); + } - float low = -1.0; - float high = 1.0; - - // 1. Make sure that cdf(low) < p < cdf(high) - int interval_found = 0; - while ((!interval_found) && (low > -FLT_MAX / 4) && (high < FLT_MAX / 4)) { - // ^ Using FLT_MIN and FLT_MAX is overkill - // but it's also the *correct* thing to do. - - int low_condition = (cdf(low) < p); - int high_condition = (p < cdf(high)); - if (low_condition && high_condition) { - interval_found = 1; - } else if (!low_condition) { - low = low * 2; - } else if (!high_condition) { - high = high * 2; + /*The continued fraction converges nicely for x < (a+1)/(a+b+2)*/ + if (x > (a + 1.0) / (a + b + 2.0)) { + struct box symmetric_incbeta = incbeta(b, a, 1.0 - x); + if (symmetric_incbeta.empty) { + return symmetric_incbeta; // propagate error + } else { + struct box result = { + .empty = 0, + .content = 1 - symmetric_incbeta.content + }; + return result; } } - if (!interval_found) { - PROCESS_ERROR("Interval containing the target value not found, in function inverse_cdf"); - } else { + /*Find the first part before the continued fraction.*/ + const float lbeta_ab = lgamma(a) + lgamma(b) - lgamma(a + b); + const float front = exp(log(x) * a + log(1.0 - x) * b - lbeta_ab) / a; - int convergence_condition = 0; - int count = 0; - while (!convergence_condition && (count < (INT_MAX / 2))) { - float mid = (high + low) / 2; - int mid_not_new = (mid == low) || (mid == high); - // float width = high - low; - // if ((width < 1e-8) || mid_not_new){ - if (mid_not_new) { - convergence_condition = 1; - } else { - float mid_sign = cdf(mid) - p; - if (mid_sign < 0) { - low = mid; - } else if (mid_sign > 0) { - high = mid; - } else if (mid_sign == 0) { - low = mid; - high = mid; - } - } - } + /*Use Lentz's algorithm to evaluate the continued fraction.*/ + float f = 1.0, c = 1.0, d = 0.0; - if (convergence_condition) { - struct box result = {.empty = 0, .content = low}; - return result; + int i, m; + for (i = 0; i <= 200; ++i) { + m = i / 2; + + float numerator; + if (i == 0) { + numerator = 1.0; /*First numerator is 1.0.*/ + } else if (i % 2 == 0) { + numerator = (m * (b - m) * x) / ((a + 2.0 * m - 1.0) * (a + 2.0 * m)); /*Even term.*/ } else { - PROCESS_ERROR("Search process did not converge, in function inverse_cdf"); + numerator = -((a + m) * (a + b + m) * x) / ((a + 2.0 * m) * (a + 2.0 * m + 1)); /*Odd term.*/ } + /*Do an iteration of Lentz's algorithm.*/ + d = 1.0 + numerator * d; + if (fabs(d) < TINY_BETA) + d = TINY_BETA; + d = 1.0 / d; + + c = 1.0 + numerator / c; + if (fabs(c) < TINY_BETA) + c = TINY_BETA; + + const float cd = c * d; + f *= cd; + + /*Check for stop.*/ + if (fabs(1.0 - cd) < STOP_BETA) { + struct box result = { + .empty = 0, + .content = front * (f - 1.0) + }; + return result; + } + } + + PROCESS_ERROR("More loops needed, did not converge, in function incbeta"); +} + +struct box cdf_beta(float x) +{ + if (x < 0) { + struct box result = { .empty = 0, .content = 0 }; + return result; + } else if (x > 1) { + struct box result = { .empty = 0, .content = 1 }; + return result; + } else { + float successes = 1, failures = (2023 - 1945); + return incbeta(successes, failures, x); } } -// Inverse cdf at point, but this time taking a struct box. -struct box inverse_cdf_box(struct box cdf_box(float), float p) +// Inverse cdf at point +struct box inverse_cdf(struct box cdf_box(float), float p) { // given a cdf: [-Inf, Inf] => Box([0,1]) // returns a box with either @@ -200,6 +251,10 @@ struct box inverse_cdf_box(struct box cdf_box(float), float p) } } +// Some randomness functions for: +// - Sampling from a cdf +// - Benchmarking against a previous approach, which will be faster, but less general + // Get random number between 0 and 1 uint32_t xorshift32(uint32_t* seed) { @@ -221,15 +276,15 @@ float rand_0_to_1(uint32_t* seed) return ((float)xorshift32(seed)) / ((float)UINT32_MAX); } -// Sampler based on inverse cdf -struct box sampler(float cdf(float), uint32_t* seed) +// Sampler based on inverse cdf and randomness function +struct box sampler(struct box cdf(float), uint32_t* seed) { float p = rand_0_to_1(seed); struct box result = inverse_cdf(cdf, p); return result; } -// For comparison, raw sampler +// Comparison point with raw normal sampler const float PI = 3.14159265358979323846; float sampler_normal_0_1(uint32_t* seed) { @@ -239,155 +294,6 @@ float sampler_normal_0_1(uint32_t* seed) return z; } -// to do: add beta. -// for the cdf, use this incomplete beta function implementation, based on continuous fractions: -// -// - -#define STOP 1.0e-8 -#define TINY 1.0e-30 - -struct box incbeta(float a, float b, float x) -{ - // Descended from , - // but modified to return a box struct and floats instead of doubles. - // [x] to do: add attribution in README - // Original code under this license: - /* - * zlib License - * - * Regularized Incomplete Beta Function - * - * Copyright (c) 2016, 2017 Lewis Van Winkle - * http://CodePlea.com - * - * This software is provided 'as-is', without any express or implied - * warranty. In no event will the authors be held liable for any damages - * arising from the use of this software. - * - * Permission is granted to anyone to use this software for any purpose, - * including commercial applications, and to alter it and redistribute it - * freely, subject to the following restrictions: - * - * 1. The origin of this software must not be misrepresented; you must not - * claim that you wrote the original software. If you use this software - * in a product, an acknowledgement in the product documentation would be - * appreciated but is not required. - * 2. Altered source versions must be plainly marked as such, and must not be - * misrepresented as being the original software. - * 3. This notice may not be removed or altered from any source distribution. - */ - if (x < 0.0 || x > 1.0) { - PROCESS_ERROR("x out of bounds [0, 1], in function incbeta"); - } - - /*The continued fraction converges nicely for x < (a+1)/(a+b+2)*/ - if (x > (a + 1.0) / (a + b + 2.0)) { - struct box symmetric_incbeta = incbeta(b, a, 1.0 - x); - if (symmetric_incbeta.empty) { - return symmetric_incbeta; // propagate error - } else { - struct box result = { - .empty = 0, - .content = 1 - symmetric_incbeta.content - }; - return result; - } - } - - /*Find the first part before the continued fraction.*/ - const float lbeta_ab = lgamma(a) + lgamma(b) - lgamma(a + b); - const float front = exp(log(x) * a + log(1.0 - x) * b - lbeta_ab) / a; - - /*Use Lentz's algorithm to evaluate the continued fraction.*/ - float f = 1.0, c = 1.0, d = 0.0; - - int i, m; - for (i = 0; i <= 200; ++i) { - m = i / 2; - - float numerator; - if (i == 0) { - numerator = 1.0; /*First numerator is 1.0.*/ - } else if (i % 2 == 0) { - numerator = (m * (b - m) * x) / ((a + 2.0 * m - 1.0) * (a + 2.0 * m)); /*Even term.*/ - } else { - numerator = -((a + m) * (a + b + m) * x) / ((a + 2.0 * m) * (a + 2.0 * m + 1)); /*Odd term.*/ - } - - /*Do an iteration of Lentz's algorithm.*/ - d = 1.0 + numerator * d; - if (fabs(d) < TINY) - d = TINY; - d = 1.0 / d; - - c = 1.0 + numerator / c; - if (fabs(c) < TINY) - c = TINY; - - const float cd = c * d; - f *= cd; - - /*Check for stop.*/ - if (fabs(1.0 - cd) < STOP) { - struct box result = { - .empty = 0, - .content = front * (f - 1.0) - }; - return result; - } - } - - PROCESS_ERROR("More loops needed, did not converge, in function incbeta"); -} - -struct box cdf_beta(float x) -{ - if (x < 0) { - struct box result = { .empty = 0, .content = 0 }; - return result; - } else if (x > 1) { - struct box result = { .empty = 0, .content = 1 }; - return result; - } else { - float successes = 1, failures = (2023 - 1945); - return incbeta(successes, failures, x); - } -} - -float cdf_dangerous_beta(float x) -{ - // So the thing is, this works - // But it will propagate through the code - // So it doesn't feel like a great architectural choice; - // I prefer my choice of setting a variable which will determine whether to exit on failure or not. - // Ok, so the proper thing to do would be to refactor inverse_cdf - // but, I could also use a GOTO? - // Ok, alternatives are: - // - Refactor inverse_cdf to take a box, take the small complexity + penalty. Add a helper - // - Duplicate the code, have a refactored inverse_cdf as well as a normal cdf - // - Do something hacky - // a. dangerous beta, which exits - // b. clever & hacky go-to statements - // i. They actually look fun to implement - // ii. But they would be hard for others to use. - if (x < 0) { - return 0; - } else if (x > 1) { - return 1; - } else { - float successes = 100, failures = 100; - struct box result = incbeta(successes, failures, x); - if (result.empty) { - printf("%s\n", result.error_msg); - exit(1); - return 1; - } else { - return result.content; - } - } -} - int main() {