move some functions from scratchpad => squiggle.c, reorg

2023-07-16 21:26:33 +02:00 · 2023-07-16 21:26:33 +02:00 · ee9ed34287
commit ee9ed34287
parent 68e7730f24
5 changed files with 232 additions and 233 deletions
--- a/scratchpad/makefile
+++ b/scratchpad/makefile
@ -9,7 +9,7 @@ CC=gcc # required for nested functions
 # CC=tcc # <= faster compilation
 # Main file
-SRC=scratchpad.c
+SRC=scratchpad.c ../squiggle.c
 OUTPUT=./scratchpad
 ## Dependencies
--- a/scratchpad/scratchpad
+++ b/scratchpad/scratchpad
--- a/scratchpad/scratchpad.c
+++ b/scratchpad/scratchpad.c
@ -1,33 +1,13 @@
 #include <float.h> // FLT_MAX, FLT_MIN
 #include <limits.h> // INT_MAX
 #include <math.h> // erf, sqrt
 #include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
 // #include <sys/types.h>
 #include <time.h>
 #include "../squiggle.h"
 #define EXIT_ON_ERROR 0
 #define MAX_ERROR_LENGTH 500
 #define PROCESS_ERROR(...)                                                              \
    do {                                                                                \
        if (EXIT_ON_ERROR) {                                                            \
            printf("@, in %s (%d)", __FILE__, __LINE__);                                \
            exit(1);                                                                    \
        } else {                                                                        \
            char error_msg[MAX_ERROR_LENGTH];                                           \
            snprintf(error_msg, MAX_ERROR_LENGTH, "@, in %s (%d)", __FILE__, __LINE__); \
            struct box error = { .empty = 1, .error_msg = error_msg };                  \
            return error;                                                               \
        }                                                                               \
    } while (0)
 #define NUM_SAMPLES 1000000
-
+#define STOP_BETA 1.0e-8
-struct box {
+#define TINY_BETA 1.0e-30
    int empty;
    float content;
    char* error_msg;
 };
 // Example cdf
 float cdf_uniform_0_1(float x)
@ -59,16 +39,10 @@ float cdf_normal_0_1(float x)
    return 0.5 * (1 + erf((x - mean) / (std * sqrt(2)))); // erf from math.h
 }
 // [x] to do: add beta.
 // [x] for the cdf, use this incomplete beta function implementation, based on continuous fractions:
 // <https://codeplea.com/incomplete-beta-function-c>
 // <https://github.com/codeplea/incbeta>
 #define STOP_BETA 1.0e-8
 #define TINY_BETA 1.0e-30
 struct box incbeta(float a, float b, float x)
 {
    // Descended from <https://github.com/codeplea/incbeta/blob/master/incbeta.c>,
    // <https://codeplea.com/incomplete-beta-function-c>
    // but modified to return a box struct and floats instead of doubles.
    // [ ] to do: add attribution in README
    // Original code under this license:
@ -174,200 +148,6 @@ struct box cdf_beta(float x)
    }
 }
 // Inverse cdf at point
 // Two versions of this function:
 //   - raw, dealing with cdfs that return floats
 //   - box, dealing with cdfs that return a box.
 // Inverse cdf
 struct box inverse_cdf_float(float cdf(float), float p)
 {
    // given a cdf: [-Inf, Inf] => [0,1]
    // returns a box with either
    // x such that cdf(x) = p
    // or an error
    // if EXIT_ON_ERROR is set to 1, it exits instead of providing an error
    float low = -1.0;
    float high = 1.0;
    // 1. Make sure that cdf(low) < p < cdf(high)
    int interval_found = 0;
    while ((!interval_found) && (low > -FLT_MAX / 4) && (high < FLT_MAX / 4)) {
        // ^ Using FLT_MIN and FLT_MAX is overkill
        // but it's also the *correct* thing to do.
        int low_condition = (cdf(low) < p);
        int high_condition = (p < cdf(high));
        if (low_condition && high_condition) {
            interval_found = 1;
        } else if (!low_condition) {
            low = low * 2;
        } else if (!high_condition) {
            high = high * 2;
        }
    }
    if (!interval_found) {
        PROCESS_ERROR("Interval containing the target value not found, in function inverse_cdf");
    } else {
        int convergence_condition = 0;
        int count = 0;
        while (!convergence_condition && (count < (INT_MAX / 2))) {
            float mid = (high + low) / 2;
            int mid_not_new = (mid == low) || (mid == high);
            // float width = high - low;
            // if ((width < 1e-8) || mid_not_new){
            if (mid_not_new) {
                convergence_condition = 1;
            } else {
                float mid_sign = cdf(mid) - p;
                if (mid_sign < 0) {
                    low = mid;
                } else if (mid_sign > 0) {
                    high = mid;
                } else if (mid_sign == 0) {
                    low = mid;
                    high = mid;
                }
            }
        }
        if (convergence_condition) {
            struct box result = { .empty = 0, .content = low };
            return result;
        } else {
            PROCESS_ERROR("Search process did not converge, in function inverse_cdf");
        }
    }
 }
 struct box inverse_cdf_box(struct box cdf_box(float), float p)
 {
    // given a cdf: [-Inf, Inf] => Box([0,1])
    // returns a box with either
    // x such that cdf(x) = p
    // or an error
    // if EXIT_ON_ERROR is set to 1, it exits instead of providing an error
    float low = -1.0;
    float high = 1.0;
    // 1. Make sure that cdf(low) < p < cdf(high)
    int interval_found = 0;
    while ((!interval_found) && (low > -FLT_MAX / 4) && (high < FLT_MAX / 4)) {
        // ^ Using FLT_MIN and FLT_MAX is overkill
        // but it's also the *correct* thing to do.
        struct box cdf_low = cdf_box(low);
        if (cdf_low.empty) {
            PROCESS_ERROR(cdf_low.error_msg);
        }
        struct box cdf_high = cdf_box(high);
        if (cdf_high.empty) {
            PROCESS_ERROR(cdf_low.error_msg);
        }
        int low_condition = (cdf_low.content < p);
        int high_condition = (p < cdf_high.content);
        if (low_condition && high_condition) {
            interval_found = 1;
        } else if (!low_condition) {
            low = low * 2;
        } else if (!high_condition) {
            high = high * 2;
        }
    }
    if (!interval_found) {
        PROCESS_ERROR("Interval containing the target value not found, in function inverse_cdf");
    } else {
        int convergence_condition = 0;
        int count = 0;
        while (!convergence_condition && (count < (INT_MAX / 2))) {
            float mid = (high + low) / 2;
            int mid_not_new = (mid == low) || (mid == high);
            // float width = high - low;
            if (mid_not_new) {
                // if ((width < 1e-8) || mid_not_new){
                convergence_condition = 1;
            } else {
                struct box cdf_mid = cdf_box(mid);
                if (cdf_mid.empty) {
                    PROCESS_ERROR(cdf_mid.error_msg);
                }
                float mid_sign = cdf_mid.content - p;
                if (mid_sign < 0) {
                    low = mid;
                } else if (mid_sign > 0) {
                    high = mid;
                } else if (mid_sign == 0) {
                    low = mid;
                    high = mid;
                }
            }
        }
        if (convergence_condition) {
            struct box result = { .empty = 0, .content = low };
            return result;
        } else {
            PROCESS_ERROR("Search process did not converge, in function inverse_cdf");
        }
    }
 }
 // Some randomness functions for:
 //   - Sampling from a cdf
 //   - Benchmarking against a previous approach, which will be faster, but less general
 // Get random number between 0 and 1
 uint32_t xorshift32(uint32_t* seed)
 {
    // Algorithm "xor" from p. 4 of Marsaglia, "Xorshift RNGs"
    // See <https://stackoverflow.com/questions/53886131/how-does-xorshift32-works>
    // https://en.wikipedia.org/wiki/Xorshift
    // Also some drama: <https://www.pcg-random.org/posts/on-vignas-pcg-critique.html>, <https://prng.di.unimi.it/>
    uint32_t x = *seed;
    x ^= x << 13;
    x ^= x >> 17;
    x ^= x << 5;
    return *seed = x;
 }
 // Distribution & sampling functions
 float rand_0_to_1(uint32_t* seed)
 {
    return ((float)xorshift32(seed)) / ((float)UINT32_MAX);
 }
 // Sampler based on inverse cdf and randomness function
 struct box sampler_box_cdf(struct box cdf(float), uint32_t* seed)
 {
    float p = rand_0_to_1(seed);
    struct box result = inverse_cdf_box(cdf, p);
    return result;
 }
 struct box sampler_float_cdf(float cdf(float), uint32_t* seed)
 {
    float p = rand_0_to_1(seed);
    struct box result = inverse_cdf_float(cdf, p);
    return result;
 }
 // Comparison point with raw normal sampler
 const float PI = 3.14159265358979323846;
 float sampler_normal_0_1(uint32_t* seed)
 {
    float u1 = rand_0_to_1(seed);
    float u2 = rand_0_to_1(seed);
    float z = sqrtf(-2.0 * log(u1)) * sin(2 * PI * u2);
    return z;
 }
 // Some testers
 void test_inverse_cdf_float(char* cdf_name, float cdf_float(float))
 {
@ -445,12 +225,12 @@ int main()
    test_and_time_sampler_float("cdf_normal_0_1", cdf_normal_0_1, seed);
    // Get some normal samples using a previous approach
-    printf("\nGetting some samples from sampler_normal_0_1\n");
+    printf("\nGetting some samples from unit_normal\n");
    clock_t begin_2 = clock();
    for (int i = 0; i < NUM_SAMPLES; i++) {
-        float normal_sample = sampler_normal_0_1(seed);
+        float normal_sample = unit_normal(seed);
        // printf("%f\n", normal_sample);
    }
@ -460,11 +240,11 @@ int main()
    // Test box sampler
    test_and_time_sampler_box("cdf_beta", cdf_beta, seed);
-		// Ok, this is slower than python!!
+    // Ok, this is slower than python!!
-		// Partly this is because I am using a more general algorithm, 
+    // Partly this is because I am using a more general algorithm,
-		//   which applies to any cdf
+    //   which applies to any cdf
-		// But I am also using really anal convergence conditions.
+    // But I am also using really anal convergence conditions.
-		// This could be optimized.
+    // This could be optimized.
    free(seed);
    return 0;
--- a/squiggle.c
+++ b/squiggle.c
@ -1,6 +1,25 @@
 #include <math.h>
 #include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <float.h>
 #include <limits.h>
 #include <time.h>
 // #include <sys/types.h>
 #define EXIT_ON_ERROR 0
 #define MAX_ERROR_LENGTH 500
 #define PROCESS_ERROR(...)                                                              \
    do {                                                                                \
        if (EXIT_ON_ERROR) {                                                            \
            printf("@, in %s (%d)", __FILE__, __LINE__);                                \
            exit(1);                                                                    \
        } else {                                                                        \
            char error_msg[MAX_ERROR_LENGTH];                                           \
            snprintf(error_msg, MAX_ERROR_LENGTH, "@, in %s (%d)", __FILE__, __LINE__); \
            struct box error = { .empty = 1, .error_msg = error_msg };                  \
            return error;                                                               \
        }                                                                               \
    } while (0)
 // PI constant
 const float PI = M_PI; // 3.14159265358979323846;
@ -112,3 +131,171 @@ float mixture(float (*samplers[])(uint32_t*), float* weights, int n_dists, uint3
    free(cumsummed_normalized_weights);
    return result;
 }
 // Sample from an arbitrary cdf 
 struct box {
    int empty;
    float content;
    char* error_msg;
 };
 // Inverse cdf at point
 // Two versions of this function:
 //   - raw, dealing with cdfs that return floats
 //     - input: cdf: float => float, p
 //     - output: Box(number|error)
 //   - box, dealing with cdfs that return a box.
 //     - input: cdf: float => Box(number|error), p
 //     - output: Box(number|error)
 struct box inverse_cdf_float(float cdf(float), float p)
 {
    // given a cdf: [-Inf, Inf] => [0,1]
    // returns a box with either
    // x such that cdf(x) = p
    // or an error
    // if EXIT_ON_ERROR is set to 1, it exits instead of providing an error
    float low = -1.0;
    float high = 1.0;
    // 1. Make sure that cdf(low) < p < cdf(high)
    int interval_found = 0;
    while ((!interval_found) && (low > -FLT_MAX / 4) && (high < FLT_MAX / 4)) {
        // ^ Using FLT_MIN and FLT_MAX is overkill
        // but it's also the *correct* thing to do.
        int low_condition = (cdf(low) < p);
        int high_condition = (p < cdf(high));
        if (low_condition && high_condition) {
            interval_found = 1;
        } else if (!low_condition) {
            low = low * 2;
        } else if (!high_condition) {
            high = high * 2;
        }
    }
    if (!interval_found) {
        PROCESS_ERROR("Interval containing the target value not found, in function inverse_cdf");
    } else {
        int convergence_condition = 0;
        int count = 0;
        while (!convergence_condition && (count < (INT_MAX / 2))) {
            float mid = (high + low) / 2;
            int mid_not_new = (mid == low) || (mid == high);
            // float width = high - low;
            // if ((width < 1e-8) || mid_not_new){
            if (mid_not_new) {
                convergence_condition = 1;
            } else {
                float mid_sign = cdf(mid) - p;
                if (mid_sign < 0) {
                    low = mid;
                } else if (mid_sign > 0) {
                    high = mid;
                } else if (mid_sign == 0) {
                    low = mid;
                    high = mid;
                }
            }
        }
        if (convergence_condition) {
            struct box result = { .empty = 0, .content = low };
            return result;
        } else {
            PROCESS_ERROR("Search process did not converge, in function inverse_cdf");
        }
    }
 }
 struct box inverse_cdf_box(struct box cdf_box(float), float p)
 {
    // given a cdf: [-Inf, Inf] => Box([0,1])
    // returns a box with either
    // x such that cdf(x) = p
    // or an error
    // if EXIT_ON_ERROR is set to 1, it exits instead of providing an error
    float low = -1.0;
    float high = 1.0;
    // 1. Make sure that cdf(low) < p < cdf(high)
    int interval_found = 0;
    while ((!interval_found) && (low > -FLT_MAX / 4) && (high < FLT_MAX / 4)) {
        // ^ Using FLT_MIN and FLT_MAX is overkill
        // but it's also the *correct* thing to do.
        struct box cdf_low = cdf_box(low);
        if (cdf_low.empty) {
            PROCESS_ERROR(cdf_low.error_msg);
        }
        struct box cdf_high = cdf_box(high);
        if (cdf_high.empty) {
            PROCESS_ERROR(cdf_low.error_msg);
        }
        int low_condition = (cdf_low.content < p);
        int high_condition = (p < cdf_high.content);
        if (low_condition && high_condition) {
            interval_found = 1;
        } else if (!low_condition) {
            low = low * 2;
        } else if (!high_condition) {
            high = high * 2;
        }
    }
    if (!interval_found) {
        PROCESS_ERROR("Interval containing the target value not found, in function inverse_cdf");
    } else {
        int convergence_condition = 0;
        int count = 0;
        while (!convergence_condition && (count < (INT_MAX / 2))) {
            float mid = (high + low) / 2;
            int mid_not_new = (mid == low) || (mid == high);
            // float width = high - low;
            if (mid_not_new) {
                // if ((width < 1e-8) || mid_not_new){
                convergence_condition = 1;
            } else {
                struct box cdf_mid = cdf_box(mid);
                if (cdf_mid.empty) {
                    PROCESS_ERROR(cdf_mid.error_msg);
                }
                float mid_sign = cdf_mid.content - p;
                if (mid_sign < 0) {
                    low = mid;
                } else if (mid_sign > 0) {
                    high = mid;
                } else if (mid_sign == 0) {
                    low = mid;
                    high = mid;
                }
            }
        }
        if (convergence_condition) {
            struct box result = { .empty = 0, .content = low };
            return result;
        } else {
            PROCESS_ERROR("Search process did not converge, in function inverse_cdf");
        }
    }
 }
 // Sampler based on inverse cdf and randomness function
 struct box sampler_box_cdf(struct box cdf(float), uint32_t* seed)
 {
    float p = rand_0_to_1(seed);
    struct box result = inverse_cdf_box(cdf, p);
    return result;
 }
 struct box sampler_float_cdf(float cdf(float), uint32_t* seed)
 {
    float p = rand_0_to_1(seed);
    struct box result = inverse_cdf_float(cdf, p);
    return result;
 }
--- a/squiggle.h
+++ b/squiggle.h
@ -4,13 +4,29 @@
 // uint32_t header
 #include <stdint.h>
 // Macros
 #define EXIT_ON_ERROR 0
 #define MAX_ERROR_LENGTH 500
 #define PROCESS_ERROR(...)                                                              \
    do {                                                                                \
        if (EXIT_ON_ERROR) {                                                            \
            printf("@, in %s (%d)", __FILE__, __LINE__);                                \
            exit(1);                                                                    \
        } else {                                                                        \
            char error_msg[MAX_ERROR_LENGTH];                                           \
            snprintf(error_msg, MAX_ERROR_LENGTH, "@, in %s (%d)", __FILE__, __LINE__); \
            struct box error = { .empty = 1, .error_msg = error_msg };                  \
            return error;                                                               \
        }                                                                               \
    } while (0)
 // Pseudo Random number generator
 uint32_t xorshift32(uint32_t* seed);
 // Distribution & sampling functions
 float rand_0_to_1(uint32_t* seed);
 float rand_float(float max, uint32_t* seed);
-float ur_normal(uint32_t* seed);
+float unit_normal(uint32_t* seed);
 float random_uniform(float from, float to, uint32_t* seed);
 float random_normal(float mean, float sigma, uint32_t* seed);
 float random_lognormal(float logmean, float logsigma, uint32_t* seed);
@ -23,4 +39,20 @@ void array_cumsum(float* array_to_sum, float* array_cumsummed, int length);
 // Mixture function
 float mixture(float (*samplers[])(uint32_t*), float* weights, int n_dists, uint32_t* seed);
 // Box
 struct box {
    int empty;
    float content;
    char* error_msg;
 };
 // Inverse cdf
 struct box inverse_cdf_float(float cdf(float), float p);
 struct box inverse_cdf_box(struct box cdf_box(float), float p);
 // Samplers from cdf
 struct box sampler_box_cdf(struct box cdf(float), uint32_t* seed);
 struct box sampler_float_cdf(float cdf(float), uint32_t* seed);
 #endif