squiggle.c/scratchpad/scratchpad.c

#include <float.h> // FLT_MAX, FLT_MIN
#include <limits.h> // INT_MAX
#include <math.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#define VERBOSE 1

// Errors
// https://mccue.dev/pages/7-27-22-c-errors
// Huh, first time I've really needed a struct
// https://en.wikipedia.org/wiki/Struct_(C_programming_language)
// https://www.cs.yale.edu/homes/aspnes/pinewiki/C(2f)Structs.html
// https://stackoverflow.com/questions/9653072/return-a-struct-from-a-function-in-c
// options:
//   - exit
//   - pass structs
// to do: reuse more informative printing from build-your-own-lisp?
struct box {
    int empty;
    float content;
};

// Example cdf
float cdf_uniform_0_1(float x)
{
    if (x < 0) {
        return 0;
    } else if (x > 1) {
        return 1;
    } else {
        return x;
    }
}

float cdf_squared_0_1(float x)
{
    if (x < 0) {
        return 0;
    } else if (x > 1) {
        return 1;
    } else {
        return x * x;
    }
}

float cdf_normal_0_1(float x)
{
    float mean = 0;
    float std = 1;
    return 0.5 * (1 + erf((x - mean) / (std * sqrt(2))));
}

// Inverse cdf
struct box inverse_cdf(float cdf(float), float p)
{
    // given a cdf: [-Inf, Inf] => [0,1]
    // returns x such that cdf(x) = p
    // [x] to do: add bounds, add error checking
    // [x] maybe return a struct or smth.

    struct box result;
    float low = -1.0;
    float high = 1.0;

    // 1. Make sure that cdf(low) < p < cdf(high)
    // [x] to do: do smth with float min and float max?
    int interval_found = 0;
    while ((!interval_found) && (low > -FLT_MAX / 4) && (high < FLT_MAX / 4)) {
        // ^ Using FLT_MIN and FLT_MAX is overkill
        // but it's also the *correct* thing to do.

        int low_condition = (cdf(low) < p);
        int high_condition = (p < cdf(high));
        if (low_condition && high_condition) {
            interval_found = 1;
        } else if (!low_condition) {
            low = low * 2;
        } else if (!high_condition) {
            high = high * 2;
        }
    }
    if (0) {
        printf("FLT_MIN = %f, FLT_MAX = %f, INT_MAX = %d\n", -FLT_MAX, FLT_MAX, INT_MAX);
        printf("low: %f, high: %f\n", low, high);
        printf("interval_found? %d\n", interval_found);
        int while_condition = (!interval_found) && (low > FLT_MIN / 4) && (high < FLT_MAX / 4);
        printf("while condition: %i\n", while_condition);
    }

    if (!interval_found) {
        result.empty = 1;
        return result;
    } else {

        int convergence_condition = 0;
        int count = 0;
        while (!convergence_condition && (count < (INT_MAX / 2))) {
            float mid = (high + low) / 2;
            int mid_not_new = (mid == low) || (mid == high);
            if (0) {
                printf("while loop\n");
                printf("low: %f, high: %f\n", low, high);
                printf("mid: %f\n", mid);
            }

            if (mid_not_new) {
                convergence_condition = 1;
            } else {
                float mid_sign = cdf(mid) - p;
                if (mid_sign < 0) {
                    low = mid;
                } else if (mid_sign > 0) {
                    high = mid;
                } else if (mid_sign == 0) {
                    low = mid;
                    high = mid;
                }
            }
        }

        if (convergence_condition) {
            result.content = low;
            result.empty = 0;
        } else {
            result.empty = 1;
        }

        return result;
    }
}

// Get random number between 0 and 1
uint32_t xorshift32(uint32_t* seed)
{
    // Algorithm "xor" from p. 4 of Marsaglia, "Xorshift RNGs"
    // See <https://stackoverflow.com/questions/53886131/how-does-xorshift32-works>
    // https://en.wikipedia.org/wiki/Xorshift
    // Also some drama: <https://www.pcg-random.org/posts/on-vignas-pcg-critique.html>, <https://prng.di.unimi.it/>

    uint32_t x = *seed;
    x ^= x << 13;
    x ^= x >> 17;
    x ^= x << 5;
    return *seed = x;
}

// Distribution & sampling functions

float rand_0_to_1(uint32_t* seed)
{
    return ((float)xorshift32(seed)) / ((float)UINT32_MAX);
}

// sampler based on inverse cdf
struct box sampler(float cdf(float), uint32_t* seed)
{
    struct box result;
    float p = rand_0_to_1(seed);
    result = inverse_cdf(cdf, p);
    return result;
}

// ~~[-] to-do: integrals => beta distribution~~
// => instead, use this incomplete beta function implementation, based on continuous fractions:
// <https://codeplea.com/incomplete-beta-function-c>
// <https://github.com/codeplea/incbeta>

// main with an example
int main()
{

    // Uniform:
    struct box result_1 = inverse_cdf(cdf_uniform_0_1, 0.5);
    char* name_1 = "cdf_uniform_0_1";
    if (result_1.empty) {
        printf("Inverse for %s not calculated\n", name_1);
        exit(1);
    } else {
        printf("Inverse of %s at %f is: %f\n", name_1, 0.5, result_1.content);
    }

    // Squared cdf
    struct box result_2 = inverse_cdf(cdf_squared_0_1, 0.5);
    char* name_2 = "cdf_squared_0_1";
    if (result_2.empty) {
        printf("Inverse for %s not calculated\n", name_2);
        exit(1);
    } else {
        printf("Inverse of %s at %f is: %f\n", name_2, 0.5, result_2.content);
    }

    // Normal cdf
    struct box result_3 = inverse_cdf(cdf_normal_0_1, 0.5);
    char* name_3 = "cdf_normal_0_1";
    if (result_3.empty) {
        printf("Inverse for %s not calculated\n", name_3);
        exit(1);
    } else {
        printf("Inverse of %s at %f is: %f\n", name_3, 0.5, result_3.content);
    }

    // set randomness seed
    uint32_t* seed = malloc(sizeof(uint32_t));
    *seed = 1000; // xorshift can't start with 0

    printf("\n\nGetting some samples from %s:\n", name_3);
    int n = 100;
    for (int i = 0; i < n; i++) {
        struct box sample = sampler(cdf_normal_0_1, seed);
        if (sample.empty) {
            printf("Error in sampler function");
        } else {
            printf("%f\n", sample.content);
        }
    }
    return 0;
}