squiggle.c/scratchpad/scratchpad.c

#include <limits.h> // INT_MAX
#include <stdint.h>
#include <stdlib.h>
#include <float.h> // FLT_MAX, FLT_MIN
#include <stdio.h>
#include <math.h> // erf, sqrt
#include <time.h>

#define EXIT_ON_ERROR 0

// Errors
struct box {
    int empty;
    float content;
		char * error_msg;
};

// Example cdf
float cdf_uniform_0_1(float x)
{
    if (x < 0) {
        return 0;
    } else if (x > 1) {
        return 1;
    } else {
        return x;
    }
}

float cdf_squared_0_1(float x)
{
    if (x < 0) {
        return 0;
    } else if (x > 1) {
        return 1;
    } else {
        return x * x;
    }
}

float cdf_normal_0_1(float x)
{
    float mean = 0;
    float std = 1;
    return 0.5 * (1 + erf((x - mean) / (std * sqrt(2)))); // erf from math.h
}

// Inverse cdf
struct box inverse_cdf(float cdf(float), float p)
{
    // given a cdf: [-Inf, Inf] => [0,1]
    // returns a box with either
		// x such that cdf(x) = p
		// or an error
		// if EXIT_ON_ERROR is set to 1, it exits instead of providing an error

    struct box result;
    float low = -1.0;
    float high = 1.0;

    // 1. Make sure that cdf(low) < p < cdf(high)
    int interval_found = 0;
    while ((!interval_found) && (low > -FLT_MAX / 4) && (high < FLT_MAX / 4)) {
        // ^ Using FLT_MIN and FLT_MAX is overkill
        // but it's also the *correct* thing to do.

        int low_condition = (cdf(low) < p);
        int high_condition = (p < cdf(high));
        if (low_condition && high_condition) {
            interval_found = 1;
        } else if (!low_condition) {
            low = low * 2;
        } else if (!high_condition) {
            high = high * 2;
        }
    }

		if (!interval_found) {
			  if(EXIT_ON_ERROR){
					printf("Interval containing the target value not found, in function inverse_cdf, in %s (%d)", __FILE__, __LINE__);
					exit(1);
				}else{
					char error_msg[200];
					snprintf(error_msg, 200, "Interval containing the target value not found in function inverse_cdf, in %s (%d)", __FILE__, __LINE__);
					result.empty = 1;
					result.error_msg = error_msg;
					return result;
				}
    } else {

        int convergence_condition = 0;
        int count = 0;
        while (!convergence_condition && (count < (INT_MAX / 2))) {
            float mid = (high + low) / 2;
            int mid_not_new = (mid == low) || (mid == high);
						// float width = high - low;
            if (mid_not_new) {
						// if ((width < 1e-8) || mid_not_new){
                convergence_condition = 1;
            } else {
                float mid_sign = cdf(mid) - p;
                if (mid_sign < 0) {
                    low = mid;
                } else if (mid_sign > 0) {
                    high = mid;
                } else if (mid_sign == 0) {
                    low = mid;
                    high = mid;
                }
            }
        }

        if (convergence_condition) {
            result.content = low;
            result.empty = 0;
        } else {
						if(EXIT_ON_ERROR){
							printf("Search process did not converge, in function inverse_cdf, in %s (%d)", __FILE__, __LINE__);
							exit(1);
						}else{
							char error_msg[200];
							snprintf(error_msg, 200, "Search process did not converge, in function inverse_cdf, in %s (%d)", __FILE__, __LINE__);
							result.empty = 1;
							result.error_msg = error_msg;
							return result;
						}
					}

        return result;
    }
}

// Get random number between 0 and 1
uint32_t xorshift32(uint32_t* seed)
{
    // Algorithm "xor" from p. 4 of Marsaglia, "Xorshift RNGs"
    // See <https://stackoverflow.com/questions/53886131/how-does-xorshift32-works>
    // https://en.wikipedia.org/wiki/Xorshift
    // Also some drama: <https://www.pcg-random.org/posts/on-vignas-pcg-critique.html>, <https://prng.di.unimi.it/>

    uint32_t x = *seed;
    x ^= x << 13;
    x ^= x >> 17;
    x ^= x << 5;
    return *seed = x;
}

// Distribution & sampling functions
float rand_0_to_1(uint32_t* seed)
{
    return ((float)xorshift32(seed)) / ((float)UINT32_MAX);
}

// Sampler based on inverse cdf
struct box sampler(float cdf(float), uint32_t* seed)
{
    struct box result;
    float p = rand_0_to_1(seed);
    result = inverse_cdf(cdf, p);
    return result;
}

// For comparison, raw sampler
const float PI = 3.14159265358979323846;
float sampler_normal_0_1(uint32_t* seed)
{
    float u1 = rand_0_to_1(seed);
    float u2 = rand_0_to_1(seed);
    float z = sqrtf(-2.0 * log(u1)) * sin(2 * PI * u2);
    return z;
}

// to do: add beta.
// for the cdf, use this incomplete beta function implementation, based on continuous fractions:
// <https://codeplea.com/incomplete-beta-function-c>
// <https://github.com/codeplea/incbeta>

#define STOP 1.0e-8
#define TINY 1.0e-30

struct box incbeta(float a, float b, float x) {
	  // Descended from <https://github.com/codeplea/incbeta/blob/master/incbeta.c>,
		// but modified to return a box struct and floats instead of doubles.
		// [x] to do: add attribution in README
		// Original code under this license:
		/*
		 * zlib License
		 *
		 * Regularized Incomplete Beta Function
		 *
		 * Copyright (c) 2016, 2017 Lewis Van Winkle
		 * http://CodePlea.com
		 *
		 * This software is provided 'as-is', without any express or implied
		 * warranty. In no event will the authors be held liable for any damages
		 * arising from the use of this software.
		 *
		 * Permission is granted to anyone to use this software for any purpose,
		 * including commercial applications, and to alter it and redistribute it
		 * freely, subject to the following restrictions:
		 *
		 * 1. The origin of this software must not be misrepresented; you must not
		 *    claim that you wrote the original software. If you use this software
		 *    in a product, an acknowledgement in the product documentation would be
		 *    appreciated but is not required.
		 * 2. Altered source versions must be plainly marked as such, and must not be
		 *    misrepresented as being the original software.
		 * 3. This notice may not be removed or altered from any source distribution.
		 */
		struct box result;

    if (x < 0.0 || x > 1.0){
			  if(EXIT_ON_ERROR){
					printf("x out of bounds, in function incbeta, in %s (%d)", __FILE__, __LINE__);
					exit(1);
				}else{
					char error_msg[200];
					snprintf(error_msg, 200, "x out of bounds, in function incbeta, in %s (%d)", __FILE__, __LINE__);
					result.empty = 1;
					result.error_msg = error_msg;
					return result;
				}
		}

    /*The continued fraction converges nicely for x < (a+1)/(a+b+2)*/
    if (x > (a+1.0)/(a+b+2.0)) {
			struct box symmetric_incbeta = incbeta(b,a,1.0-x);
			if(symmetric_incbeta.empty){
				return symmetric_incbeta; // propagate error
			}else{
				result.empty = 0;
				result.content = 1-symmetric_incbeta.content;
				return result;
			}
    }

    /*Find the first part before the continued fraction.*/
    const float lbeta_ab = lgamma(a)+lgamma(b)-lgamma(a+b);
    const float front = exp(log(x)*a+log(1.0-x)*b-lbeta_ab) / a;

    /*Use Lentz's algorithm to evaluate the continued fraction.*/
    float f = 1.0, c = 1.0, d = 0.0;

    int i, m;
    for (i = 0; i <= 200; ++i) {
        m = i/2;

        float numerator;
        if (i == 0) {
            numerator = 1.0; /*First numerator is 1.0.*/
        } else if (i % 2 == 0) {
            numerator = (m*(b-m)*x)/((a+2.0*m-1.0)*(a+2.0*m)); /*Even term.*/
        } else {
            numerator = -((a+m)*(a+b+m)*x)/((a+2.0*m)*(a+2.0*m+1)); /*Odd term.*/
        }

        /*Do an iteration of Lentz's algorithm.*/
        d = 1.0 + numerator * d;
        if (fabs(d) < TINY) d = TINY;
        d = 1.0 / d;

        c = 1.0 + numerator / c;
        if (fabs(c) < TINY) c = TINY;

        const float cd = c*d;
        f *= cd;

        /*Check for stop.*/
        if (fabs(1.0-cd) < STOP) {
            result.content = front * (f-1.0);
						result.empty = 0;
						return result;
        }
    }

		if(EXIT_ON_ERROR){
			printf("More loops needed, did not converge, in function incbeta, in %s (%d)", __FILE__, __LINE__);
			exit(1);
		}else{
			char error_msg[200];
			snprintf(error_msg, 200, "More loops needed, did not converge, in function incbeta, in %s (%d)", __FILE__, __LINE__);
			result.empty = 1;
			result.error_msg = error_msg;
			return result;
		}
}

struct box cdf_beta(float x){
	float successes = 1, failures = (2023-1945);
	return incbeta(successes, failures, x);
}

float cdf_dangerous_beta(float x){
	// So the thing is, this works
	// But it will propagate through the code
	// So it doesn't feel like a great architectural choice;
	// I prefer my choice of setting a variable which will determine whether to exit on failure or not.
	float successes = 1, failures = (2023-1945);
	struct box result =  incbeta(successes, failures, x);
	if(result.empty){
		printf("%s", result.error_msg);
		exit(1);
	}else{
		return result.content;
	}
}
struct box dangerous_beta_sampler(uint32_t* seed)
	// Think through what to do to feed the incbeta box into
{
  return sampler(cdf_dangerous_beta, seed);
}

int main()
{

    // Get the inverse cdf of a [0,1] uniform distribution at 0.5
    struct box result_1 = inverse_cdf(cdf_uniform_0_1, 0.5);
    char* name_1 = "cdf_uniform_0_1";
    if (result_1.empty) {
        printf("Inverse for %s not calculated\n", name_1);
        exit(1);
    } else {
        printf("Inverse of %s at %f is: %f\n", name_1, 0.5, result_1.content);
    }

    // Get the inverse cdf of a [0,1] squared distribution at 0.5
    struct box result_2 = inverse_cdf(cdf_squared_0_1, 0.5);
    char* name_2 = "cdf_squared_0_1";
    if (result_2.empty) {
        printf("Inverse for %s not calculated\n", name_2);
        exit(1);
    } else {
        printf("Inverse of %s at %f is: %f\n", name_2, 0.5, result_2.content);
    }

		// Get the inverse of a normal(0,1) cdf distribution
    struct box result_3 = inverse_cdf(cdf_normal_0_1, 0.5);
    char* name_3 = "cdf_normal_0_1";
    if (result_3.empty) {
        printf("Inverse for %s not calculated\n", name_3);
        exit(1);
    } else {
        printf("Inverse of %s at %f is: %f\n", name_3, 0.5, result_3.content);
    }

		// Use the sampler on a normal(0,1)
    // set randomness seed
    uint32_t* seed = malloc(sizeof(uint32_t));
    *seed = 1000; // xorshift can't start with 0
    int n = 100;

    printf("\n\nGetting some samples from %s:\n", name_3);
		clock_t begin = clock();
		for (int i = 0; i < n; i++) {
        struct box sample = sampler(cdf_normal_0_1, seed);
        if (sample.empty) {
            printf("Error in sampler function");
        } else {
            printf("%f\n", sample.content);
        }
    }
		clock_t end = clock();
		float time_spent = (float)(end - begin) / CLOCKS_PER_SEC;
		printf("Time spent: %f", time_spent);

		// Get some normal samples using the previous method.
		clock_t begin_2 = clock();
    printf("\n\nGetting some samples from sampler_normal_0_1\n");
    for (int i = 0; i < n; i++) {
			float normal_sample = sampler_normal_0_1(seed);
      printf("%f\n", normal_sample);
    }
		clock_t end_2 = clock();
		float time_spent_2 = (float)(end_2 - begin_2) / CLOCKS_PER_SEC;
		printf("Time spent: %f", time_spent_2);

    return 0;
}