squiggle.c/scratchpad/scratchpad.c

#include <math.h> // erf, sqrt
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include "../squiggle.h"

#define NUM_SAMPLES 1000000
#define STOP_BETA 1.0e-8
#define TINY_BETA 1.0e-30

// Example cdf
float cdf_uniform_0_1(float x)
{
    if (x < 0) {
        return 0;
    } else if (x > 1) {
        return 1;
    } else {
        return x;
    }
}

float cdf_squared_0_1(float x)
{
    if (x < 0) {
        return 0;
    } else if (x > 1) {
        return 1;
    } else {
        return x * x;
    }
}

float cdf_normal_0_1(float x)
{
    float mean = 0;
    float std = 1;
    return 0.5 * (1 + erf((x - mean) / (std * sqrt(2)))); // erf from math.h
}

struct box incbeta(float a, float b, float x)
{
    // Descended from <https://github.com/codeplea/incbeta/blob/master/incbeta.c>,
    // <https://codeplea.com/incomplete-beta-function-c>
    // but modified to return a box struct and floats instead of doubles.
    // [ ] to do: add attribution in README
    // Original code under this license:
    /*
		 * zlib License
		 *
		 * Regularized Incomplete Beta Function
		 *
		 * Copyright (c) 2016, 2017 Lewis Van Winkle
		 * http://CodePlea.com
		 *
		 * This software is provided 'as-is', without any express or implied
		 * warranty. In no event will the authors be held liable for any damages
		 * arising from the use of this software.
		 *
		 * Permission is granted to anyone to use this software for any purpose,
		 * including commercial applications, and to alter it and redistribute it
		 * freely, subject to the following restrictions:
		 *
		 * 1. The origin of this software must not be misrepresented; you must not
		 *    claim that you wrote the original software. If you use this software
		 *    in a product, an acknowledgement in the product documentation would be
		 *    appreciated but is not required.
		 * 2. Altered source versions must be plainly marked as such, and must not be
		 *    misrepresented as being the original software.
		 * 3. This notice may not be removed or altered from any source distribution.
		 */
    if (x < 0.0 || x > 1.0) {
        PROCESS_ERROR("x out of bounds [0, 1], in function incbeta");
    }

    /*The continued fraction converges nicely for x < (a+1)/(a+b+2)*/
    if (x > (a + 1.0) / (a + b + 2.0)) {
        struct box symmetric_incbeta = incbeta(b, a, 1.0 - x);
        if (symmetric_incbeta.empty) {
            return symmetric_incbeta; // propagate error
        } else {
            struct box result = {
                .empty = 0,
                .content = 1 - symmetric_incbeta.content
            };
            return result;
        }
    }

    /*Find the first part before the continued fraction.*/
    const float lbeta_ab = lgamma(a) + lgamma(b) - lgamma(a + b);
    const float front = exp(log(x) * a + log(1.0 - x) * b - lbeta_ab) / a;

    /*Use Lentz's algorithm to evaluate the continued fraction.*/
    float f = 1.0, c = 1.0, d = 0.0;

    int i, m;
    for (i = 0; i <= 200; ++i) {
        m = i / 2;

        float numerator;
        if (i == 0) {
            numerator = 1.0; /*First numerator is 1.0.*/
        } else if (i % 2 == 0) {
            numerator = (m * (b - m) * x) / ((a + 2.0 * m - 1.0) * (a + 2.0 * m)); /*Even term.*/
        } else {
            numerator = -((a + m) * (a + b + m) * x) / ((a + 2.0 * m) * (a + 2.0 * m + 1)); /*Odd term.*/
        }

        /*Do an iteration of Lentz's algorithm.*/
        d = 1.0 + numerator * d;
        if (fabs(d) < TINY_BETA)
            d = TINY_BETA;
        d = 1.0 / d;

        c = 1.0 + numerator / c;
        if (fabs(c) < TINY_BETA)
            c = TINY_BETA;

        const float cd = c * d;
        f *= cd;

        /*Check for stop.*/
        if (fabs(1.0 - cd) < STOP_BETA) {
            struct box result = {
                .empty = 0,
                .content = front * (f - 1.0)
            };
            return result;
        }
    }

    PROCESS_ERROR("More loops needed, did not converge, in function incbeta");
}

struct box cdf_beta(float x)
{
    if (x < 0) {
        struct box result = { .empty = 0, .content = 0 };
        return result;
    } else if (x > 1) {
        struct box result = { .empty = 0, .content = 1 };
        return result;
    } else {
        float successes = 1, failures = (2023 - 1945);
        return incbeta(successes, failures, x);
    }
}

// Some testers
void test_inverse_cdf_float(char* cdf_name, float cdf_float(float))
{
    struct box result = inverse_cdf_float(cdf_float, 0.5);
    if (result.empty) {
        printf("Inverse for %s not calculated\n", cdf_name);
        exit(1);
    } else {
        printf("Inverse of %s at %f is: %f\n", cdf_name, 0.5, result.content);
    }
}
void test_inverse_cdf_box(char* cdf_name, struct box cdf_box(float))
{
    struct box result = inverse_cdf_box(cdf_box, 0.5);
    if (result.empty) {
        printf("Inverse for %s not calculated\n", cdf_name);
        exit(1);
    } else {
        printf("Inverse of %s at %f is: %f\n", cdf_name, 0.5, result.content);
    }
}

void test_and_time_sampler_float(char* cdf_name, float cdf_float(float), uint32_t* seed)
{
    printf("\nGetting some samples from %s:\n", cdf_name);
    clock_t begin = clock();
    for (int i = 0; i < NUM_SAMPLES; i++) {
        struct box sample = sampler_float_cdf(cdf_float, seed);
        if (sample.empty) {
            printf("Error in sampler function for %s", cdf_name);
        } else {
            // printf("%f\n", sample.content);
        }
    }
    clock_t end = clock();
    float time_spent = (float)(end - begin) / CLOCKS_PER_SEC;
    printf("Time spent: %f\n", time_spent);
}

void test_and_time_sampler_box(char* cdf_name, struct box cdf_box(float), uint32_t* seed)
{
    printf("\nGetting some samples from %s:\n", cdf_name);
    clock_t begin = clock();
    for (int i = 0; i < NUM_SAMPLES; i++) {
        struct box sample = sampler_box_cdf(cdf_box, seed);
        if (sample.empty) {
            printf("Error in sampler function for %s", cdf_name);
        } else {
            // printf("%f\n", sample.content);
        }
    }
    clock_t end = clock();
    float time_spent = (float)(end - begin) / CLOCKS_PER_SEC;
    printf("Time spent: %f\n", time_spent);
}

int main()
{
    // Test inverse cdf float
    test_inverse_cdf_float("cdf_uniform_0_1", cdf_uniform_0_1);
    test_inverse_cdf_float("cdf_squared_0_1", cdf_squared_0_1);
    test_inverse_cdf_float("cdf_normal_0_1", cdf_normal_0_1);

    // Test inverse cdf box
    test_inverse_cdf_box("cdf_beta", cdf_beta);

    // Testing samplers
    // set randomness seed
    uint32_t* seed = malloc(sizeof(uint32_t));
    *seed = 1000; // xorshift can't start with 0

    // Test float sampler
    test_and_time_sampler_float("cdf_uniform_0_1", cdf_uniform_0_1, seed);
    test_and_time_sampler_float("cdf_squared_0_1", cdf_squared_0_1, seed);
    test_and_time_sampler_float("cdf_normal_0_1", cdf_normal_0_1, seed);

    // Get some normal samples using a previous approach
    printf("\nGetting some samples from unit_normal\n");

    clock_t begin_2 = clock();

    for (int i = 0; i < NUM_SAMPLES; i++) {
        float normal_sample = unit_normal(seed);
        // printf("%f\n", normal_sample);
    }

    clock_t end_2 = clock();
    float time_spent_2 = (float)(end_2 - begin_2) / CLOCKS_PER_SEC;
    printf("Time spent: %f\n", time_spent_2);

    // Test box sampler
    test_and_time_sampler_box("cdf_beta", cdf_beta, seed);
    // Ok, this is slower than python!!
    // Partly this is because I am using a more general algorithm,
    //   which applies to any cdf
    // But I am also using really anal convergence conditions.
    // This could be optimized.

    free(seed);
    return 0;
}