squiggle.c/scratchpad/scratchpad.c

#include <stdint.h>
#include <stdlib.h>
#include <stdio.h>
#include <float.h> // FLT_MAX, FLT_MIN
#include <limits.h> // INT_MAX
#include <math.h>
#define VERBOSE 1

// Errors
// https://mccue.dev/pages/7-27-22-c-errors
// Huh, first time I've really needed a struct
// https://en.wikipedia.org/wiki/Struct_(C_programming_language)
// https://www.cs.yale.edu/homes/aspnes/pinewiki/C(2f)Structs.html
// https://stackoverflow.com/questions/9653072/return-a-struct-from-a-function-in-c
// options:
//   - exit
//   - pass structs
// to do: reuse more informative printing from build-your-own-lisp?
struct box {
	int empty;
	float content;
};

// Example cdf
float cdf_uniform_0_1(float x){
  if(x < 0){
		return 0;
	} else if (x > 1){
		return 1;
	} else {
		return x;
	}
}

float cdf_squared_0_1(float x){
  if(x < 0){
		return 0;
	} else if (x > 1){
		return 1;
	} else {
		return x*x;
	}
}

float cdf_normal_0_1(float x){
	float mean = 0;
	float std = 1;
	return 0.5 * ( 1 + erf((x-mean)/(std * sqrt(2)) ));
}

// Inverse cdf
struct box inverse_cdf(float cdf(float), float p){
	// given a cdf: [-Inf, Inf] => [0,1]
	// returns x such that cdf(x) = p
	// to do: add bounds, add error checking
	// [x] maybe return a struct or smth.

  struct box result;
	float low = -1.0;
	float high = 1.0;

	// 1. Make sure that cdf(low) < p < cdf(high)
	// [x] to do: do smth with float min and float max?
	int interval_found = 0;
	while((!interval_found) && (low > -FLT_MAX/4) && (high < FLT_MAX/4)){
		// ^ Using FLT_MIN and FLT_MAX is overkill
		// but it's also the *correct* thing to do.

		int low_condition = (cdf(low) < p);
		int high_condition = (p < cdf(high));
		if( low_condition && high_condition ){
			interval_found = 1;
		}else if(!low_condition){
			low = low * 2;
		}else if (!high_condition){
			high = high * 2 ;
		}
	}
	if(0){
		printf("FLT_MIN = %f, FLT_MAX = %f, INT_MAX = %d\n", -FLT_MAX, FLT_MAX, INT_MAX);
		printf("low: %f, high: %f\n", low, high);
		printf("interval_found? %d\n", interval_found);
		int while_condition = (!interval_found) && (low > FLT_MIN/4) && (high < FLT_MAX/4);
		printf("while condition: %i\n", while_condition);
	}

	if(!interval_found){
		result.empty = 1;
		return result;
	} else{

		int convergence_condition = 0;
		int count = 0;
		while(!convergence_condition && (count < (INT_MAX/2) )){
			float mid = (high + low)/2;
			int mid_not_new = (mid == low) || (mid == high);
			if(0){
				printf("while loop\n");
				printf("low: %f, high: %f\n", low, high);
				printf("mid: %f\n", mid);
			}

			if(mid_not_new){
				convergence_condition = 1;
			} else{
				float mid_sign = cdf(mid) - p;
				if(mid_sign < 0){
					low = mid;
				} else if  (mid_sign > 0){
					high = mid;
				} else if (mid_sign == 0){
					low = mid;
					high = mid;
				}
			}

		}

		if(convergence_condition){
			result.content = low;
			result.empty = 0;
		} else{
			result.empty = 1;
		}

		return result;
	}

}

// Get random number between 0 and 1
uint32_t xorshift32
(uint32_t* seed)
{
	// Algorithm "xor" from p. 4 of Marsaglia, "Xorshift RNGs"
  // See <https://stackoverflow.com/questions/53886131/how-does-xorshift32-works>
  // https://en.wikipedia.org/wiki/Xorshift
	// Also some drama: <https://www.pcg-random.org/posts/on-vignas-pcg-critique.html>, <https://prng.di.unimi.it/>

	uint32_t x = *seed;
	x ^= x << 13;
	x ^= x >> 17;
	x ^= x << 5;
	return *seed = x;
}

// Distribution & sampling functions

float rand_0_to_1(uint32_t* seed){
	return ((float) xorshift32(seed)) / ((float) UINT32_MAX);
}

// sampler based on inverse cdf
struct box sampler(float cdf(float), uint32_t* seed){
	struct box result;
	float p = rand_0_to_1(seed);
	result = inverse_cdf(cdf, p);
	return result;
}

// ~~[-] to-do: integrals => beta distribution~~
// => instead, use this incomplete beta function implementation, based on continuous fractions:
// <https://codeplea.com/incomplete-beta-function-c>
// <https://github.com/codeplea/incbeta>

// main with an example
int main(){

	// Uniform:
	struct box result_1 = inverse_cdf(cdf_uniform_0_1, 0.5);
	char* name_1 = "cdf_uniform_0_1";
	if(result_1.empty){
		printf("Inverse for %s not calculated\n", name_1);
		exit(1);
	}else{
		printf("Inverse of %s at %f is: %f\n", name_1, 0.5, result_1.content);
	}

	// Squared cdf
	struct box result_2 = inverse_cdf(cdf_squared_0_1, 0.5);
	char* name_2 = "cdf_squared_0_1";
	if(result_2.empty){
		printf("Inverse for %s not calculated\n", name_2);
		exit(1);
	}else{
		printf("Inverse of %s at %f is: %f\n", name_2, 0.5, result_2.content);
	}

	// Normal cdf
	struct box result_3 = inverse_cdf(cdf_normal_0_1, 0.5);
	char* name_3 = "cdf_normal_0_1";
	if(result_3.empty){
		printf("Inverse for %s not calculated\n", name_3);
		exit(1);
	}else{
		printf("Inverse of %s at %f is: %f\n", name_3, 0.5, result_3.content);
	}

	// set randomness seed
	uint32_t* seed = malloc(sizeof(uint32_t));
	*seed = 1000; // xorshift can't start with 0

	printf("\n\nGetting some samples from %s:\n", name_3);
	int n=100;
	for(int i=0;i<n; i++){
		struct box sample = sampler(cdf_normal_0_1, seed);
		if(sample.empty){
			printf("Error in sampler function");
		}else{
			printf("%f\n", sample.content);
		}
	}
	return 0;
}