224 lines
		
	
	
		
			6.2 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			224 lines
		
	
	
		
			6.2 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| #include <limits.h> // INT_MAX
 | |
| #include <stdint.h>
 | |
| #include <stdlib.h>
 | |
| #include <float.h> // FLT_MAX, FLT_MIN
 | |
| #include <stdio.h>
 | |
| #include <math.h> // erf, sqrt
 | |
| 
 | |
| #define EXIT_ON_ERROR 0
 | |
| 
 | |
| // Errors
 | |
| // [ ] to do: reuse more informative printing from build-your-own-lisp?
 | |
| // Another option could be to exit on error. Maybe let the user decide?
 | |
| struct box {
 | |
|     int empty;
 | |
|     float content;
 | |
| 		char * error_msg;
 | |
| };
 | |
| 
 | |
| // Example cdf
 | |
| float cdf_uniform_0_1(float x)
 | |
| {
 | |
|     if (x < 0) {
 | |
|         return 0;
 | |
|     } else if (x > 1) {
 | |
|         return 1;
 | |
|     } else {
 | |
|         return x;
 | |
|     }
 | |
| }
 | |
| 
 | |
| float cdf_squared_0_1(float x)
 | |
| {
 | |
|     if (x < 0) {
 | |
|         return 0;
 | |
|     } else if (x > 1) {
 | |
|         return 1;
 | |
|     } else {
 | |
|         return x * x;
 | |
|     }
 | |
| }
 | |
| 
 | |
| float cdf_normal_0_1(float x)
 | |
| {
 | |
|     float mean = 0;
 | |
|     float std = 1;
 | |
|     return 0.5 * (1 + erf((x - mean) / (std * sqrt(2)))); // erf from math.h
 | |
| }
 | |
| 
 | |
| // Inverse cdf
 | |
| struct box inverse_cdf(float cdf(float), float p)
 | |
| {
 | |
|     // given a cdf: [-Inf, Inf] => [0,1]
 | |
|     // returns a box with either
 | |
| 		// x such that cdf(x) = p
 | |
| 		// or an error
 | |
| 		// if EXIT_ON_ERROR is set to 1, it exits instead of providing an error
 | |
| 
 | |
|     struct box result;
 | |
|     float low = -1.0;
 | |
|     float high = 1.0;
 | |
| 
 | |
|     // 1. Make sure that cdf(low) < p < cdf(high)
 | |
|     int interval_found = 0;
 | |
|     while ((!interval_found) && (low > -FLT_MAX / 4) && (high < FLT_MAX / 4)) {
 | |
|         // ^ Using FLT_MIN and FLT_MAX is overkill
 | |
|         // but it's also the *correct* thing to do.
 | |
| 
 | |
|         int low_condition = (cdf(low) < p);
 | |
|         int high_condition = (p < cdf(high));
 | |
|         if (low_condition && high_condition) {
 | |
|             interval_found = 1;
 | |
|         } else if (!low_condition) {
 | |
|             low = low * 2;
 | |
|         } else if (!high_condition) {
 | |
|             high = high * 2;
 | |
|         }
 | |
|     }
 | |
| 
 | |
| 		if (!interval_found) {
 | |
| 			  if(EXIT_ON_ERROR){
 | |
| 					printf("Interval containing the target value not found, in function inverse_cdf, in %s (%d)", __FILE__, __LINE__);
 | |
| 					exit(1);
 | |
| 				}else{
 | |
| 					char error_msg[200];
 | |
| 					snprintf(error_msg, 200, "Interval containing the target value not found in function inverse_cdf, in %s (%d)", __FILE__, __LINE__);
 | |
| 					result.empty = 1;
 | |
| 					result.error_msg = error_msg;
 | |
| 					return result;
 | |
| 				}
 | |
|     } else {
 | |
| 
 | |
|         int convergence_condition = 0;
 | |
|         int count = 0;
 | |
|         while (!convergence_condition && (count < (INT_MAX / 2))) {
 | |
|             float mid = (high + low) / 2;
 | |
|             int mid_not_new = (mid == low) || (mid == high);
 | |
|             if (0) {
 | |
|                 printf("while loop\n");
 | |
|                 printf("low: %f, high: %f\n", low, high);
 | |
|                 printf("mid: %f\n", mid);
 | |
|             }
 | |
| 
 | |
|             if (mid_not_new) {
 | |
|                 convergence_condition = 1;
 | |
|             } else {
 | |
|                 float mid_sign = cdf(mid) - p;
 | |
|                 if (mid_sign < 0) {
 | |
|                     low = mid;
 | |
|                 } else if (mid_sign > 0) {
 | |
|                     high = mid;
 | |
|                 } else if (mid_sign == 0) {
 | |
|                     low = mid;
 | |
|                     high = mid;
 | |
|                 }
 | |
|             }
 | |
|         }
 | |
| 
 | |
|         if (convergence_condition) {
 | |
|             result.content = low;
 | |
|             result.empty = 0;
 | |
|         } else {
 | |
| 						if(EXIT_ON_ERROR){
 | |
| 							printf("Search process did not converge, in function inverse_cdf, in %s (%d)", __FILE__, __LINE__);
 | |
| 							exit(1);
 | |
| 						}else{
 | |
| 							char error_msg[200];
 | |
| 							snprintf(error_msg, 200, "Search process did not converge, in function inverse_cdf, in %s (%d)", __FILE__, __LINE__);
 | |
| 							result.empty = 1;
 | |
| 							result.error_msg = error_msg;
 | |
| 							return result;
 | |
| 						}
 | |
| 								result.empty = 1;
 | |
| 					}
 | |
| 
 | |
|         return result;
 | |
|     }
 | |
| }
 | |
| 
 | |
| // Get random number between 0 and 1
 | |
| uint32_t xorshift32(uint32_t* seed)
 | |
| {
 | |
|     // Algorithm "xor" from p. 4 of Marsaglia, "Xorshift RNGs"
 | |
|     // See <https://stackoverflow.com/questions/53886131/how-does-xorshift32-works>
 | |
|     // https://en.wikipedia.org/wiki/Xorshift
 | |
|     // Also some drama: <https://www.pcg-random.org/posts/on-vignas-pcg-critique.html>, <https://prng.di.unimi.it/>
 | |
| 
 | |
|     uint32_t x = *seed;
 | |
|     x ^= x << 13;
 | |
|     x ^= x >> 17;
 | |
|     x ^= x << 5;
 | |
|     return *seed = x;
 | |
| }
 | |
| 
 | |
| // Distribution & sampling functions
 | |
| float rand_0_to_1(uint32_t* seed)
 | |
| {
 | |
|     return ((float)xorshift32(seed)) / ((float)UINT32_MAX);
 | |
| }
 | |
| 
 | |
| // Sampler based on inverse cdf
 | |
| struct box sampler(float cdf(float), uint32_t* seed)
 | |
| {
 | |
|     struct box result;
 | |
|     float p = rand_0_to_1(seed);
 | |
|     result = inverse_cdf(cdf, p);
 | |
|     return result;
 | |
| }
 | |
| 
 | |
| // to do: add beta. 
 | |
| // for the cdf, use this incomplete beta function implementation, based on continuous fractions:
 | |
| // <https://codeplea.com/incomplete-beta-function-c>
 | |
| // <https://github.com/codeplea/incbeta>
 | |
| 
 | |
| int main()
 | |
| {
 | |
| 
 | |
|     // Get the inverse cdf of a [0,1] uniform distribution at 0.5
 | |
|     struct box result_1 = inverse_cdf(cdf_uniform_0_1, 0.5);
 | |
|     char* name_1 = "cdf_uniform_0_1";
 | |
|     if (result_1.empty) {
 | |
|         printf("Inverse for %s not calculated\n", name_1);
 | |
|         exit(1);
 | |
|     } else {
 | |
|         printf("Inverse of %s at %f is: %f\n", name_1, 0.5, result_1.content);
 | |
|     }
 | |
| 
 | |
|     // Get the inverse cdf of a [0,1] squared distribution at 0.5
 | |
|     struct box result_2 = inverse_cdf(cdf_squared_0_1, 0.5);
 | |
|     char* name_2 = "cdf_squared_0_1";
 | |
|     if (result_2.empty) {
 | |
|         printf("Inverse for %s not calculated\n", name_2);
 | |
|         exit(1);
 | |
|     } else {
 | |
|         printf("Inverse of %s at %f is: %f\n", name_2, 0.5, result_2.content);
 | |
|     }
 | |
| 
 | |
| 		// Get the inverse of a normal(0,1) cdf distribution
 | |
|     struct box result_3 = inverse_cdf(cdf_normal_0_1, 0.5);
 | |
|     char* name_3 = "cdf_normal_0_1";
 | |
|     if (result_3.empty) {
 | |
|         printf("Inverse for %s not calculated\n", name_3);
 | |
|         exit(1);
 | |
|     } else {
 | |
|         printf("Inverse of %s at %f is: %f\n", name_3, 0.5, result_3.content);
 | |
|     }
 | |
| 
 | |
| 		// Use the sampler on a normal(0,1)
 | |
|     // set randomness seed
 | |
|     uint32_t* seed = malloc(sizeof(uint32_t));
 | |
|     *seed = 1000; // xorshift can't start with 0
 | |
| 
 | |
|     printf("\n\nGetting some samples from %s:\n", name_3);
 | |
|     int n = 100;
 | |
|     for (int i = 0; i < n; i++) {
 | |
|         struct box sample = sampler(cdf_normal_0_1, seed);
 | |
|         if (sample.empty) {
 | |
|             printf("Error in sampler function");
 | |
|         } else {
 | |
|             printf("%f\n", sample.content);
 | |
|         }
 | |
|     }
 | |
|     return 0;
 | |
| }
 |