#include // INT_MAX #include #include #include // FLT_MAX, FLT_MIN #include #include // erf, sqrt #include #define EXIT_ON_ERROR 0 // Errors // [ ] to do: reuse more informative printing from build-your-own-lisp? // Another option could be to exit on error. Maybe let the user decide? struct box { int empty; float content; char * error_msg; }; // Example cdf float cdf_uniform_0_1(float x) { if (x < 0) { return 0; } else if (x > 1) { return 1; } else { return x; } } float cdf_squared_0_1(float x) { if (x < 0) { return 0; } else if (x > 1) { return 1; } else { return x * x; } } float cdf_normal_0_1(float x) { // float mean = 0; // float std = 1; return 0.5 * (1 + erf((x - 0) / (1 * sqrt(2)))); // erf from math.h } // Inverse cdf struct box inverse_cdf(float cdf(float), float p) { // given a cdf: [-Inf, Inf] => [0,1] // returns a box with either // x such that cdf(x) = p // or an error // if EXIT_ON_ERROR is set to 1, it exits instead of providing an error struct box result; float low = -1.0; float high = 1.0; // 1. Make sure that cdf(low) < p < cdf(high) int interval_found = 0; while ((!interval_found) && (low > -FLT_MAX / 4) && (high < FLT_MAX / 4)) { // ^ Using FLT_MIN and FLT_MAX is overkill // but it's also the *correct* thing to do. int low_condition = (cdf(low) < p); int high_condition = (p < cdf(high)); if (low_condition && high_condition) { interval_found = 1; } else if (!low_condition) { low = low * 2; } else if (!high_condition) { high = high * 2; } } if (!interval_found) { if(EXIT_ON_ERROR){ printf("Interval containing the target value not found, in function inverse_cdf, in %s (%d)", __FILE__, __LINE__); exit(1); }else{ char error_msg[200]; snprintf(error_msg, 200, "Interval containing the target value not found in function inverse_cdf, in %s (%d)", __FILE__, __LINE__); result.empty = 1; result.error_msg = error_msg; return result; } } else { int convergence_condition = 0; int count = 0; while (!convergence_condition && (count < (INT_MAX / 2))) { float mid = (high + low) / 2; int mid_not_new = (mid == low) || (mid == high); // float width = high - low; if (mid_not_new) { // if ((width < 1e-8) || mid_not_new){ convergence_condition = 1; } else { float mid_sign = cdf(mid) - p; if (mid_sign < 0) { low = mid; } else if (mid_sign > 0) { high = mid; } else if (mid_sign == 0) { low = mid; high = mid; } } } if (convergence_condition) { result.content = low; result.empty = 0; } else { if(EXIT_ON_ERROR){ printf("Search process did not converge, in function inverse_cdf, in %s (%d)", __FILE__, __LINE__); exit(1); }else{ char error_msg[200]; snprintf(error_msg, 200, "Search process did not converge, in function inverse_cdf, in %s (%d)", __FILE__, __LINE__); result.empty = 1; result.error_msg = error_msg; return result; } result.empty = 1; } return result; } } // Get random number between 0 and 1 uint32_t xorshift32(uint32_t* seed) { // Algorithm "xor" from p. 4 of Marsaglia, "Xorshift RNGs" // See // https://en.wikipedia.org/wiki/Xorshift // Also some drama: , uint32_t x = *seed; x ^= x << 13; x ^= x >> 17; x ^= x << 5; return *seed = x; } // Distribution & sampling functions float rand_0_to_1(uint32_t* seed) { return ((float)xorshift32(seed)) / ((float)UINT32_MAX); } // Sampler based on inverse cdf struct box sampler(float cdf(float), uint32_t* seed) { struct box result; float p = rand_0_to_1(seed); result = inverse_cdf(cdf, p); return result; } // For comparison, raw sampler const float PI = 3.14159265358979323846; float sampler_normal_0_1(uint32_t* seed) { float u1 = rand_0_to_1(seed); float u2 = rand_0_to_1(seed); float z = sqrtf(-2.0 * log(u1)) * sin(2 * PI * u2); return z; } // to do: add beta. // for the cdf, use this incomplete beta function implementation, based on continuous fractions: // // int main() { // Get the inverse cdf of a [0,1] uniform distribution at 0.5 struct box result_1 = inverse_cdf(cdf_uniform_0_1, 0.5); char* name_1 = "cdf_uniform_0_1"; if (result_1.empty) { printf("Inverse for %s not calculated\n", name_1); exit(1); } else { printf("Inverse of %s at %f is: %f\n", name_1, 0.5, result_1.content); } // Get the inverse cdf of a [0,1] squared distribution at 0.5 struct box result_2 = inverse_cdf(cdf_squared_0_1, 0.5); char* name_2 = "cdf_squared_0_1"; if (result_2.empty) { printf("Inverse for %s not calculated\n", name_2); exit(1); } else { printf("Inverse of %s at %f is: %f\n", name_2, 0.5, result_2.content); } // Get the inverse of a normal(0,1) cdf distribution struct box result_3 = inverse_cdf(cdf_normal_0_1, 0.5); char* name_3 = "cdf_normal_0_1"; if (result_3.empty) { printf("Inverse for %s not calculated\n", name_3); exit(1); } else { printf("Inverse of %s at %f is: %f\n", name_3, 0.5, result_3.content); } // Use the sampler on a normal(0,1) // set randomness seed uint32_t* seed = malloc(sizeof(uint32_t)); *seed = 1000; // xorshift can't start with 0 int n = 1000000; printf("\n\nGetting some samples from %s:\n", name_3); clock_t begin = clock(); for (int i = 0; i < n; i++) { struct box sample = sampler(cdf_normal_0_1, seed); if (sample.empty) { printf("Error in sampler function"); } else { // printf("%f\n", sample.content); } } clock_t end = clock(); double time_spent = (double)(end - begin) / CLOCKS_PER_SEC; printf("Time spent: %f", time_spent); // Get some normal samples using the previous method. clock_t begin_2 = clock(); printf("\n\nGetting some samples from sampler_normal_0_1\n"); for (int i = 0; i < n; i++) { float normal_sample = sampler_normal_0_1(seed); // printf("%f\n", normal_sample); } clock_t end_2 = clock(); double time_spent_2 = (double)(end_2 - begin_2) / CLOCKS_PER_SEC; printf("Time spent: %f", time_spent_2); return 0; }