diff --git a/scratchpad/scratchpad b/scratchpad/scratchpad index 5bb58d1..b1224e6 100755 Binary files a/scratchpad/scratchpad and b/scratchpad/scratchpad differ diff --git a/scratchpad/scratchpad.c b/scratchpad/scratchpad.c index 8e070c7..8280379 100644 --- a/scratchpad/scratchpad.c +++ b/scratchpad/scratchpad.c @@ -4,6 +4,7 @@ #include #include #include +#include #include #define EXIT_ON_ERROR 0 @@ -20,6 +21,7 @@ return error; \ } \ } while (0) +#define NUM_SAMPLES 10 struct box { int empty; @@ -41,7 +43,6 @@ float cdf_uniform_0_1(float x) float cdf_squared_0_1(float x) { - float result; if (x < 0) { return 0; } else if (x > 1) { @@ -174,7 +175,76 @@ struct box cdf_beta(float x) } // Inverse cdf at point -struct box inverse_cdf(struct box cdf_box(float), float p) +// Two versions of this function: +// - raw, dealing with cdfs that return floats +// - box, dealing with cdfs that return a box. + +// Inverse cdf +struct box inverse_cdf_float(float cdf(float), float p) +{ + // given a cdf: [-Inf, Inf] => [0,1] + // returns a box with either + // x such that cdf(x) = p + // or an error + // if EXIT_ON_ERROR is set to 1, it exits instead of providing an error + + float low = -1.0; + float high = 1.0; + + // 1. Make sure that cdf(low) < p < cdf(high) + int interval_found = 0; + while ((!interval_found) && (low > -FLT_MAX / 4) && (high < FLT_MAX / 4)) { + // ^ Using FLT_MIN and FLT_MAX is overkill + // but it's also the *correct* thing to do. + + int low_condition = (cdf(low) < p); + int high_condition = (p < cdf(high)); + if (low_condition && high_condition) { + interval_found = 1; + } else if (!low_condition) { + low = low * 2; + } else if (!high_condition) { + high = high * 2; + } + } + + if (!interval_found) { + PROCESS_ERROR("Interval containing the target value not found, in function inverse_cdf"); + } else { + + int convergence_condition = 0; + int count = 0; + while (!convergence_condition && (count < (INT_MAX / 2))) { + float mid = (high + low) / 2; + int mid_not_new = (mid == low) || (mid == high); + // float width = high - low; + // if ((width < 1e-8) || mid_not_new){ + if (mid_not_new) { + convergence_condition = 1; + } else { + float mid_sign = cdf(mid) - p; + if (mid_sign < 0) { + low = mid; + } else if (mid_sign > 0) { + high = mid; + } else if (mid_sign == 0) { + low = mid; + high = mid; + } + } + } + + if (convergence_condition) { + struct box result = {.empty = 0, .content = low}; + return result; + } else { + PROCESS_ERROR("Search process did not converge, in function inverse_cdf"); + } + + } +} + +struct box inverse_cdf_box(struct box cdf_box(float), float p) { // given a cdf: [-Inf, Inf] => Box([0,1]) // returns a box with either @@ -277,10 +347,16 @@ float rand_0_to_1(uint32_t* seed) } // Sampler based on inverse cdf and randomness function -struct box sampler(struct box cdf(float), uint32_t* seed) +struct box sampler_box_cdf(struct box cdf(float), uint32_t* seed) { float p = rand_0_to_1(seed); - struct box result = inverse_cdf(cdf, p); + struct box result = inverse_cdf_box(cdf, p); + return result; +} +struct box sampler_float_cdf(float cdf(float), uint32_t* seed) +{ + float p = rand_0_to_1(seed); + struct box result = inverse_cdf_float(cdf, p); return result; } @@ -294,83 +370,97 @@ float sampler_normal_0_1(uint32_t* seed) return z; } -int main() -{ - - // Get the inverse cdf of a [0,1] uniform distribution at 0.5 - struct box result_1 = inverse_cdf(cdf_uniform_0_1, 0.5); - char* name_1 = "cdf_uniform_0_1"; - if (result_1.empty) { - printf("Inverse for %s not calculated\n", name_1); +// Some testers +void test_inverse_cdf_float(char* cdf_name, float cdf_float(float)){ + struct box result = inverse_cdf_float(cdf_float, 0.5); + if (result.empty) { + printf("Inverse for %s not calculated\n", cdf_name); exit(1); } else { - printf("Inverse of %s at %f is: %f\n", name_1, 0.5, result_1.content); + printf("Inverse of %s at %f is: %f\n", cdf_name, 0.5, result.content); } - // Get the inverse cdf of a [0,1] squared distribution at 0.5 - struct box result_2 = inverse_cdf(cdf_squared_0_1, 0.5); - char* name_2 = "cdf_squared_0_1"; - if (result_2.empty) { - printf("Inverse for %s not calculated\n", name_2); +} +void test_inverse_cdf_box(char* cdf_name, struct box cdf_box(float)){ + struct box result = inverse_cdf_box(cdf_box, 0.5); + if (result.empty) { + printf("Inverse for %s not calculated\n", cdf_name); exit(1); } else { - printf("Inverse of %s at %f is: %f\n", name_2, 0.5, result_2.content); + printf("Inverse of %s at %f is: %f\n", cdf_name, 0.5, result.content); } - // Get the inverse of a normal(0,1) cdf distribution - struct box result_3 = inverse_cdf(cdf_normal_0_1, 0.5); - char* name_3 = "cdf_normal_0_1"; - if (result_3.empty) { - printf("Inverse for %s not calculated\n", name_3); - exit(1); - } else { - printf("Inverse of %s at %f is: %f\n", name_3, 0.5, result_3.content); - } +} - // Use the sampler on a normal(0,1) - // set randomness seed - uint32_t* seed = malloc(sizeof(uint32_t)); - *seed = 1000; // xorshift can't start with 0 - int n = 100; - - printf("\n\nGetting some samples from %s:\n", name_3); +void test_and_time_sampler_float(char* cdf_name, float cdf_float(float), uint32_t* seed){ + printf("\nGetting some samples from %s:\n", cdf_name); clock_t begin = clock(); - for (int i = 0; i < n; i++) { - struct box sample = sampler(cdf_normal_0_1, seed); + for (int i = 0; i < NUM_SAMPLES; i++) { + struct box sample = sampler_float_cdf(cdf_float, seed); if (sample.empty) { - printf("Error in sampler function"); + printf("Error in sampler function for %s", cdf_name); } else { printf("%f\n", sample.content); } } clock_t end = clock(); float time_spent = (float)(end - begin) / CLOCKS_PER_SEC; - printf("Time spent: %f", time_spent); + printf("Time spent: %f\n", time_spent); +} - // Get some normal samples using the previous method. - clock_t begin_2 = clock(); - printf("\n\nGetting some samples from sampler_normal_0_1\n"); - for (int i = 0; i < n; i++) { - float normal_sample = sampler_normal_0_1(seed); - printf("%f\n", normal_sample); - } - clock_t end_2 = clock(); - float time_spent_2 = (float)(end_2 - begin_2) / CLOCKS_PER_SEC; - printf("Time spent: %f", time_spent_2); - - // Get some beta samples - clock_t begin_3 = clock(); - printf("\n\nGetting some samples from box sampler_dangerous_beta\n"); - for (int i = 0; i < n; i++) { - struct box sample = sampler(cdf_dangerous_beta, seed); +void test_and_time_sampler_box(char* cdf_name, struct box cdf_box(float), uint32_t* seed){ + printf("\nGetting some samples from %s:\n", cdf_name); + clock_t begin = clock(); + for (int i = 0; i < NUM_SAMPLES; i++) { + struct box sample = sampler_box_cdf(cdf_box, seed); if (sample.empty) { - printf("Error in sampler function"); + printf("Error in sampler function for %s", cdf_name); } else { printf("%f\n", sample.content); } } - clock_t end_3 = clock(); - float time_spent_3 = (float)(end_3 - begin_3) / CLOCKS_PER_SEC; - printf("Time spent: %f\n", time_spent_3); + clock_t end = clock(); + float time_spent = (float)(end - begin) / CLOCKS_PER_SEC; + printf("Time spent: %f\n", time_spent); +} + +int main() +{ + // Test inverse cdf float + test_inverse_cdf_float("cdf_uniform_0_1", cdf_uniform_0_1); + test_inverse_cdf_float("cdf_squared_0_1", cdf_squared_0_1); + test_inverse_cdf_float("cdf_normal_0_1", cdf_normal_0_1); + + // Test inverse cdf box + test_inverse_cdf_box("cdf_beta", cdf_beta); + + // Testing samplers + // set randomness seed + uint32_t* seed = malloc(sizeof(uint32_t)); + *seed = 1000; // xorshift can't start with 0 + + // Test float sampler + test_and_time_sampler_float("cdf_uniform_0_1", cdf_uniform_0_1, seed); + test_and_time_sampler_float("cdf_squared_0_1", cdf_squared_0_1, seed); + test_and_time_sampler_float("cdf_normal_0_1", cdf_normal_0_1, seed); + + // Get some normal samples using a previous approach + printf("\nGetting some samples from sampler_normal_0_1\n"); + + clock_t begin_2 = clock(); + + for (int i = 0; i < NUM_SAMPLES; i++) { + float normal_sample = sampler_normal_0_1(seed); + printf("%f\n", normal_sample); + } + + clock_t end_2 = clock(); + float time_spent_2 = (float)(end_2 - begin_2) / CLOCKS_PER_SEC; + printf("Time spent: %f\n", time_spent_2); + + // Test box sampler + test_and_time_sampler_box("cdf_beta", cdf_beta, seed); + + free(seed); return 0; }