readd inverse cdf for raw floats. rework examples.

2023-07-16 17:33:37 +02:00 · 2023-07-16 17:33:37 +02:00 · 487de4a731
commit 487de4a731
parent e05baa6fee
2 changed files with 149 additions and 59 deletions
--- a/scratchpad/scratchpad
+++ b/scratchpad/scratchpad
--- a/scratchpad/scratchpad.c
+++ b/scratchpad/scratchpad.c
@ -4,6 +4,7 @@
 #include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
+#include <sys/types.h>
 #include <time.h>

 #define EXIT_ON_ERROR 0
@ -20,6 +21,7 @@
            return error;                                                               \
        }                                                                               \
    } while (0)
+#define NUM_SAMPLES 10

 struct box {
    int empty;
@ -41,7 +43,6 @@ float cdf_uniform_0_1(float x)

 float cdf_squared_0_1(float x)
 {
-	  float result;
    if (x < 0) {
        return 0;
    } else if (x > 1) {
@ -174,7 +175,76 @@ struct box cdf_beta(float x)
 }

 // Inverse cdf at point
-struct box inverse_cdf(struct box cdf_box(float), float p)
+// Two versions of this function: 
+//   - raw, dealing with cdfs that return floats
+//   - box, dealing with cdfs that return a box.
+
+// Inverse cdf
+struct box inverse_cdf_float(float cdf(float), float p)
+{
+    // given a cdf: [-Inf, Inf] => [0,1]
+    // returns a box with either
+    // x such that cdf(x) = p
+    // or an error
+    // if EXIT_ON_ERROR is set to 1, it exits instead of providing an error
+
+    float low = -1.0;
+    float high = 1.0;
+
+    // 1. Make sure that cdf(low) < p < cdf(high)
+    int interval_found = 0;
+    while ((!interval_found) && (low > -FLT_MAX / 4) && (high < FLT_MAX / 4)) {
+        // ^ Using FLT_MIN and FLT_MAX is overkill
+        // but it's also the *correct* thing to do.
+
+        int low_condition = (cdf(low) < p);
+        int high_condition = (p < cdf(high));
+        if (low_condition && high_condition) {
+            interval_found = 1;
+        } else if (!low_condition) {
+            low = low * 2;
+        } else if (!high_condition) {
+            high = high * 2;
+        }
+    }
+
+    if (!interval_found) {
+        PROCESS_ERROR("Interval containing the target value not found, in function inverse_cdf");
+    } else {
+
+        int convergence_condition = 0;
+        int count = 0;
+        while (!convergence_condition && (count < (INT_MAX / 2))) {
+            float mid = (high + low) / 2;
+            int mid_not_new = (mid == low) || (mid == high);
+            // float width = high - low;
+						// if ((width < 1e-8) || mid_not_new){
+            if (mid_not_new) {
+                convergence_condition = 1;
+            } else {
+                float mid_sign = cdf(mid) - p;
+                if (mid_sign < 0) {
+                    low = mid;
+                } else if (mid_sign > 0) {
+                    high = mid;
+                } else if (mid_sign == 0) {
+                    low = mid;
+                    high = mid;
+                }
+            }
+        }
+
+        if (convergence_condition) {
+						struct box result = {.empty = 0, .content = low};
+						return result;
+        } else {
+            PROCESS_ERROR("Search process did not converge, in function inverse_cdf");
+        }
+
+    }
+}
+
+struct box inverse_cdf_box(struct box cdf_box(float), float p)
 {
    // given a cdf: [-Inf, Inf] => Box([0,1])
    // returns a box with either
@ -277,10 +347,16 @@ float rand_0_to_1(uint32_t* seed)
 }

 // Sampler based on inverse cdf and randomness function
-struct box sampler(struct box cdf(float), uint32_t* seed)
+struct box sampler_box_cdf(struct box cdf(float), uint32_t* seed)
 {
    float p = rand_0_to_1(seed);
-    struct box result = inverse_cdf(cdf, p);
+    struct box result = inverse_cdf_box(cdf, p);
+    return result;
+}
+struct box sampler_float_cdf(float cdf(float), uint32_t* seed)
+{
+    float p = rand_0_to_1(seed);
+    struct box result = inverse_cdf_float(cdf, p);
    return result;
 }

@ -294,83 +370,97 @@ float sampler_normal_0_1(uint32_t* seed)
    return z;
 }

-int main()
-{
-
-    // Get the inverse cdf of a [0,1] uniform distribution at 0.5
-    struct box result_1 = inverse_cdf(cdf_uniform_0_1, 0.5);
-    char* name_1 = "cdf_uniform_0_1";
-    if (result_1.empty) {
-        printf("Inverse for %s not calculated\n", name_1);
+// Some testers
+void test_inverse_cdf_float(char* cdf_name, float cdf_float(float)){
+    struct box result = inverse_cdf_float(cdf_float, 0.5);
+    if (result.empty) {
+        printf("Inverse for %s not calculated\n", cdf_name);
        exit(1);
    } else {
-        printf("Inverse of %s at %f is: %f\n", name_1, 0.5, result_1.content);
+        printf("Inverse of %s at %f is: %f\n", cdf_name, 0.5, result.content);
    }

-    // Get the inverse cdf of a [0,1] squared distribution at 0.5
-    struct box result_2 = inverse_cdf(cdf_squared_0_1, 0.5);
-    char* name_2 = "cdf_squared_0_1";
-    if (result_2.empty) {
-        printf("Inverse for %s not calculated\n", name_2);
+}
+void test_inverse_cdf_box(char* cdf_name, struct box cdf_box(float)){
+    struct box result = inverse_cdf_box(cdf_box, 0.5);
+    if (result.empty) {
+        printf("Inverse for %s not calculated\n", cdf_name);
        exit(1);
    } else {
-        printf("Inverse of %s at %f is: %f\n", name_2, 0.5, result_2.content);
+        printf("Inverse of %s at %f is: %f\n", cdf_name, 0.5, result.content);
    }

-    // Get the inverse of a normal(0,1) cdf distribution
-    struct box result_3 = inverse_cdf(cdf_normal_0_1, 0.5);
-    char* name_3 = "cdf_normal_0_1";
-    if (result_3.empty) {
-        printf("Inverse for %s not calculated\n", name_3);
-        exit(1);
-    } else {
-        printf("Inverse of %s at %f is: %f\n", name_3, 0.5, result_3.content);
 }

-    // Use the sampler on a normal(0,1)
-    // set randomness seed
-    uint32_t* seed = malloc(sizeof(uint32_t));
-    *seed = 1000; // xorshift can't start with 0
-    int n = 100;
-
-    printf("\n\nGetting some samples from %s:\n", name_3);
+void test_and_time_sampler_float(char* cdf_name, float cdf_float(float), uint32_t* seed){
+    printf("\nGetting some samples from %s:\n", cdf_name);
    clock_t begin = clock();
-    for (int i = 0; i < n; i++) {
-        struct box sample = sampler(cdf_normal_0_1, seed);
+    for (int i = 0; i < NUM_SAMPLES; i++) {
+        struct box sample = sampler_float_cdf(cdf_float, seed);
        if (sample.empty) {
-            printf("Error in sampler function");
+            printf("Error in sampler function for %s", cdf_name);
        } else {
            printf("%f\n", sample.content);
        }
    }
    clock_t end = clock();
    float time_spent = (float)(end - begin) / CLOCKS_PER_SEC;
-    printf("Time spent: %f", time_spent);
-
-    // Get some normal samples using the previous method.
-    clock_t begin_2 = clock();
-    printf("\n\nGetting some samples from sampler_normal_0_1\n");
-    for (int i = 0; i < n; i++) {
-        float normal_sample = sampler_normal_0_1(seed);
-        printf("%f\n", normal_sample);
+    printf("Time spent: %f\n", time_spent);
 }
-    clock_t end_2 = clock();
-    float time_spent_2 = (float)(end_2 - begin_2) / CLOCKS_PER_SEC;
-    printf("Time spent: %f", time_spent_2);

-    // Get some beta samples
-    clock_t begin_3 = clock();
-    printf("\n\nGetting some samples from box sampler_dangerous_beta\n");
-    for (int i = 0; i < n; i++) {
-        struct box sample = sampler(cdf_dangerous_beta, seed);
+void test_and_time_sampler_box(char* cdf_name, struct box cdf_box(float), uint32_t* seed){
+    printf("\nGetting some samples from %s:\n", cdf_name);
+    clock_t begin = clock();
+    for (int i = 0; i < NUM_SAMPLES; i++) {
+        struct box sample = sampler_box_cdf(cdf_box, seed);
        if (sample.empty) {
-            printf("Error in sampler function");
+            printf("Error in sampler function for %s", cdf_name);
        } else {
            printf("%f\n", sample.content);
        }
    }
-    clock_t end_3 = clock();
-    float time_spent_3 = (float)(end_3 - begin_3) / CLOCKS_PER_SEC;
-    printf("Time spent: %f\n", time_spent_3);
+    clock_t end = clock();
+    float time_spent = (float)(end - begin) / CLOCKS_PER_SEC;
+    printf("Time spent: %f\n", time_spent);
+}
+
+int main()
+{
+		// Test inverse cdf float
+		test_inverse_cdf_float("cdf_uniform_0_1", cdf_uniform_0_1);
+		test_inverse_cdf_float("cdf_squared_0_1", cdf_squared_0_1);
+		test_inverse_cdf_float("cdf_normal_0_1", cdf_normal_0_1);
+
+		// Test inverse cdf box
+		test_inverse_cdf_box("cdf_beta", cdf_beta);
+		
+		// Testing samplers
+    // set randomness seed
+    uint32_t* seed = malloc(sizeof(uint32_t));
+    *seed = 1000; // xorshift can't start with 0
+		
+		// Test float sampler
+		test_and_time_sampler_float("cdf_uniform_0_1", cdf_uniform_0_1, seed);
+		test_and_time_sampler_float("cdf_squared_0_1", cdf_squared_0_1, seed);
+		test_and_time_sampler_float("cdf_normal_0_1", cdf_normal_0_1, seed);
+    
+		// Get some normal samples using a previous approach
+		printf("\nGetting some samples from sampler_normal_0_1\n");
+    
+    clock_t begin_2 = clock();
+    
+		for (int i = 0; i < NUM_SAMPLES; i++) {
+        float normal_sample = sampler_normal_0_1(seed);
+        printf("%f\n", normal_sample);
+    }
+    
+		clock_t end_2 = clock();
+    float time_spent_2 = (float)(end_2 - begin_2) / CLOCKS_PER_SEC;
+    printf("Time spent: %f\n", time_spent_2);
+
+		// Test box sampler
+		test_and_time_sampler_box("cdf_beta", cdf_beta, seed);
+
+		free(seed);
    return 0;
 }