add possible gotcha to folk wisdom file

2024-02-07 19:44:02 +01:00 · 2024-02-07 19:44:02 +01:00 · 0f5f68f21f
commit 0f5f68f21f
parent 06109a0af1
3 changed files with 86 additions and 139 deletions
--- a/FOLK_WISDOM.md
+++ b/FOLK_WISDOM.md
@ -269,6 +269,65 @@ The last commit that has code to sample from arbitrary cdfs is `8f6919fa2...`. Y

 - Even though the C standard is ambiguous about this, this code assumes that doubles are 64 bit precision (otherwise the xorshift code should be different).

+### Consider this code
+
+Consider sampling from the unit uniform in this manner:
+
+```
+#include "../squiggle.h"
+#include "../squiggle_more.h"
+#include <math.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+int main()
+{
+    // Replicate <https://arxiv.org/pdf/1806.02404.pdf>, and in particular the red line in page 11.
+    // Could also be interesting to just produce and save many samples.
+
+    // set randomness seed
+    uint64_t* seed = malloc(sizeof(uint64_t));
+    *seed = UINT64_MAX/64; // xorshift can't start with a seed of 0
+    
+    int n_samples = 100*MILLION;
+    int p_sixteenth = 0;
+    int p_eighth = 0;
+    int p_quarter = 0;
+    int p_half = 0;
+    double sample;
+    for(int i=0; i<n_samples; i++){
+        sample = sample_unit_uniform(seed);
+        // printf("%lf\n", sample);
+        if (sample < 1.0/16.0){
+            p_sixteenth++;
+            p_eighth++;
+            p_quarter++;
+            p_half++;
+        } else if(sample < 0.125){
+            p_eighth++;
+            p_quarter++;
+            p_half++;
+        } else if(sample < 0.25){
+            p_quarter++;
+            p_half++;
+        } else if(sample < 0.5){
+            p_half++;
+        }else{
+            // printf("Sample > 0.5\n");
+        }
+    }
+    printf("p_16th: %lf; p_eighth; %lf; p_quarter: %lf; p_half: %lf", ((double)p_sixteenth)/n_samples, (double)p_eighth/n_samples, (double)p_quarter/n_samples, (double)p_half/n_samples);
+}
+```
+
+What will be printed out? In particular, consider that not all floating points have the same density. 
+
+<details style="border-style: solid; border-width: 2px; padding-top: 20px; padding-left: 20px; padding-right: 20px; margin-bottom: 20px;">
+<summary>Click on the arrow to see the answer</summary>
+The p_eighth will be ~0.125, p_quarter will be ~0.25, p_half will be ~0.5. This is because these random numbers are produced by generating ints and then dividing by the maximum int. There may be additional gotchas here, but this at least ensures that intervals of the same length in [0,1] will have the same number of samples. It's just that those that are smaller will be represented with more precision.
+</details>
+
 ## Related projects

 - [Squiggle](https://www.squiggle-language.com/)
--- a/scratchpad/scratchpad
+++ b/scratchpad/scratchpad
--- a/scratchpad/scratchpad.c
+++ b/scratchpad/scratchpad.c
@ -1,14 +1,10 @@
 #include "../squiggle.h"
-// #include "../squiggle_more.h"
+#include "../squiggle_more.h"
 #include <math.h>
 #include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>

-double sample_loguniform(double a, double b, uint64_t* seed){
-    return exp(sample_uniform(log(a), log(b), seed));
-}
-
 int main()
 {
    // Replicate <https://arxiv.org/pdf/1806.02404.pdf>, and in particular the red line in page 11.
@ -18,140 +14,32 @@ int main()
    uint64_t* seed = malloc(sizeof(uint64_t));
    *seed = UINT64_MAX/64; // xorshift can't start with a seed of 0
    
-    double sample_fermi_naive(uint64_t* seed){
-        double rate_of_star_formation = sample_loguniform(1,100, seed);
-        double fraction_of_stars_with_planets = sample_loguniform(0.1, 1, seed);
-        double number_of_habitable_planets_per_star_system = sample_loguniform(0.1, 1, seed);
-        double rate_of_life_formation_in_habitable_planets = sample_lognormal(1, 50, seed);
-        double fraction_of_habitable_planets_in_which_any_life_appears = -expm1(-rate_of_life_formation_in_habitable_planets);
-        // double fraction_of_habitable_planets_in_which_any_life_appears = 1-exp(-rate_of_life_formation_in_habitable_planets);
-        // but with more precision
-        double fraction_of_planets_with_life_in_which_intelligent_life_appears = sample_loguniform(0.001, 1, seed);
-        double fraction_of_intelligent_planets_which_are_detectable_as_such = sample_loguniform(0.01, 1, seed);
-        double longevity_of_detectable_civilizations = sample_loguniform(100, 10000000000, seed);
-
-        // printf(" rate_of_star_formation = %lf\n", rate_of_star_formation);
-        // printf(" fraction_of_stars_with_planets = %lf\n", fraction_of_stars_with_planets);
-        // printf(" number_of_habitable_planets_per_star_system = %lf\n", number_of_habitable_planets_per_star_system);
-        // printf(" rate_of_life_formation_in_habitable_planets = %.16lf\n", rate_of_life_formation_in_habitable_planets);
-        // printf(" fraction_of_habitable_planets_in_which_any_life_appears = %lf\n", fraction_of_habitable_planets_in_which_any_life_appears);
-        // printf(" fraction_of_planets_with_life_in_which_intelligent_life_appears = %lf\n", fraction_of_planets_with_life_in_which_intelligent_life_appears);
-        // printf(" fraction_of_intelligent_planets_which_are_detectable_as_such = %lf\n", fraction_of_intelligent_planets_which_are_detectable_as_such);
-        // printf(" longevity_of_detectable_civilizations = %lf\n", longevity_of_detectable_civilizations);
-
-        // Expected number of civilizations in the Milky way;
-        // see footnote 3 (p. 5)
-        double n = rate_of_star_formation *
-          fraction_of_stars_with_planets *
-          number_of_habitable_planets_per_star_system * 
-          fraction_of_habitable_planets_in_which_any_life_appears * 
-          fraction_of_planets_with_life_in_which_intelligent_life_appears * 
-          fraction_of_intelligent_planets_which_are_detectable_as_such *
-          longevity_of_detectable_civilizations;
-
-        return n;
-    }
-
-    double sample_fermi_paradox_naive(uint64_t* seed){
-        double n = sample_fermi_naive(seed);
-        return ((n > 1) ? 1 : 0);
-    }
-
-    double n = 1000000;
-    double naive_fermi_proportion = 0;
-    for(int i=0; i<n; i++){
-        double result = sample_fermi_paradox_naive(seed);
-        // printf("result: %lf\n", result);
-        naive_fermi_proportion+=result;
-    }
-    printf("Naïve %% that we are not alone: %lf\n", naive_fermi_proportion/n);
-
-
-    // Thinking in log space
-    double sample_fermi_logspace(uint64_t* seed){
-        double log_rate_of_star_formation = sample_uniform(log(1), log(100), seed);
-        double log_fraction_of_stars_with_planets = sample_uniform(log(0.1), log(1), seed);
-        double log_number_of_habitable_planets_per_star_system = sample_uniform(log(0.1), log(1), seed);
-        double log_fraction_of_planets_with_life_in_which_intelligent_life_appears = sample_uniform(log(0.001), log(1), seed);
-        double log_fraction_of_intelligent_planets_which_are_detectable_as_such = sample_uniform(log(0.01), log(1), seed);
-        double log_longevity_of_detectable_civilizations = sample_uniform(log(100), log(10000000000), seed);
-
-        // printf(" log_rate_of_star_formation = %lf\n", log_rate_of_star_formation);
-        // printf(" log_fraction_of_stars_with_planets = %lf\n", log_fraction_of_stars_with_planets);
-        // printf(" log_number_of_habitable_planets_per_star_system = %lf\n", log_number_of_habitable_planets_per_star_system);
-        // printf(" log_fraction_of_planets_with_life_in_which_intelligent_life_appears = %lf\n", log_fraction_of_planets_with_life_in_which_intelligent_life_appears);
-        // printf(" log_fraction_of_intelligent_planets_which_are_detectable_as_such = %lf\n", log_fraction_of_intelligent_planets_which_are_detectable_as_such);
-        // printf(" log_longevity_of_detectable_civilizations = %lf\n", log_longevity_of_detectable_civilizations);
-
-        double log_n1 = 
-            log_rate_of_star_formation +
-            log_fraction_of_stars_with_planets +
-            log_number_of_habitable_planets_per_star_system + 
-            log_fraction_of_planets_with_life_in_which_intelligent_life_appears + 
-            log_fraction_of_intelligent_planets_which_are_detectable_as_such +
-            log_longevity_of_detectable_civilizations;
-        // printf("first part of calculation: %lf\n", log_n1);
-
-        /* Consider fraction_of_habitable_planets_in_which_any_life_appears separately.
-        Imprecisely, we could do:
-
-        double rate_of_life_formation_in_habitable_planets = sample_lognormal(1, 50, seed);
-        double fraction_of_habitable_planets_in_which_any_life_appears = 1- exp(-rate_of_life_formation_in_habitable_planets);
-        double log_fraction_of_habitable_planets_in_which_any_life_appears = log(1-fraction_of_habitable_planets_in_which_any_life_appears);
-        double n = exp(log_n1) * fraction_of_habitable_planets_in_which_any_life_appears;
-        // or: 
-        double n2 = exp(log_n1 + log(fraction_of_habitable_planets_in_which_any_life_appears))
-
-        However, we lose all precision here.
-
-        Now, say
-        a = underlying normal 
-        b = rate_of_life_formation_in_habitable_planets = exp(underlying normal)
-        c = 1 - exp(-b) = fraction_of_habitable_planets_in_which_any_life_appears
-        d = log(c)
-
-        Now, is there some way we can d more efficiently/precisely?
-        Turns out there is!
-
-        Looking at the Taylor expansion for c = 1 - exp(-b), it's b - b^2/2 + b^3/6 - x^b/24, etc.
-        // https://www.wolframalpha.com/input?i=1-exp%28-x%29
-        When b ~ 0 (as is often the case), this is close to b.
-
-        But now, if b ~ 0
-        c ~ b
-        and d = log(c) ~ log(b) = log(exp(a)) = a
-        */
-        double log_rate_of_life_formation_in_habitable_planets = sample_normal(1, 50, seed);
-        // printf("log_rate_of_life_formation_in_habitable_planets: %lf\n", log_rate_of_life_formation_in_habitable_planets);
-
-        double log_fraction_of_habitable_planets_in_which_any_life_appears;
-        if(log_rate_of_life_formation_in_habitable_planets < -32){
-            log_fraction_of_habitable_planets_in_which_any_life_appears = log_rate_of_life_formation_in_habitable_planets;
-        } else{
-            double rate_of_life_formation_in_habitable_planets = exp(log_rate_of_life_formation_in_habitable_planets);
-            double fraction_of_habitable_planets_in_which_any_life_appears = -expm1(-rate_of_life_formation_in_habitable_planets);
-            log_fraction_of_habitable_planets_in_which_any_life_appears = log(fraction_of_habitable_planets_in_which_any_life_appears);
+    int n_samples = 100*MILLION;
+    int p_sixteenth = 0;
+    int p_eighth = 0;
+    int p_quarter = 0;
+    int p_half = 0;
+    double sample;
+    for(int i=0; i<n_samples; i++){
+        sample = sample_unit_uniform(seed);
+        // printf("%lf\n", sample);
+        if (sample < 1.0/16.0){
+            p_sixteenth++;
+            p_eighth++;
+            p_quarter++;
+            p_half++;
+        } else if(sample < 0.125){
+            p_eighth++;
+            p_quarter++;
+            p_half++;
+        } else if(sample < 0.25){
+            p_quarter++;
+            p_half++;
+        } else if(sample < 0.5){
+            p_half++;
+        }else{
+            // printf("Sample > 0.5\n");
        }
-        // printf(" log_fraction_of_habitable_planets_in_which_any_life_appears: %lf\n", log_fraction_of_habitable_planets_in_which_any_life_appears);
-
-        double log_n = log_n1 + log_fraction_of_habitable_planets_in_which_any_life_appears;
-
-        return log_n;
    }
-
-    double sample_fermi_paradox_logspace(uint64_t* seed){
-        double n = sample_fermi_logspace(seed);
-        return ((n > 0) ? 1 : 0);
-    }
-
-    double logspace_fermi_proportion = 0;
-    for(int i=0; i<n; i++){
-        double result = sample_fermi_paradox_logspace(seed);
-        // printf("result: %lf\n", result);
-        logspace_fermi_proportion+=result;
-    }
-    printf("Using more accurate logspace computations, %% that we are not alone: %lf\n", logspace_fermi_proportion/n);
-    double result2;
-
-    free(seed);
+    printf("p_16th: %lf; p_eighth; %lf; p_quarter: %lf; p_half: %lf", ((double)p_sixteenth)/n_samples, (double)p_eighth/n_samples, (double)p_quarter/n_samples, (double)p_half/n_samples);
 }