tweaks

2024-09-13 17:41:17 -04:00
17 changed files with 87 additions and 39 deletions
--- a/ROADMAP.md
+++ b/ROADMAP.md
@ -7,10 +7,9 @@
  - [x] Make README.md less messy
  - [x] Give examples of new functions
  - [x] Reference commit with cdf functions, even though deleted
 - [ ] Figure out fixed point libraries <https://github.com/PetteriAimonen/libfixmath/>, and overflow guards for operations
 - [ ] Post on suckless subreddit
 - [ ] Look into <https://lite.duckduckgo.com/html/> instead?
 - [ ] Drive in a few more real-life applications
  - [ ] US election modelling?
 - [ ] Look into using size_t instead of int for sample numbers
 - [ ] Reorganize code a little bit to reduce usage of gcc's nested functions
 - [ ] Rename examples
--- a/examples/core/00_example_template/example
+++ b/examples/core/00_example_template/example
--- a/examples/core/01_one_sample/example
+++ b/examples/core/01_one_sample/example
--- a/examples/core/02_time_to_botec/example
+++ b/examples/core/02_time_to_botec/example
--- a/examples/core/03_gcc_nested_function/example
+++ b/examples/core/03_gcc_nested_function/example
--- a/examples/core/04_gamma_beta/example
+++ b/examples/core/04_gamma_beta/example
--- a/examples/core/05_hundred_lognormals/example
+++ b/examples/core/05_hundred_lognormals/example
--- a/examples/core/06_dissolving_fermi_paradox/example
+++ b/examples/core/06_dissolving_fermi_paradox/example
--- a/examples/more/12_time_to_botec_parallel/example
+++ b/examples/more/12_time_to_botec_parallel/example
--- a/examples/more/15_time_to_botec_custom_mixture/example
+++ b/examples/more/15_time_to_botec_custom_mixture/example
--- a/examples/more/15_time_to_botec_custom_mixture/example.c
+++ b/examples/more/15_time_to_botec_custom_mixture/example.c
@ -1,34 +0,0 @@
 #include "../../../squiggle.h"
 #include "../../../squiggle_more.h"
 #include <stdio.h>
 #include <stdlib.h>
 double cumsum_p0 = 0.6;
 double cumsum_p1 = 0.8;
 double cumsum_p2 = 0.9;
 double cumsum_p3 = 1.0;
 double sampler_result(uint64_t * seed)
 {
    double p = sample_uniform(0, 1, seed);
    if(p< cumsum_p0){
        return 0;
    } else if (p < cumsum_p1){
        return 1;
    } else if (p < cumsum_p2){
        return sample_to(1,3, seed);
    } else {
        return sample_to(2, 10, seed);
    } 
 }
 int main()
 {
    int n_samples = 1000 * 1000, n_threads = 16;
    double* results = malloc((size_t)n_samples * sizeof(double));
    sampler_parallel(sampler_result, results, n_threads, n_samples);
    printf("Avg: %f\n", array_sum(results, n_samples) / n_samples);
    free(results);
 }
--- a/examples/more/makefile
+++ b/examples/more/makefile
@ -52,7 +52,6 @@ all:
 	$(CC) $(OPTIMIZED) $(DEBUG) $(WARN) 12_time_to_botec_parallel/$(SRC)       $(DEPS) -o 12_time_to_botec_parallel/$(OUTPUT)
 	$(CC) $(OPTIMIZED) $(DEBUG) $(WARN) 13_parallelize_min/$(SRC)              $(DEPS) -o 13_parallelize_min/$(OUTPUT)
 	$(CC) $(OPTIMIZED) $(DEBUG) $(WARN) 14_check_confidence_interval/$(SRC)    $(DEPS) -o 14_check_confidence_interval/$(OUTPUT)
 	$(CC) $(OPTIMIZED) $(DEBUG) $(WARN) 15_time_to_botec_custom_mixture/$(SRC)    $(DEPS) -o 15_time_to_botec_custom_mixture/$(OUTPUT)
 format-all:
 	$(FORMATTER) 00_example_template/$(SRC)
--- a/scratchpad/ai
+++ b/scratchpad/ai
--- a/scratchpad/ai.c
+++ b/scratchpad/ai.c
@ -0,0 +1,51 @@
 #include "../squiggle.h"
 #include "../squiggle_more.h"
 #include <stdio.h>
 #include <stdlib.h>
 // Estimate functions
 double sample_ais_1(uint64_t* seed)
 {
    double num_arxiv_ml_authors_2024 = 7379; // Number of authors who published in the stats.ML category on arxiv in 2023
    double fraction_of_ml = sample_beta(7.41986324742243, 114.487997692331, seed); // fraction they are of the field. 0.03 to 0.1. https://nunosempere.com/blog/2023/03/15/fit-beta/
    double fraction_of_their_research_thats_relevant = sample_beta(0.8277362357555023, 25.259989675532076, seed); // fraction of their research that is safety relevant, 0.001 to 0.1
    double academia_adjustment = sample_beta(1.9872200324266, 6.36630125578423, seed); // 0.05 0.5 adjustment because they are from academia
    return num_arxiv_ml_authors_2024 * fraction_of_their_research_thats_relevant * academia_adjustment / fraction_of_ml;
 }
 double sample_ais_2(uint64_t* seed)
 {
    double num_arxiv_ml_authors_2024 = 7379; // Number of authors who published in the stats.ML category on arxiv in 2023
    double fraction_of_ml = sample_beta(7.41986324742243, 114.487997692331, seed); // fraction they are of the field. 0.03 to 0.1. https://nunosempere.com/blog/2023/03/15/fit-beta/
    double fraction_of_their_research_thats_relevant = sample_beta(3.28962721497463, 17.7686162987246, seed); // fraction of their research that is safety relevant, 0.001 to 0.1
    double academia_adjustment = sample_beta(2.23634269185645, 3.73532102339597, seed); // 0.05 0.5 adjustment because they are from academia
    return num_arxiv_ml_authors_2024 * fraction_of_their_research_thats_relevant * academia_adjustment / fraction_of_ml; 
 }
 int main()
 {
    // set randomness seed
    uint64_t* seed = malloc(sizeof(uint64_t));
    *seed = 1000; // xorshift can't start with 0
    int n_samples = 10 * MILLION;
    printf("# AIS 1\n");
    double* xs = malloc(sizeof(double) * (size_t)n_samples);
    sampler_parallel(sample_ais_1, xs, 16, n_samples);
    printf("# Stats\n");
    array_print_stats(xs, n_samples);
    printf("\n# Histogram\n");
    array_print_histogram(xs, n_samples, 23);
    printf("# AIS 2\n");
    sampler_parallel(sample_ais_2, xs, 16, n_samples);
    printf("# Stats\n");
    array_print_stats(xs, n_samples);
    printf("\n# Histogram\n");
    array_print_histogram(xs, n_samples, 23);
    free(seed);
 }
--- a/scratchpad/aisfield/example.c
+++ b/scratchpad/aisfield/example.c
@ -0,0 +1,33 @@
 #include "../../../squiggle.h"
 #include "../../../squiggle_more.h"
 #include <stdio.h>
 #include <stdlib.h>
 // Estimate functions
 double sample_beta_3_2(uint64_t* seed)
 {
    double num_arxiv_ml_authors_2024 = 7379; // Number of authors who published in the stats.ML category on arxiv in 2023
    double fraction_of_ml = sample_beta(7.41986324742243, 114.487997692331, seed); // fraction they are of the field. 0.03 to 0.1. https://nunosempere.com/blog/2023/03/15/fit-beta/
    double fraction_of_their_research_thats_relevant = sample_beta(0.8277362357555023, 25.259989675532076, seed); // fraction of their research that is safety relevant, 0.001 to 0.1
    double academia_discount = sample_beta(1.9872200324266, 6.36630125578423, seed); // 0.05 0.5 discount because they are from academia
    return num_arxiv_ml_authors_2024 * fraction_of_their_research_thats_relevant * academia_discount / fraction_of_ml;
 }
 int main()
 {
    // set randomness seed
    uint64_t* seed = malloc(sizeof(uint64_t));
    *seed = 1000; // xorshift can't start with 0
    int n_samples = 1 * MILLION;
    double* xs = malloc(sizeof(double) * (size_t)n_samples);
    sampler_parallel(sample_beta_3_2, xs, 16, n_samples);
    printf("\n# Stats\n");
    array_print_stats(xs, n_samples);
    printf("\n# Histogram\n");
    array_print_histogram(xs, n_samples, 23);
    free(seed);
 }
--- a/scratchpad/scratchpad
+++ b/scratchpad/scratchpad
--- a/squiggle.c
+++ b/squiggle.c
@ -50,7 +50,7 @@ double sample_unit_normal(uint64_t* seed)
    // // See: <https://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform>
    double u1 = sample_unit_uniform(seed);
    double u2 = sample_unit_uniform(seed);
-    double z = sqrt(-2.0 * log(u1)) * sin(2.0 * PI * u2);
+    double z = sqrt(-2.0 * log(u1)) * sin(2 * PI * u2);
    return z;
 }
@ -90,7 +90,7 @@ double sample_normal_from_90_ci(double low, double high, uint64_t* seed)
    // 5. If we want a 90% confidence interval from high to low,
    // we can set mean = (high + low)/2; the midpoint, and L = high-low,
    // Normal([high + low]/2, [high - low]/(2 * 1.6448536269514722))
-    double mean = (high + low) * 0.5;
+    double mean = (high + low) / 2.0;
    double std = (high - low) / (2.0 * NORMAL90CONFIDENCE);
    return sample_normal(mean, std, seed);
 }