tweaks before twitter thread

fix constant name 95=>90
reword comments
2023-09-27 15:25:12 +01:00 · 2023-09-27 14:10:40 +01:00 · 2023-09-27 13:51:05 +01:00 · 2023-09-26 20:48:33 +01:00 · 2023-09-24 18:03:34 +01:00 · 2023-09-23 23:24:25 +01:00
26 changed files with 604 additions and 81 deletions
--- a/README.md
+++ b/README.md
@ -2,6 +2,8 @@

 squiggle.c is a self-contained C99 library that provides functions for simple Monte Carlo estimation, based on [Squiggle](https://www.squiggle-language.com/).

+![](./core.png)
+
 ## Why C?

 - Because it is fast
@ -12,7 +14,7 @@ squiggle.c is a self-contained C99 library that provides functions for simple Mo
 - Because if you can implement something in C, you can implement it anywhere else
 - Because it can be made faster if need be
  - e.g., with a multi-threading library like OpenMP, 
-  - or by implementing faster but more complex algorithms
+  - o  by implementing faster but more complex algorithms
  - or more simply, by inlining the sampling functions (adding an `inline` directive before their function declaration)
 - **Because there are few abstractions between it and machine code** (C => assembly => machine code with gcc, or C => machine code, with tcc), leading to fewer errors beyond the programmer's control.

@ -34,7 +36,7 @@ You can follow some example usage in the examples/ folder

 ### squiggle.c is short

-[squiggle.c](squiggle.c) is less than 500 lines of C. The reader could just read it and grasp its contents.
+[squiggle.c](squiggle.c) is less than 600 lines of C, with a core of <250 lines. The reader could just read it and grasp its contents.

 ### Core strategy

@ -341,4 +343,18 @@ It emits one warning about something I already took care of, so by default I've
 - [x] Have some more complicated & realistic example
 - [x] Add summarization functions: 90% ci (or all c.i.?) 
 - [x] Link to the examples in the examples section.
- [x] Add a few functions for doing simple algebra on normals, and lognormals?
+- [x] Add a few functions for doing simple algebra on normals, and lognormals
+  - [x] Add prototypes
+  - [x] Use named structs
+  - [x] Add to header file
+  - [x] Provide example algebra
+  - [x] Add conversion between 90% ci and parameters.
+  - [x] Use that conversion in conjuction with small algebra.
+  - [x] Consider ergonomics of using ci instead of c_i
+    - [x] use named struct instead
+    - [x] demonstrate and document feeding a struct directly to a function; my_function((struct c_i){.low = 1, .high = 2});
+  - [ ] Consider desirability of defining shortcuts for those functions. Adds a level of magic, though.
+  - [ ] Test results
+  - [x] Move to own file? Or signpost in file? => signposted in file.
+- [ ] Disambiguate sample_laplace--successes vs failures || successes vs total trials as two distinct and differently named functions
+- [ ] Write twitter thread.
--- a/core.png
+++ b/core.png
--- a/examples/02_many_samples_time_to_botec/example.c
+++ b/examples/02_many_samples_time_to_botec/example.c
@ -1,7 +1,7 @@
-#include <stdint.h>
-#include <stdlib.h>
-#include <stdio.h>
 #include "../../squiggle.h"
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>

 // Estimate functions
 double sample_0(uint64_t* seed)
@ -24,10 +24,11 @@ double sample_many(uint64_t* seed)
    return sample_to(2, 10, seed);
 }

-int main(){
+int main()
+{
    // set randomness seed
-		uint64_t* seed = malloc(sizeof(uint64_t));
-		*seed = 1000; // xorshift can't start with 0
+    uint64_t* seed = malloc(sizeof(uint64_t));
+    *seed = 1000; // xorshift can't start with 0

    double p_a = 0.8;
    double p_b = 0.5;
@ -37,18 +38,18 @@ int main(){
    double weights[] = { 1 - p_c, p_c / 2, p_c / 4, p_c / 4 };
    double (*samplers[])(uint64_t*) = { sample_0, sample_1, sample_few, sample_many };

-		int n_samples = 1000000;
-		double* result_many = (double *) malloc(n_samples * sizeof(double));
-		for(int i=0; i<n_samples; i++){
-      result_many[i] = sample_mixture(samplers, weights, n_dists, seed);
-		}
-	  printf("Mean: %f\n", array_mean(result_many, n_samples));
-		
-		// printf("result_many: [");
-		// for(int i=0; i<100; i++){
-		//  printf("%.2f, ", result_many[i]);
-		// }
-		// printf("]\n");
+    int n_samples = 1000000;
+    double* result_many = (double*)malloc(n_samples * sizeof(double));
+    for (int i = 0; i < n_samples; i++) {
+        result_many[i] = sample_mixture(samplers, weights, n_dists, seed);
+    }
+    printf("Mean: %f\n", array_mean(result_many, n_samples));

-		free(seed);
+    // printf("result_many: [");
+    // for(int i=0; i<100; i++){
+    //  printf("%.2f, ", result_many[i]);
+    // }
+    // printf("]\n");
+
+    free(seed);
 }
--- a/examples/08_nuclear_war/example
+++ b/examples/08_nuclear_war/example
--- a/examples/08_nuclear_war/example.c
+++ b/examples/08_nuclear_war/example.c
@ -60,9 +60,9 @@ int main()
    }
    printf("... ]\n");

-    struct c_i c_i_90 = get_90_confidence_interval(mixture, seed);
+    ci ci_90 = get_90_confidence_interval(mixture, seed);
    printf("mean: %f\n", array_mean(mixture_result, n));
-    printf("90%% confidence interval: [%f, %f]\n", c_i_90.low, c_i_90.high);
+    printf("90%% confidence interval: [%f, %f]\n", ci_90.low, ci_90.high);

    free(seed);
 }
--- a/examples/10_nuclear_recovery/example
+++ b/examples/10_nuclear_recovery/example
--- a/examples/10_nuclear_recovery/example.c
+++ b/examples/10_nuclear_recovery/example.c
@ -46,8 +46,8 @@ int main()

 		// Before a first nuclear collapse
 		printf("## Before the first nuclear collapse\n");
-    struct c_i c_i_90_2023 = get_90_confidence_interval(yearly_probability_nuclear_collapse_2023, seed);
-    printf("90%% confidence interval: [%f, %f]\n", c_i_90_2023.low, c_i_90_2023.high);
+    ci ci_90_2023 = get_90_confidence_interval(yearly_probability_nuclear_collapse_2023, seed);
+    printf("90%% confidence interval: [%f, %f]\n", ci_90_2023.low, ci_90_2023.high);

    double* yearly_probability_nuclear_collapse_2023_samples = malloc(sizeof(double) * num_samples);
    for (int i = 0; i < num_samples; i++) {
@ -57,8 +57,8 @@ int main()

 		// After the first nuclear collapse
 		printf("\n## After the first nuclear collapse\n");
-    struct c_i c_i_90_2070 = get_90_confidence_interval(yearly_probability_nuclear_collapse_after_recovery_example, seed);
-    printf("90%% confidence interval: [%f, %f]\n", c_i_90_2070.low, c_i_90_2070.high);
+    ci ci_90_2070 = get_90_confidence_interval(yearly_probability_nuclear_collapse_after_recovery_example, seed);
+    printf("90%% confidence interval: [%f, %f]\n", ci_90_2070.low, ci_90_2070.high);

    double* yearly_probability_nuclear_collapse_after_recovery_samples = malloc(sizeof(double) * num_samples);
    for (int i = 0; i < num_samples; i++) {
@ -68,8 +68,8 @@ int main()

 		// After the first nuclear collapse (antiinductive)
 		printf("\n## After the first nuclear collapse (antiinductive)\n");
-    struct c_i c_i_90_antiinductive = get_90_confidence_interval(yearly_probability_nuclear_collapse_after_recovery_antiinductive, seed);
-    printf("90%% confidence interval: [%f, %f]\n", c_i_90_antiinductive.low, c_i_90_antiinductive.high);
+    ci ci_90_antiinductive = get_90_confidence_interval(yearly_probability_nuclear_collapse_after_recovery_antiinductive, seed);
+    printf("90%% confidence interval: [%f, %f]\n", ci_90_antiinductive.low, ci_90_antiinductive.high);

    double* yearly_probability_nuclear_collapse_after_recovery_antiinductive_samples = malloc(sizeof(double) * num_samples);
    for (int i = 0; i < num_samples; i++) {
--- a/examples/11_algebra/example
+++ b/examples/11_algebra/example
--- a/examples/11_algebra/example.c
+++ b/examples/11_algebra/example.c
@ -0,0 +1,26 @@
+#include "../../squiggle.h"
+#include <math.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+int main()
+{
+    // set randomness seed
+    uint64_t* seed = malloc(sizeof(uint64_t));
+    *seed = 1000; // xorshift can't start with 0
+
+    normal_params n1 = { .mean = 1.0, .std = 3.0 };
+    normal_params n2 = { .mean = 2.0, .std = 4.0 };
+    normal_params sn = algebra_sum_normals(n1, n2);
+    printf("The sum of Normal(%f, %f) and Normal(%f, %f) is Normal(%f, %f)\n",
+        n1.mean, n1.std, n2.mean, n2.std, sn.mean, sn.std);
+
+    lognormal_params ln1 = { .logmean = 1.0, .logstd = 3.0 };
+    lognormal_params ln2 = { .logmean = 2.0, .logstd = 4.0 };
+    lognormal_params sln = algebra_product_lognormals(ln1, ln2);
+    printf("The product of Lognormal(%f, %f) and Lognormal(%f, %f) is Lognormal(%f, %f)\n",
+        ln1.logmean, ln1.logstd, ln2.logmean, ln2.logstd, sln.logmean, sln.logstd);
+
+    free(seed);
+}
--- a/examples/11_algebra/makefile
+++ b/examples/11_algebra/makefile
@ -0,0 +1,53 @@
+# Interface: 
+#   make
+#   make build
+#   make format
+#   make run
+
+# Compiler
+CC=gcc
+# CC=tcc # <= faster compilation
+
+# Main file
+SRC=example.c ../../squiggle.c
+OUTPUT=example
+
+## Dependencies
+MATH=-lm
+
+## Flags
+DEBUG= #'-g'
+STANDARD=-std=c99
+WARNINGS=-Wall
+OPTIMIZED=-O3  #-Ofast
+# OPENMP=-fopenmp
+
+## Formatter
+STYLE_BLUEPRINT=webkit
+FORMATTER=clang-format -i -style=$(STYLE_BLUEPRINT)
+
+## make build
+build: $(SRC)
+	$(CC) $(OPTIMIZED) $(DEBUG) $(SRC) $(MATH) -o $(OUTPUT)
+
+format: $(SRC)
+	$(FORMATTER) $(SRC)
+
+run: $(SRC) $(OUTPUT)
+	OMP_NUM_THREADS=1 ./$(OUTPUT) && echo
+
+time-linux: 
+	@echo "Requires /bin/time, found on GNU/Linux systems" && echo
+	
+	@echo "Running 100x and taking avg time $(OUTPUT)"
+	@t=$$(/usr/bin/time -f "%e" -p bash -c 'for i in {1..100}; do $(OUTPUT); done' 2>&1 >/dev/null | grep real | awk '{print $$2}' ); echo "scale=2; 1000 * $$t / 100" | bc | sed "s|^|Time using 1 thread: |" | sed 's|$$|ms|' && echo
+
+## Profiling
+
+profile-linux: 
+	echo "Requires perf, which depends on the kernel version, and might be in linux-tools package or similar"
+	echo "Must be run as sudo"
+	$(CC) $(SRC) $(MATH) -o $(OUTPUT)
+	sudo perf record ./$(OUTPUT)
+	sudo perf report
+	rm perf.data
--- a/examples/12_algebra_and_conversion/example
+++ b/examples/12_algebra_and_conversion/example
--- a/examples/12_algebra_and_conversion/example.c
+++ b/examples/12_algebra_and_conversion/example.c
@ -0,0 +1,33 @@
+#include "../../squiggle.h"
+#include <math.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+int main()
+{
+    // set randomness seed
+    uint64_t* seed = malloc(sizeof(uint64_t));
+    *seed = 1000; // xorshift can't start with 0
+    
+    // Convert to 90% confidence interval form and back
+    lognormal_params ln1 = { .logmean = 1.0, .logstd = 3.0 };
+    ci ln1_ci = convert_lognormal_params_to_ci(ln1);
+    printf("The 90%% confidence interval of Lognormal(%f, %f) is [%f, %f]\n",
+           ln1.logmean, ln1.logstd,
+           ln1_ci.low, ln1_ci.high);
+    lognormal_params ln1_ci_paramas = convert_ci_to_lognormal_params(ln1_ci);
+    printf("The lognormal which has 90%% confidence interval [%f, %f] is Lognormal(%f, %f)\n",
+           ln1_ci.low, ln1_ci.high,
+           ln1.logmean, ln1.logstd);
+
+    lognormal_params ln2 = convert_ci_to_lognormal_params((ci){.low = 1, .high = 10});
+    lognormal_params ln3 = convert_ci_to_lognormal_params((ci){.low = 5, .high = 50});
+
+    lognormal_params sln = algebra_product_lognormals(ln2, ln3);
+    ci sln_ci = convert_lognormal_params_to_ci(sln);
+
+    printf("Result of some lognormal products: to(%f, %f)\n", sln_ci.low, sln_ci.high);
+
+    free(seed);
+}
--- a/examples/12_algebra_and_conversion/makefile
+++ b/examples/12_algebra_and_conversion/makefile
@ -0,0 +1,53 @@
+# Interface: 
+#   make
+#   make build
+#   make format
+#   make run
+
+# Compiler
+CC=gcc
+# CC=tcc # <= faster compilation
+
+# Main file
+SRC=example.c ../../squiggle.c
+OUTPUT=example
+
+## Dependencies
+MATH=-lm
+
+## Flags
+DEBUG= #'-g'
+STANDARD=-std=c99
+WARNINGS=-Wall
+OPTIMIZED=-O3  #-Ofast
+# OPENMP=-fopenmp
+
+## Formatter
+STYLE_BLUEPRINT=webkit
+FORMATTER=clang-format -i -style=$(STYLE_BLUEPRINT)
+
+## make build
+build: $(SRC)
+	$(CC) $(OPTIMIZED) $(DEBUG) $(SRC) $(MATH) -o $(OUTPUT)
+
+format: $(SRC)
+	$(FORMATTER) $(SRC)
+
+run: $(SRC) $(OUTPUT)
+	OMP_NUM_THREADS=1 ./$(OUTPUT) && echo
+
+time-linux: 
+	@echo "Requires /bin/time, found on GNU/Linux systems" && echo
+	
+	@echo "Running 100x and taking avg time $(OUTPUT)"
+	@t=$$(/usr/bin/time -f "%e" -p bash -c 'for i in {1..100}; do $(OUTPUT); done' 2>&1 >/dev/null | grep real | awk '{print $$2}' ); echo "scale=2; 1000 * $$t / 100" | bc | sed "s|^|Time using 1 thread: |" | sed 's|$$|ms|' && echo
+
+## Profiling
+
+profile-linux: 
+	echo "Requires perf, which depends on the kernel version, and might be in linux-tools package or similar"
+	echo "Must be run as sudo"
+	$(CC) $(SRC) $(MATH) -o $(OUTPUT)
+	sudo perf record ./$(OUTPUT)
+	sudo perf report
+	rm perf.data
--- a/examples/13_ergonomic_algebra/example
+++ b/examples/13_ergonomic_algebra/example
--- a/examples/13_ergonomic_algebra/example.c
+++ b/examples/13_ergonomic_algebra/example.c
@ -0,0 +1,26 @@
+#include "../../squiggle.h"
+#include <math.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#define ln lognormal_params
+#define to(...) convert_ci_to_lognormal_params((ci) __VA_ARGS__)
+#define from(...) convert_lognormal_params_to_ci((ln) __VA_ARGS__)
+#define times(a,b) algebra_product_lognormals(a,b)
+
+int main()
+{
+    // set randomness seed
+    uint64_t* seed = malloc(sizeof(uint64_t));
+    *seed = 1000; // xorshift can't start with 0
+
+    ln a = to({.low = 1, .high = 10});
+    ln b = to({.low = 5, .high = 500});
+    ln c = times(a, b);
+
+    printf("Result: to(%f, %f)\n", from(c).low, from(c).high);
+    printf("One sample from it is: %f\n", sample_lognormal(c.logmean, c.logstd, seed));
+
+    free(seed);
+}
--- a/examples/13_ergonomic_algebra/makefile
+++ b/examples/13_ergonomic_algebra/makefile
@ -0,0 +1,53 @@
+# Interface: 
+#   make
+#   make build
+#   make format
+#   make run
+
+# Compiler
+CC=gcc
+# CC=tcc # <= faster compilation
+
+# Main file
+SRC=example.c ../../squiggle.c
+OUTPUT=example
+
+## Dependencies
+MATH=-lm
+
+## Flags
+DEBUG= #'-g'
+STANDARD=-std=c99
+WARNINGS=-Wall
+OPTIMIZED=-O3  #-Ofast
+# OPENMP=-fopenmp
+
+## Formatter
+STYLE_BLUEPRINT=webkit
+FORMATTER=clang-format -i -style=$(STYLE_BLUEPRINT)
+
+## make build
+build: $(SRC)
+	$(CC) $(OPTIMIZED) $(DEBUG) $(SRC) $(MATH) -o $(OUTPUT)
+
+format: $(SRC)
+	$(FORMATTER) $(SRC)
+
+run: $(SRC) $(OUTPUT)
+	OMP_NUM_THREADS=1 ./$(OUTPUT) && echo
+
+time-linux: 
+	@echo "Requires /bin/time, found on GNU/Linux systems" && echo
+	
+	@echo "Running 100x and taking avg time $(OUTPUT)"
+	@t=$$(/usr/bin/time -f "%e" -p bash -c 'for i in {1..100}; do $(OUTPUT); done' 2>&1 >/dev/null | grep real | awk '{print $$2}' ); echo "scale=2; 1000 * $$t / 100" | bc | sed "s|^|Time using 1 thread: |" | sed 's|$$|ms|' && echo
+
+## Profiling
+
+profile-linux: 
+	echo "Requires perf, which depends on the kernel version, and might be in linux-tools package or similar"
+	echo "Must be run as sudo"
+	$(CC) $(SRC) $(MATH) -o $(OUTPUT)
+	sudo perf record ./$(OUTPUT)
+	sudo perf report
+	rm perf.data
--- a/examples/14_twitter_thread_example/example
+++ b/examples/14_twitter_thread_example/example
--- a/examples/14_twitter_thread_example/example.c
+++ b/examples/14_twitter_thread_example/example.c
@ -0,0 +1,43 @@
+#include "../../squiggle.h"
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+double sample_0(uint64_t* seed){ 
+    return 0;
+}
+
+double sample_1(uint64_t* seed){
+    return 1;
+}
+
+double sample_normal_mean_1_std_2(uint64_t* seed){
+    return sample_normal(1, 2, seed);
+}
+
+double sample_1_to_3(uint64_t* seed){
+    return sample_to(1, 3, seed);
+}
+
+int main()
+{
+    // set randomness seed
+    uint64_t* seed = malloc(sizeof(uint64_t));
+    *seed = 1000; // xorshift can't start with 0
+
+    int n_dists = 4;
+    double weights[] = { 1, 2, 3, 4 };
+    double (*samplers[])(uint64_t*) = { 
+        sample_0, 
+        sample_1, 
+        sample_normal_mean_1_std_2, 
+        sample_1_to_3 
+    };
+
+    int n_samples = 10;
+    for (int i = 0; i < n_samples; i++) {
+        printf("Sample #%d: %f\n", i, sample_mixture(samplers, weights, n_dists, seed));
+    }
+
+    free(seed);
+}
--- a/examples/14_twitter_thread_example/makefile
+++ b/examples/14_twitter_thread_example/makefile
@ -0,0 +1,53 @@
+# Interface: 
+#   make
+#   make build
+#   make format
+#   make run
+
+# Compiler
+CC=gcc
+# CC=tcc # <= faster compilation
+
+# Main file
+SRC=example.c ../../squiggle.c
+OUTPUT=example
+
+## Dependencies
+MATH=-lm
+
+## Flags
+DEBUG= #'-g'
+STANDARD=-std=c99
+WARNINGS=-Wall
+OPTIMIZED=-O3  #-Ofast
+# OPENMP=-fopenmp
+
+## Formatter
+STYLE_BLUEPRINT=webkit
+FORMATTER=clang-format -i -style=$(STYLE_BLUEPRINT)
+
+## make build
+build: $(SRC)
+	$(CC) $(OPTIMIZED) $(DEBUG) $(SRC) $(MATH) -o $(OUTPUT)
+
+format: $(SRC)
+	$(FORMATTER) $(SRC)
+
+run: $(SRC) $(OUTPUT)
+	./$(OUTPUT) && echo
+
+time-linux: 
+	@echo "Requires /bin/time, found on GNU/Linux systems" && echo
+	
+	@echo "Running 100x and taking avg time $(OUTPUT)"
+	@t=$$(/usr/bin/time -f "%e" -p bash -c 'for i in {1..100}; do ./$(OUTPUT); done' 2>&1 >/dev/null | grep real | awk '{print $$2}' ); echo "scale=2; 1000 * $$t / 100" | bc | sed "s|^|Time using 1 thread: |" | sed 's|$$|ms|' && echo
+
+## Profiling
+
+profile-linux: 
+	echo "Requires perf, which depends on the kernel version, and might be in linux-tools package or similar"
+	echo "Must be run as sudo"
+	$(CC) $(SRC) $(MATH) -o $(OUTPUT)
+	sudo perf record ./$(OUTPUT)
+	sudo perf report
+	rm perf.data
--- a/scratchpad/core.c
+++ b/scratchpad/core.c
@ -0,0 +1,27 @@
+
+uint64_t xorshift64(uint64_t* seed)
+{
+    // Algorithm "xor" from p. 4 of Marsaglia, "Xorshift RNGs"
+    // <https://en.wikipedia.org/wiki/Xorshift>
+    uint64_t x = *seed;
+    x ^= x << 13;
+    x ^= x >> 7;
+    x ^= x << 17;
+    return *seed = x;
+}
+
+double sample_unit_uniform(uint64_t* seed)
+{
+    // samples uniform from [0,1] interval.
+    return ((double)xorshift64(seed)) / ((double)UINT64_MAX);
+}
+
+double sample_unit_normal(uint64_t* seed)
+{
+    // // See: <https://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform>
+    double u1 = sample_unit_uniform(seed);
+    double u2 = sample_unit_uniform(seed);
+    double z = sqrtf(-2.0 * log(u1)) * sin(2 * PI * u2);
+    return z;
+}
+
--- a/scratchpad/makefile
+++ b/scratchpad/makefile
@ -0,0 +1,56 @@
+# Interface: 
+#   make
+#   make build
+#   make format
+#   make run
+
+# Compiler
+CC=gcc
+# CC=tcc # <= faster compilation
+
+# Main file
+SRC=scratchpad.c ../squiggle.c
+OUTPUT=scratchpad
+
+## Dependencies
+MATH=-lm
+
+## Flags
+DEBUG= #'-g'
+STANDARD=-std=c99
+WARNINGS=-Wall
+OPTIMIZED=-O3  #-Ofast
+# OPENMP=-fopenmp
+
+## Formatter
+STYLE_BLUEPRINT=webkit
+FORMATTER=clang-format -i -style=$(STYLE_BLUEPRINT)
+
+## make build
+build: $(SRC)
+	$(CC) $(OPTIMIZED) $(DEBUG) $(SRC) $(MATH) -o $(OUTPUT)
+
+format: $(SRC)
+	$(FORMATTER) $(SRC)
+
+run: $(SRC) $(OUTPUT)
+	./$(OUTPUT)
+
+verify: $(SRC) $(OUTPUT)
+	./$(OUTPUT) | grep "NOT passed" -A 2 --group-separator='' || true
+
+time-linux: 
+	@echo "Requires /bin/time, found on GNU/Linux systems" && echo
+	
+	@echo "Running 100x and taking avg time $(OUTPUT)"
+	@t=$$(/usr/bin/time -f "%e" -p bash -c 'for i in {1..100}; do $(OUTPUT); done' 2>&1 >/dev/null | grep real | awk '{print $$2}' ); echo "scale=2; 1000 * $$t / 100" | bc | sed "s|^|Time using 1 thread: |" | sed 's|$$|ms|' && echo
+
+## Profiling
+
+profile-linux: 
+	echo "Requires perf, which depends on the kernel version, and might be in linux-tools package or similar"
+	echo "Must be run as sudo"
+	$(CC) $(SRC) $(MATH) -o $(OUTPUT)
+	sudo perf record ./$(OUTPUT)
+	sudo perf report
+	rm perf.data
--- a/scratchpad/scratchpad
+++ b/scratchpad/scratchpad
--- a/scratchpad/scratchpad.c
+++ b/scratchpad/scratchpad.c
@ -0,0 +1,20 @@
+#include "../squiggle.h"
+#include <math.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+int main()
+{
+    // set randomness seed
+    uint64_t* seed = malloc(sizeof(uint64_t));
+    *seed = 1000; // xorshift can't start with a seed of 0
+
+    for (int i = 0; i < 100; i++) {
+        double draw = sample_unit_uniform(seed);
+        printf("%f\n", draw);
+
+    }
+
+    free(seed);
+}
--- a/squiggle.c
+++ b/squiggle.c
@ -7,20 +7,28 @@
 #include <sys/types.h>
 #include <time.h>

+// Some error niceties; these won't be used until later
 #define MAX_ERROR_LENGTH 500
 #define EXIT_ON_ERROR 0
 #define PROCESS_ERROR(error_msg) process_error(error_msg, EXIT_ON_ERROR, __FILE__, __LINE__)

-const double PI = 3.14159265358979323846; // M_PI in gcc gnu99
+#define PI 3.14159265358979323846 // M_PI in gcc gnu99
+#define NORMAL90CONFIDENCE 1.6448536269514722
+
+// # Key functionality
+// Define the minimum number of functions needed to do simple estimation
+// Starts here, ends until the end of the mixture function

 // Pseudo Random number generator
 uint64_t xorshift32(uint32_t* seed)
 {
    // Algorithm "xor" from p. 4 of Marsaglia, "Xorshift RNGs"
-    // See <https://stackoverflow.com/questions/53886131/how-does-xorshift64-works>
-    // https://en.wikipedia.org/wiki/Xorshift
-    // Also some drama: <https://www.pcg-random.org/posts/on-vignas-pcg-critique.html>, <https://prng.di.unimi.it/>
-    // for floats
+    // See:
+    //   <https://en.wikipedia.org/wiki/Xorshift>
+    //   <https://stackoverflow.com/questions/53886131/how-does-xorshift32-works>,
+    // Also some drama:
+    //   <https://www.pcg-random.org/posts/on-vignas-pcg-critique.html>,
+    //   <https://prng.di.unimi.it/>
    uint64_t x = *seed;
    x ^= x << 13;
    x ^= x >> 17;
@ -30,7 +38,7 @@ uint64_t xorshift32(uint32_t* seed)

 uint64_t xorshift64(uint64_t* seed)
 {
-    // same as above, but for generating doubles
+    // same as above, but for generating doubles instead of floats
    uint64_t x = *seed;
    x ^= x << 13;
    x ^= x >> 7;
@ -48,7 +56,7 @@ double sample_unit_uniform(uint64_t* seed)

 double sample_unit_normal(uint64_t* seed)
 {
-    // See: <https://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform>
+    // // See: <https://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform>
    double u1 = sample_unit_uniform(seed);
    double u2 = sample_unit_uniform(seed);
    double z = sqrtf(-2.0 * log(u1)) * sin(2 * PI * u2);
@ -71,17 +79,40 @@ double sample_lognormal(double logmean, double logstd, uint64_t* seed)
    return exp(sample_normal(logmean, logstd, seed));
 }

+inline double sample_normal_from_90_confidence_interval(double low, double high, uint64_t* seed)
+{
+    // Explanation of key idea:
+    // 1. We know that the 90% confidence interval of the unit normal is
+    // [-1.6448536269514722, 1.6448536269514722]
+    // see e.g.: https://stackoverflow.com/questions/20626994/how-to-calculate-the-inverse-of-the-normal-cumulative-distribution-function-in-p
+    // 2. So if we take a unit normal and multiply it by
+    // L / 1.6448536269514722, its new 90% confidence interval will be
+    // [-L, L], i.e., length 2 * L
+    // 3. Instead, if we want to get a confidence interval of length L,
+    // we should multiply the unit normal by
+    // L / (2 * 1.6448536269514722)
+    // Meaning that its standard deviation should be multiplied by that amount
+    // see: https://en.wikipedia.org/wiki/Normal_distribution?lang=en#Operations_on_a_single_normal_variable
+    // 4. So we have learnt that Normal(0, L / (2 * 1.6448536269514722))
+    // has a 90% confidence interval of length L
+    // 5. If we want a 90% confidence interval from high to low,
+    // we can set mean = (high + low)/2; the midpoint, and L = high-low,
+    // Normal([high + low]/2, [high - low]/(2 * 1.6448536269514722))
+    double mean = (high + low) / 2.0;
+    double std = (high - low) / (2.0 * NORMAL90CONFIDENCE);
+    return sample_normal(mean, std, seed);
+}
+
 double sample_to(double low, double high, uint64_t* seed)
 {
    // Given a (positive) 90% confidence interval,
-    // returns a sample from a lognormal
-    // with a matching 90% c.i.
-    const double NORMAL95CONFIDENCE = 1.6448536269514722;
+    // returns a sample from a lognorma with a matching 90% c.i.
+    // Key idea: If we want a lognormal with 90% confidence interval [a, b]
+    // we need but get a normal with 90% confidence interval [log(a), log(b)].
+    // Then see code for sample_normal_from_90_confidence_interval
    double loglow = logf(low);
    double loghigh = logf(high);
-    double logmean = (loglow + loghigh) / 2;
-    double logstd = (loghigh - loglow) / (2.0 * NORMAL95CONFIDENCE);
-    return sample_lognormal(logmean, logstd, seed);
+    return exp(sample_normal_from_90_confidence_interval(loglow, loghigh, seed));
 }

 double sample_gamma(double alpha, uint64_t* seed)
@ -129,14 +160,16 @@ double sample_gamma(double alpha, uint64_t* seed)

 double sample_beta(double a, double b, uint64_t* seed)
 {
+    // See: https://en.wikipedia.org/wiki/Gamma_distribution#Related_distributions
    double gamma_a = sample_gamma(a, seed);
    double gamma_b = sample_gamma(b, seed);
    return gamma_a / (gamma_a + gamma_b);
 }

-double sample_laplace(double successes, double failures, uint64_t* seed){
-	// see <https://wikiless.esmailelbob.xyz/wiki/Beta_distribution?lang=en#Rule_of_succession>
-	return sample_beta(successes + 1, failures + 1, seed);
+double sample_laplace(double successes, double failures, uint64_t* seed)
+{
+    // see <https://en.wikipedia.org/wiki/Beta_distribution?lang=en#Rule_of_succession>
+    return sample_beta(successes + 1, failures + 1, seed);
 }

 // Array helpers
@ -177,8 +210,7 @@ double array_std(double* array, int length)
 // Mixture function
 double sample_mixture(double (*samplers[])(uint64_t*), double* weights, int n_dists, uint64_t* seed)
 {
-    // You can see a simpler version of this function in the git history
-    // or in C-02-better-algorithm-one-thread/
+    // Sample from samples with frequency proportional to their weights.
    double sum_weights = array_sum(weights, n_dists);
    double* cumsummed_normalized_weights = (double*)malloc(n_dists * sizeof(double));
    cumsummed_normalized_weights[0] = weights[0] / sum_weights;
@ -203,7 +235,11 @@ double sample_mixture(double (*samplers[])(uint64_t*), double* weights, int n_di
    return result;
 }

-// Sample from an arbitrary cdf
+// # More cool stuff
+// This is no longer necessary to do basic estimation,
+// but is still cool
+
+// ## Sample from an arbitrary cdf
 struct box {
    int empty;
    double content;
@ -399,10 +435,10 @@ double sampler_danger(struct box cdf(double), uint64_t* seed)

 // Get confidence intervals, given a sampler

-struct c_i {
+typedef struct ci_t {
    float low;
    float high;
-};
+} ci;
 int compare_doubles(const void* p, const void* q)
 {
    // https://wikiless.esmailelbob.xyz/wiki/Qsort?lang=en
@ -418,7 +454,7 @@ int compare_doubles(const void* p, const void* q)

    return 0;
 }
-struct c_i get_90_confidence_interval(double (*sampler)(uint64_t*), uint64_t* seed)
+ci get_90_confidence_interval(double (*sampler)(uint64_t*), uint64_t* seed)
 {
    int n = 100 * 1000;
    double* samples_array = malloc(n * sizeof(double));
@ -427,7 +463,7 @@ struct c_i get_90_confidence_interval(double (*sampler)(uint64_t*), uint64_t* se
    }
    qsort(samples_array, n, sizeof(double), compare_doubles);

-    struct c_i result = {
+    ci result = {
        .low = samples_array[5000],
        .high = samples_array[94999],
    };
@ -436,47 +472,54 @@ struct c_i get_90_confidence_interval(double (*sampler)(uint64_t*), uint64_t* se
    return result;
 }

-// Do algebra over lognormals and normals
-struct normal_parameters {
+// # Small algebra manipulations
+
+// here I discover named structs,
+// which mean that I don't have to be typing
+// struct blah all the time.
+typedef struct normal_params_t {
    double mean;
    double std;
-};
+} normal_params;

-struct lognormal_parameters {
-    double logmean;
-    double logstd;
-};
-
-struct normal_parameters algebra_sum_normals(struct normal_parameters a, struct normal_parameters b)
+normal_params algebra_sum_normals(normal_params a, normal_params b)
 {
-    struct normal_parameters result = {
+    normal_params result = {
        .mean = a.mean + b.mean,
        .std = sqrt((a.std * a.std) + (b.std * b.std)),
    };
    return result;
 }
-struct normal_parameters algebra_shift_normal(struct normal_parameters a, double shift)
-{
-    struct normal_parameters result = {
-        .mean = a.mean + shift,
-        .std = a.std,
-    };
-    return result;
-}

-struct lognormal_parameters algebra_product_lognormals(struct lognormal_parameters a, struct lognormal_parameters b)
+typedef struct lognormal_params_t {
+    double logmean;
+    double logstd;
+} lognormal_params;
+
+lognormal_params algebra_product_lognormals(lognormal_params a, lognormal_params b)
 {
-    struct lognormal_parameters result = {
+    lognormal_params result = {
        .logmean = a.logmean + b.logmean,
        .logstd = sqrt((a.logstd * a.logstd) + (b.logstd * b.logstd)),
    };
    return result;
 }
-struct lognormal_parameters algebra_scale_lognormal(struct lognormal_parameters a, double k)
+
+lognormal_params convert_ci_to_lognormal_params(ci x)
 {
-    struct lognormal_parameters result = {
-        .logmean = a.logmean + k,
-        .logstd = a.logstd,
-    };
+    double loghigh = logf(x.high);
+    double loglow = logf(x.low);
+    double logmean = (loghigh + loglow) / 2.0;
+    double logstd = (loghigh - loglow) / (2.0 * NORMAL90CONFIDENCE);
+    lognormal_params result = { .logmean = logmean, .logstd = logstd };
+    return result;
+}
+
+ci convert_lognormal_params_to_ci(lognormal_params y)
+{
+    double h = y.logstd * NORMAL90CONFIDENCE;
+    double loghigh = y.logmean + h;
+    double loglow = y.logmean - h;
+    ci result = { .low = exp(loglow), .high = exp(loghigh) };
    return result;
 }
--- a/squiggle.h
+++ b/squiggle.h
@ -52,10 +52,30 @@ struct box sampler_cdf_double(double cdf(double), uint64_t* seed);
 struct box sampler_cdf_box(struct box cdf(double), uint64_t* seed);

 // Get 90% confidence interval
-struct c_i {
+typedef struct ci_t {
    float low;
    float high;
-};
-struct c_i get_90_confidence_interval(double (*sampler)(uint64_t*), uint64_t* seed);
+} ci;
+ci get_90_confidence_interval(double (*sampler)(uint64_t*), uint64_t* seed);
+
+// small algebra manipulations
+
+typedef struct normal_params_t {
+    double mean;
+    double std;
+} normal_params;
+normal_params algebra_sum_normals(normal_params a, normal_params b);
+
+typedef struct lognormal_params_t {
+    double logmean;
+    double logstd;
+} lognormal_params;
+lognormal_params algebra_product_lognormals(lognormal_params a, lognormal_params b);
+
+
+lognormal_params convert_ci_to_lognormal_params(ci x);
+
+
+ci convert_lognormal_params_to_ci(lognormal_params y);

 #endif
--- a/test/test
+++ b/test/test
Author	SHA1	Message	Date
NunoSempere	308eecba98	tweaks before twitter thread	2023-09-27 15:25:12 +01:00
NunoSempere	b1a58f9b74	fix constant name 95=>90	2023-09-27 14:10:40 +01:00
NunoSempere	015d33adca	reword comments	2023-09-27 13:51:05 +01:00
NunoSempere	fa714f91ae	fix link typo.	2023-09-26 20:48:33 +01:00
NunoSempere	0bcd9f9b62	update to-dos in readme	2023-09-24 18:03:34 +01:00
NunoSempere	66cbb4132f	add more ergonomic & compact code using macros.	2023-09-23 23:24:25 +01:00
NunoSempere	a4389e605f	move to ci named struct.	2023-09-23 23:08:51 +01:00
NunoSempere	0d31a4f1ab	add converstion between lognormal form and 90% c.i.	2023-09-23 22:35:40 +01:00
NunoSempere	f1834341a9	update README, end for the day.	2023-09-23 22:19:59 +01:00
NunoSempere	ccad14b318	leave out really trivial manipulations, add example, update to-dos	2023-09-23 22:15:48 +01:00
NunoSempere	56ab018469	add many more comments, start adding to header file.	2023-09-23 21:53:45 +01:00
NunoSempere	79dfcf79db	use named structs in small algebra system.	2023-09-23 20:55:18 +01:00