Compare commits
No commits in common. "308eecba9844f0194f892b8d88f22f5e2fe51a22" and "a4fdbc1e2c80460fc2c3861a9f0e96f9577ac26d" have entirely different histories.
308eecba98
...
a4fdbc1e2c
22
README.md
22
README.md
|
@ -2,8 +2,6 @@
|
|||
|
||||
squiggle.c is a self-contained C99 library that provides functions for simple Monte Carlo estimation, based on [Squiggle](https://www.squiggle-language.com/).
|
||||
|
||||
![](./core.png)
|
||||
|
||||
## Why C?
|
||||
|
||||
- Because it is fast
|
||||
|
@ -14,7 +12,7 @@ squiggle.c is a self-contained C99 library that provides functions for simple Mo
|
|||
- Because if you can implement something in C, you can implement it anywhere else
|
||||
- Because it can be made faster if need be
|
||||
- e.g., with a multi-threading library like OpenMP,
|
||||
- o by implementing faster but more complex algorithms
|
||||
- or by implementing faster but more complex algorithms
|
||||
- or more simply, by inlining the sampling functions (adding an `inline` directive before their function declaration)
|
||||
- **Because there are few abstractions between it and machine code** (C => assembly => machine code with gcc, or C => machine code, with tcc), leading to fewer errors beyond the programmer's control.
|
||||
|
||||
|
@ -36,7 +34,7 @@ You can follow some example usage in the examples/ folder
|
|||
|
||||
### squiggle.c is short
|
||||
|
||||
[squiggle.c](squiggle.c) is less than 600 lines of C, with a core of <250 lines. The reader could just read it and grasp its contents.
|
||||
[squiggle.c](squiggle.c) is less than 500 lines of C. The reader could just read it and grasp its contents.
|
||||
|
||||
### Core strategy
|
||||
|
||||
|
@ -343,18 +341,4 @@ It emits one warning about something I already took care of, so by default I've
|
|||
- [x] Have some more complicated & realistic example
|
||||
- [x] Add summarization functions: 90% ci (or all c.i.?)
|
||||
- [x] Link to the examples in the examples section.
|
||||
- [x] Add a few functions for doing simple algebra on normals, and lognormals
|
||||
- [x] Add prototypes
|
||||
- [x] Use named structs
|
||||
- [x] Add to header file
|
||||
- [x] Provide example algebra
|
||||
- [x] Add conversion between 90% ci and parameters.
|
||||
- [x] Use that conversion in conjuction with small algebra.
|
||||
- [x] Consider ergonomics of using ci instead of c_i
|
||||
- [x] use named struct instead
|
||||
- [x] demonstrate and document feeding a struct directly to a function; my_function((struct c_i){.low = 1, .high = 2});
|
||||
- [ ] Consider desirability of defining shortcuts for those functions. Adds a level of magic, though.
|
||||
- [ ] Test results
|
||||
- [x] Move to own file? Or signpost in file? => signposted in file.
|
||||
- [ ] Disambiguate sample_laplace--successes vs failures || successes vs total trials as two distinct and differently named functions
|
||||
- [ ] Write twitter thread.
|
||||
- [x] Add a few functions for doing simple algebra on normals, and lognormals?
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
#include "../../squiggle.h"
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include "../../squiggle.h"
|
||||
|
||||
// Estimate functions
|
||||
double sample_0(uint64_t* seed)
|
||||
|
@ -24,11 +24,10 @@ double sample_many(uint64_t* seed)
|
|||
return sample_to(2, 10, seed);
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
int main(){
|
||||
// set randomness seed
|
||||
uint64_t* seed = malloc(sizeof(uint64_t));
|
||||
*seed = 1000; // xorshift can't start with 0
|
||||
uint64_t* seed = malloc(sizeof(uint64_t));
|
||||
*seed = 1000; // xorshift can't start with 0
|
||||
|
||||
double p_a = 0.8;
|
||||
double p_b = 0.5;
|
||||
|
@ -38,18 +37,18 @@ int main()
|
|||
double weights[] = { 1 - p_c, p_c / 2, p_c / 4, p_c / 4 };
|
||||
double (*samplers[])(uint64_t*) = { sample_0, sample_1, sample_few, sample_many };
|
||||
|
||||
int n_samples = 1000000;
|
||||
double* result_many = (double*)malloc(n_samples * sizeof(double));
|
||||
for (int i = 0; i < n_samples; i++) {
|
||||
result_many[i] = sample_mixture(samplers, weights, n_dists, seed);
|
||||
}
|
||||
printf("Mean: %f\n", array_mean(result_many, n_samples));
|
||||
int n_samples = 1000000;
|
||||
double* result_many = (double *) malloc(n_samples * sizeof(double));
|
||||
for(int i=0; i<n_samples; i++){
|
||||
result_many[i] = sample_mixture(samplers, weights, n_dists, seed);
|
||||
}
|
||||
printf("Mean: %f\n", array_mean(result_many, n_samples));
|
||||
|
||||
// printf("result_many: [");
|
||||
// for(int i=0; i<100; i++){
|
||||
// printf("%.2f, ", result_many[i]);
|
||||
// }
|
||||
// printf("]\n");
|
||||
// printf("result_many: [");
|
||||
// for(int i=0; i<100; i++){
|
||||
// printf("%.2f, ", result_many[i]);
|
||||
// }
|
||||
// printf("]\n");
|
||||
|
||||
free(seed);
|
||||
free(seed);
|
||||
}
|
||||
|
|
Binary file not shown.
|
@ -60,9 +60,9 @@ int main()
|
|||
}
|
||||
printf("... ]\n");
|
||||
|
||||
ci ci_90 = get_90_confidence_interval(mixture, seed);
|
||||
struct c_i c_i_90 = get_90_confidence_interval(mixture, seed);
|
||||
printf("mean: %f\n", array_mean(mixture_result, n));
|
||||
printf("90%% confidence interval: [%f, %f]\n", ci_90.low, ci_90.high);
|
||||
printf("90%% confidence interval: [%f, %f]\n", c_i_90.low, c_i_90.high);
|
||||
|
||||
free(seed);
|
||||
}
|
||||
|
|
Binary file not shown.
|
@ -46,8 +46,8 @@ int main()
|
|||
|
||||
// Before a first nuclear collapse
|
||||
printf("## Before the first nuclear collapse\n");
|
||||
ci ci_90_2023 = get_90_confidence_interval(yearly_probability_nuclear_collapse_2023, seed);
|
||||
printf("90%% confidence interval: [%f, %f]\n", ci_90_2023.low, ci_90_2023.high);
|
||||
struct c_i c_i_90_2023 = get_90_confidence_interval(yearly_probability_nuclear_collapse_2023, seed);
|
||||
printf("90%% confidence interval: [%f, %f]\n", c_i_90_2023.low, c_i_90_2023.high);
|
||||
|
||||
double* yearly_probability_nuclear_collapse_2023_samples = malloc(sizeof(double) * num_samples);
|
||||
for (int i = 0; i < num_samples; i++) {
|
||||
|
@ -57,8 +57,8 @@ int main()
|
|||
|
||||
// After the first nuclear collapse
|
||||
printf("\n## After the first nuclear collapse\n");
|
||||
ci ci_90_2070 = get_90_confidence_interval(yearly_probability_nuclear_collapse_after_recovery_example, seed);
|
||||
printf("90%% confidence interval: [%f, %f]\n", ci_90_2070.low, ci_90_2070.high);
|
||||
struct c_i c_i_90_2070 = get_90_confidence_interval(yearly_probability_nuclear_collapse_after_recovery_example, seed);
|
||||
printf("90%% confidence interval: [%f, %f]\n", c_i_90_2070.low, c_i_90_2070.high);
|
||||
|
||||
double* yearly_probability_nuclear_collapse_after_recovery_samples = malloc(sizeof(double) * num_samples);
|
||||
for (int i = 0; i < num_samples; i++) {
|
||||
|
@ -68,8 +68,8 @@ int main()
|
|||
|
||||
// After the first nuclear collapse (antiinductive)
|
||||
printf("\n## After the first nuclear collapse (antiinductive)\n");
|
||||
ci ci_90_antiinductive = get_90_confidence_interval(yearly_probability_nuclear_collapse_after_recovery_antiinductive, seed);
|
||||
printf("90%% confidence interval: [%f, %f]\n", ci_90_antiinductive.low, ci_90_antiinductive.high);
|
||||
struct c_i c_i_90_antiinductive = get_90_confidence_interval(yearly_probability_nuclear_collapse_after_recovery_antiinductive, seed);
|
||||
printf("90%% confidence interval: [%f, %f]\n", c_i_90_antiinductive.low, c_i_90_antiinductive.high);
|
||||
|
||||
double* yearly_probability_nuclear_collapse_after_recovery_antiinductive_samples = malloc(sizeof(double) * num_samples);
|
||||
for (int i = 0; i < num_samples; i++) {
|
||||
|
|
Binary file not shown.
|
@ -1,26 +0,0 @@
|
|||
#include "../../squiggle.h"
|
||||
#include <math.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
int main()
|
||||
{
|
||||
// set randomness seed
|
||||
uint64_t* seed = malloc(sizeof(uint64_t));
|
||||
*seed = 1000; // xorshift can't start with 0
|
||||
|
||||
normal_params n1 = { .mean = 1.0, .std = 3.0 };
|
||||
normal_params n2 = { .mean = 2.0, .std = 4.0 };
|
||||
normal_params sn = algebra_sum_normals(n1, n2);
|
||||
printf("The sum of Normal(%f, %f) and Normal(%f, %f) is Normal(%f, %f)\n",
|
||||
n1.mean, n1.std, n2.mean, n2.std, sn.mean, sn.std);
|
||||
|
||||
lognormal_params ln1 = { .logmean = 1.0, .logstd = 3.0 };
|
||||
lognormal_params ln2 = { .logmean = 2.0, .logstd = 4.0 };
|
||||
lognormal_params sln = algebra_product_lognormals(ln1, ln2);
|
||||
printf("The product of Lognormal(%f, %f) and Lognormal(%f, %f) is Lognormal(%f, %f)\n",
|
||||
ln1.logmean, ln1.logstd, ln2.logmean, ln2.logstd, sln.logmean, sln.logstd);
|
||||
|
||||
free(seed);
|
||||
}
|
|
@ -1,53 +0,0 @@
|
|||
# Interface:
|
||||
# make
|
||||
# make build
|
||||
# make format
|
||||
# make run
|
||||
|
||||
# Compiler
|
||||
CC=gcc
|
||||
# CC=tcc # <= faster compilation
|
||||
|
||||
# Main file
|
||||
SRC=example.c ../../squiggle.c
|
||||
OUTPUT=example
|
||||
|
||||
## Dependencies
|
||||
MATH=-lm
|
||||
|
||||
## Flags
|
||||
DEBUG= #'-g'
|
||||
STANDARD=-std=c99
|
||||
WARNINGS=-Wall
|
||||
OPTIMIZED=-O3 #-Ofast
|
||||
# OPENMP=-fopenmp
|
||||
|
||||
## Formatter
|
||||
STYLE_BLUEPRINT=webkit
|
||||
FORMATTER=clang-format -i -style=$(STYLE_BLUEPRINT)
|
||||
|
||||
## make build
|
||||
build: $(SRC)
|
||||
$(CC) $(OPTIMIZED) $(DEBUG) $(SRC) $(MATH) -o $(OUTPUT)
|
||||
|
||||
format: $(SRC)
|
||||
$(FORMATTER) $(SRC)
|
||||
|
||||
run: $(SRC) $(OUTPUT)
|
||||
OMP_NUM_THREADS=1 ./$(OUTPUT) && echo
|
||||
|
||||
time-linux:
|
||||
@echo "Requires /bin/time, found on GNU/Linux systems" && echo
|
||||
|
||||
@echo "Running 100x and taking avg time $(OUTPUT)"
|
||||
@t=$$(/usr/bin/time -f "%e" -p bash -c 'for i in {1..100}; do $(OUTPUT); done' 2>&1 >/dev/null | grep real | awk '{print $$2}' ); echo "scale=2; 1000 * $$t / 100" | bc | sed "s|^|Time using 1 thread: |" | sed 's|$$|ms|' && echo
|
||||
|
||||
## Profiling
|
||||
|
||||
profile-linux:
|
||||
echo "Requires perf, which depends on the kernel version, and might be in linux-tools package or similar"
|
||||
echo "Must be run as sudo"
|
||||
$(CC) $(SRC) $(MATH) -o $(OUTPUT)
|
||||
sudo perf record ./$(OUTPUT)
|
||||
sudo perf report
|
||||
rm perf.data
|
Binary file not shown.
|
@ -1,33 +0,0 @@
|
|||
#include "../../squiggle.h"
|
||||
#include <math.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
int main()
|
||||
{
|
||||
// set randomness seed
|
||||
uint64_t* seed = malloc(sizeof(uint64_t));
|
||||
*seed = 1000; // xorshift can't start with 0
|
||||
|
||||
// Convert to 90% confidence interval form and back
|
||||
lognormal_params ln1 = { .logmean = 1.0, .logstd = 3.0 };
|
||||
ci ln1_ci = convert_lognormal_params_to_ci(ln1);
|
||||
printf("The 90%% confidence interval of Lognormal(%f, %f) is [%f, %f]\n",
|
||||
ln1.logmean, ln1.logstd,
|
||||
ln1_ci.low, ln1_ci.high);
|
||||
lognormal_params ln1_ci_paramas = convert_ci_to_lognormal_params(ln1_ci);
|
||||
printf("The lognormal which has 90%% confidence interval [%f, %f] is Lognormal(%f, %f)\n",
|
||||
ln1_ci.low, ln1_ci.high,
|
||||
ln1.logmean, ln1.logstd);
|
||||
|
||||
lognormal_params ln2 = convert_ci_to_lognormal_params((ci){.low = 1, .high = 10});
|
||||
lognormal_params ln3 = convert_ci_to_lognormal_params((ci){.low = 5, .high = 50});
|
||||
|
||||
lognormal_params sln = algebra_product_lognormals(ln2, ln3);
|
||||
ci sln_ci = convert_lognormal_params_to_ci(sln);
|
||||
|
||||
printf("Result of some lognormal products: to(%f, %f)\n", sln_ci.low, sln_ci.high);
|
||||
|
||||
free(seed);
|
||||
}
|
|
@ -1,53 +0,0 @@
|
|||
# Interface:
|
||||
# make
|
||||
# make build
|
||||
# make format
|
||||
# make run
|
||||
|
||||
# Compiler
|
||||
CC=gcc
|
||||
# CC=tcc # <= faster compilation
|
||||
|
||||
# Main file
|
||||
SRC=example.c ../../squiggle.c
|
||||
OUTPUT=example
|
||||
|
||||
## Dependencies
|
||||
MATH=-lm
|
||||
|
||||
## Flags
|
||||
DEBUG= #'-g'
|
||||
STANDARD=-std=c99
|
||||
WARNINGS=-Wall
|
||||
OPTIMIZED=-O3 #-Ofast
|
||||
# OPENMP=-fopenmp
|
||||
|
||||
## Formatter
|
||||
STYLE_BLUEPRINT=webkit
|
||||
FORMATTER=clang-format -i -style=$(STYLE_BLUEPRINT)
|
||||
|
||||
## make build
|
||||
build: $(SRC)
|
||||
$(CC) $(OPTIMIZED) $(DEBUG) $(SRC) $(MATH) -o $(OUTPUT)
|
||||
|
||||
format: $(SRC)
|
||||
$(FORMATTER) $(SRC)
|
||||
|
||||
run: $(SRC) $(OUTPUT)
|
||||
OMP_NUM_THREADS=1 ./$(OUTPUT) && echo
|
||||
|
||||
time-linux:
|
||||
@echo "Requires /bin/time, found on GNU/Linux systems" && echo
|
||||
|
||||
@echo "Running 100x and taking avg time $(OUTPUT)"
|
||||
@t=$$(/usr/bin/time -f "%e" -p bash -c 'for i in {1..100}; do $(OUTPUT); done' 2>&1 >/dev/null | grep real | awk '{print $$2}' ); echo "scale=2; 1000 * $$t / 100" | bc | sed "s|^|Time using 1 thread: |" | sed 's|$$|ms|' && echo
|
||||
|
||||
## Profiling
|
||||
|
||||
profile-linux:
|
||||
echo "Requires perf, which depends on the kernel version, and might be in linux-tools package or similar"
|
||||
echo "Must be run as sudo"
|
||||
$(CC) $(SRC) $(MATH) -o $(OUTPUT)
|
||||
sudo perf record ./$(OUTPUT)
|
||||
sudo perf report
|
||||
rm perf.data
|
Binary file not shown.
|
@ -1,26 +0,0 @@
|
|||
#include "../../squiggle.h"
|
||||
#include <math.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#define ln lognormal_params
|
||||
#define to(...) convert_ci_to_lognormal_params((ci) __VA_ARGS__)
|
||||
#define from(...) convert_lognormal_params_to_ci((ln) __VA_ARGS__)
|
||||
#define times(a,b) algebra_product_lognormals(a,b)
|
||||
|
||||
int main()
|
||||
{
|
||||
// set randomness seed
|
||||
uint64_t* seed = malloc(sizeof(uint64_t));
|
||||
*seed = 1000; // xorshift can't start with 0
|
||||
|
||||
ln a = to({.low = 1, .high = 10});
|
||||
ln b = to({.low = 5, .high = 500});
|
||||
ln c = times(a, b);
|
||||
|
||||
printf("Result: to(%f, %f)\n", from(c).low, from(c).high);
|
||||
printf("One sample from it is: %f\n", sample_lognormal(c.logmean, c.logstd, seed));
|
||||
|
||||
free(seed);
|
||||
}
|
|
@ -1,53 +0,0 @@
|
|||
# Interface:
|
||||
# make
|
||||
# make build
|
||||
# make format
|
||||
# make run
|
||||
|
||||
# Compiler
|
||||
CC=gcc
|
||||
# CC=tcc # <= faster compilation
|
||||
|
||||
# Main file
|
||||
SRC=example.c ../../squiggle.c
|
||||
OUTPUT=example
|
||||
|
||||
## Dependencies
|
||||
MATH=-lm
|
||||
|
||||
## Flags
|
||||
DEBUG= #'-g'
|
||||
STANDARD=-std=c99
|
||||
WARNINGS=-Wall
|
||||
OPTIMIZED=-O3 #-Ofast
|
||||
# OPENMP=-fopenmp
|
||||
|
||||
## Formatter
|
||||
STYLE_BLUEPRINT=webkit
|
||||
FORMATTER=clang-format -i -style=$(STYLE_BLUEPRINT)
|
||||
|
||||
## make build
|
||||
build: $(SRC)
|
||||
$(CC) $(OPTIMIZED) $(DEBUG) $(SRC) $(MATH) -o $(OUTPUT)
|
||||
|
||||
format: $(SRC)
|
||||
$(FORMATTER) $(SRC)
|
||||
|
||||
run: $(SRC) $(OUTPUT)
|
||||
OMP_NUM_THREADS=1 ./$(OUTPUT) && echo
|
||||
|
||||
time-linux:
|
||||
@echo "Requires /bin/time, found on GNU/Linux systems" && echo
|
||||
|
||||
@echo "Running 100x and taking avg time $(OUTPUT)"
|
||||
@t=$$(/usr/bin/time -f "%e" -p bash -c 'for i in {1..100}; do $(OUTPUT); done' 2>&1 >/dev/null | grep real | awk '{print $$2}' ); echo "scale=2; 1000 * $$t / 100" | bc | sed "s|^|Time using 1 thread: |" | sed 's|$$|ms|' && echo
|
||||
|
||||
## Profiling
|
||||
|
||||
profile-linux:
|
||||
echo "Requires perf, which depends on the kernel version, and might be in linux-tools package or similar"
|
||||
echo "Must be run as sudo"
|
||||
$(CC) $(SRC) $(MATH) -o $(OUTPUT)
|
||||
sudo perf record ./$(OUTPUT)
|
||||
sudo perf report
|
||||
rm perf.data
|
Binary file not shown.
|
@ -1,43 +0,0 @@
|
|||
#include "../../squiggle.h"
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
double sample_0(uint64_t* seed){
|
||||
return 0;
|
||||
}
|
||||
|
||||
double sample_1(uint64_t* seed){
|
||||
return 1;
|
||||
}
|
||||
|
||||
double sample_normal_mean_1_std_2(uint64_t* seed){
|
||||
return sample_normal(1, 2, seed);
|
||||
}
|
||||
|
||||
double sample_1_to_3(uint64_t* seed){
|
||||
return sample_to(1, 3, seed);
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
// set randomness seed
|
||||
uint64_t* seed = malloc(sizeof(uint64_t));
|
||||
*seed = 1000; // xorshift can't start with 0
|
||||
|
||||
int n_dists = 4;
|
||||
double weights[] = { 1, 2, 3, 4 };
|
||||
double (*samplers[])(uint64_t*) = {
|
||||
sample_0,
|
||||
sample_1,
|
||||
sample_normal_mean_1_std_2,
|
||||
sample_1_to_3
|
||||
};
|
||||
|
||||
int n_samples = 10;
|
||||
for (int i = 0; i < n_samples; i++) {
|
||||
printf("Sample #%d: %f\n", i, sample_mixture(samplers, weights, n_dists, seed));
|
||||
}
|
||||
|
||||
free(seed);
|
||||
}
|
|
@ -1,53 +0,0 @@
|
|||
# Interface:
|
||||
# make
|
||||
# make build
|
||||
# make format
|
||||
# make run
|
||||
|
||||
# Compiler
|
||||
CC=gcc
|
||||
# CC=tcc # <= faster compilation
|
||||
|
||||
# Main file
|
||||
SRC=example.c ../../squiggle.c
|
||||
OUTPUT=example
|
||||
|
||||
## Dependencies
|
||||
MATH=-lm
|
||||
|
||||
## Flags
|
||||
DEBUG= #'-g'
|
||||
STANDARD=-std=c99
|
||||
WARNINGS=-Wall
|
||||
OPTIMIZED=-O3 #-Ofast
|
||||
# OPENMP=-fopenmp
|
||||
|
||||
## Formatter
|
||||
STYLE_BLUEPRINT=webkit
|
||||
FORMATTER=clang-format -i -style=$(STYLE_BLUEPRINT)
|
||||
|
||||
## make build
|
||||
build: $(SRC)
|
||||
$(CC) $(OPTIMIZED) $(DEBUG) $(SRC) $(MATH) -o $(OUTPUT)
|
||||
|
||||
format: $(SRC)
|
||||
$(FORMATTER) $(SRC)
|
||||
|
||||
run: $(SRC) $(OUTPUT)
|
||||
./$(OUTPUT) && echo
|
||||
|
||||
time-linux:
|
||||
@echo "Requires /bin/time, found on GNU/Linux systems" && echo
|
||||
|
||||
@echo "Running 100x and taking avg time $(OUTPUT)"
|
||||
@t=$$(/usr/bin/time -f "%e" -p bash -c 'for i in {1..100}; do ./$(OUTPUT); done' 2>&1 >/dev/null | grep real | awk '{print $$2}' ); echo "scale=2; 1000 * $$t / 100" | bc | sed "s|^|Time using 1 thread: |" | sed 's|$$|ms|' && echo
|
||||
|
||||
## Profiling
|
||||
|
||||
profile-linux:
|
||||
echo "Requires perf, which depends on the kernel version, and might be in linux-tools package or similar"
|
||||
echo "Must be run as sudo"
|
||||
$(CC) $(SRC) $(MATH) -o $(OUTPUT)
|
||||
sudo perf record ./$(OUTPUT)
|
||||
sudo perf report
|
||||
rm perf.data
|
|
@ -1,27 +0,0 @@
|
|||
|
||||
uint64_t xorshift64(uint64_t* seed)
|
||||
{
|
||||
// Algorithm "xor" from p. 4 of Marsaglia, "Xorshift RNGs"
|
||||
// <https://en.wikipedia.org/wiki/Xorshift>
|
||||
uint64_t x = *seed;
|
||||
x ^= x << 13;
|
||||
x ^= x >> 7;
|
||||
x ^= x << 17;
|
||||
return *seed = x;
|
||||
}
|
||||
|
||||
double sample_unit_uniform(uint64_t* seed)
|
||||
{
|
||||
// samples uniform from [0,1] interval.
|
||||
return ((double)xorshift64(seed)) / ((double)UINT64_MAX);
|
||||
}
|
||||
|
||||
double sample_unit_normal(uint64_t* seed)
|
||||
{
|
||||
// // See: <https://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform>
|
||||
double u1 = sample_unit_uniform(seed);
|
||||
double u2 = sample_unit_uniform(seed);
|
||||
double z = sqrtf(-2.0 * log(u1)) * sin(2 * PI * u2);
|
||||
return z;
|
||||
}
|
||||
|
|
@ -1,56 +0,0 @@
|
|||
# Interface:
|
||||
# make
|
||||
# make build
|
||||
# make format
|
||||
# make run
|
||||
|
||||
# Compiler
|
||||
CC=gcc
|
||||
# CC=tcc # <= faster compilation
|
||||
|
||||
# Main file
|
||||
SRC=scratchpad.c ../squiggle.c
|
||||
OUTPUT=scratchpad
|
||||
|
||||
## Dependencies
|
||||
MATH=-lm
|
||||
|
||||
## Flags
|
||||
DEBUG= #'-g'
|
||||
STANDARD=-std=c99
|
||||
WARNINGS=-Wall
|
||||
OPTIMIZED=-O3 #-Ofast
|
||||
# OPENMP=-fopenmp
|
||||
|
||||
## Formatter
|
||||
STYLE_BLUEPRINT=webkit
|
||||
FORMATTER=clang-format -i -style=$(STYLE_BLUEPRINT)
|
||||
|
||||
## make build
|
||||
build: $(SRC)
|
||||
$(CC) $(OPTIMIZED) $(DEBUG) $(SRC) $(MATH) -o $(OUTPUT)
|
||||
|
||||
format: $(SRC)
|
||||
$(FORMATTER) $(SRC)
|
||||
|
||||
run: $(SRC) $(OUTPUT)
|
||||
./$(OUTPUT)
|
||||
|
||||
verify: $(SRC) $(OUTPUT)
|
||||
./$(OUTPUT) | grep "NOT passed" -A 2 --group-separator='' || true
|
||||
|
||||
time-linux:
|
||||
@echo "Requires /bin/time, found on GNU/Linux systems" && echo
|
||||
|
||||
@echo "Running 100x and taking avg time $(OUTPUT)"
|
||||
@t=$$(/usr/bin/time -f "%e" -p bash -c 'for i in {1..100}; do $(OUTPUT); done' 2>&1 >/dev/null | grep real | awk '{print $$2}' ); echo "scale=2; 1000 * $$t / 100" | bc | sed "s|^|Time using 1 thread: |" | sed 's|$$|ms|' && echo
|
||||
|
||||
## Profiling
|
||||
|
||||
profile-linux:
|
||||
echo "Requires perf, which depends on the kernel version, and might be in linux-tools package or similar"
|
||||
echo "Must be run as sudo"
|
||||
$(CC) $(SRC) $(MATH) -o $(OUTPUT)
|
||||
sudo perf record ./$(OUTPUT)
|
||||
sudo perf report
|
||||
rm perf.data
|
Binary file not shown.
|
@ -1,20 +0,0 @@
|
|||
#include "../squiggle.h"
|
||||
#include <math.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
int main()
|
||||
{
|
||||
// set randomness seed
|
||||
uint64_t* seed = malloc(sizeof(uint64_t));
|
||||
*seed = 1000; // xorshift can't start with a seed of 0
|
||||
|
||||
for (int i = 0; i < 100; i++) {
|
||||
double draw = sample_unit_uniform(seed);
|
||||
printf("%f\n", draw);
|
||||
|
||||
}
|
||||
|
||||
free(seed);
|
||||
}
|
141
squiggle.c
141
squiggle.c
|
@ -7,28 +7,20 @@
|
|||
#include <sys/types.h>
|
||||
#include <time.h>
|
||||
|
||||
// Some error niceties; these won't be used until later
|
||||
#define MAX_ERROR_LENGTH 500
|
||||
#define EXIT_ON_ERROR 0
|
||||
#define PROCESS_ERROR(error_msg) process_error(error_msg, EXIT_ON_ERROR, __FILE__, __LINE__)
|
||||
|
||||
#define PI 3.14159265358979323846 // M_PI in gcc gnu99
|
||||
#define NORMAL90CONFIDENCE 1.6448536269514722
|
||||
|
||||
// # Key functionality
|
||||
// Define the minimum number of functions needed to do simple estimation
|
||||
// Starts here, ends until the end of the mixture function
|
||||
const double PI = 3.14159265358979323846; // M_PI in gcc gnu99
|
||||
|
||||
// Pseudo Random number generator
|
||||
uint64_t xorshift32(uint32_t* seed)
|
||||
{
|
||||
// Algorithm "xor" from p. 4 of Marsaglia, "Xorshift RNGs"
|
||||
// See:
|
||||
// <https://en.wikipedia.org/wiki/Xorshift>
|
||||
// <https://stackoverflow.com/questions/53886131/how-does-xorshift32-works>,
|
||||
// Also some drama:
|
||||
// <https://www.pcg-random.org/posts/on-vignas-pcg-critique.html>,
|
||||
// <https://prng.di.unimi.it/>
|
||||
// See <https://stackoverflow.com/questions/53886131/how-does-xorshift64-works>
|
||||
// https://en.wikipedia.org/wiki/Xorshift
|
||||
// Also some drama: <https://www.pcg-random.org/posts/on-vignas-pcg-critique.html>, <https://prng.di.unimi.it/>
|
||||
// for floats
|
||||
uint64_t x = *seed;
|
||||
x ^= x << 13;
|
||||
x ^= x >> 17;
|
||||
|
@ -38,7 +30,7 @@ uint64_t xorshift32(uint32_t* seed)
|
|||
|
||||
uint64_t xorshift64(uint64_t* seed)
|
||||
{
|
||||
// same as above, but for generating doubles instead of floats
|
||||
// same as above, but for generating doubles
|
||||
uint64_t x = *seed;
|
||||
x ^= x << 13;
|
||||
x ^= x >> 7;
|
||||
|
@ -56,7 +48,7 @@ double sample_unit_uniform(uint64_t* seed)
|
|||
|
||||
double sample_unit_normal(uint64_t* seed)
|
||||
{
|
||||
// // See: <https://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform>
|
||||
// See: <https://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform>
|
||||
double u1 = sample_unit_uniform(seed);
|
||||
double u2 = sample_unit_uniform(seed);
|
||||
double z = sqrtf(-2.0 * log(u1)) * sin(2 * PI * u2);
|
||||
|
@ -79,40 +71,17 @@ double sample_lognormal(double logmean, double logstd, uint64_t* seed)
|
|||
return exp(sample_normal(logmean, logstd, seed));
|
||||
}
|
||||
|
||||
inline double sample_normal_from_90_confidence_interval(double low, double high, uint64_t* seed)
|
||||
{
|
||||
// Explanation of key idea:
|
||||
// 1. We know that the 90% confidence interval of the unit normal is
|
||||
// [-1.6448536269514722, 1.6448536269514722]
|
||||
// see e.g.: https://stackoverflow.com/questions/20626994/how-to-calculate-the-inverse-of-the-normal-cumulative-distribution-function-in-p
|
||||
// 2. So if we take a unit normal and multiply it by
|
||||
// L / 1.6448536269514722, its new 90% confidence interval will be
|
||||
// [-L, L], i.e., length 2 * L
|
||||
// 3. Instead, if we want to get a confidence interval of length L,
|
||||
// we should multiply the unit normal by
|
||||
// L / (2 * 1.6448536269514722)
|
||||
// Meaning that its standard deviation should be multiplied by that amount
|
||||
// see: https://en.wikipedia.org/wiki/Normal_distribution?lang=en#Operations_on_a_single_normal_variable
|
||||
// 4. So we have learnt that Normal(0, L / (2 * 1.6448536269514722))
|
||||
// has a 90% confidence interval of length L
|
||||
// 5. If we want a 90% confidence interval from high to low,
|
||||
// we can set mean = (high + low)/2; the midpoint, and L = high-low,
|
||||
// Normal([high + low]/2, [high - low]/(2 * 1.6448536269514722))
|
||||
double mean = (high + low) / 2.0;
|
||||
double std = (high - low) / (2.0 * NORMAL90CONFIDENCE);
|
||||
return sample_normal(mean, std, seed);
|
||||
}
|
||||
|
||||
double sample_to(double low, double high, uint64_t* seed)
|
||||
{
|
||||
// Given a (positive) 90% confidence interval,
|
||||
// returns a sample from a lognorma with a matching 90% c.i.
|
||||
// Key idea: If we want a lognormal with 90% confidence interval [a, b]
|
||||
// we need but get a normal with 90% confidence interval [log(a), log(b)].
|
||||
// Then see code for sample_normal_from_90_confidence_interval
|
||||
// returns a sample from a lognormal
|
||||
// with a matching 90% c.i.
|
||||
const double NORMAL95CONFIDENCE = 1.6448536269514722;
|
||||
double loglow = logf(low);
|
||||
double loghigh = logf(high);
|
||||
return exp(sample_normal_from_90_confidence_interval(loglow, loghigh, seed));
|
||||
double logmean = (loglow + loghigh) / 2;
|
||||
double logstd = (loghigh - loglow) / (2.0 * NORMAL95CONFIDENCE);
|
||||
return sample_lognormal(logmean, logstd, seed);
|
||||
}
|
||||
|
||||
double sample_gamma(double alpha, uint64_t* seed)
|
||||
|
@ -160,16 +129,14 @@ double sample_gamma(double alpha, uint64_t* seed)
|
|||
|
||||
double sample_beta(double a, double b, uint64_t* seed)
|
||||
{
|
||||
// See: https://en.wikipedia.org/wiki/Gamma_distribution#Related_distributions
|
||||
double gamma_a = sample_gamma(a, seed);
|
||||
double gamma_b = sample_gamma(b, seed);
|
||||
return gamma_a / (gamma_a + gamma_b);
|
||||
}
|
||||
|
||||
double sample_laplace(double successes, double failures, uint64_t* seed)
|
||||
{
|
||||
// see <https://en.wikipedia.org/wiki/Beta_distribution?lang=en#Rule_of_succession>
|
||||
return sample_beta(successes + 1, failures + 1, seed);
|
||||
double sample_laplace(double successes, double failures, uint64_t* seed){
|
||||
// see <https://wikiless.esmailelbob.xyz/wiki/Beta_distribution?lang=en#Rule_of_succession>
|
||||
return sample_beta(successes + 1, failures + 1, seed);
|
||||
}
|
||||
|
||||
// Array helpers
|
||||
|
@ -210,7 +177,8 @@ double array_std(double* array, int length)
|
|||
// Mixture function
|
||||
double sample_mixture(double (*samplers[])(uint64_t*), double* weights, int n_dists, uint64_t* seed)
|
||||
{
|
||||
// Sample from samples with frequency proportional to their weights.
|
||||
// You can see a simpler version of this function in the git history
|
||||
// or in C-02-better-algorithm-one-thread/
|
||||
double sum_weights = array_sum(weights, n_dists);
|
||||
double* cumsummed_normalized_weights = (double*)malloc(n_dists * sizeof(double));
|
||||
cumsummed_normalized_weights[0] = weights[0] / sum_weights;
|
||||
|
@ -235,11 +203,7 @@ double sample_mixture(double (*samplers[])(uint64_t*), double* weights, int n_di
|
|||
return result;
|
||||
}
|
||||
|
||||
// # More cool stuff
|
||||
// This is no longer necessary to do basic estimation,
|
||||
// but is still cool
|
||||
|
||||
// ## Sample from an arbitrary cdf
|
||||
// Sample from an arbitrary cdf
|
||||
struct box {
|
||||
int empty;
|
||||
double content;
|
||||
|
@ -435,10 +399,10 @@ double sampler_danger(struct box cdf(double), uint64_t* seed)
|
|||
|
||||
// Get confidence intervals, given a sampler
|
||||
|
||||
typedef struct ci_t {
|
||||
struct c_i {
|
||||
float low;
|
||||
float high;
|
||||
} ci;
|
||||
};
|
||||
int compare_doubles(const void* p, const void* q)
|
||||
{
|
||||
// https://wikiless.esmailelbob.xyz/wiki/Qsort?lang=en
|
||||
|
@ -454,7 +418,7 @@ int compare_doubles(const void* p, const void* q)
|
|||
|
||||
return 0;
|
||||
}
|
||||
ci get_90_confidence_interval(double (*sampler)(uint64_t*), uint64_t* seed)
|
||||
struct c_i get_90_confidence_interval(double (*sampler)(uint64_t*), uint64_t* seed)
|
||||
{
|
||||
int n = 100 * 1000;
|
||||
double* samples_array = malloc(n * sizeof(double));
|
||||
|
@ -463,7 +427,7 @@ ci get_90_confidence_interval(double (*sampler)(uint64_t*), uint64_t* seed)
|
|||
}
|
||||
qsort(samples_array, n, sizeof(double), compare_doubles);
|
||||
|
||||
ci result = {
|
||||
struct c_i result = {
|
||||
.low = samples_array[5000],
|
||||
.high = samples_array[94999],
|
||||
};
|
||||
|
@ -472,54 +436,47 @@ ci get_90_confidence_interval(double (*sampler)(uint64_t*), uint64_t* seed)
|
|||
return result;
|
||||
}
|
||||
|
||||
// # Small algebra manipulations
|
||||
|
||||
// here I discover named structs,
|
||||
// which mean that I don't have to be typing
|
||||
// struct blah all the time.
|
||||
typedef struct normal_params_t {
|
||||
// Do algebra over lognormals and normals
|
||||
struct normal_parameters {
|
||||
double mean;
|
||||
double std;
|
||||
} normal_params;
|
||||
};
|
||||
|
||||
normal_params algebra_sum_normals(normal_params a, normal_params b)
|
||||
struct lognormal_parameters {
|
||||
double logmean;
|
||||
double logstd;
|
||||
};
|
||||
|
||||
struct normal_parameters algebra_sum_normals(struct normal_parameters a, struct normal_parameters b)
|
||||
{
|
||||
normal_params result = {
|
||||
struct normal_parameters result = {
|
||||
.mean = a.mean + b.mean,
|
||||
.std = sqrt((a.std * a.std) + (b.std * b.std)),
|
||||
};
|
||||
return result;
|
||||
}
|
||||
|
||||
typedef struct lognormal_params_t {
|
||||
double logmean;
|
||||
double logstd;
|
||||
} lognormal_params;
|
||||
|
||||
lognormal_params algebra_product_lognormals(lognormal_params a, lognormal_params b)
|
||||
struct normal_parameters algebra_shift_normal(struct normal_parameters a, double shift)
|
||||
{
|
||||
lognormal_params result = {
|
||||
struct normal_parameters result = {
|
||||
.mean = a.mean + shift,
|
||||
.std = a.std,
|
||||
};
|
||||
return result;
|
||||
}
|
||||
|
||||
struct lognormal_parameters algebra_product_lognormals(struct lognormal_parameters a, struct lognormal_parameters b)
|
||||
{
|
||||
struct lognormal_parameters result = {
|
||||
.logmean = a.logmean + b.logmean,
|
||||
.logstd = sqrt((a.logstd * a.logstd) + (b.logstd * b.logstd)),
|
||||
};
|
||||
return result;
|
||||
}
|
||||
|
||||
lognormal_params convert_ci_to_lognormal_params(ci x)
|
||||
struct lognormal_parameters algebra_scale_lognormal(struct lognormal_parameters a, double k)
|
||||
{
|
||||
double loghigh = logf(x.high);
|
||||
double loglow = logf(x.low);
|
||||
double logmean = (loghigh + loglow) / 2.0;
|
||||
double logstd = (loghigh - loglow) / (2.0 * NORMAL90CONFIDENCE);
|
||||
lognormal_params result = { .logmean = logmean, .logstd = logstd };
|
||||
return result;
|
||||
}
|
||||
|
||||
ci convert_lognormal_params_to_ci(lognormal_params y)
|
||||
{
|
||||
double h = y.logstd * NORMAL90CONFIDENCE;
|
||||
double loghigh = y.logmean + h;
|
||||
double loglow = y.logmean - h;
|
||||
ci result = { .low = exp(loglow), .high = exp(loghigh) };
|
||||
struct lognormal_parameters result = {
|
||||
.logmean = a.logmean + k,
|
||||
.logstd = a.logstd,
|
||||
};
|
||||
return result;
|
||||
}
|
||||
|
|
26
squiggle.h
26
squiggle.h
|
@ -52,30 +52,10 @@ struct box sampler_cdf_double(double cdf(double), uint64_t* seed);
|
|||
struct box sampler_cdf_box(struct box cdf(double), uint64_t* seed);
|
||||
|
||||
// Get 90% confidence interval
|
||||
typedef struct ci_t {
|
||||
struct c_i {
|
||||
float low;
|
||||
float high;
|
||||
} ci;
|
||||
ci get_90_confidence_interval(double (*sampler)(uint64_t*), uint64_t* seed);
|
||||
|
||||
// small algebra manipulations
|
||||
|
||||
typedef struct normal_params_t {
|
||||
double mean;
|
||||
double std;
|
||||
} normal_params;
|
||||
normal_params algebra_sum_normals(normal_params a, normal_params b);
|
||||
|
||||
typedef struct lognormal_params_t {
|
||||
double logmean;
|
||||
double logstd;
|
||||
} lognormal_params;
|
||||
lognormal_params algebra_product_lognormals(lognormal_params a, lognormal_params b);
|
||||
|
||||
|
||||
lognormal_params convert_ci_to_lognormal_params(ci x);
|
||||
|
||||
|
||||
ci convert_lognormal_params_to_ci(lognormal_params y);
|
||||
};
|
||||
struct c_i get_90_confidence_interval(double (*sampler)(uint64_t*), uint64_t* seed);
|
||||
|
||||
#endif
|
||||
|
|
Loading…
Reference in New Issue
Block a user