forked from personal/squiggle.c
add example of getting confidence interval & misc changes
This commit is contained in:
parent
d531d5571f
commit
e053a726ee
22
README.md
22
README.md
|
@ -11,7 +11,8 @@ A self-contained C99 library that provides a subset of [Squiggle](https://www.sq
|
||||||
- Because it can fit in my head
|
- Because it can fit in my head
|
||||||
- Because if you can implement something in C, you can implement it anywhere else
|
- Because if you can implement something in C, you can implement it anywhere else
|
||||||
- Because it can be made faster if need be
|
- Because it can be made faster if need be
|
||||||
- e.g., with a multi-threading library like OpenMP, or by adding more algorithmic complexity
|
- e.g., with a multi-threading library like OpenMP,
|
||||||
|
- or by implementing faster but more complex algorithms
|
||||||
- or more simply, by inlining the sampling functions (adding an `inline` directive before their function declaration)
|
- or more simply, by inlining the sampling functions (adding an `inline` directive before their function declaration)
|
||||||
- **Because there are few abstractions between it and machine code** (C => assembly => machine code with gcc, or C => machine code, with tcc), leading to fewer errors beyond the programmer's control.
|
- **Because there are few abstractions between it and machine code** (C => assembly => machine code with gcc, or C => machine code, with tcc), leading to fewer errors beyond the programmer's control.
|
||||||
|
|
||||||
|
@ -184,16 +185,11 @@ int main(){
|
||||||
|
|
||||||
## To do list
|
## To do list
|
||||||
|
|
||||||
- [ ] Test summary statistics for each of the distributions.
|
- [ ] Pontificate about lognormal tests
|
||||||
- [ ] Have some more complicated & realistic example
|
- [ ] Have some more complicated & realistic example
|
||||||
- [ ] Add summarization functions: 90% ci (or all c.i.?)
|
- [ ] Add summarization functions: 90% ci (or all c.i.?)
|
||||||
- [ ] Systematize references
|
- [ ] Systematize references
|
||||||
- [ ] Publish online
|
- [ ] Publish online
|
||||||
- [ ] Add efficient sampling from a beta distribution
|
|
||||||
- https://dl.acm.org/doi/10.1145/358407.358414
|
|
||||||
- https://link.springer.com/article/10.1007/bf02293108
|
|
||||||
- https://stats.stackexchange.com/questions/502146/how-does-numpy-generate-samples-from-a-beta-distribution
|
|
||||||
- https://github.com/numpy/numpy/blob/5cae51e794d69dd553104099305e9f92db237c53/numpy/random/src/distributions/distributions.c
|
|
||||||
- [ ] Support all distribution functions in <https://www.squiggle-language.com/docs/Api/Dist>
|
- [ ] Support all distribution functions in <https://www.squiggle-language.com/docs/Api/Dist>
|
||||||
- [ ] Support all distribution functions in <https://www.squiggle-language.com/docs/Api/Dist>, and do so efficiently
|
- [ ] Support all distribution functions in <https://www.squiggle-language.com/docs/Api/Dist>, and do so efficiently
|
||||||
|
|
||||||
|
@ -224,3 +220,15 @@ int main(){
|
||||||
- https://dl.acm.org/doi/pdf/10.1145/358407.358414
|
- https://dl.acm.org/doi/pdf/10.1145/358407.358414
|
||||||
- [x] Explain correlated samples
|
- [x] Explain correlated samples
|
||||||
- [-] ~~Add tests in Stan?~~
|
- [-] ~~Add tests in Stan?~~
|
||||||
|
- [x] Test summary statistics for each of the distributions.
|
||||||
|
- [x] For uniform
|
||||||
|
- [x] For normal
|
||||||
|
- [x] For lognormal
|
||||||
|
- [x] For lognormal (to syntax)
|
||||||
|
- [x] For beta distribution
|
||||||
|
- [x] Clarify gamma/standard gamma
|
||||||
|
- [x] Add efficient sampling from a beta distribution
|
||||||
|
- https://dl.acm.org/doi/10.1145/358407.358414
|
||||||
|
- https://link.springer.com/article/10.1007/bf02293108
|
||||||
|
- https://stats.stackexchange.com/questions/502146/how-does-numpy-generate-samples-from-a-beta-distribution
|
||||||
|
- https://github.com/numpy/numpy/blob/5cae51e794d69dd553104099305e9f92db237c53/numpy/random/src/distributions/distributions.c
|
||||||
|
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -87,9 +87,9 @@ int main()
|
||||||
printf("\nGetting some samples from sample_unit_normal\n");
|
printf("\nGetting some samples from sample_unit_normal\n");
|
||||||
|
|
||||||
clock_t begin_2 = clock();
|
clock_t begin_2 = clock();
|
||||||
|
double* normal_samples = malloc(NUM_SAMPLES * sizeof(double));
|
||||||
for (int i = 0; i < NUM_SAMPLES; i++) {
|
for (int i = 0; i < NUM_SAMPLES; i++) {
|
||||||
double normal_sample = sample_unit_normal(seed);
|
normal_samples[i] = sample_unit_normal(seed);
|
||||||
// printf("%f\n", normal_sample);
|
// printf("%f\n", normal_sample);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Binary file not shown.
Binary file not shown.
|
@ -43,10 +43,3 @@ int main()
|
||||||
free(seed);
|
free(seed);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
Aggregation mechanisms:
|
|
||||||
- Quantiles (requires a sort)
|
|
||||||
- Sum
|
|
||||||
- Average
|
|
||||||
- Std
|
|
||||||
*/
|
|
||||||
|
|
BIN
examples/07_ci_beta/example
Executable file
BIN
examples/07_ci_beta/example
Executable file
Binary file not shown.
21
examples/07_ci_beta/example.c
Normal file
21
examples/07_ci_beta/example.c
Normal file
|
@ -0,0 +1,21 @@
|
||||||
|
#include "../../squiggle.h"
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
// Estimate functions
|
||||||
|
double beta_1_2_sampler(uint64_t* seed){
|
||||||
|
return sample_beta(1, 2.0, seed);
|
||||||
|
}
|
||||||
|
|
||||||
|
int main()
|
||||||
|
{
|
||||||
|
// set randomness seed
|
||||||
|
uint64_t* seed = malloc(sizeof(uint64_t));
|
||||||
|
*seed = 1000; // xorshift can't start with 0
|
||||||
|
|
||||||
|
struct c_i beta_1_2_ci_90 = get_90_confidence_interval(beta_1_2_sampler, seed);
|
||||||
|
printf("90%% confidence interval of beta(1,2) is [%f, %f]\n", beta_1_2_ci_90.low, beta_1_2_ci_90.high);
|
||||||
|
|
||||||
|
free(seed);
|
||||||
|
}
|
53
examples/07_ci_beta/makefile
Normal file
53
examples/07_ci_beta/makefile
Normal file
|
@ -0,0 +1,53 @@
|
||||||
|
# Interface:
|
||||||
|
# make
|
||||||
|
# make build
|
||||||
|
# make format
|
||||||
|
# make run
|
||||||
|
|
||||||
|
# Compiler
|
||||||
|
CC=gcc
|
||||||
|
# CC=tcc # <= faster compilation
|
||||||
|
|
||||||
|
# Main file
|
||||||
|
SRC=example.c ../../squiggle.c
|
||||||
|
OUTPUT=example
|
||||||
|
|
||||||
|
## Dependencies
|
||||||
|
MATH=-lm
|
||||||
|
|
||||||
|
## Flags
|
||||||
|
DEBUG= #'-g'
|
||||||
|
STANDARD=-std=c99
|
||||||
|
WARNINGS=-Wall
|
||||||
|
OPTIMIZED=-O3 #-Ofast
|
||||||
|
# OPENMP=-fopenmp
|
||||||
|
|
||||||
|
## Formatter
|
||||||
|
STYLE_BLUEPRINT=webkit
|
||||||
|
FORMATTER=clang-format -i -style=$(STYLE_BLUEPRINT)
|
||||||
|
|
||||||
|
## make build
|
||||||
|
build: $(SRC)
|
||||||
|
$(CC) $(OPTIMIZED) $(DEBUG) $(SRC) $(MATH) -o $(OUTPUT)
|
||||||
|
|
||||||
|
format: $(SRC)
|
||||||
|
$(FORMATTER) $(SRC)
|
||||||
|
|
||||||
|
run: $(SRC) $(OUTPUT)
|
||||||
|
OMP_NUM_THREADS=1 ./$(OUTPUT) && echo
|
||||||
|
|
||||||
|
time-linux:
|
||||||
|
@echo "Requires /bin/time, found on GNU/Linux systems" && echo
|
||||||
|
|
||||||
|
@echo "Running 100x and taking avg time $(OUTPUT)"
|
||||||
|
@t=$$(/usr/bin/time -f "%e" -p bash -c 'for i in {1..100}; do $(OUTPUT); done' 2>&1 >/dev/null | grep real | awk '{print $$2}' ); echo "scale=2; 1000 * $$t / 100" | bc | sed "s|^|Time using 1 thread: |" | sed 's|$$|ms|' && echo
|
||||||
|
|
||||||
|
## Profiling
|
||||||
|
|
||||||
|
profile-linux:
|
||||||
|
echo "Requires perf, which depends on the kernel version, and might be in linux-tools package or similar"
|
||||||
|
echo "Must be run as sudo"
|
||||||
|
$(CC) $(SRC) $(MATH) -o $(OUTPUT)
|
||||||
|
sudo perf record ./$(OUTPUT)
|
||||||
|
sudo perf report
|
||||||
|
rm perf.data
|
1
makefile
1
makefile
|
@ -11,6 +11,7 @@ all:
|
||||||
cd examples/04_sample_from_cdf_simple && make && echo
|
cd examples/04_sample_from_cdf_simple && make && echo
|
||||||
cd examples/05_sample_from_cdf_beta && make && echo
|
cd examples/05_sample_from_cdf_beta && make && echo
|
||||||
cd examples/06_gamma_beta && make && echo
|
cd examples/06_gamma_beta && make && echo
|
||||||
|
cd examples/07_ci_beta && make && echo
|
||||||
|
|
||||||
format: squiggle.c squiggle.h
|
format: squiggle.c squiggle.h
|
||||||
$(FORMATTER) squiggle.c
|
$(FORMATTER) squiggle.c
|
||||||
|
|
43
squiggle.c
43
squiggle.c
|
@ -94,6 +94,12 @@ double sample_gamma(double alpha, uint64_t* seed)
|
||||||
// A Simple Method for Generating Gamma Variables, Marsaglia and Wan Tsang, 2001
|
// A Simple Method for Generating Gamma Variables, Marsaglia and Wan Tsang, 2001
|
||||||
// https://dl.acm.org/doi/pdf/10.1145/358407.358414
|
// https://dl.acm.org/doi/pdf/10.1145/358407.358414
|
||||||
// see also the references/ folder
|
// see also the references/ folder
|
||||||
|
// Note that the Wikipedia page for the gamma distribution includes a scaling parameter
|
||||||
|
// k or beta
|
||||||
|
// https://en.wikipedia.org/wiki/Gamma_distribution
|
||||||
|
// such that gamma_k(alpha, k) = k * gamma(alpha)
|
||||||
|
// or gamma_beta(alpha, beta) = gamma(alpha) / beta
|
||||||
|
// So far I have not needed to use this, and thus the second parameter is by default 1.
|
||||||
if (alpha >= 1) {
|
if (alpha >= 1) {
|
||||||
double d, c, x, v, u;
|
double d, c, x, v, u;
|
||||||
d = alpha - 1.0 / 3.0;
|
d = alpha - 1.0 / 3.0;
|
||||||
|
@ -377,6 +383,43 @@ struct box sampler_cdf_double(double cdf(double), uint64_t* seed)
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Get confidence intervals, given a sampler
|
||||||
|
|
||||||
|
struct c_i {
|
||||||
|
float low;
|
||||||
|
float high;
|
||||||
|
};
|
||||||
|
int compare_doubles(const void *p, const void *q) {
|
||||||
|
// https://wikiless.esmailelbob.xyz/wiki/Qsort?lang=en
|
||||||
|
double x = *(const double *)p;
|
||||||
|
double y = *(const double *)q;
|
||||||
|
|
||||||
|
/* Avoid return x - y, which can cause undefined behaviour
|
||||||
|
because of signed integer overflow. */
|
||||||
|
if (x < y)
|
||||||
|
return -1; // Return -1 if you want ascending, 1 if you want descending order.
|
||||||
|
else if (x > y)
|
||||||
|
return 1; // Return 1 if you want ascending, -1 if you want descending order.
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
struct c_i get_90_confidence_interval(double (*sampler)(uint64_t*), uint64_t* seed){
|
||||||
|
int n = 100 * 1000;
|
||||||
|
double* samples_array = malloc(n * sizeof(double));
|
||||||
|
for(int i=0; i<n; i++){
|
||||||
|
samples_array[i] = sampler(seed);
|
||||||
|
}
|
||||||
|
qsort(samples_array, n, sizeof(double), compare_doubles);
|
||||||
|
|
||||||
|
struct c_i result = {
|
||||||
|
.low = samples_array[5000],
|
||||||
|
.high =samples_array[94999],
|
||||||
|
};
|
||||||
|
free(samples_array);
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
/* Could also define other variations, e.g.,
|
/* Could also define other variations, e.g.,
|
||||||
double sampler_danger(struct box cdf(double), uint64_t* seed)
|
double sampler_danger(struct box cdf(double), uint64_t* seed)
|
||||||
{
|
{
|
||||||
|
|
|
@ -50,4 +50,11 @@ struct box inverse_cdf_box(struct box cdf_box(double), double p);
|
||||||
struct box sampler_cdf_double(double cdf(double), uint64_t* seed);
|
struct box sampler_cdf_double(double cdf(double), uint64_t* seed);
|
||||||
struct box sampler_cdf_box(struct box cdf(double), uint64_t* seed);
|
struct box sampler_cdf_box(struct box cdf(double), uint64_t* seed);
|
||||||
|
|
||||||
|
// Get 90% confidence interval
|
||||||
|
struct c_i {
|
||||||
|
float low;
|
||||||
|
float high;
|
||||||
|
};
|
||||||
|
struct c_i get_90_confidence_interval(double (*sampler)(uint64_t*), uint64_t* seed);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
Loading…
Reference in New Issue
Block a user