diff --git a/README.md b/README.md index 192d06a..d800382 100644 --- a/README.md +++ b/README.md @@ -10,8 +10,10 @@ A self-contained C99 library that provides a subset of [Squiggle](https://www.sq - Because it will last long - Because it can fit in my head - Because if you can implement something in C, you can implement it anywhere else -- Because it can be made faster if need be, e.g., with a multi-threading library like OpenMP, or by adding more algorithmic complexity -- **Because there are few abstractions between it and the bare metal: C -> assembly = CPU instructions**, leading to fewer errors beyond the programmer's control. +- Because it can be made faster if need be + - e.g., with a multi-threading library like OpenMP, or by adding more algorithmic complexity + - or more simply, by inlining the sampling functions (adding an `inline` directive before their function declaration) +- **Because there are few abstractions between it and machine code** (C => assembly => machine code with gcc, or C => machine code, with tcc), leading to fewer errors beyond the programmer's control. ## Getting started @@ -79,8 +81,13 @@ Behaviour on error can be toggled by the `EXIT_ON_ERROR` variable. This library - [ ] Have some more complicated & realistic example - [ ] Add summarization functions: 90% ci (or all c.i.?) +- [ ] Systematize references - [ ] Publish online - [ ] Add efficient sampling from a beta distribution + - https://dl.acm.org/doi/10.1145/358407.358414 + - https://link.springer.com/article/10.1007/bf02293108 + - https://stats.stackexchange.com/questions/502146/how-does-numpy-generate-samples-from-a-beta-distribution + - https://github.com/numpy/numpy/blob/5cae51e794d69dd553104099305e9f92db237c53/numpy/random/src/distributions/distributions.c - [ ] Support all distribution functions in - [ ] Support all distribution functions in , and do so efficiently @@ -107,3 +114,5 @@ Behaviour on error can be toggled by the `EXIT_ON_ERROR` variable. This library - [x] Explain individual examples - [x] Rename functions to something more self-explanatory, e.g,. `sample_unit_normal`. - [x] Add summarization functions: mean, std +- [x] Add sampling from a gamma distribution + - https://dl.acm.org/doi/pdf/10.1145/358407.358414 diff --git a/examples/05_sample_from_cdf_beta/example b/examples/05_sample_from_cdf_beta/example index b60681a..4bc0554 100755 Binary files a/examples/05_sample_from_cdf_beta/example and b/examples/05_sample_from_cdf_beta/example differ diff --git a/examples/05_sample_from_cdf_beta/makefile b/examples/05_sample_from_cdf_beta/makefile index 30cebab..2580b8c 100644 --- a/examples/05_sample_from_cdf_beta/makefile +++ b/examples/05_sample_from_cdf_beta/makefile @@ -18,12 +18,13 @@ DEPENDENCIES=$(MATH) # OPENMP=-fopenmp ## Flags +VERBOSE=#-v DEBUG= #'-g' STANDARD=-std=c99 ## gnu99 allows for nested functions. EXTENSIONS= #-fnested-functions WARNINGS=-Wall OPTIMIZED=-O3#-Ofast -CFLAGS=$(DEBUG) $(STANDARD) $(EXTENSIONS) $(WARNINGS) $(OPTIMIZED) +CFLAGS=$(VERBOSE) $(DEBUG) $(STANDARD) $(EXTENSIONS) $(WARNINGS) $(OPTIMIZED) ## Formatter STYLE_BLUEPRINT=webkit diff --git a/references/marsaglia-gamma.pdf b/references/marsaglia-gamma.pdf new file mode 100644 index 0000000..d3a6982 Binary files /dev/null and b/references/marsaglia-gamma.pdf differ diff --git a/squiggle.c b/squiggle.c index 11b66f8..c8e30dd 100644 --- a/squiggle.c +++ b/squiggle.c @@ -73,6 +73,42 @@ float sample_to(float low, float high, uint32_t* seed) return sample_lognormal(logmean, logsigma, seed); } +float sample_gamma(float alpha, uint32_t* seed){ + + // A Simple Method for Generating Gamma Variables, Marsaglia and Wan Tsang, 2001 + // https://dl.acm.org/doi/pdf/10.1145/358407.358414 + // see also the references/ folder + if(alpha >=1){ + float d, c, x, v, u; + d = alpha - 1.0/3.0; + c = 1.0/sqrt(9.0 * d); + while(1){ + + do { + x = sample_unit_normal(seed); + v = 1.0 + c * x; + } while(v <= 0.0); + + v = pow(v, 3); + u = sample_unit_uniform(seed); + if( u < 1.0 - 0.0331 * pow(x, 4)){ // Condition 1 + // the 0.0331 doesn't inspire much confidence + // however, this isn't the whole story + // by knowing that Condition 1 implies condition 2 + // we realize that this is just a way of making the algorithm faster + // i.e., of not using the logarithms + return d*v; + } + if(log(u) < 0.5*pow(x,2) + d*(1.0 - v + log(v))){ // Condition 2 + return d*v; + } + } + }else{ + return sample_gamma(1 + alpha, seed) * pow(sample_unit_uniform(seed), 1/alpha); + // see note in p. 371 of https://dl.acm.org/doi/pdf/10.1145/358407.358414 + } +} + // Array helpers float array_sum(float* array, int length) {