add dangerous beta sampler.

This commit is contained in:
NunoSempere 2023-07-16 13:57:02 +02:00
parent 2acf129ef2
commit e94774ae3a
2 changed files with 143 additions and 11 deletions

Binary file not shown.

View File

@ -9,8 +9,6 @@
#define EXIT_ON_ERROR 0 #define EXIT_ON_ERROR 0
// Errors // Errors
// [ ] to do: reuse more informative printing from build-your-own-lisp?
// Another option could be to exit on error. Maybe let the user decide?
struct box { struct box {
int empty; int empty;
float content; float content;
@ -42,9 +40,9 @@ float cdf_squared_0_1(float x)
float cdf_normal_0_1(float x) float cdf_normal_0_1(float x)
{ {
// float mean = 0; float mean = 0;
// float std = 1; float std = 1;
return 0.5 * (1 + erf((x - 0) / (1 * sqrt(2)))); // erf from math.h return 0.5 * (1 + erf((x - mean) / (std * sqrt(2)))); // erf from math.h
} }
// Inverse cdf // Inverse cdf
@ -126,7 +124,6 @@ struct box inverse_cdf(float cdf(float), float p)
result.error_msg = error_msg; result.error_msg = error_msg;
return result; return result;
} }
result.empty = 1;
} }
return result; return result;
@ -178,6 +175,141 @@ float sampler_normal_0_1(uint32_t* seed)
// <https://codeplea.com/incomplete-beta-function-c> // <https://codeplea.com/incomplete-beta-function-c>
// <https://github.com/codeplea/incbeta> // <https://github.com/codeplea/incbeta>
#define STOP 1.0e-8
#define TINY 1.0e-30
struct box incbeta(float a, float b, float x) {
// Descended from <https://github.com/codeplea/incbeta/blob/master/incbeta.c>,
// but modified to return a box struct and floats instead of doubles.
// [x] to do: add attribution in README
// Original code under this license:
/*
* zlib License
*
* Regularized Incomplete Beta Function
*
* Copyright (c) 2016, 2017 Lewis Van Winkle
* http://CodePlea.com
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgement in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
*/
struct box result;
if (x < 0.0 || x > 1.0){
if(EXIT_ON_ERROR){
printf("x out of bounds, in function incbeta, in %s (%d)", __FILE__, __LINE__);
exit(1);
}else{
char error_msg[200];
snprintf(error_msg, 200, "x out of bounds, in function incbeta, in %s (%d)", __FILE__, __LINE__);
result.empty = 1;
result.error_msg = error_msg;
return result;
}
}
/*The continued fraction converges nicely for x < (a+1)/(a+b+2)*/
if (x > (a+1.0)/(a+b+2.0)) {
struct box symmetric_incbeta = incbeta(b,a,1.0-x);
if(symmetric_incbeta.empty){
return symmetric_incbeta; // propagate error
}else{
result.empty = 0;
result.content = 1-symmetric_incbeta.content;
return result;
}
}
/*Find the first part before the continued fraction.*/
const float lbeta_ab = lgamma(a)+lgamma(b)-lgamma(a+b);
const float front = exp(log(x)*a+log(1.0-x)*b-lbeta_ab) / a;
/*Use Lentz's algorithm to evaluate the continued fraction.*/
float f = 1.0, c = 1.0, d = 0.0;
int i, m;
for (i = 0; i <= 200; ++i) {
m = i/2;
float numerator;
if (i == 0) {
numerator = 1.0; /*First numerator is 1.0.*/
} else if (i % 2 == 0) {
numerator = (m*(b-m)*x)/((a+2.0*m-1.0)*(a+2.0*m)); /*Even term.*/
} else {
numerator = -((a+m)*(a+b+m)*x)/((a+2.0*m)*(a+2.0*m+1)); /*Odd term.*/
}
/*Do an iteration of Lentz's algorithm.*/
d = 1.0 + numerator * d;
if (fabs(d) < TINY) d = TINY;
d = 1.0 / d;
c = 1.0 + numerator / c;
if (fabs(c) < TINY) c = TINY;
const float cd = c*d;
f *= cd;
/*Check for stop.*/
if (fabs(1.0-cd) < STOP) {
result.content = front * (f-1.0);
result.empty = 0;
return result;
}
}
if(EXIT_ON_ERROR){
printf("More loops needed, did not converge, in function incbeta, in %s (%d)", __FILE__, __LINE__);
exit(1);
}else{
char error_msg[200];
snprintf(error_msg, 200, "More loops needed, did not converge, in function incbeta, in %s (%d)", __FILE__, __LINE__);
result.empty = 1;
result.error_msg = error_msg;
return result;
}
}
struct box cdf_beta(float x){
float successes = 1, failures = (2023-1945);
return incbeta(successes, failures, x);
}
float cdf_dangerous_beta(float x){
// So the thing is, this works
// But it will propagate through the code
// So it doesn't feel like a great architectural choice;
// I prefer my choice of setting a variable which will determine whether to exit on failure or not.
float successes = 1, failures = (2023-1945);
struct box result = incbeta(successes, failures, x);
if(result.empty){
printf("%s", result.error_msg);
exit(1);
}else{
return result.content;
}
}
struct box dangerous_beta_sampler(uint32_t* seed)
// Think through what to do to feed the incbeta box into
{
return sampler(cdf_dangerous_beta, seed);
}
int main() int main()
{ {
@ -215,7 +347,7 @@ int main()
// set randomness seed // set randomness seed
uint32_t* seed = malloc(sizeof(uint32_t)); uint32_t* seed = malloc(sizeof(uint32_t));
*seed = 1000; // xorshift can't start with 0 *seed = 1000; // xorshift can't start with 0
int n = 1000000; int n = 100;
printf("\n\nGetting some samples from %s:\n", name_3); printf("\n\nGetting some samples from %s:\n", name_3);
clock_t begin = clock(); clock_t begin = clock();
@ -224,11 +356,11 @@ int main()
if (sample.empty) { if (sample.empty) {
printf("Error in sampler function"); printf("Error in sampler function");
} else { } else {
// printf("%f\n", sample.content); printf("%f\n", sample.content);
} }
} }
clock_t end = clock(); clock_t end = clock();
double time_spent = (double)(end - begin) / CLOCKS_PER_SEC; float time_spent = (float)(end - begin) / CLOCKS_PER_SEC;
printf("Time spent: %f", time_spent); printf("Time spent: %f", time_spent);
// Get some normal samples using the previous method. // Get some normal samples using the previous method.
@ -236,10 +368,10 @@ int main()
printf("\n\nGetting some samples from sampler_normal_0_1\n"); printf("\n\nGetting some samples from sampler_normal_0_1\n");
for (int i = 0; i < n; i++) { for (int i = 0; i < n; i++) {
float normal_sample = sampler_normal_0_1(seed); float normal_sample = sampler_normal_0_1(seed);
// printf("%f\n", normal_sample); printf("%f\n", normal_sample);
} }
clock_t end_2 = clock(); clock_t end_2 = clock();
double time_spent_2 = (double)(end_2 - begin_2) / CLOCKS_PER_SEC; float time_spent_2 = (float)(end_2 - begin_2) / CLOCKS_PER_SEC;
printf("Time spent: %f", time_spent_2); printf("Time spent: %f", time_spent_2);
return 0; return 0;