2023-07-15 22:59:27 +00:00
|
|
|
#include <limits.h> // INT_MAX
|
2023-07-16 10:09:58 +00:00
|
|
|
#include <stdint.h>
|
|
|
|
#include <stdlib.h>
|
2023-07-16 10:26:55 +00:00
|
|
|
#include <float.h> // FLT_MAX, FLT_MIN
|
|
|
|
#include <stdio.h>
|
|
|
|
#include <math.h> // erf, sqrt
|
2023-07-16 11:02:11 +00:00
|
|
|
#include <time.h>
|
2023-07-16 10:26:55 +00:00
|
|
|
|
|
|
|
#define EXIT_ON_ERROR 0
|
2023-07-15 22:59:27 +00:00
|
|
|
|
2023-07-15 22:23:59 +00:00
|
|
|
// Errors
|
2023-07-16 10:26:55 +00:00
|
|
|
// [ ] to do: reuse more informative printing from build-your-own-lisp?
|
|
|
|
// Another option could be to exit on error. Maybe let the user decide?
|
2023-07-15 22:23:59 +00:00
|
|
|
struct box {
|
2023-07-16 10:09:58 +00:00
|
|
|
int empty;
|
|
|
|
float content;
|
2023-07-16 10:26:55 +00:00
|
|
|
char * error_msg;
|
2023-07-15 22:23:59 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
// Example cdf
|
2023-07-16 10:09:58 +00:00
|
|
|
float cdf_uniform_0_1(float x)
|
|
|
|
{
|
|
|
|
if (x < 0) {
|
|
|
|
return 0;
|
|
|
|
} else if (x > 1) {
|
|
|
|
return 1;
|
|
|
|
} else {
|
|
|
|
return x;
|
|
|
|
}
|
2023-07-15 22:23:59 +00:00
|
|
|
}
|
|
|
|
|
2023-07-16 10:09:58 +00:00
|
|
|
float cdf_squared_0_1(float x)
|
|
|
|
{
|
|
|
|
if (x < 0) {
|
|
|
|
return 0;
|
|
|
|
} else if (x > 1) {
|
|
|
|
return 1;
|
|
|
|
} else {
|
|
|
|
return x * x;
|
|
|
|
}
|
2023-07-15 22:59:27 +00:00
|
|
|
}
|
|
|
|
|
2023-07-16 10:09:58 +00:00
|
|
|
float cdf_normal_0_1(float x)
|
|
|
|
{
|
2023-07-16 11:02:11 +00:00
|
|
|
// float mean = 0;
|
|
|
|
// float std = 1;
|
|
|
|
return 0.5 * (1 + erf((x - 0) / (1 * sqrt(2)))); // erf from math.h
|
2023-07-16 09:08:59 +00:00
|
|
|
}
|
|
|
|
|
2023-07-15 22:23:59 +00:00
|
|
|
// Inverse cdf
|
2023-07-16 10:09:58 +00:00
|
|
|
struct box inverse_cdf(float cdf(float), float p)
|
|
|
|
{
|
|
|
|
// given a cdf: [-Inf, Inf] => [0,1]
|
2023-07-16 10:26:55 +00:00
|
|
|
// returns a box with either
|
|
|
|
// x such that cdf(x) = p
|
|
|
|
// or an error
|
|
|
|
// if EXIT_ON_ERROR is set to 1, it exits instead of providing an error
|
2023-07-16 10:09:58 +00:00
|
|
|
|
|
|
|
struct box result;
|
|
|
|
float low = -1.0;
|
|
|
|
float high = 1.0;
|
|
|
|
|
|
|
|
// 1. Make sure that cdf(low) < p < cdf(high)
|
|
|
|
int interval_found = 0;
|
|
|
|
while ((!interval_found) && (low > -FLT_MAX / 4) && (high < FLT_MAX / 4)) {
|
|
|
|
// ^ Using FLT_MIN and FLT_MAX is overkill
|
|
|
|
// but it's also the *correct* thing to do.
|
|
|
|
|
|
|
|
int low_condition = (cdf(low) < p);
|
|
|
|
int high_condition = (p < cdf(high));
|
|
|
|
if (low_condition && high_condition) {
|
|
|
|
interval_found = 1;
|
|
|
|
} else if (!low_condition) {
|
|
|
|
low = low * 2;
|
|
|
|
} else if (!high_condition) {
|
|
|
|
high = high * 2;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-07-16 10:26:55 +00:00
|
|
|
if (!interval_found) {
|
|
|
|
if(EXIT_ON_ERROR){
|
|
|
|
printf("Interval containing the target value not found, in function inverse_cdf, in %s (%d)", __FILE__, __LINE__);
|
|
|
|
exit(1);
|
|
|
|
}else{
|
|
|
|
char error_msg[200];
|
|
|
|
snprintf(error_msg, 200, "Interval containing the target value not found in function inverse_cdf, in %s (%d)", __FILE__, __LINE__);
|
|
|
|
result.empty = 1;
|
|
|
|
result.error_msg = error_msg;
|
|
|
|
return result;
|
|
|
|
}
|
2023-07-16 10:09:58 +00:00
|
|
|
} else {
|
|
|
|
|
|
|
|
int convergence_condition = 0;
|
|
|
|
int count = 0;
|
|
|
|
while (!convergence_condition && (count < (INT_MAX / 2))) {
|
|
|
|
float mid = (high + low) / 2;
|
|
|
|
int mid_not_new = (mid == low) || (mid == high);
|
2023-07-16 11:02:11 +00:00
|
|
|
// float width = high - low;
|
2023-07-16 10:09:58 +00:00
|
|
|
if (mid_not_new) {
|
2023-07-16 11:02:11 +00:00
|
|
|
// if ((width < 1e-8) || mid_not_new){
|
2023-07-16 10:09:58 +00:00
|
|
|
convergence_condition = 1;
|
|
|
|
} else {
|
|
|
|
float mid_sign = cdf(mid) - p;
|
|
|
|
if (mid_sign < 0) {
|
|
|
|
low = mid;
|
|
|
|
} else if (mid_sign > 0) {
|
|
|
|
high = mid;
|
|
|
|
} else if (mid_sign == 0) {
|
|
|
|
low = mid;
|
|
|
|
high = mid;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (convergence_condition) {
|
|
|
|
result.content = low;
|
|
|
|
result.empty = 0;
|
|
|
|
} else {
|
2023-07-16 10:26:55 +00:00
|
|
|
if(EXIT_ON_ERROR){
|
|
|
|
printf("Search process did not converge, in function inverse_cdf, in %s (%d)", __FILE__, __LINE__);
|
|
|
|
exit(1);
|
|
|
|
}else{
|
|
|
|
char error_msg[200];
|
|
|
|
snprintf(error_msg, 200, "Search process did not converge, in function inverse_cdf, in %s (%d)", __FILE__, __LINE__);
|
|
|
|
result.empty = 1;
|
|
|
|
result.error_msg = error_msg;
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
result.empty = 1;
|
|
|
|
}
|
2023-07-16 10:09:58 +00:00
|
|
|
|
|
|
|
return result;
|
|
|
|
}
|
2023-07-15 22:23:59 +00:00
|
|
|
}
|
|
|
|
|
2023-07-16 09:08:59 +00:00
|
|
|
// Get random number between 0 and 1
|
2023-07-16 10:09:58 +00:00
|
|
|
uint32_t xorshift32(uint32_t* seed)
|
2023-07-16 09:08:59 +00:00
|
|
|
{
|
2023-07-16 10:09:58 +00:00
|
|
|
// Algorithm "xor" from p. 4 of Marsaglia, "Xorshift RNGs"
|
|
|
|
// See <https://stackoverflow.com/questions/53886131/how-does-xorshift32-works>
|
|
|
|
// https://en.wikipedia.org/wiki/Xorshift
|
|
|
|
// Also some drama: <https://www.pcg-random.org/posts/on-vignas-pcg-critique.html>, <https://prng.di.unimi.it/>
|
|
|
|
|
|
|
|
uint32_t x = *seed;
|
|
|
|
x ^= x << 13;
|
|
|
|
x ^= x >> 17;
|
|
|
|
x ^= x << 5;
|
|
|
|
return *seed = x;
|
2023-07-16 09:08:59 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Distribution & sampling functions
|
2023-07-16 10:09:58 +00:00
|
|
|
float rand_0_to_1(uint32_t* seed)
|
|
|
|
{
|
|
|
|
return ((float)xorshift32(seed)) / ((float)UINT32_MAX);
|
2023-07-16 09:08:59 +00:00
|
|
|
}
|
|
|
|
|
2023-07-16 10:26:55 +00:00
|
|
|
// Sampler based on inverse cdf
|
2023-07-16 10:09:58 +00:00
|
|
|
struct box sampler(float cdf(float), uint32_t* seed)
|
|
|
|
{
|
|
|
|
struct box result;
|
|
|
|
float p = rand_0_to_1(seed);
|
|
|
|
result = inverse_cdf(cdf, p);
|
|
|
|
return result;
|
2023-07-16 09:08:59 +00:00
|
|
|
}
|
|
|
|
|
2023-07-16 11:02:11 +00:00
|
|
|
// For comparison, raw sampler
|
|
|
|
const float PI = 3.14159265358979323846;
|
|
|
|
float sampler_normal_0_1(uint32_t* seed)
|
|
|
|
{
|
|
|
|
float u1 = rand_0_to_1(seed);
|
|
|
|
float u2 = rand_0_to_1(seed);
|
|
|
|
float z = sqrtf(-2.0 * log(u1)) * sin(2 * PI * u2);
|
|
|
|
return z;
|
|
|
|
}
|
|
|
|
|
2023-07-16 10:26:55 +00:00
|
|
|
// to do: add beta.
|
|
|
|
// for the cdf, use this incomplete beta function implementation, based on continuous fractions:
|
2023-07-16 10:09:41 +00:00
|
|
|
// <https://codeplea.com/incomplete-beta-function-c>
|
|
|
|
// <https://github.com/codeplea/incbeta>
|
2023-07-15 22:23:59 +00:00
|
|
|
|
2023-07-16 10:09:58 +00:00
|
|
|
int main()
|
|
|
|
{
|
|
|
|
|
2023-07-16 10:26:55 +00:00
|
|
|
// Get the inverse cdf of a [0,1] uniform distribution at 0.5
|
2023-07-16 10:09:58 +00:00
|
|
|
struct box result_1 = inverse_cdf(cdf_uniform_0_1, 0.5);
|
|
|
|
char* name_1 = "cdf_uniform_0_1";
|
|
|
|
if (result_1.empty) {
|
|
|
|
printf("Inverse for %s not calculated\n", name_1);
|
|
|
|
exit(1);
|
|
|
|
} else {
|
|
|
|
printf("Inverse of %s at %f is: %f\n", name_1, 0.5, result_1.content);
|
|
|
|
}
|
|
|
|
|
2023-07-16 10:26:55 +00:00
|
|
|
// Get the inverse cdf of a [0,1] squared distribution at 0.5
|
2023-07-16 10:09:58 +00:00
|
|
|
struct box result_2 = inverse_cdf(cdf_squared_0_1, 0.5);
|
|
|
|
char* name_2 = "cdf_squared_0_1";
|
|
|
|
if (result_2.empty) {
|
|
|
|
printf("Inverse for %s not calculated\n", name_2);
|
|
|
|
exit(1);
|
|
|
|
} else {
|
|
|
|
printf("Inverse of %s at %f is: %f\n", name_2, 0.5, result_2.content);
|
|
|
|
}
|
|
|
|
|
2023-07-16 10:26:55 +00:00
|
|
|
// Get the inverse of a normal(0,1) cdf distribution
|
2023-07-16 10:09:58 +00:00
|
|
|
struct box result_3 = inverse_cdf(cdf_normal_0_1, 0.5);
|
|
|
|
char* name_3 = "cdf_normal_0_1";
|
|
|
|
if (result_3.empty) {
|
|
|
|
printf("Inverse for %s not calculated\n", name_3);
|
|
|
|
exit(1);
|
|
|
|
} else {
|
|
|
|
printf("Inverse of %s at %f is: %f\n", name_3, 0.5, result_3.content);
|
|
|
|
}
|
|
|
|
|
2023-07-16 10:26:55 +00:00
|
|
|
// Use the sampler on a normal(0,1)
|
2023-07-16 10:09:58 +00:00
|
|
|
// set randomness seed
|
|
|
|
uint32_t* seed = malloc(sizeof(uint32_t));
|
|
|
|
*seed = 1000; // xorshift can't start with 0
|
2023-07-16 11:02:11 +00:00
|
|
|
int n = 1000000;
|
2023-07-16 10:09:58 +00:00
|
|
|
|
|
|
|
printf("\n\nGetting some samples from %s:\n", name_3);
|
2023-07-16 11:02:11 +00:00
|
|
|
clock_t begin = clock();
|
|
|
|
for (int i = 0; i < n; i++) {
|
2023-07-16 10:09:58 +00:00
|
|
|
struct box sample = sampler(cdf_normal_0_1, seed);
|
|
|
|
if (sample.empty) {
|
|
|
|
printf("Error in sampler function");
|
|
|
|
} else {
|
2023-07-16 11:02:11 +00:00
|
|
|
// printf("%f\n", sample.content);
|
2023-07-16 10:09:58 +00:00
|
|
|
}
|
|
|
|
}
|
2023-07-16 11:02:11 +00:00
|
|
|
clock_t end = clock();
|
|
|
|
double time_spent = (double)(end - begin) / CLOCKS_PER_SEC;
|
|
|
|
printf("Time spent: %f", time_spent);
|
|
|
|
|
|
|
|
// Get some normal samples using the previous method.
|
|
|
|
clock_t begin_2 = clock();
|
|
|
|
printf("\n\nGetting some samples from sampler_normal_0_1\n");
|
|
|
|
for (int i = 0; i < n; i++) {
|
|
|
|
float normal_sample = sampler_normal_0_1(seed);
|
|
|
|
// printf("%f\n", normal_sample);
|
|
|
|
}
|
|
|
|
clock_t end_2 = clock();
|
|
|
|
double time_spent_2 = (double)(end_2 - begin_2) / CLOCKS_PER_SEC;
|
|
|
|
printf("Time spent: %f", time_spent_2);
|
|
|
|
|
2023-07-16 10:09:58 +00:00
|
|
|
return 0;
|
2023-07-15 21:26:48 +00:00
|
|
|
}
|