forked from personal/squiggle.c
move some functions from scratchpad => squiggle.c, reorg
This commit is contained in:
parent
68e7730f24
commit
ee9ed34287
|
@ -9,7 +9,7 @@ CC=gcc # required for nested functions
|
|||
# CC=tcc # <= faster compilation
|
||||
|
||||
# Main file
|
||||
SRC=scratchpad.c
|
||||
SRC=scratchpad.c ../squiggle.c
|
||||
OUTPUT=./scratchpad
|
||||
|
||||
## Dependencies
|
||||
|
|
Binary file not shown.
|
@ -1,33 +1,13 @@
|
|||
#include <float.h> // FLT_MAX, FLT_MIN
|
||||
#include <limits.h> // INT_MAX
|
||||
#include <math.h> // erf, sqrt
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
// #include <sys/types.h>
|
||||
#include <time.h>
|
||||
#include "../squiggle.h"
|
||||
|
||||
#define EXIT_ON_ERROR 0
|
||||
#define MAX_ERROR_LENGTH 500
|
||||
#define PROCESS_ERROR(...) \
|
||||
do { \
|
||||
if (EXIT_ON_ERROR) { \
|
||||
printf("@, in %s (%d)", __FILE__, __LINE__); \
|
||||
exit(1); \
|
||||
} else { \
|
||||
char error_msg[MAX_ERROR_LENGTH]; \
|
||||
snprintf(error_msg, MAX_ERROR_LENGTH, "@, in %s (%d)", __FILE__, __LINE__); \
|
||||
struct box error = { .empty = 1, .error_msg = error_msg }; \
|
||||
return error; \
|
||||
} \
|
||||
} while (0)
|
||||
#define NUM_SAMPLES 1000000
|
||||
|
||||
struct box {
|
||||
int empty;
|
||||
float content;
|
||||
char* error_msg;
|
||||
};
|
||||
#define STOP_BETA 1.0e-8
|
||||
#define TINY_BETA 1.0e-30
|
||||
|
||||
// Example cdf
|
||||
float cdf_uniform_0_1(float x)
|
||||
|
@ -59,16 +39,10 @@ float cdf_normal_0_1(float x)
|
|||
return 0.5 * (1 + erf((x - mean) / (std * sqrt(2)))); // erf from math.h
|
||||
}
|
||||
|
||||
// [x] to do: add beta.
|
||||
// [x] for the cdf, use this incomplete beta function implementation, based on continuous fractions:
|
||||
// <https://codeplea.com/incomplete-beta-function-c>
|
||||
// <https://github.com/codeplea/incbeta>
|
||||
|
||||
#define STOP_BETA 1.0e-8
|
||||
#define TINY_BETA 1.0e-30
|
||||
struct box incbeta(float a, float b, float x)
|
||||
{
|
||||
// Descended from <https://github.com/codeplea/incbeta/blob/master/incbeta.c>,
|
||||
// <https://codeplea.com/incomplete-beta-function-c>
|
||||
// but modified to return a box struct and floats instead of doubles.
|
||||
// [ ] to do: add attribution in README
|
||||
// Original code under this license:
|
||||
|
@ -174,200 +148,6 @@ struct box cdf_beta(float x)
|
|||
}
|
||||
}
|
||||
|
||||
// Inverse cdf at point
|
||||
// Two versions of this function:
|
||||
// - raw, dealing with cdfs that return floats
|
||||
// - box, dealing with cdfs that return a box.
|
||||
|
||||
// Inverse cdf
|
||||
struct box inverse_cdf_float(float cdf(float), float p)
|
||||
{
|
||||
// given a cdf: [-Inf, Inf] => [0,1]
|
||||
// returns a box with either
|
||||
// x such that cdf(x) = p
|
||||
// or an error
|
||||
// if EXIT_ON_ERROR is set to 1, it exits instead of providing an error
|
||||
|
||||
float low = -1.0;
|
||||
float high = 1.0;
|
||||
|
||||
// 1. Make sure that cdf(low) < p < cdf(high)
|
||||
int interval_found = 0;
|
||||
while ((!interval_found) && (low > -FLT_MAX / 4) && (high < FLT_MAX / 4)) {
|
||||
// ^ Using FLT_MIN and FLT_MAX is overkill
|
||||
// but it's also the *correct* thing to do.
|
||||
|
||||
int low_condition = (cdf(low) < p);
|
||||
int high_condition = (p < cdf(high));
|
||||
if (low_condition && high_condition) {
|
||||
interval_found = 1;
|
||||
} else if (!low_condition) {
|
||||
low = low * 2;
|
||||
} else if (!high_condition) {
|
||||
high = high * 2;
|
||||
}
|
||||
}
|
||||
|
||||
if (!interval_found) {
|
||||
PROCESS_ERROR("Interval containing the target value not found, in function inverse_cdf");
|
||||
} else {
|
||||
|
||||
int convergence_condition = 0;
|
||||
int count = 0;
|
||||
while (!convergence_condition && (count < (INT_MAX / 2))) {
|
||||
float mid = (high + low) / 2;
|
||||
int mid_not_new = (mid == low) || (mid == high);
|
||||
// float width = high - low;
|
||||
// if ((width < 1e-8) || mid_not_new){
|
||||
if (mid_not_new) {
|
||||
convergence_condition = 1;
|
||||
} else {
|
||||
float mid_sign = cdf(mid) - p;
|
||||
if (mid_sign < 0) {
|
||||
low = mid;
|
||||
} else if (mid_sign > 0) {
|
||||
high = mid;
|
||||
} else if (mid_sign == 0) {
|
||||
low = mid;
|
||||
high = mid;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (convergence_condition) {
|
||||
struct box result = { .empty = 0, .content = low };
|
||||
return result;
|
||||
} else {
|
||||
PROCESS_ERROR("Search process did not converge, in function inverse_cdf");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct box inverse_cdf_box(struct box cdf_box(float), float p)
|
||||
{
|
||||
// given a cdf: [-Inf, Inf] => Box([0,1])
|
||||
// returns a box with either
|
||||
// x such that cdf(x) = p
|
||||
// or an error
|
||||
// if EXIT_ON_ERROR is set to 1, it exits instead of providing an error
|
||||
|
||||
float low = -1.0;
|
||||
float high = 1.0;
|
||||
|
||||
// 1. Make sure that cdf(low) < p < cdf(high)
|
||||
int interval_found = 0;
|
||||
while ((!interval_found) && (low > -FLT_MAX / 4) && (high < FLT_MAX / 4)) {
|
||||
// ^ Using FLT_MIN and FLT_MAX is overkill
|
||||
// but it's also the *correct* thing to do.
|
||||
struct box cdf_low = cdf_box(low);
|
||||
if (cdf_low.empty) {
|
||||
PROCESS_ERROR(cdf_low.error_msg);
|
||||
}
|
||||
|
||||
struct box cdf_high = cdf_box(high);
|
||||
if (cdf_high.empty) {
|
||||
PROCESS_ERROR(cdf_low.error_msg);
|
||||
}
|
||||
|
||||
int low_condition = (cdf_low.content < p);
|
||||
int high_condition = (p < cdf_high.content);
|
||||
if (low_condition && high_condition) {
|
||||
interval_found = 1;
|
||||
} else if (!low_condition) {
|
||||
low = low * 2;
|
||||
} else if (!high_condition) {
|
||||
high = high * 2;
|
||||
}
|
||||
}
|
||||
|
||||
if (!interval_found) {
|
||||
PROCESS_ERROR("Interval containing the target value not found, in function inverse_cdf");
|
||||
} else {
|
||||
|
||||
int convergence_condition = 0;
|
||||
int count = 0;
|
||||
while (!convergence_condition && (count < (INT_MAX / 2))) {
|
||||
float mid = (high + low) / 2;
|
||||
int mid_not_new = (mid == low) || (mid == high);
|
||||
// float width = high - low;
|
||||
if (mid_not_new) {
|
||||
// if ((width < 1e-8) || mid_not_new){
|
||||
convergence_condition = 1;
|
||||
} else {
|
||||
struct box cdf_mid = cdf_box(mid);
|
||||
if (cdf_mid.empty) {
|
||||
PROCESS_ERROR(cdf_mid.error_msg);
|
||||
}
|
||||
float mid_sign = cdf_mid.content - p;
|
||||
if (mid_sign < 0) {
|
||||
low = mid;
|
||||
} else if (mid_sign > 0) {
|
||||
high = mid;
|
||||
} else if (mid_sign == 0) {
|
||||
low = mid;
|
||||
high = mid;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (convergence_condition) {
|
||||
struct box result = { .empty = 0, .content = low };
|
||||
return result;
|
||||
} else {
|
||||
PROCESS_ERROR("Search process did not converge, in function inverse_cdf");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Some randomness functions for:
|
||||
// - Sampling from a cdf
|
||||
// - Benchmarking against a previous approach, which will be faster, but less general
|
||||
|
||||
// Get random number between 0 and 1
|
||||
uint32_t xorshift32(uint32_t* seed)
|
||||
{
|
||||
// Algorithm "xor" from p. 4 of Marsaglia, "Xorshift RNGs"
|
||||
// See <https://stackoverflow.com/questions/53886131/how-does-xorshift32-works>
|
||||
// https://en.wikipedia.org/wiki/Xorshift
|
||||
// Also some drama: <https://www.pcg-random.org/posts/on-vignas-pcg-critique.html>, <https://prng.di.unimi.it/>
|
||||
|
||||
uint32_t x = *seed;
|
||||
x ^= x << 13;
|
||||
x ^= x >> 17;
|
||||
x ^= x << 5;
|
||||
return *seed = x;
|
||||
}
|
||||
|
||||
// Distribution & sampling functions
|
||||
float rand_0_to_1(uint32_t* seed)
|
||||
{
|
||||
return ((float)xorshift32(seed)) / ((float)UINT32_MAX);
|
||||
}
|
||||
|
||||
// Sampler based on inverse cdf and randomness function
|
||||
struct box sampler_box_cdf(struct box cdf(float), uint32_t* seed)
|
||||
{
|
||||
float p = rand_0_to_1(seed);
|
||||
struct box result = inverse_cdf_box(cdf, p);
|
||||
return result;
|
||||
}
|
||||
struct box sampler_float_cdf(float cdf(float), uint32_t* seed)
|
||||
{
|
||||
float p = rand_0_to_1(seed);
|
||||
struct box result = inverse_cdf_float(cdf, p);
|
||||
return result;
|
||||
}
|
||||
|
||||
// Comparison point with raw normal sampler
|
||||
const float PI = 3.14159265358979323846;
|
||||
float sampler_normal_0_1(uint32_t* seed)
|
||||
{
|
||||
float u1 = rand_0_to_1(seed);
|
||||
float u2 = rand_0_to_1(seed);
|
||||
float z = sqrtf(-2.0 * log(u1)) * sin(2 * PI * u2);
|
||||
return z;
|
||||
}
|
||||
|
||||
// Some testers
|
||||
void test_inverse_cdf_float(char* cdf_name, float cdf_float(float))
|
||||
{
|
||||
|
@ -445,12 +225,12 @@ int main()
|
|||
test_and_time_sampler_float("cdf_normal_0_1", cdf_normal_0_1, seed);
|
||||
|
||||
// Get some normal samples using a previous approach
|
||||
printf("\nGetting some samples from sampler_normal_0_1\n");
|
||||
printf("\nGetting some samples from unit_normal\n");
|
||||
|
||||
clock_t begin_2 = clock();
|
||||
|
||||
for (int i = 0; i < NUM_SAMPLES; i++) {
|
||||
float normal_sample = sampler_normal_0_1(seed);
|
||||
float normal_sample = unit_normal(seed);
|
||||
// printf("%f\n", normal_sample);
|
||||
}
|
||||
|
||||
|
@ -460,11 +240,11 @@ int main()
|
|||
|
||||
// Test box sampler
|
||||
test_and_time_sampler_box("cdf_beta", cdf_beta, seed);
|
||||
// Ok, this is slower than python!!
|
||||
// Partly this is because I am using a more general algorithm,
|
||||
// which applies to any cdf
|
||||
// But I am also using really anal convergence conditions.
|
||||
// This could be optimized.
|
||||
// Ok, this is slower than python!!
|
||||
// Partly this is because I am using a more general algorithm,
|
||||
// which applies to any cdf
|
||||
// But I am also using really anal convergence conditions.
|
||||
// This could be optimized.
|
||||
|
||||
free(seed);
|
||||
return 0;
|
||||
|
|
187
squiggle.c
187
squiggle.c
|
@ -1,6 +1,25 @@
|
|||
#include <math.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <float.h>
|
||||
#include <limits.h>
|
||||
#include <time.h>
|
||||
// #include <sys/types.h>
|
||||
#define EXIT_ON_ERROR 0
|
||||
#define MAX_ERROR_LENGTH 500
|
||||
#define PROCESS_ERROR(...) \
|
||||
do { \
|
||||
if (EXIT_ON_ERROR) { \
|
||||
printf("@, in %s (%d)", __FILE__, __LINE__); \
|
||||
exit(1); \
|
||||
} else { \
|
||||
char error_msg[MAX_ERROR_LENGTH]; \
|
||||
snprintf(error_msg, MAX_ERROR_LENGTH, "@, in %s (%d)", __FILE__, __LINE__); \
|
||||
struct box error = { .empty = 1, .error_msg = error_msg }; \
|
||||
return error; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
// PI constant
|
||||
const float PI = M_PI; // 3.14159265358979323846;
|
||||
|
@ -112,3 +131,171 @@ float mixture(float (*samplers[])(uint32_t*), float* weights, int n_dists, uint3
|
|||
free(cumsummed_normalized_weights);
|
||||
return result;
|
||||
}
|
||||
|
||||
// Sample from an arbitrary cdf
|
||||
struct box {
|
||||
int empty;
|
||||
float content;
|
||||
char* error_msg;
|
||||
};
|
||||
|
||||
// Inverse cdf at point
|
||||
// Two versions of this function:
|
||||
// - raw, dealing with cdfs that return floats
|
||||
// - input: cdf: float => float, p
|
||||
// - output: Box(number|error)
|
||||
// - box, dealing with cdfs that return a box.
|
||||
// - input: cdf: float => Box(number|error), p
|
||||
// - output: Box(number|error)
|
||||
struct box inverse_cdf_float(float cdf(float), float p)
|
||||
{
|
||||
// given a cdf: [-Inf, Inf] => [0,1]
|
||||
// returns a box with either
|
||||
// x such that cdf(x) = p
|
||||
// or an error
|
||||
// if EXIT_ON_ERROR is set to 1, it exits instead of providing an error
|
||||
|
||||
float low = -1.0;
|
||||
float high = 1.0;
|
||||
|
||||
// 1. Make sure that cdf(low) < p < cdf(high)
|
||||
int interval_found = 0;
|
||||
while ((!interval_found) && (low > -FLT_MAX / 4) && (high < FLT_MAX / 4)) {
|
||||
// ^ Using FLT_MIN and FLT_MAX is overkill
|
||||
// but it's also the *correct* thing to do.
|
||||
|
||||
int low_condition = (cdf(low) < p);
|
||||
int high_condition = (p < cdf(high));
|
||||
if (low_condition && high_condition) {
|
||||
interval_found = 1;
|
||||
} else if (!low_condition) {
|
||||
low = low * 2;
|
||||
} else if (!high_condition) {
|
||||
high = high * 2;
|
||||
}
|
||||
}
|
||||
|
||||
if (!interval_found) {
|
||||
PROCESS_ERROR("Interval containing the target value not found, in function inverse_cdf");
|
||||
} else {
|
||||
|
||||
int convergence_condition = 0;
|
||||
int count = 0;
|
||||
while (!convergence_condition && (count < (INT_MAX / 2))) {
|
||||
float mid = (high + low) / 2;
|
||||
int mid_not_new = (mid == low) || (mid == high);
|
||||
// float width = high - low;
|
||||
// if ((width < 1e-8) || mid_not_new){
|
||||
if (mid_not_new) {
|
||||
convergence_condition = 1;
|
||||
} else {
|
||||
float mid_sign = cdf(mid) - p;
|
||||
if (mid_sign < 0) {
|
||||
low = mid;
|
||||
} else if (mid_sign > 0) {
|
||||
high = mid;
|
||||
} else if (mid_sign == 0) {
|
||||
low = mid;
|
||||
high = mid;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (convergence_condition) {
|
||||
struct box result = { .empty = 0, .content = low };
|
||||
return result;
|
||||
} else {
|
||||
PROCESS_ERROR("Search process did not converge, in function inverse_cdf");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct box inverse_cdf_box(struct box cdf_box(float), float p)
|
||||
{
|
||||
// given a cdf: [-Inf, Inf] => Box([0,1])
|
||||
// returns a box with either
|
||||
// x such that cdf(x) = p
|
||||
// or an error
|
||||
// if EXIT_ON_ERROR is set to 1, it exits instead of providing an error
|
||||
|
||||
float low = -1.0;
|
||||
float high = 1.0;
|
||||
|
||||
// 1. Make sure that cdf(low) < p < cdf(high)
|
||||
int interval_found = 0;
|
||||
while ((!interval_found) && (low > -FLT_MAX / 4) && (high < FLT_MAX / 4)) {
|
||||
// ^ Using FLT_MIN and FLT_MAX is overkill
|
||||
// but it's also the *correct* thing to do.
|
||||
struct box cdf_low = cdf_box(low);
|
||||
if (cdf_low.empty) {
|
||||
PROCESS_ERROR(cdf_low.error_msg);
|
||||
}
|
||||
|
||||
struct box cdf_high = cdf_box(high);
|
||||
if (cdf_high.empty) {
|
||||
PROCESS_ERROR(cdf_low.error_msg);
|
||||
}
|
||||
|
||||
int low_condition = (cdf_low.content < p);
|
||||
int high_condition = (p < cdf_high.content);
|
||||
if (low_condition && high_condition) {
|
||||
interval_found = 1;
|
||||
} else if (!low_condition) {
|
||||
low = low * 2;
|
||||
} else if (!high_condition) {
|
||||
high = high * 2;
|
||||
}
|
||||
}
|
||||
|
||||
if (!interval_found) {
|
||||
PROCESS_ERROR("Interval containing the target value not found, in function inverse_cdf");
|
||||
} else {
|
||||
|
||||
int convergence_condition = 0;
|
||||
int count = 0;
|
||||
while (!convergence_condition && (count < (INT_MAX / 2))) {
|
||||
float mid = (high + low) / 2;
|
||||
int mid_not_new = (mid == low) || (mid == high);
|
||||
// float width = high - low;
|
||||
if (mid_not_new) {
|
||||
// if ((width < 1e-8) || mid_not_new){
|
||||
convergence_condition = 1;
|
||||
} else {
|
||||
struct box cdf_mid = cdf_box(mid);
|
||||
if (cdf_mid.empty) {
|
||||
PROCESS_ERROR(cdf_mid.error_msg);
|
||||
}
|
||||
float mid_sign = cdf_mid.content - p;
|
||||
if (mid_sign < 0) {
|
||||
low = mid;
|
||||
} else if (mid_sign > 0) {
|
||||
high = mid;
|
||||
} else if (mid_sign == 0) {
|
||||
low = mid;
|
||||
high = mid;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (convergence_condition) {
|
||||
struct box result = { .empty = 0, .content = low };
|
||||
return result;
|
||||
} else {
|
||||
PROCESS_ERROR("Search process did not converge, in function inverse_cdf");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Sampler based on inverse cdf and randomness function
|
||||
struct box sampler_box_cdf(struct box cdf(float), uint32_t* seed)
|
||||
{
|
||||
float p = rand_0_to_1(seed);
|
||||
struct box result = inverse_cdf_box(cdf, p);
|
||||
return result;
|
||||
}
|
||||
struct box sampler_float_cdf(float cdf(float), uint32_t* seed)
|
||||
{
|
||||
float p = rand_0_to_1(seed);
|
||||
struct box result = inverse_cdf_float(cdf, p);
|
||||
return result;
|
||||
}
|
||||
|
|
34
squiggle.h
34
squiggle.h
|
@ -4,13 +4,29 @@
|
|||
// uint32_t header
|
||||
#include <stdint.h>
|
||||
|
||||
// Macros
|
||||
#define EXIT_ON_ERROR 0
|
||||
#define MAX_ERROR_LENGTH 500
|
||||
#define PROCESS_ERROR(...) \
|
||||
do { \
|
||||
if (EXIT_ON_ERROR) { \
|
||||
printf("@, in %s (%d)", __FILE__, __LINE__); \
|
||||
exit(1); \
|
||||
} else { \
|
||||
char error_msg[MAX_ERROR_LENGTH]; \
|
||||
snprintf(error_msg, MAX_ERROR_LENGTH, "@, in %s (%d)", __FILE__, __LINE__); \
|
||||
struct box error = { .empty = 1, .error_msg = error_msg }; \
|
||||
return error; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
// Pseudo Random number generator
|
||||
uint32_t xorshift32(uint32_t* seed);
|
||||
|
||||
// Distribution & sampling functions
|
||||
float rand_0_to_1(uint32_t* seed);
|
||||
float rand_float(float max, uint32_t* seed);
|
||||
float ur_normal(uint32_t* seed);
|
||||
float unit_normal(uint32_t* seed);
|
||||
float random_uniform(float from, float to, uint32_t* seed);
|
||||
float random_normal(float mean, float sigma, uint32_t* seed);
|
||||
float random_lognormal(float logmean, float logsigma, uint32_t* seed);
|
||||
|
@ -23,4 +39,20 @@ void array_cumsum(float* array_to_sum, float* array_cumsummed, int length);
|
|||
// Mixture function
|
||||
float mixture(float (*samplers[])(uint32_t*), float* weights, int n_dists, uint32_t* seed);
|
||||
|
||||
// Box
|
||||
struct box {
|
||||
int empty;
|
||||
float content;
|
||||
char* error_msg;
|
||||
};
|
||||
|
||||
// Inverse cdf
|
||||
struct box inverse_cdf_float(float cdf(float), float p);
|
||||
struct box inverse_cdf_box(struct box cdf_box(float), float p);
|
||||
|
||||
// Samplers from cdf
|
||||
struct box sampler_box_cdf(struct box cdf(float), uint32_t* seed);
|
||||
struct box sampler_float_cdf(float cdf(float), uint32_t* seed);
|
||||
|
||||
#endif
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user