diff --git a/README.md b/README.md index 4185179..45a3e34 100644 --- a/README.md +++ b/README.md @@ -68,7 +68,7 @@ This library provides two approaches: ```C struct box { int empty; - float content; + double content; char* error_msg; }; ``` @@ -131,9 +131,9 @@ int main(){ uint64_t* seed = malloc(sizeof(uint64_t)); *seed = 1000; // xorshift can't start with a seed of 0 - float a = sample_to(1, 10, seed); - float b = 2 * a; - float c = b / a; + double a = sample_to(1, 10, seed); + double b = 2 * a; + double c = b / a; printf("a: %f, b: %f, c: %f\n", a, b, c); // a: 0.607162, b: 1.214325, c: 0.500000 @@ -153,7 +153,7 @@ vs #include #include -float draw_xyz(uint64_t* seed){ +double draw_xyz(uint64_t* seed){ // function could also be placed inside main with gcc nested functions extension. return sample_to(1, 20, seed); } @@ -164,9 +164,9 @@ int main(){ uint64_t* seed = malloc(sizeof(uint64_t)); *seed = 1000; // xorshift can't start with a seed of 0 - float a = draw_xyz(seed); - float b = 2 * draw_xyz(seed); - float c = b / a; + double a = draw_xyz(seed); + double b = 2 * draw_xyz(seed); + double c = b / a; printf("a: %f, b: %f, c: %f\n", a, b, c); // a: 0.522484, b: 10.283501, c: 19.681936 diff --git a/examples/01_one_sample/example b/examples/01_one_sample/example index 5038416..0395c5c 100755 Binary files a/examples/01_one_sample/example and b/examples/01_one_sample/example differ diff --git a/examples/01_one_sample/example.c b/examples/01_one_sample/example.c index f1687ae..c0668a8 100644 --- a/examples/01_one_sample/example.c +++ b/examples/01_one_sample/example.c @@ -4,22 +4,22 @@ #include // Estimate functions -float sample_0(uint64_t* seed) +double sample_0(uint64_t* seed) { return 0; } -float sample_1(uint64_t* seed) +double sample_1(uint64_t* seed) { return 1; } -float sample_few(uint64_t* seed) +double sample_few(uint64_t* seed) { return sample_to(1, 3, seed); } -float sample_many(uint64_t* seed) +double sample_many(uint64_t* seed) { return sample_to(2, 10, seed); } @@ -29,15 +29,15 @@ int main(){ uint64_t* seed = malloc(sizeof(uint64_t)); *seed = 1000; // xorshift can't start with 0 - float p_a = 0.8; - float p_b = 0.5; - float p_c = p_a * p_b; + double p_a = 0.8; + double p_b = 0.5; + double p_c = p_a * p_b; int n_dists = 4; - float weights[] = { 1 - p_c, p_c / 2, p_c / 4, p_c / 4 }; - float (*samplers[])(uint64_t*) = { sample_0, sample_1, sample_few, sample_many }; + double weights[] = { 1 - p_c, p_c / 2, p_c / 4, p_c / 4 }; + double (*samplers[])(uint64_t*) = { sample_0, sample_1, sample_few, sample_many }; - float result_one = sample_mixture(samplers, weights, n_dists, seed); + double result_one = sample_mixture(samplers, weights, n_dists, seed); printf("result_one: %f\n", result_one); free(seed); } diff --git a/examples/02_many_samples/example b/examples/02_many_samples/example index 4b781cf..67494bd 100755 Binary files a/examples/02_many_samples/example and b/examples/02_many_samples/example differ diff --git a/examples/02_many_samples/example.c b/examples/02_many_samples/example.c index 7f3faa0..18ec1eb 100644 --- a/examples/02_many_samples/example.c +++ b/examples/02_many_samples/example.c @@ -4,22 +4,22 @@ #include "../../squiggle.h" // Estimate functions -float sample_0(uint64_t* seed) +double sample_0(uint64_t* seed) { return 0; } -float sample_1(uint64_t* seed) +double sample_1(uint64_t* seed) { return 1; } -float sample_few(uint64_t* seed) +double sample_few(uint64_t* seed) { return sample_to(1, 3, seed); } -float sample_many(uint64_t* seed) +double sample_many(uint64_t* seed) { return sample_to(2, 10, seed); } @@ -29,16 +29,16 @@ int main(){ uint64_t* seed = malloc(sizeof(uint64_t)); *seed = 1000; // xorshift can't start with 0 - float p_a = 0.8; - float p_b = 0.5; - float p_c = p_a * p_b; + double p_a = 0.8; + double p_b = 0.5; + double p_c = p_a * p_b; int n_dists = 4; - float weights[] = { 1 - p_c, p_c / 2, p_c / 4, p_c / 4 }; - float (*samplers[])(uint64_t*) = { sample_0, sample_1, sample_few, sample_many }; + double weights[] = { 1 - p_c, p_c / 2, p_c / 4, p_c / 4 }; + double (*samplers[])(uint64_t*) = { sample_0, sample_1, sample_few, sample_many }; int n_samples = 1000000; - float* result_many = (float *) malloc(n_samples * sizeof(float)); + double* result_many = (double *) malloc(n_samples * sizeof(double)); for(int i=0; i, // - // but modified to return a box struct and floats instead of doubles. + // but modified to return a box struct and doubles instead of doubles. // [ ] to do: add attribution in README // Original code under this license: /* @@ -60,17 +60,17 @@ struct box incbeta(float a, float b, float x) } /*Find the first part before the continued fraction.*/ - const float lbeta_ab = lgamma(a) + lgamma(b) - lgamma(a + b); - const float front = exp(log(x) * a + log(1.0 - x) * b - lbeta_ab) / a; + const double lbeta_ab = lgamma(a) + lgamma(b) - lgamma(a + b); + const double front = exp(log(x) * a + log(1.0 - x) * b - lbeta_ab) / a; /*Use Lentz's algorithm to evaluate the continued fraction.*/ - float f = 1.0, c = 1.0, d = 0.0; + double f = 1.0, c = 1.0, d = 0.0; int i, m; for (i = 0; i <= 200; ++i) { m = i / 2; - float numerator; + double numerator; if (i == 0) { numerator = 1.0; /*First numerator is 1.0.*/ } else if (i % 2 == 0) { @@ -89,7 +89,7 @@ struct box incbeta(float a, float b, float x) if (fabs(c) < TINY_BETA) c = TINY_BETA; - const float cd = c * d; + const double cd = c * d; f *= cd; /*Check for stop.*/ @@ -105,7 +105,7 @@ struct box incbeta(float a, float b, float x) return PROCESS_ERROR("More loops needed, did not converge, in function incbeta"); } -struct box cdf_beta(float x) +struct box cdf_beta(double x) { if (x < 0) { struct box result = { .empty = 0, .content = 0 }; @@ -114,13 +114,13 @@ struct box cdf_beta(float x) struct box result = { .empty = 0, .content = 1 }; return result; } else { - float successes = 1, failures = (2023 - 1945); + double successes = 1, failures = (2023 - 1945); return incbeta(successes, failures, x); } } // Some testers -void test_inverse_cdf_box(char* cdf_name, struct box cdf_box(float)) +void test_inverse_cdf_box(char* cdf_name, struct box cdf_box(double)) { struct box result = inverse_cdf_box(cdf_box, 0.5); if (result.empty) { @@ -131,7 +131,7 @@ void test_inverse_cdf_box(char* cdf_name, struct box cdf_box(float)) } } -void test_and_time_sampler_box(char* cdf_name, struct box cdf_box(float), uint64_t* seed) +void test_and_time_sampler_box(char* cdf_name, struct box cdf_box(double), uint64_t* seed) { printf("\nGetting some samples from %s:\n", cdf_name); clock_t begin = clock(); @@ -144,7 +144,7 @@ void test_and_time_sampler_box(char* cdf_name, struct box cdf_box(float), uint64 } } clock_t end = clock(); - float time_spent = (float)(end - begin) / CLOCKS_PER_SEC; + double time_spent = (double)(end - begin) / CLOCKS_PER_SEC; printf("Time spent: %f\n", time_spent); } diff --git a/examples/06_gamma_beta/example b/examples/06_gamma_beta/example index 20513bc..11e0a92 100755 Binary files a/examples/06_gamma_beta/example and b/examples/06_gamma_beta/example differ diff --git a/examples/06_gamma_beta/example.c b/examples/06_gamma_beta/example.c index 8c11342..3a7d422 100644 --- a/examples/06_gamma_beta/example.c +++ b/examples/06_gamma_beta/example.c @@ -14,15 +14,15 @@ int main() int n = 1000 * 1000; /* for (int i = 0; i < n; i++) { - float gamma_0 = sample_gamma(0.0, seed); + double gamma_0 = sample_gamma(0.0, seed); // printf("sample_gamma(0.0): %f\n", gamma_0); } printf("\n"); */ - float* gamma_1_array = malloc(sizeof(float) * n); + double* gamma_1_array = malloc(sizeof(double) * n); for (int i = 0; i < n; i++) { - float gamma_1 = sample_gamma(1.0, seed); + double gamma_1 = sample_gamma(1.0, seed); // printf("sample_gamma(1.0): %f\n", gamma_1); gamma_1_array[i] = gamma_1; } @@ -30,9 +30,9 @@ int main() free(gamma_1_array); printf("\n"); - float* beta_1_2_array = malloc(sizeof(float) * n); + double* beta_1_2_array = malloc(sizeof(double) * n); for (int i = 0; i < n; i++) { - float beta_1_2 = sample_beta(1, 2.0, seed); + double beta_1_2 = sample_beta(1, 2.0, seed); // printf("sample_beta(1.0, 2.0): %f\n", beta_1_2); beta_1_2_array[i] = beta_1_2; } diff --git a/squiggle.c b/squiggle.c index 3d3f4e6..8832c36 100644 --- a/squiggle.c +++ b/squiggle.c @@ -1,4 +1,4 @@ -#include +#include #include #include #include @@ -11,7 +11,7 @@ #define EXIT_ON_ERROR 0 #define PROCESS_ERROR(error_msg) process_error(error_msg, EXIT_ON_ERROR, __FILE__, __LINE__) -const float PI = 3.14159265358979323846; // M_PI in gcc gnu99 +const double PI = 3.14159265358979323846; // M_PI in gcc gnu99 // Pseudo Random number generator uint64_t xorshift32(uint32_t* seed) @@ -44,58 +44,58 @@ uint64_t xorshift64(uint64_t* seed) // Distribution & sampling functions // Unit distributions -float sample_unit_uniform(uint64_t* seed) +double sample_unit_uniform(uint64_t* seed) { // samples uniform from [0,1] interval. - return ((float)xorshift64(seed)) / ((float)UINT64_MAX); + return ((double)xorshift64(seed)) / ((double)UINT64_MAX); } -float sample_unit_normal(uint64_t* seed) +double sample_unit_normal(uint64_t* seed) { // See: - float u1 = sample_unit_uniform(seed); - float u2 = sample_unit_uniform(seed); - float z = sqrtf(-2.0 * log(u1)) * sin(2 * PI * u2); + double u1 = sample_unit_uniform(seed); + double u2 = sample_unit_uniform(seed); + double z = sqrtf(-2.0 * log(u1)) * sin(2 * PI * u2); return z; } // Composite distributions -float sample_uniform(float start, float end, uint64_t* seed) +double sample_uniform(double start, double end, uint64_t* seed) { return sample_unit_uniform(seed) * (end - start) + start; } -float sample_normal(float mean, float sigma, uint64_t* seed) +double sample_normal(double mean, double sigma, uint64_t* seed) { return (mean + sigma * sample_unit_normal(seed)); } -float sample_lognormal(float logmean, float logsigma, uint64_t* seed) +double sample_lognormal(double logmean, double logsigma, uint64_t* seed) { return expf(sample_normal(logmean, logsigma, seed)); } -float sample_to(float low, float high, uint64_t* seed) +double sample_to(double low, double high, uint64_t* seed) { // Given a (positive) 90% confidence interval, // returns a sample from a lognormal // with a matching 90% c.i. - const float NORMAL95CONFIDENCE = 1.6448536269514722; - float loglow = logf(low); - float loghigh = logf(high); - float logmean = (loglow + loghigh) / 2; - float logsigma = (loghigh - loglow) / (2.0 * NORMAL95CONFIDENCE); + const double NORMAL95CONFIDENCE = 1.6448536269514722; + double loglow = logf(low); + double loghigh = logf(high); + double logmean = (loglow + loghigh) / 2; + double logsigma = (loghigh - loglow) / (2.0 * NORMAL95CONFIDENCE); return sample_lognormal(logmean, logsigma, seed); } -float sample_gamma(float alpha, uint64_t* seed) +double sample_gamma(double alpha, uint64_t* seed) { // A Simple Method for Generating Gamma Variables, Marsaglia and Wan Tsang, 2001 // https://dl.acm.org/doi/pdf/10.1145/358407.358414 // see also the references/ folder if (alpha >= 1) { - float d, c, x, v, u; + double d, c, x, v, u; d = alpha - 1.0 / 3.0; c = 1.0 / sqrt(9.0 * d); while (1) { @@ -125,24 +125,24 @@ float sample_gamma(float alpha, uint64_t* seed) } } -float sample_beta(float a, float b, uint64_t* seed) +double sample_beta(double a, double b, uint64_t* seed) { - float gamma_a = sample_gamma(a, seed); - float gamma_b = sample_gamma(b, seed); + double gamma_a = sample_gamma(a, seed); + double gamma_b = sample_gamma(b, seed); return gamma_a / (gamma_a + gamma_b); } // Array helpers -float array_sum(float* array, int length) +double array_sum(double* array, int length) { - float sum = 0.0; + double sum = 0.0; for (int i = 0; i < length; i++) { sum += array[i]; } return sum; } -void array_cumsum(float* array_to_sum, float* array_cumsummed, int length) +void array_cumsum(double* array_to_sum, double* array_cumsummed, int length) { array_cumsummed[0] = array_to_sum[0]; for (int i = 1; i < length; i++) { @@ -150,16 +150,16 @@ void array_cumsum(float* array_to_sum, float* array_cumsummed, int length) } } -float array_mean(float* array, int length) +double array_mean(double* array, int length) { - float sum = array_sum(array, length); + double sum = array_sum(array, length); return sum / length; } -float array_std(float* array, int length) +double array_std(double* array, int length) { - float mean = array_mean(array, length); - float std = 0.0; + double mean = array_mean(array, length); + double std = 0.0; for (int i = 0; i < length; i++) { std += (array[i] - mean); std *= std; @@ -169,20 +169,20 @@ float array_std(float* array, int length) } // Mixture function -float sample_mixture(float (*samplers[])(uint64_t*), float* weights, int n_dists, uint64_t* seed) +double sample_mixture(double (*samplers[])(uint64_t*), double* weights, int n_dists, uint64_t* seed) { // You can see a simpler version of this function in the git history // or in C-02-better-algorithm-one-thread/ - float sum_weights = array_sum(weights, n_dists); - float* cumsummed_normalized_weights = (float*)malloc(n_dists * sizeof(float)); + double sum_weights = array_sum(weights, n_dists); + double* cumsummed_normalized_weights = (double*)malloc(n_dists * sizeof(double)); cumsummed_normalized_weights[0] = weights[0] / sum_weights; for (int i = 1; i < n_dists; i++) { cumsummed_normalized_weights[i] = cumsummed_normalized_weights[i - 1] + weights[i] / sum_weights; } - float result; + double result; int result_set_flag = 0; - float p = sample_uniform(0, 1, seed); + double p = sample_uniform(0, 1, seed); for (int k = 0; k < n_dists; k++) { if (p < cumsummed_normalized_weights[k]) { result = samplers[k](seed); @@ -200,7 +200,7 @@ float sample_mixture(float (*samplers[])(uint64_t*), float* weights, int n_dists // Sample from an arbitrary cdf struct box { int empty; - float content; + double content; char* error_msg; }; @@ -219,13 +219,13 @@ struct box process_error(const char* error_msg, int should_exit, char* file, int // Inverse cdf at point // Two versions of this function: -// - raw, dealing with cdfs that return floats -// - input: cdf: float => float, p +// - raw, dealing with cdfs that return doubles +// - input: cdf: double => double, p // - output: Box(number|error) // - box, dealing with cdfs that return a box. -// - input: cdf: float => Box(number|error), p +// - input: cdf: double => Box(number|error), p // - output: Box(number|error) -struct box inverse_cdf_float(float cdf(float), float p) +struct box inverse_cdf_double(double cdf(double), double p) { // given a cdf: [-Inf, Inf] => [0,1] // returns a box with either @@ -233,8 +233,8 @@ struct box inverse_cdf_float(float cdf(float), float p) // or an error // if EXIT_ON_ERROR is set to 1, it exits instead of providing an error - float low = -1.0; - float high = 1.0; + double low = -1.0; + double high = 1.0; // 1. Make sure that cdf(low) < p < cdf(high) int interval_found = 0; @@ -260,14 +260,14 @@ struct box inverse_cdf_float(float cdf(float), float p) int convergence_condition = 0; int count = 0; while (!convergence_condition && (count < (INT_MAX / 2))) { - float mid = (high + low) / 2; + double mid = (high + low) / 2; int mid_not_new = (mid == low) || (mid == high); - // float width = high - low; + // double width = high - low; // if ((width < 1e-8) || mid_not_new){ if (mid_not_new) { convergence_condition = 1; } else { - float mid_sign = cdf(mid) - p; + double mid_sign = cdf(mid) - p; if (mid_sign < 0) { low = mid; } else if (mid_sign > 0) { @@ -288,7 +288,7 @@ struct box inverse_cdf_float(float cdf(float), float p) } } -struct box inverse_cdf_box(struct box cdf_box(float), float p) +struct box inverse_cdf_box(struct box cdf_box(double), double p) { // given a cdf: [-Inf, Inf] => Box([0,1]) // returns a box with either @@ -296,8 +296,8 @@ struct box inverse_cdf_box(struct box cdf_box(float), float p) // or an error // if EXIT_ON_ERROR is set to 1, it exits instead of providing an error - float low = -1.0; - float high = 1.0; + double low = -1.0; + double high = 1.0; // 1. Make sure that cdf(low) < p < cdf(high) int interval_found = 0; @@ -332,9 +332,9 @@ struct box inverse_cdf_box(struct box cdf_box(float), float p) int convergence_condition = 0; int count = 0; while (!convergence_condition && (count < (INT_MAX / 2))) { - float mid = (high + low) / 2; + double mid = (high + low) / 2; int mid_not_new = (mid == low) || (mid == high); - // float width = high - low; + // double width = high - low; if (mid_not_new) { // if ((width < 1e-8) || mid_not_new){ convergence_condition = 1; @@ -343,7 +343,7 @@ struct box inverse_cdf_box(struct box cdf_box(float), float p) if (cdf_mid.empty) { return PROCESS_ERROR(cdf_mid.error_msg); } - float mid_sign = cdf_mid.content - p; + double mid_sign = cdf_mid.content - p; if (mid_sign < 0) { low = mid; } else if (mid_sign > 0) { @@ -365,23 +365,23 @@ struct box inverse_cdf_box(struct box cdf_box(float), float p) } // Sampler based on inverse cdf and randomness function -struct box sampler_cdf_box(struct box cdf(float), uint64_t* seed) +struct box sampler_cdf_box(struct box cdf(double), uint64_t* seed) { - float p = sample_unit_uniform(seed); + double p = sample_unit_uniform(seed); struct box result = inverse_cdf_box(cdf, p); return result; } -struct box sampler_cdf_float(float cdf(float), uint64_t* seed) +struct box sampler_cdf_double(double cdf(double), uint64_t* seed) { - float p = sample_unit_uniform(seed); - struct box result = inverse_cdf_float(cdf, p); + double p = sample_unit_uniform(seed); + struct box result = inverse_cdf_double(cdf, p); return result; } /* Could also define other variations, e.g., -float sampler_danger(struct box cdf(float), uint64_t* seed) +double sampler_danger(struct box cdf(double), uint64_t* seed) { - float p = sample_unit_uniform(seed); + double p = sample_unit_uniform(seed); struct box result = inverse_cdf_box(cdf, p); if(result.empty){ exit(1); diff --git a/squiggle.h b/squiggle.h index aa3399b..6e2106b 100644 --- a/squiggle.h +++ b/squiggle.h @@ -8,31 +8,31 @@ uint64_t xorshift64(uint64_t* seed); // Basic distribution sampling functions -float sample_unit_uniform(uint64_t* seed); -float sample_unit_normal(uint64_t* seed); +double sample_unit_uniform(uint64_t* seed); +double sample_unit_normal(uint64_t* seed); // Composite distribution sampling functions -float sample_uniform(float start, float end, uint64_t* seed); -float sample_normal(float mean, float sigma, uint64_t* seed); -float sample_lognormal(float logmean, float logsigma, uint64_t* seed); -float sample_to(float low, float high, uint64_t* seed); +double sample_uniform(double start, double end, uint64_t* seed); +double sample_normal(double mean, double sigma, uint64_t* seed); +double sample_lognormal(double logmean, double logsigma, uint64_t* seed); +double sample_to(double low, double high, uint64_t* seed); -float sample_gamma(float alpha, uint64_t* seed); -float sample_beta(float a, float b, uint64_t* seed); +double sample_gamma(double alpha, uint64_t* seed); +double sample_beta(double a, double b, uint64_t* seed); // Array helpers -float array_sum(float* array, int length); -void array_cumsum(float* array_to_sum, float* array_cumsummed, int length); -float array_mean(float* array, int length); -float array_std(float* array, int length); +double array_sum(double* array, int length); +void array_cumsum(double* array_to_sum, double* array_cumsummed, int length); +double array_mean(double* array, int length); +double array_std(double* array, int length); // Mixture function -float sample_mixture(float (*samplers[])(uint64_t*), float* weights, int n_dists, uint64_t* seed); +double sample_mixture(double (*samplers[])(uint64_t*), double* weights, int n_dists, uint64_t* seed); // Box struct box { int empty; - float content; + double content; char* error_msg; }; @@ -43,11 +43,11 @@ struct box { struct box process_error(const char* error_msg, int should_exit, char* file, int line); // Inverse cdf -struct box inverse_cdf_float(float cdf(float), float p); -struct box inverse_cdf_box(struct box cdf_box(float), float p); +struct box inverse_cdf_double(double cdf(double), double p); +struct box inverse_cdf_box(struct box cdf_box(double), double p); // Samplers from cdf -struct box sampler_cdf_float(float cdf(float), uint64_t* seed); -struct box sampler_cdf_box(struct box cdf(float), uint64_t* seed); +struct box sampler_cdf_double(double cdf(double), uint64_t* seed); +struct box sampler_cdf_box(struct box cdf(double), uint64_t* seed); #endif diff --git a/test/test b/test/test index 708e1ff..f6a849c 100755 Binary files a/test/test and b/test/test differ diff --git a/test/test.c b/test/test.c index 7e472f6..2c8f909 100644 --- a/test/test.c +++ b/test/test.c @@ -4,22 +4,22 @@ #include #include -#define N 1000 * 1000 +#define N 100 void test_unit_uniform(uint64_t* seed){ - float* unit_uniform_array = malloc(sizeof(float) * N); + double* unit_uniform_array = malloc(sizeof(double) * N); for(int i=0; i width * 1.0/1000.0){ printf("[-] Mean test for [%.1f, %.1f] uniform NOT passed.\n", start, end); printf("Mean of [%.1f, %.1f] uniform: %f, vs expected mean: %f, delta: %f\n", start, end, mean, expected_mean, mean - expected_mean); @@ -67,6 +66,10 @@ void test_uniform(float start, float end, uint64_t* seed){ if(fabs(delta_std) > width * 1.0/1000.0){ printf("[-] Std test for [%.1f, %.1f] uniform NOT passed.\n", start, end); printf("Std of [%.1f, %.1f] uniform: %f, vs expected std: %f, delta: %f\n", start, end, std, expected_std, std - expected_std); + for(int i=0; i start){ test_uniform(start, end, seed); }