Compare commits
6 Commits
b352cdf3ba
...
5d28295a15
Author | SHA1 | Date | |
---|---|---|---|
5d28295a15 | |||
86b12db894 | |||
fa832cbd17 | |||
279fb12dee | |||
6199e43ae4 | |||
27f9d76e9a |
|
@ -381,7 +381,9 @@ Overall, I'd describe the error handling capabilities of this library as pretty
|
|||
### To do
|
||||
|
||||
- [ ] Drive in a few more real-life applications
|
||||
- [ ] US election modelling?
|
||||
- [ ] Look into using size_t instead of int for sample numbers
|
||||
- [ ] Reorganize code a little bit to reduce usage of gcc's nested functions
|
||||
|
||||
### Done
|
||||
|
||||
|
|
Binary file not shown.
|
@ -3,33 +3,13 @@
|
|||
#include <stdlib.h>
|
||||
|
||||
// Estimate functions
|
||||
double sample_0(uint64_t* seed)
|
||||
{
|
||||
UNUSED(seed);
|
||||
return 0;
|
||||
}
|
||||
|
||||
double sample_1(uint64_t* seed)
|
||||
{
|
||||
UNUSED(seed);
|
||||
return 1;
|
||||
}
|
||||
double sample_0(uint64_t* seed) { UNUSED(seed); return 0; }
|
||||
double sample_1(uint64_t* seed) { UNUSED(seed); return 1; }
|
||||
double sample_few(uint64_t* seed) { return sample_to(1, 3, seed); }
|
||||
double sample_many(uint64_t* seed) { return sample_to(2, 10, seed); }
|
||||
|
||||
double sample_few(uint64_t* seed)
|
||||
{
|
||||
return sample_to(1, 3, seed);
|
||||
}
|
||||
|
||||
double sample_many(uint64_t* seed)
|
||||
{
|
||||
return sample_to(2, 10, seed);
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
// set randomness seed
|
||||
uint64_t* seed = malloc(sizeof(uint64_t));
|
||||
*seed = 1000; // xorshift can't start with 0
|
||||
double sample_model(uint64_t* seed){
|
||||
|
||||
double p_a = 0.8;
|
||||
double p_b = 0.5;
|
||||
|
@ -38,8 +18,17 @@ int main()
|
|||
int n_dists = 4;
|
||||
double weights[] = { 1 - p_c, p_c / 2, p_c / 4, p_c / 4 };
|
||||
double (*samplers[])(uint64_t*) = { sample_0, sample_1, sample_few, sample_many };
|
||||
double result = sample_mixture(samplers, weights, n_dists, seed);
|
||||
|
||||
double result_one = sample_mixture(samplers, weights, n_dists, seed);
|
||||
printf("result_one: %f\n", result_one);
|
||||
return result;
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
// set randomness seed
|
||||
uint64_t* seed = malloc(sizeof(uint64_t));
|
||||
*seed = 1000; // xorshift can't start with 0
|
||||
|
||||
printf("result_one: %f\n", sample_model(seed));
|
||||
free(seed);
|
||||
}
|
||||
|
|
Binary file not shown.
|
@ -2,37 +2,35 @@
|
|||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
double sample_0(uint64_t* seed) { UNUSED(seed); return 0; }
|
||||
double sample_1(uint64_t* seed) { UNUSED(seed); return 1; }
|
||||
double sample_few(uint64_t* seed) { return sample_to(1, 3, seed); }
|
||||
double sample_many(uint64_t* seed) { return sample_to(2, 10, seed); }
|
||||
|
||||
double sample_model(uint64_t* seed){
|
||||
|
||||
double p_a = 0.8;
|
||||
double p_b = 0.5;
|
||||
double p_c = p_a * p_b;
|
||||
|
||||
int n_dists = 4;
|
||||
double weights[] = { 1 - p_c, p_c / 2, p_c / 4, p_c / 4 };
|
||||
double (*samplers[])(uint64_t*) = { sample_0, sample_1, sample_few, sample_many };
|
||||
double result = sample_mixture(samplers, weights, n_dists, seed);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
// set randomness seed
|
||||
uint64_t* seed = malloc(sizeof(uint64_t));
|
||||
*seed = 1000; // xorshift can't start with 0
|
||||
|
||||
double p_a = 0.8;
|
||||
double p_b = 0.5;
|
||||
double p_c = p_a * p_b;
|
||||
|
||||
double sample_0(uint64_t * seed)
|
||||
{
|
||||
UNUSED(seed);
|
||||
return 0;
|
||||
}
|
||||
double sample_1(uint64_t * seed)
|
||||
{
|
||||
UNUSED(seed);
|
||||
return 1;
|
||||
}
|
||||
double sample_few(uint64_t * seed) { return sample_to(1, 3, seed); }
|
||||
double sample_many(uint64_t * seed) { return sample_to(2, 10, seed); }
|
||||
|
||||
int n_dists = 4;
|
||||
double weights[] = { 1 - p_c, p_c / 2, p_c / 4, p_c / 4 };
|
||||
double (*samplers[])(uint64_t*) = { sample_0, sample_1, sample_few, sample_many };
|
||||
|
||||
int n_samples = 1000000;
|
||||
double* result_many = (double*)malloc((size_t)n_samples * sizeof(double));
|
||||
for (int i = 0; i < n_samples; i++) {
|
||||
result_many[i] = sample_mixture(samplers, weights, n_dists, seed);
|
||||
result_many[i] = sample_model(seed);
|
||||
}
|
||||
printf("Mean: %f\n", array_mean(result_many, n_samples));
|
||||
|
||||
|
|
Binary file not shown.
|
@ -2,39 +2,36 @@
|
|||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
double sample_model(uint64_t* seed){
|
||||
|
||||
double sample_0(uint64_t* seed) { UNUSED(seed); return 0; }
|
||||
// Using a gcc extension, you can define a function inside another function
|
||||
double sample_1(uint64_t* seed) { UNUSED(seed); return 1; }
|
||||
double sample_few(uint64_t* seed) { return sample_to(1, 3, seed); }
|
||||
double sample_many(uint64_t* seed) { return sample_to(2, 10, seed); }
|
||||
|
||||
double p_a = 0.8;
|
||||
double p_b = 0.5;
|
||||
double p_c = p_a * p_b;
|
||||
|
||||
int n_dists = 4;
|
||||
double weights[] = { 1 - p_c, p_c / 2, p_c / 4, p_c / 4 };
|
||||
double (*samplers[])(uint64_t*) = { sample_0, sample_1, sample_few, sample_many };
|
||||
double result = sample_mixture(samplers, weights, n_dists, seed);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
// set randomness seed
|
||||
uint64_t* seed = malloc(sizeof(uint64_t));
|
||||
*seed = 1000; // xorshift can't start with 0
|
||||
|
||||
double p_a = 0.8;
|
||||
double p_b = 0.5;
|
||||
double p_c = p_a * p_b;
|
||||
|
||||
int n_dists = 4;
|
||||
|
||||
// These are nested functions. They will not compile without gcc.
|
||||
double sample_0(uint64_t * seed)
|
||||
{
|
||||
UNUSED(seed);
|
||||
return 0;
|
||||
}
|
||||
double sample_1(uint64_t * seed)
|
||||
{
|
||||
UNUSED(seed);
|
||||
return 1;
|
||||
}
|
||||
double sample_few(uint64_t * seed) { return sample_to(1, 3, seed); }
|
||||
double sample_many(uint64_t * seed) { return sample_to(2, 10, seed); }
|
||||
|
||||
double (*samplers[])(uint64_t*) = { sample_0, sample_1, sample_few, sample_many };
|
||||
double weights[] = { 1 - p_c, p_c / 2, p_c / 4, p_c / 4 };
|
||||
|
||||
int n_samples = 1000000;
|
||||
double* result_many = (double*)malloc((size_t)n_samples * sizeof(double));
|
||||
for (int i = 0; i < n_samples; i++) {
|
||||
result_many[i] = sample_mixture(samplers, weights, n_dists, seed);
|
||||
result_many[i] = sample_model(seed);
|
||||
}
|
||||
|
||||
printf("result_many: [");
|
||||
|
@ -42,5 +39,6 @@ int main()
|
|||
printf("%.2f, ", result_many[i]);
|
||||
}
|
||||
printf("]\n");
|
||||
|
||||
free(seed);
|
||||
}
|
||||
|
|
Binary file not shown.
|
@ -2,8 +2,6 @@
|
|||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
// Estimate functions
|
||||
|
||||
int main()
|
||||
{
|
||||
// set randomness seed
|
||||
|
@ -11,33 +9,21 @@ int main()
|
|||
*seed = 1000; // xorshift can't start with 0
|
||||
|
||||
int n = 1000 * 1000;
|
||||
/*
|
||||
for (int i = 0; i < n; i++) {
|
||||
double gamma_0 = sample_gamma(0.0, seed);
|
||||
// printf("sample_gamma(0.0): %f\n", gamma_0);
|
||||
}
|
||||
printf("\n");
|
||||
*/
|
||||
|
||||
double* gamma_1_array = malloc(sizeof(double) * (size_t)n);
|
||||
double* gamma_array = malloc(sizeof(double) * (size_t)n);
|
||||
for (int i = 0; i < n; i++) {
|
||||
double gamma_1 = sample_gamma(1.0, seed);
|
||||
// printf("sample_gamma(1.0): %f\n", gamma_1);
|
||||
gamma_1_array[i] = gamma_1;
|
||||
gamma_array[i] = sample_gamma(1.0, seed);
|
||||
}
|
||||
printf("gamma(1) summary statistics = mean: %f, std: %f\n", array_mean(gamma_1_array, n), array_std(gamma_1_array, n));
|
||||
free(gamma_1_array);
|
||||
printf("gamma(1) summary statistics = mean: %f, std: %f\n", array_mean(gamma_array, n), array_std(gamma_array, n));
|
||||
printf("\n");
|
||||
|
||||
double* beta_1_2_array = malloc(sizeof(double) * (size_t)n);
|
||||
double* beta_array = malloc(sizeof(double) * (size_t)n);
|
||||
for (int i = 0; i < n; i++) {
|
||||
double beta_1_2 = sample_beta(1, 2.0, seed);
|
||||
// printf("sample_beta(1.0, 2.0): %f\n", beta_1_2);
|
||||
beta_1_2_array[i] = beta_1_2;
|
||||
beta_array[i] = sample_beta(1, 2.0, seed);
|
||||
}
|
||||
printf("beta(1,2) summary statistics: mean: %f, std: %f\n", array_mean(beta_1_2_array, n), array_std(beta_1_2_array, n));
|
||||
free(beta_1_2_array);
|
||||
printf("beta(1,2) summary statistics: mean: %f, std: %f\n", array_mean(beta_array, n), array_std(beta_array, n));
|
||||
printf("\n");
|
||||
|
||||
free(gamma_array);
|
||||
free(beta_array);
|
||||
free(seed);
|
||||
}
|
||||
|
|
Binary file not shown.
|
@ -4,87 +4,88 @@
|
|||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
int main()
|
||||
double sample_fermi_logspace(uint64_t * seed)
|
||||
{
|
||||
// Replicate <https://arxiv.org/pdf/1806.02404.pdf>, and in particular the red line in page 11.
|
||||
// You can see a simple version of this function in naive.c in this same folder
|
||||
double log_rate_of_star_formation = sample_uniform(log(1), log(100), seed);
|
||||
double log_fraction_of_stars_with_planets = sample_uniform(log(0.1), log(1), seed);
|
||||
double log_number_of_habitable_planets_per_star_system = sample_uniform(log(0.1), log(1), seed);
|
||||
|
||||
double log_rate_of_life_formation_in_habitable_planets = sample_normal(1, 50, seed);
|
||||
double log_fraction_of_habitable_planets_in_which_any_life_appears;
|
||||
/*
|
||||
Consider:
|
||||
a = underlying normal
|
||||
b = rate_of_life_formation_in_habitable_planets = exp(underlying normal) = exp(a)
|
||||
c = 1 - exp(-b) = fraction_of_habitable_planets_in_which_any_life_appears
|
||||
d = log(c)
|
||||
|
||||
Looking at the Taylor expansion for c = 1 - exp(-b), it's
|
||||
b - b^2/2 + b^3/6 - x^b/24, etc.
|
||||
<https://www.wolframalpha.com/input?i=1-exp%28-x%29>
|
||||
When b ~ 0 (as is often the case), this is close to b.
|
||||
|
||||
But now, if b ~ 0, c ~ b
|
||||
and d = log(c) ~ log(b) = log(exp(a)) = a
|
||||
|
||||
Now, we could play around with estimating errors,
|
||||
and indeed if we want b^2/2 = exp(a)^2/2 < 10^(-n), i.e., to have n decimal digits of precision,
|
||||
we could compute this as e.g., a < (nlog(10) + log(2))/2
|
||||
so for example if we want ten digits of precision, that's a < -11
|
||||
|
||||
Empirically, the two numbers as calculated in C do become really close around 11 or so,
|
||||
and at 38 that calculation results in a -inf (so probably a floating point error or similar.)
|
||||
So we should be using that formula for somewhere between -38 << a < -11
|
||||
|
||||
I chose -16 as a happy medium after playing around with
|
||||
double invert(double x){
|
||||
return log(1-exp(-exp(-x)));
|
||||
}
|
||||
for(int i=0; i<64; i++){
|
||||
double j = i;
|
||||
printf("for %lf, log(1-exp(-exp(-x))) is calculated as... %lf\n", j, invert(j));
|
||||
}
|
||||
and <https://www.wolframalpha.com/input?i=log%281-exp%28-exp%28-16%29%29%29>
|
||||
*/
|
||||
if (log_rate_of_life_formation_in_habitable_planets < -16) {
|
||||
log_fraction_of_habitable_planets_in_which_any_life_appears = log_rate_of_life_formation_in_habitable_planets;
|
||||
} else {
|
||||
double rate_of_life_formation_in_habitable_planets = exp(log_rate_of_life_formation_in_habitable_planets);
|
||||
double fraction_of_habitable_planets_in_which_any_life_appears = -expm1(-rate_of_life_formation_in_habitable_planets);
|
||||
log_fraction_of_habitable_planets_in_which_any_life_appears = log(fraction_of_habitable_planets_in_which_any_life_appears);
|
||||
}
|
||||
|
||||
double log_fraction_of_planets_with_life_in_which_intelligent_life_appears = sample_uniform(log(0.001), log(1), seed);
|
||||
double log_fraction_of_intelligent_planets_which_are_detectable_as_such = sample_uniform(log(0.01), log(1), seed);
|
||||
double log_longevity_of_detectable_civilizations = sample_uniform(log(100), log(10000000000), seed);
|
||||
|
||||
double log_n =
|
||||
log_rate_of_star_formation +
|
||||
log_fraction_of_stars_with_planets +
|
||||
log_number_of_habitable_planets_per_star_system +
|
||||
log_fraction_of_habitable_planets_in_which_any_life_appears +
|
||||
log_fraction_of_planets_with_life_in_which_intelligent_life_appears +
|
||||
log_fraction_of_intelligent_planets_which_are_detectable_as_such +
|
||||
log_longevity_of_detectable_civilizations;
|
||||
return log_n;
|
||||
}
|
||||
|
||||
double sample_are_we_alone_logspace(uint64_t * seed)
|
||||
{
|
||||
double log_n = sample_fermi_logspace(seed);
|
||||
return ((log_n > 0) ? 1 : 0);
|
||||
// log_n > 0 => n > 1
|
||||
}
|
||||
|
||||
|
||||
int main()
|
||||
{
|
||||
|
||||
// set randomness seed
|
||||
uint64_t* seed = malloc(sizeof(uint64_t));
|
||||
*seed = 1001; // xorshift can't start with a seed of 0
|
||||
|
||||
double sample_fermi_logspace(uint64_t * seed)
|
||||
{
|
||||
// You can see a simple version of this function in naive.c in this same folder
|
||||
double log_rate_of_star_formation = sample_uniform(log(1), log(100), seed);
|
||||
double log_fraction_of_stars_with_planets = sample_uniform(log(0.1), log(1), seed);
|
||||
double log_number_of_habitable_planets_per_star_system = sample_uniform(log(0.1), log(1), seed);
|
||||
|
||||
double log_rate_of_life_formation_in_habitable_planets = sample_normal(1, 50, seed);
|
||||
double log_fraction_of_habitable_planets_in_which_any_life_appears;
|
||||
/*
|
||||
Consider:
|
||||
a = underlying normal
|
||||
b = rate_of_life_formation_in_habitable_planets = exp(underlying normal) = exp(a)
|
||||
c = 1 - exp(-b) = fraction_of_habitable_planets_in_which_any_life_appears
|
||||
d = log(c)
|
||||
|
||||
Looking at the Taylor expansion for c = 1 - exp(-b), it's
|
||||
b - b^2/2 + b^3/6 - x^b/24, etc.
|
||||
<https://www.wolframalpha.com/input?i=1-exp%28-x%29>
|
||||
When b ~ 0 (as is often the case), this is close to b.
|
||||
|
||||
But now, if b ~ 0, c ~ b
|
||||
and d = log(c) ~ log(b) = log(exp(a)) = a
|
||||
|
||||
Now, we could play around with estimating errors,
|
||||
and indeed if we want b^2/2 = exp(a)^2/2 < 10^(-n), i.e., to have n decimal digits of precision,
|
||||
we could compute this as e.g., a < (nlog(10) + log(2))/2
|
||||
so for example if we want ten digits of precision, that's a < -11
|
||||
|
||||
Empirically, the two numbers as calculated in C do become really close around 11 or so,
|
||||
and at 38 that calculation results in a -inf (so probably a floating point error or similar.)
|
||||
So we should be using that formula for somewhere between -38 << a < -11
|
||||
|
||||
I chose -16 as a happy medium after playing around with
|
||||
double invert(double x){
|
||||
return log(1-exp(-exp(-x)));
|
||||
}
|
||||
for(int i=0; i<64; i++){
|
||||
double j = i;
|
||||
printf("for %lf, log(1-exp(-exp(-x))) is calculated as... %lf\n", j, invert(j));
|
||||
}
|
||||
and <https://www.wolframalpha.com/input?i=log%281-exp%28-exp%28-16%29%29%29>
|
||||
*/
|
||||
if (log_rate_of_life_formation_in_habitable_planets < -16) {
|
||||
log_fraction_of_habitable_planets_in_which_any_life_appears = log_rate_of_life_formation_in_habitable_planets;
|
||||
} else {
|
||||
double rate_of_life_formation_in_habitable_planets = exp(log_rate_of_life_formation_in_habitable_planets);
|
||||
double fraction_of_habitable_planets_in_which_any_life_appears = -expm1(-rate_of_life_formation_in_habitable_planets);
|
||||
log_fraction_of_habitable_planets_in_which_any_life_appears = log(fraction_of_habitable_planets_in_which_any_life_appears);
|
||||
}
|
||||
|
||||
double log_fraction_of_planets_with_life_in_which_intelligent_life_appears = sample_uniform(log(0.001), log(1), seed);
|
||||
double log_fraction_of_intelligent_planets_which_are_detectable_as_such = sample_uniform(log(0.01), log(1), seed);
|
||||
double log_longevity_of_detectable_civilizations = sample_uniform(log(100), log(10000000000), seed);
|
||||
|
||||
double log_n =
|
||||
log_rate_of_star_formation +
|
||||
log_fraction_of_stars_with_planets +
|
||||
log_number_of_habitable_planets_per_star_system +
|
||||
log_fraction_of_habitable_planets_in_which_any_life_appears +
|
||||
log_fraction_of_planets_with_life_in_which_intelligent_life_appears +
|
||||
log_fraction_of_intelligent_planets_which_are_detectable_as_such +
|
||||
log_longevity_of_detectable_civilizations;
|
||||
return log_n;
|
||||
}
|
||||
|
||||
double sample_are_we_alone_logspace(uint64_t * seed)
|
||||
{
|
||||
double log_n = sample_fermi_logspace(seed);
|
||||
return ((log_n > 0) ? 1 : 0);
|
||||
// log_n > 0 => n > 1
|
||||
}
|
||||
|
||||
double logspace_fermi_proportion = 0;
|
||||
int n_samples = 1000 * 1000;
|
||||
for (int i = 0; i < n_samples; i++) {
|
||||
|
|
Binary file not shown.
|
@ -3,6 +3,10 @@
|
|||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
double sample_model(uint64_t* seed){
|
||||
return sample_to(1, 10, seed);
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
// set randomness seed
|
||||
|
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -3,6 +3,10 @@
|
|||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
double sample_model(uint64_t * seed)
|
||||
{
|
||||
return sample_lognormal(0, 10, seed);
|
||||
}
|
||||
// Estimate functions
|
||||
int main()
|
||||
{
|
||||
|
@ -13,13 +17,9 @@ int main()
|
|||
|
||||
int n_samples = 1000 * 1000 * 1000;
|
||||
int n_threads = 16;
|
||||
double sampler(uint64_t * seed)
|
||||
{
|
||||
return sample_lognormal(0, 10, seed);
|
||||
}
|
||||
double* results = malloc((size_t)n_samples * sizeof(double));
|
||||
|
||||
sampler_parallel(sampler, results, n_threads, n_samples);
|
||||
sampler_parallel(sample_model, results, n_threads, n_samples);
|
||||
double avg = array_sum(results, n_samples) / n_samples;
|
||||
printf("Average of 1B lognormal(0,10): %f\n", avg);
|
||||
|
||||
|
|
Binary file not shown.
|
@ -3,7 +3,7 @@
|
|||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
int main()
|
||||
double sampler_result(uint64_t * seed)
|
||||
{
|
||||
double p_a = 0.8;
|
||||
double p_b = 0.5;
|
||||
|
@ -17,10 +17,11 @@ int main()
|
|||
int n_dists = 4;
|
||||
double weights[] = { 1 - p_c, p_c / 2, p_c / 4, p_c / 4 };
|
||||
double (*samplers[])(uint64_t*) = { sample_0, sample_1, sample_few, sample_many };
|
||||
double sampler_result(uint64_t * seed)
|
||||
{
|
||||
return sample_mixture(samplers, weights, n_dists, seed);
|
||||
}
|
||||
return sample_mixture(samplers, weights, n_dists, seed);
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
|
||||
int n_samples = 1000 * 1000, n_threads = 16;
|
||||
double* results = malloc((size_t)n_samples * sizeof(double));
|
||||
|
|
Binary file not shown.
Binary file not shown.
|
@ -63,6 +63,8 @@ void sampler_parallel(double (*sampler)(uint64_t* seed), double* results, int n_
|
|||
{
|
||||
#pragma omp for
|
||||
for (i = 0; i < n_threads; i++) {
|
||||
// It's possible I don't need the for, and could instead call omp
|
||||
// in some different way and get the thread number with omp_get_thread_num()
|
||||
int lower_bound_inclusive = i * quotient;
|
||||
int upper_bound_not_inclusive = ((i + 1) * quotient); // note the < in the for loop below,
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user