Revert "Revert "Merge branch 'master' into quickselect""
This reverts commit 4d218468cf
.
This commit is contained in:
parent
fb123dd14c
commit
58a329bcc3
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -15,8 +15,9 @@ int main()
|
||||||
uint64_t* seed = malloc(sizeof(uint64_t));
|
uint64_t* seed = malloc(sizeof(uint64_t));
|
||||||
*seed = 1000; // xorshift can't start with 0
|
*seed = 1000; // xorshift can't start with 0
|
||||||
|
|
||||||
ci beta_1_2_ci_90 = get_90_confidence_interval(beta_1_2_sampler, seed);
|
ci beta_1_2_ci_90 = sampler_get_90_ci(beta_1_2_sampler, 1000000, seed);
|
||||||
printf("90%% confidence interval of beta(1,2) is [%f, %f]\n", beta_1_2_ci_90.low, beta_1_2_ci_90.high);
|
printf("90%% confidence interval of beta(1,2) is [%f, %f]\n", beta_1_2_ci_90.low, beta_1_2_ci_90.high);
|
||||||
|
printf("You can check this in <https://nunosempere.com/blog/2023/03/15/fit-beta/>\n");
|
||||||
|
|
||||||
free(seed);
|
free(seed);
|
||||||
}
|
}
|
||||||
|
|
Binary file not shown.
|
@ -60,7 +60,7 @@ int main()
|
||||||
}
|
}
|
||||||
printf("... ]\n");
|
printf("... ]\n");
|
||||||
|
|
||||||
ci ci_90 = get_90_confidence_interval(mixture, seed);
|
ci ci_90 = sampler_get_90_ci(mixture, 1000000, seed);
|
||||||
printf("mean: %f\n", array_mean(mixture_result, n));
|
printf("mean: %f\n", array_mean(mixture_result, n));
|
||||||
printf("90%% confidence interval: [%f, %f]\n", ci_90.low, ci_90.high);
|
printf("90%% confidence interval: [%f, %f]\n", ci_90.low, ci_90.high);
|
||||||
|
|
||||||
|
|
Binary file not shown.
Binary file not shown.
|
@ -41,7 +41,7 @@ int main()
|
||||||
}
|
}
|
||||||
printf("... ]\n");
|
printf("... ]\n");
|
||||||
|
|
||||||
ci ci_90 = get_90_confidence_interval(sample_minutes_per_day_jumping_rope_needed_to_burn_10kg, seed);
|
ci ci_90 = sampler_get_90_ci(sample_minutes_per_day_jumping_rope_needed_to_burn_10kg, 1000000, seed);
|
||||||
printf("90%% confidence interval: [%f, %f]\n", ci_90.low, ci_90.high);
|
printf("90%% confidence interval: [%f, %f]\n", ci_90.low, ci_90.high);
|
||||||
|
|
||||||
free(seed);
|
free(seed);
|
||||||
|
|
Binary file not shown.
|
@ -50,7 +50,7 @@ int main()
|
||||||
|
|
||||||
// Before a first nuclear collapse
|
// Before a first nuclear collapse
|
||||||
printf("## Before the first nuclear collapse\n");
|
printf("## Before the first nuclear collapse\n");
|
||||||
ci ci_90_2023 = get_90_confidence_interval(yearly_probability_nuclear_collapse_2023, seed);
|
ci ci_90_2023 = sampler_get_90_ci(yearly_probability_nuclear_collapse_2023, 1000000, seed);
|
||||||
printf("90%% confidence interval: [%f, %f]\n", ci_90_2023.low, ci_90_2023.high);
|
printf("90%% confidence interval: [%f, %f]\n", ci_90_2023.low, ci_90_2023.high);
|
||||||
|
|
||||||
double* yearly_probability_nuclear_collapse_2023_samples = malloc(sizeof(double) * num_samples);
|
double* yearly_probability_nuclear_collapse_2023_samples = malloc(sizeof(double) * num_samples);
|
||||||
|
@ -61,7 +61,7 @@ int main()
|
||||||
|
|
||||||
// After the first nuclear collapse
|
// After the first nuclear collapse
|
||||||
printf("\n## After the first nuclear collapse\n");
|
printf("\n## After the first nuclear collapse\n");
|
||||||
ci ci_90_2070 = get_90_confidence_interval(yearly_probability_nuclear_collapse_after_recovery_example, seed);
|
ci ci_90_2070 = sampler_get_90_ci(yearly_probability_nuclear_collapse_after_recovery_example, 1000000, seed);
|
||||||
printf("90%% confidence interval: [%f, %f]\n", ci_90_2070.low, ci_90_2070.high);
|
printf("90%% confidence interval: [%f, %f]\n", ci_90_2070.low, ci_90_2070.high);
|
||||||
|
|
||||||
double* yearly_probability_nuclear_collapse_after_recovery_samples = malloc(sizeof(double) * num_samples);
|
double* yearly_probability_nuclear_collapse_after_recovery_samples = malloc(sizeof(double) * num_samples);
|
||||||
|
@ -72,7 +72,7 @@ int main()
|
||||||
|
|
||||||
// After the first nuclear collapse (antiinductive)
|
// After the first nuclear collapse (antiinductive)
|
||||||
printf("\n## After the first nuclear collapse (antiinductive)\n");
|
printf("\n## After the first nuclear collapse (antiinductive)\n");
|
||||||
ci ci_90_antiinductive = get_90_confidence_interval(yearly_probability_nuclear_collapse_after_recovery_antiinductive, seed);
|
ci ci_90_antiinductive = sampler_get_90_ci(yearly_probability_nuclear_collapse_after_recovery_antiinductive, 1000000, seed);
|
||||||
printf("90%% confidence interval: [%f, %f]\n", ci_90_antiinductive.low, ci_90_antiinductive.high);
|
printf("90%% confidence interval: [%f, %f]\n", ci_90_antiinductive.low, ci_90_antiinductive.high);
|
||||||
|
|
||||||
double* yearly_probability_nuclear_collapse_after_recovery_antiinductive_samples = malloc(sizeof(double) * num_samples);
|
double* yearly_probability_nuclear_collapse_after_recovery_antiinductive_samples = malloc(sizeof(double) * num_samples);
|
||||||
|
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -9,21 +9,22 @@ int main()
|
||||||
// set randomness seed
|
// set randomness seed
|
||||||
// uint64_t* seed = malloc(sizeof(uint64_t));
|
// uint64_t* seed = malloc(sizeof(uint64_t));
|
||||||
// *seed = 1000; // xorshift can't start with 0
|
// *seed = 1000; // xorshift can't start with 0
|
||||||
// ^ not necessary, because parallel_sampler takes care of the seed.
|
// ^ not necessary, because sampler_parallel takes care of the seed.
|
||||||
|
|
||||||
int n_samples = 1000 * 1000 * 1000;
|
int n_samples = 1000 * 1000 * 1000;
|
||||||
int n_threads = 16;
|
int n_threads = 16;
|
||||||
double sampler(uint64_t* seed){
|
double sampler(uint64_t * seed)
|
||||||
|
{
|
||||||
return sample_lognormal(0, 10, seed);
|
return sample_lognormal(0, 10, seed);
|
||||||
}
|
}
|
||||||
double* results = malloc(n_samples * sizeof(double));
|
double* results = malloc(n_samples * sizeof(double));
|
||||||
|
|
||||||
parallel_sampler(sampler, results, n_threads, n_samples);
|
sampler_parallel(sampler, results, n_threads, n_samples);
|
||||||
double avg = array_sum(results, n_samples) / n_samples;
|
double avg = array_sum(results, n_samples) / n_samples;
|
||||||
printf("Average of 1B lognormal(0,10): %f", avg);
|
printf("Average of 1B lognormal(0,10): %f", avg);
|
||||||
|
|
||||||
free(results);
|
free(results);
|
||||||
|
|
||||||
// free(seed);
|
// free(seed);
|
||||||
// ^ not necessary, because parallel_sampler takes care of the seed.
|
// ^ not necessary, because sampler_parallel takes care of the seed.
|
||||||
}
|
}
|
||||||
|
|
Binary file not shown.
|
@ -17,13 +17,14 @@ int main()
|
||||||
int n_dists = 4;
|
int n_dists = 4;
|
||||||
double weights[] = { 1 - p_c, p_c / 2, p_c / 4, p_c / 4 };
|
double weights[] = { 1 - p_c, p_c / 2, p_c / 4, p_c / 4 };
|
||||||
double (*samplers[])(uint64_t*) = { sample_0, sample_1, sample_few, sample_many };
|
double (*samplers[])(uint64_t*) = { sample_0, sample_1, sample_few, sample_many };
|
||||||
double sampler_result(uint64_t* seed) {
|
double sampler_result(uint64_t * seed)
|
||||||
|
{
|
||||||
return sample_mixture(samplers, weights, n_dists, seed);
|
return sample_mixture(samplers, weights, n_dists, seed);
|
||||||
}
|
}
|
||||||
|
|
||||||
int n_samples = 1000 * 1000, n_threads = 16;
|
int n_samples = 1000 * 1000, n_threads = 16;
|
||||||
double* results = malloc(n_samples * sizeof(double));
|
double* results = malloc(n_samples * sizeof(double));
|
||||||
parallel_sampler(sampler_result, results, n_threads, n_samples);
|
sampler_parallel(sampler_result, results, n_threads, n_samples);
|
||||||
printf("Avg: %f\n", array_sum(results, n_samples) / n_samples);
|
printf("Avg: %f\n", array_sum(results, n_samples) / n_samples);
|
||||||
free(results);
|
free(results);
|
||||||
}
|
}
|
||||||
|
|
Binary file not shown.
|
@ -13,9 +13,10 @@ int main()
|
||||||
|
|
||||||
/* Option 1: parallelize taking from n samples */
|
/* Option 1: parallelize taking from n samples */
|
||||||
// Question being asked: what is the distribution of sampling 1000 times and taking the min?
|
// Question being asked: what is the distribution of sampling 1000 times and taking the min?
|
||||||
double sample_min_of_n(uint64_t* seed, int n){
|
double sample_min_of_n(uint64_t * seed, int n)
|
||||||
|
{
|
||||||
double min = sample_normal(5, 2, seed);
|
double min = sample_normal(5, 2, seed);
|
||||||
for(int i=0; i<(n-1); i++){
|
for (int i = 0; i < (n - 2); i++) {
|
||||||
double sample = sample_normal(5, 2, seed);
|
double sample = sample_normal(5, 2, seed);
|
||||||
if (sample < min) {
|
if (sample < min) {
|
||||||
min = sample;
|
min = sample;
|
||||||
|
@ -23,47 +24,47 @@ int main()
|
||||||
}
|
}
|
||||||
return min;
|
return min;
|
||||||
}
|
}
|
||||||
double sampler_min_of_1000(uint64_t* seed) {
|
double sample_min_of_1000(uint64_t * seed)
|
||||||
|
{
|
||||||
return sample_min_of_n(seed, 1000);
|
return sample_min_of_n(seed, 1000);
|
||||||
}
|
}
|
||||||
|
|
||||||
int n_samples = 10000, n_threads = 16;
|
int n_samples = 1000000, n_threads = 16;
|
||||||
double* results = malloc(n_samples * sizeof(double));
|
double* results = malloc(n_samples * sizeof(double));
|
||||||
parallel_sampler(sampler_min_of_1000, results, n_threads, n_samples);
|
sampler_parallel(sample_min_of_1000, results, n_threads, n_samples);
|
||||||
printf("Mean of the distribution of (taking the min of 1000 samples of a normal(5,2)): %f\n", array_mean(results, n_samples));
|
printf("Mean of the distribution of (taking the min of 1000 samples of a normal(5,2)): %f\n", array_mean(results, n_samples));
|
||||||
free(results);
|
free(results);
|
||||||
|
|
||||||
/* Option 2: take the min from n samples cleverly using parallelism */
|
/* Option 2: take the min from n samples cleverly using parallelism */
|
||||||
// Question being asked: can we take the min of n samples cleverly?
|
// Question being asked: can we take the min of n samples cleverly?
|
||||||
double sample_n_parallel(int n){
|
double sample_n_parallel(int n)
|
||||||
|
{
|
||||||
|
|
||||||
int n_threads = 16;
|
int n_threads = 16;
|
||||||
int quotient = n / 16;
|
int quotient = n / 16;
|
||||||
int remainder = n % 16;
|
int remainder = n % 16;
|
||||||
|
|
||||||
uint64_t seed = 100;
|
uint64_t seed = 1000;
|
||||||
double result_remainder = sample_min_of_n(&seed, remainder);
|
double result_remainder = sample_min_of_n(&seed, remainder);
|
||||||
|
|
||||||
double sample_min_of_quotient(uint64_t* seed) {
|
double sample_min_of_quotient(uint64_t * seed)
|
||||||
double result = sample_min_of_n(seed, quotient);
|
{
|
||||||
// printf("Result: %f\n", result);
|
return sample_min_of_n(seed, quotient);
|
||||||
return result;
|
|
||||||
}
|
}
|
||||||
double* results = malloc(n_threads * sizeof(double));
|
double* results_quotient = malloc(quotient * sizeof(double));
|
||||||
parallel_sampler(sample_min_of_quotient, results, n_threads, n_threads);
|
sampler_parallel(sample_min_of_quotient, results_quotient, n_threads, quotient);
|
||||||
|
|
||||||
double min = results[0];
|
double min = results_quotient[0];
|
||||||
for(int i=1; i<n_threads; i++){
|
for (int i = 1; i < quotient; i++) {
|
||||||
if(min > results[i]){
|
if (min > results_quotient[i]) {
|
||||||
min = results[i];
|
min = results_quotient[i];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (min > result_remainder) {
|
if (min > result_remainder) {
|
||||||
min = result_remainder;
|
min = result_remainder;
|
||||||
}
|
}
|
||||||
free(results);
|
free(results_quotient);
|
||||||
return min;
|
return min;
|
||||||
}
|
}
|
||||||
printf("Minimum of 10M samples of normal(5,2): %f\n", sample_n_parallel(1000 * 1000));
|
printf("Minimum of 1M samples of normal(5,2): %f\n", sample_n_parallel(1000000));
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
BIN
examples/more/14_check_confidence_interval/example
Executable file
BIN
examples/more/14_check_confidence_interval/example
Executable file
Binary file not shown.
21
examples/more/14_check_confidence_interval/example.c
Normal file
21
examples/more/14_check_confidence_interval/example.c
Normal file
|
@ -0,0 +1,21 @@
|
||||||
|
#include "../../../squiggle.h"
|
||||||
|
#include "../../../squiggle_more.h"
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
int main()
|
||||||
|
{
|
||||||
|
// set randomness seed
|
||||||
|
uint64_t* seed = malloc(sizeof(uint64_t));
|
||||||
|
*seed = 1000; // xorshift can't start with a seed of 0
|
||||||
|
|
||||||
|
int n = 1000000;
|
||||||
|
double* xs = malloc(sizeof(double) * n);
|
||||||
|
for (int i = 0; i < n; i++) {
|
||||||
|
xs[i] = sample_to(10, 100, seed);
|
||||||
|
}
|
||||||
|
ci ci_90 = array_get_90_ci(xs, n);
|
||||||
|
printf("Recovering confidence interval of sample_to(10, 100):\n low: %f, high: %f\n", ci_90.low, ci_90.high);
|
||||||
|
|
||||||
|
free(seed);
|
||||||
|
}
|
|
@ -49,6 +49,7 @@ all:
|
||||||
$(CC) $(OPTIMIZED) $(DEBUG) 11_billion_lognormals_paralell/$(SRC) $(DEPS) -o 11_billion_lognormals_paralell/$(OUTPUT)
|
$(CC) $(OPTIMIZED) $(DEBUG) 11_billion_lognormals_paralell/$(SRC) $(DEPS) -o 11_billion_lognormals_paralell/$(OUTPUT)
|
||||||
$(CC) $(OPTIMIZED) $(DEBUG) 12_time_to_botec_parallel/$(SRC) $(DEPS) -o 12_time_to_botec_parallel/$(OUTPUT)
|
$(CC) $(OPTIMIZED) $(DEBUG) 12_time_to_botec_parallel/$(SRC) $(DEPS) -o 12_time_to_botec_parallel/$(OUTPUT)
|
||||||
$(CC) $(OPTIMIZED) $(DEBUG) 13_parallelize_min/$(SRC) $(DEPS) -o 13_parallelize_min/$(OUTPUT)
|
$(CC) $(OPTIMIZED) $(DEBUG) 13_parallelize_min/$(SRC) $(DEPS) -o 13_parallelize_min/$(OUTPUT)
|
||||||
|
$(CC) $(OPTIMIZED) $(DEBUG) 14_check_confidence_interval/$(SRC) $(DEPS) -o 14_check_confidence_interval/$(OUTPUT)
|
||||||
|
|
||||||
format-all:
|
format-all:
|
||||||
$(FORMATTER) 00_example_template/$(SRC)
|
$(FORMATTER) 00_example_template/$(SRC)
|
||||||
|
@ -65,6 +66,7 @@ format-all:
|
||||||
$(FORMATTER) 11_billion_lognormals_paralell/$(SRC)
|
$(FORMATTER) 11_billion_lognormals_paralell/$(SRC)
|
||||||
$(FORMATTER) 12_time_to_botec_parallel/$(SRC)
|
$(FORMATTER) 12_time_to_botec_parallel/$(SRC)
|
||||||
$(FORMATTER) 13_parallelize_min/$(SRC)
|
$(FORMATTER) 13_parallelize_min/$(SRC)
|
||||||
|
$(FORMATTER) 14_check_confidence_interval/$(SRC)
|
||||||
|
|
||||||
run-all:
|
run-all:
|
||||||
00_example_template/$(OUTPUT)
|
00_example_template/$(OUTPUT)
|
||||||
|
@ -81,6 +83,7 @@ run-all:
|
||||||
11_billion_lognormals_paralell/$(OUTPUT)
|
11_billion_lognormals_paralell/$(OUTPUT)
|
||||||
12_time_to_botec_parallel/$(OUTPUT)
|
12_time_to_botec_parallel/$(OUTPUT)
|
||||||
13_parallelize_min/$(OUTPUT)
|
13_parallelize_min/$(OUTPUT)
|
||||||
|
14_check_confidence_interval/$(OUTPUT)
|
||||||
|
|
||||||
## make one DIR=06_nuclear_recovery
|
## make one DIR=06_nuclear_recovery
|
||||||
one: $(DIR)/$(SRC)
|
one: $(DIR)/$(SRC)
|
||||||
|
|
4
makefile
4
makefile
|
@ -13,8 +13,8 @@ format-examples:
|
||||||
cd examples/more && make format-all
|
cd examples/more && make format-all
|
||||||
|
|
||||||
format: squiggle.c squiggle.h
|
format: squiggle.c squiggle.h
|
||||||
$(FORMATTER) squiggle.c
|
$(FORMATTER) squiggle.c squiggle.h
|
||||||
$(FORMATTER) squiggle.h
|
$(FORMATTER) squiggle_more.c squiggle_more.h
|
||||||
|
|
||||||
lint:
|
lint:
|
||||||
clang-tidy squiggle.c -- -lm
|
clang-tidy squiggle.c -- -lm
|
||||||
|
|
|
@ -1,27 +0,0 @@
|
||||||
|
|
||||||
uint64_t xorshift64(uint64_t* seed)
|
|
||||||
{
|
|
||||||
// Algorithm "xor" from p. 4 of Marsaglia, "Xorshift RNGs"
|
|
||||||
// <https://en.wikipedia.org/wiki/Xorshift>
|
|
||||||
uint64_t x = *seed;
|
|
||||||
x ^= x << 13;
|
|
||||||
x ^= x >> 7;
|
|
||||||
x ^= x << 17;
|
|
||||||
return *seed = x;
|
|
||||||
}
|
|
||||||
|
|
||||||
double sample_unit_uniform(uint64_t* seed)
|
|
||||||
{
|
|
||||||
// samples uniform from [0,1] interval.
|
|
||||||
return ((double)xorshift64(seed)) / ((double)UINT64_MAX);
|
|
||||||
}
|
|
||||||
|
|
||||||
double sample_unit_normal(uint64_t* seed)
|
|
||||||
{
|
|
||||||
// // See: <https://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform>
|
|
||||||
double u1 = sample_unit_uniform(seed);
|
|
||||||
double u2 = sample_unit_uniform(seed);
|
|
||||||
double z = sqrtf(-2.0 * log(u1)) * sin(2 * PI * u2);
|
|
||||||
return z;
|
|
||||||
}
|
|
||||||
|
|
Binary file not shown.
Binary file not shown.
|
@ -10,25 +10,14 @@ int main()
|
||||||
// set randomness seed
|
// set randomness seed
|
||||||
uint64_t* seed = malloc(sizeof(uint64_t));
|
uint64_t* seed = malloc(sizeof(uint64_t));
|
||||||
*seed = 1000; // xorshift can't start with a seed of 0
|
*seed = 1000; // xorshift can't start with a seed of 0
|
||||||
/*
|
|
||||||
for (int i = 0; i < 100; i++) {
|
|
||||||
double draw = sample_unit_uniform(seed);
|
|
||||||
printf("%f\n", draw);
|
|
||||||
|
|
||||||
}*/
|
int n = 1000000;
|
||||||
// Test division
|
double* xs = malloc(sizeof(double) * n);
|
||||||
// printf("\n%d\n", 10 % 3);
|
for (int i = 0; i < n; i++) {
|
||||||
//
|
xs[i] = sample_to(10, 100, seed);
|
||||||
|
|
||||||
int n_samples = 100, n_threads = 16;
|
|
||||||
double* results = malloc(n_samples * sizeof(double));
|
|
||||||
double sampler_scratchpad(uint64_t* seed){
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
parallel_sampler(sampler_scratchpad, results, n_threads, n_samples);
|
|
||||||
for(int i=0; i<n_samples; i++){
|
|
||||||
printf("Sample %d: %f\n", i, results[i]);
|
|
||||||
}
|
}
|
||||||
|
ci ci_90 = array_get_90_ci(xs, n);
|
||||||
|
printf("Recovering confidence interval of sample_to(10, 100):\n low: %f, high: %f\n", ci_90.low, ci_90.high);
|
||||||
|
|
||||||
free(seed);
|
free(seed);
|
||||||
}
|
}
|
||||||
|
|
|
@ -8,7 +8,7 @@
|
||||||
#define NORMAL90CONFIDENCE 1.6448536269514727
|
#define NORMAL90CONFIDENCE 1.6448536269514727
|
||||||
|
|
||||||
// Pseudo Random number generator
|
// Pseudo Random number generator
|
||||||
uint64_t xorshift32(uint32_t* seed)
|
static uint64_t xorshift32(uint32_t* seed)
|
||||||
{
|
{
|
||||||
// Algorithm "xor" from p. 4 of Marsaglia, "Xorshift RNGs"
|
// Algorithm "xor" from p. 4 of Marsaglia, "Xorshift RNGs"
|
||||||
// See:
|
// See:
|
||||||
|
@ -24,7 +24,7 @@ uint64_t xorshift32(uint32_t* seed)
|
||||||
return *seed = x;
|
return *seed = x;
|
||||||
}
|
}
|
||||||
|
|
||||||
uint64_t xorshift64(uint64_t* seed)
|
static uint64_t xorshift64(uint64_t* seed)
|
||||||
{
|
{
|
||||||
// same as above, but for generating doubles instead of floats
|
// same as above, but for generating doubles instead of floats
|
||||||
uint64_t x = *seed;
|
uint64_t x = *seed;
|
||||||
|
|
313
squiggle_more.c
313
squiggle_more.c
|
@ -1,67 +1,195 @@
|
||||||
|
#include "squiggle.h"
|
||||||
#include <float.h>
|
#include <float.h>
|
||||||
#include <math.h>
|
|
||||||
#include <limits.h>
|
#include <limits.h>
|
||||||
|
#include <math.h>
|
||||||
#include <omp.h>
|
#include <omp.h>
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include "squiggle.h"
|
|
||||||
|
|
||||||
/* Math constants */
|
/* Parallel sampler */
|
||||||
#define PI 3.14159265358979323846 // M_PI in gcc gnu99
|
void sampler_parallel(double (*sampler)(uint64_t* seed), double* results, int n_threads, int n_samples)
|
||||||
#define NORMAL90CONFIDENCE 1.6448536269514727
|
{
|
||||||
|
if ((n_samples % n_threads) != 0) {
|
||||||
|
fprintf(stderr, "Number of samples isn't divisible by number of threads, aborting\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
uint64_t** seeds = malloc(n_threads * sizeof(uint64_t*));
|
||||||
|
for (uint64_t i = 0; i < n_threads; i++) {
|
||||||
|
seeds[i] = malloc(sizeof(uint64_t));
|
||||||
|
*seeds[i] = i + 1; // xorshift can't start with 0
|
||||||
|
}
|
||||||
|
|
||||||
/* Some error niceties */
|
int i;
|
||||||
// These won't be used until later
|
#pragma omp parallel private(i)
|
||||||
#define MAX_ERROR_LENGTH 500
|
{
|
||||||
#define EXIT_ON_ERROR 0
|
#pragma omp for
|
||||||
#define PROCESS_ERROR(error_msg) process_error(error_msg, EXIT_ON_ERROR, __FILE__, __LINE__)
|
for (i = 0; i < n_threads; i++) {
|
||||||
|
int lower_bound = i * (n_samples / n_threads);
|
||||||
|
int upper_bound = ((i + 1) * (n_samples / n_threads)) - 1;
|
||||||
|
// printf("Lower bound: %d, upper bound: %d\n", lower_bound, upper_bound);
|
||||||
|
for (int j = lower_bound; j < upper_bound; j++) {
|
||||||
|
results[j] = sampler(seeds[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (uint64_t i = 0; i < n_threads; i++) {
|
||||||
|
free(seeds[i]);
|
||||||
|
}
|
||||||
|
free(seeds);
|
||||||
|
}
|
||||||
|
|
||||||
/* Get confidence intervals, given a sampler */
|
/* Get confidence intervals, given a sampler */
|
||||||
// Not in core yet because I'm not sure how much I like the struct
|
// Not in core yet because I'm not sure how much I like the struct
|
||||||
// and the built-in 100k samples
|
// and the built-in 100k samples
|
||||||
// to do: add n to function parameters and document
|
// to do: add n to function parameters and document
|
||||||
|
|
||||||
typedef struct ci_t {
|
typedef struct ci_t {
|
||||||
float low;
|
double low;
|
||||||
float high;
|
double high;
|
||||||
} ci;
|
} ci;
|
||||||
int compare_doubles(const void* p, const void* q)
|
|
||||||
|
static void swp(int i, int j, double xs[])
|
||||||
{
|
{
|
||||||
// https://wikiless.esmailelbob.xyz/wiki/Qsort?lang=en
|
double tmp = xs[i];
|
||||||
double x = *(const double*)p;
|
xs[i] = xs[j];
|
||||||
double y = *(const double*)q;
|
xs[j] = tmp;
|
||||||
|
|
||||||
/* Avoid returning x - y, which can cause undefined behaviour
|
|
||||||
because of signed integer overflow. */
|
|
||||||
if (x < y)
|
|
||||||
return -1; // Return -1 if you want ascending, 1 if you want descending order.
|
|
||||||
else if (x > y)
|
|
||||||
return 1; // Return 1 if you want ascending, -1 if you want descending order.
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
ci get_90_confidence_interval(double (*sampler)(uint64_t*), uint64_t* seed)
|
|
||||||
|
static int partition(int low, int high, double xs[], int length)
|
||||||
{
|
{
|
||||||
int n = 100 * 1000;
|
// To understand this function:
|
||||||
double* samples_array = malloc(n * sizeof(double));
|
// - see the note after gt variable definition
|
||||||
for (int i = 0; i < n; i++) {
|
// - go to commit 578bfa27 and the scratchpad/ folder in it, which has printfs sprinkled throughout
|
||||||
samples_array[i] = sampler(seed);
|
int pivot = low + floor((high - low) / 2);
|
||||||
|
double pivot_value = xs[pivot];
|
||||||
|
swp(pivot, high, xs);
|
||||||
|
int gt = low; /* This pointer will iterate until finding an element which is greater than the pivot. Then it will move elements that are smaller before it--more specifically, it will move elements to its position and then increment. As a result all elements between gt and i will be greater than the pivot. */
|
||||||
|
for (int i = low; i < high; i++) {
|
||||||
|
if (xs[i] < pivot_value) {
|
||||||
|
swp(gt, i, xs);
|
||||||
|
gt++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
swp(high, gt, xs);
|
||||||
|
return gt;
|
||||||
}
|
}
|
||||||
qsort(samples_array, n, sizeof(double), compare_doubles);
|
|
||||||
|
|
||||||
|
static double quickselect(int k, double xs[], int n)
|
||||||
|
{
|
||||||
|
// https://en.wikipedia.org/wiki/Quickselect
|
||||||
|
int low = 0;
|
||||||
|
int high = n - 1;
|
||||||
|
for (;;) {
|
||||||
|
if (low == high) {
|
||||||
|
return xs[low];
|
||||||
|
}
|
||||||
|
int pivot = partition(low, high, xs, n);
|
||||||
|
if (pivot == k) {
|
||||||
|
return xs[pivot];
|
||||||
|
} else if (k < pivot) {
|
||||||
|
high = pivot - 1;
|
||||||
|
} else {
|
||||||
|
low = pivot + 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ci array_get_ci(ci interval, double* xs, int n)
|
||||||
|
{
|
||||||
|
|
||||||
|
int low_k = floor(interval.low * n);
|
||||||
|
int high_k = ceil(interval.high * n);
|
||||||
ci result = {
|
ci result = {
|
||||||
.low = samples_array[5000],
|
.low = quickselect(low_k, xs, n),
|
||||||
.high = samples_array[94999],
|
.high = quickselect(high_k, xs, n),
|
||||||
};
|
};
|
||||||
free(samples_array);
|
return result;
|
||||||
|
}
|
||||||
|
ci array_get_90_ci(double xs[], int n)
|
||||||
|
{
|
||||||
|
return array_get_ci((ci) { .low = 0.05, .high = 0.95 }, xs, n);
|
||||||
|
}
|
||||||
|
|
||||||
|
ci sampler_get_ci(ci interval, double (*sampler)(uint64_t*), int n, uint64_t* seed)
|
||||||
|
{
|
||||||
|
double* xs = malloc(n * sizeof(double));
|
||||||
|
for (int i = 0; i < n; i++) {
|
||||||
|
xs[i] = sampler(seed);
|
||||||
|
}
|
||||||
|
ci result = array_get_ci(interval, xs, n);
|
||||||
|
free(xs);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
ci sampler_get_90_ci(double (*sampler)(uint64_t*), int n, uint64_t* seed)
|
||||||
|
{
|
||||||
|
return sampler_get_ci((ci) { .low = 0.05, .high = 0.95 }, sampler, n, seed);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Algebra manipulations */
|
||||||
|
// here I discover named structs,
|
||||||
|
// which mean that I don't have to be typing
|
||||||
|
// struct blah all the time.
|
||||||
|
|
||||||
|
#define NORMAL90CONFIDENCE 1.6448536269514727
|
||||||
|
|
||||||
|
typedef struct normal_params_t {
|
||||||
|
double mean;
|
||||||
|
double std;
|
||||||
|
} normal_params;
|
||||||
|
|
||||||
|
normal_params algebra_sum_normals(normal_params a, normal_params b)
|
||||||
|
{
|
||||||
|
normal_params result = {
|
||||||
|
.mean = a.mean + b.mean,
|
||||||
|
.std = sqrt((a.std * a.std) + (b.std * b.std)),
|
||||||
|
};
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
typedef struct lognormal_params_t {
|
||||||
|
double logmean;
|
||||||
|
double logstd;
|
||||||
|
} lognormal_params;
|
||||||
|
|
||||||
|
lognormal_params algebra_product_lognormals(lognormal_params a, lognormal_params b)
|
||||||
|
{
|
||||||
|
lognormal_params result = {
|
||||||
|
.logmean = a.logmean + b.logmean,
|
||||||
|
.logstd = sqrt((a.logstd * a.logstd) + (b.logstd * b.logstd)),
|
||||||
|
};
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
lognormal_params convert_ci_to_lognormal_params(ci x)
|
||||||
|
{
|
||||||
|
double loghigh = logf(x.high);
|
||||||
|
double loglow = logf(x.low);
|
||||||
|
double logmean = (loghigh + loglow) / 2.0;
|
||||||
|
double logstd = (loghigh - loglow) / (2.0 * NORMAL90CONFIDENCE);
|
||||||
|
lognormal_params result = { .logmean = logmean, .logstd = logstd };
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
ci convert_lognormal_params_to_ci(lognormal_params y)
|
||||||
|
{
|
||||||
|
double h = y.logstd * NORMAL90CONFIDENCE;
|
||||||
|
double loghigh = y.logmean + h;
|
||||||
|
double loglow = y.logmean - h;
|
||||||
|
ci result = { .low = exp(loglow), .high = exp(loghigh) };
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Scaffolding to handle errors */
|
/* Scaffolding to handle errors */
|
||||||
// We are building towards sample from an arbitrary cdf
|
// We will sample from an arbitrary cdf
|
||||||
// and that operation might fail
|
// and that operation might fail
|
||||||
// so we build some scaffolding here
|
// so we build some scaffolding here
|
||||||
|
|
||||||
|
#define MAX_ERROR_LENGTH 500
|
||||||
|
#define EXIT_ON_ERROR 0
|
||||||
|
#define PROCESS_ERROR(error_msg) process_error(error_msg, EXIT_ON_ERROR, __FILE__, __LINE__)
|
||||||
|
|
||||||
struct box {
|
struct box {
|
||||||
int empty;
|
int empty;
|
||||||
double content;
|
double content;
|
||||||
|
@ -253,115 +381,14 @@ double sampler_cdf_danger(struct box cdf(double), uint64_t* seed)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Algebra manipulations */
|
/* array print: potentially useful for debugging */
|
||||||
// here I discover named structs,
|
|
||||||
// which mean that I don't have to be typing
|
|
||||||
// struct blah all the time.
|
|
||||||
typedef struct normal_params_t {
|
|
||||||
double mean;
|
|
||||||
double std;
|
|
||||||
} normal_params;
|
|
||||||
|
|
||||||
normal_params algebra_sum_normals(normal_params a, normal_params b)
|
void array_print(double xs[], int n)
|
||||||
{
|
{
|
||||||
normal_params result = {
|
printf("[");
|
||||||
.mean = a.mean + b.mean,
|
for (int i = 0; i < n - 1; i++) {
|
||||||
.std = sqrt((a.std * a.std) + (b.std * b.std)),
|
printf("%f, ", xs[i]);
|
||||||
};
|
|
||||||
return result;
|
|
||||||
}
|
}
|
||||||
|
printf("%f", xs[n - 1]);
|
||||||
typedef struct lognormal_params_t {
|
printf("]\n");
|
||||||
double logmean;
|
|
||||||
double logstd;
|
|
||||||
} lognormal_params;
|
|
||||||
|
|
||||||
lognormal_params algebra_product_lognormals(lognormal_params a, lognormal_params b)
|
|
||||||
{
|
|
||||||
lognormal_params result = {
|
|
||||||
.logmean = a.logmean + b.logmean,
|
|
||||||
.logstd = sqrt((a.logstd * a.logstd) + (b.logstd * b.logstd)),
|
|
||||||
};
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
lognormal_params convert_ci_to_lognormal_params(ci x)
|
|
||||||
{
|
|
||||||
double loghigh = logf(x.high);
|
|
||||||
double loglow = logf(x.low);
|
|
||||||
double logmean = (loghigh + loglow) / 2.0;
|
|
||||||
double logstd = (loghigh - loglow) / (2.0 * NORMAL90CONFIDENCE);
|
|
||||||
lognormal_params result = { .logmean = logmean, .logstd = logstd };
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
ci convert_lognormal_params_to_ci(lognormal_params y)
|
|
||||||
{
|
|
||||||
double h = y.logstd * NORMAL90CONFIDENCE;
|
|
||||||
double loghigh = y.logmean + h;
|
|
||||||
double loglow = y.logmean - h;
|
|
||||||
ci result = { .low = exp(loglow), .high = exp(loghigh) };
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Parallel sampler */
|
|
||||||
void parallel_sampler(double (*sampler)(uint64_t* seed), double* results, int n_threads, int n_samples){
|
|
||||||
|
|
||||||
// Division terminology:
|
|
||||||
// a = b * quotient + reminder
|
|
||||||
// a = (a/b)*b + (a%b)
|
|
||||||
// dividend: a
|
|
||||||
// divisor: b
|
|
||||||
// quotient = a / b
|
|
||||||
// reminder = a % b
|
|
||||||
// "divisor's multiple" := (a/b)*b
|
|
||||||
|
|
||||||
// now, we have n_samples and n_threads
|
|
||||||
// to make our life easy, each thread will have a number of samples of: a/b (quotient)
|
|
||||||
// and we'll compute the remainder of samples separately
|
|
||||||
// to possibly do by Jorge: improve so that the remainder is included in the threads
|
|
||||||
|
|
||||||
int quotient = n_samples / n_threads;
|
|
||||||
int remainder = n_samples % n_threads;
|
|
||||||
int divisor_multiple = quotient * n_threads;
|
|
||||||
|
|
||||||
uint64_t** seeds = malloc(n_threads * sizeof(uint64_t*));
|
|
||||||
// printf("UINT64_MAX: %lu\n", UINT64_MAX);
|
|
||||||
srand(1);
|
|
||||||
for (uint64_t i = 0; i < n_threads; i++) {
|
|
||||||
seeds[i] = malloc(sizeof(uint64_t));
|
|
||||||
// Constraints:
|
|
||||||
// - xorshift can't start with 0
|
|
||||||
// - the seeds should be reasonably separated and not correlated
|
|
||||||
*seeds[i] = (uint64_t) rand() * (UINT64_MAX / RAND_MAX);
|
|
||||||
// printf("#%ld: %lu\n",i, *seeds[i]);
|
|
||||||
|
|
||||||
// Other initializations tried:
|
|
||||||
// *seeds[i] = 1 + i;
|
|
||||||
// *seeds[i] = (i + 0.5)*(UINT64_MAX/n_threads);
|
|
||||||
// *seeds[i] = (i + 0.5)*(UINT64_MAX/n_threads) + constant * i;
|
|
||||||
}
|
|
||||||
|
|
||||||
int i;
|
|
||||||
#pragma omp parallel private(i)
|
|
||||||
{
|
|
||||||
#pragma omp for
|
|
||||||
for (i = 0; i < n_threads; i++) {
|
|
||||||
int lower_bound_inclusive = i * quotient;
|
|
||||||
int upper_bound_not_inclusive = ((i+1) * quotient); // note the < in the for loop below,
|
|
||||||
// printf("Lower bound: %d, upper bound: %d\n", lower_bound, upper_bound);
|
|
||||||
for (int j = lower_bound_inclusive; j < upper_bound_not_inclusive; j++) {
|
|
||||||
results[j] = sampler(seeds[i]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for(int j=divisor_multiple; j<n_samples; j++){
|
|
||||||
results[j] = sampler(seeds[0]);
|
|
||||||
// we can just reuse a seed, this isn't problematic because we are not doing multithreading
|
|
||||||
}
|
|
||||||
|
|
||||||
for (uint64_t i = 0; i < n_threads; i++) {
|
|
||||||
free(seeds[i]);
|
|
||||||
}
|
|
||||||
free(seeds);
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,35 +1,20 @@
|
||||||
#ifndef SQUIGGLE_C_EXTRA
|
#ifndef SQUIGGLE_C_EXTRA
|
||||||
#define SQUIGGLE_C_EXTRA
|
#define SQUIGGLE_C_EXTRA
|
||||||
|
|
||||||
// Box
|
/* Parallel sampling */
|
||||||
struct box {
|
void sampler_parallel(double (*sampler)(uint64_t* seed), double* results, int n_threads, int n_samples);
|
||||||
int empty;
|
|
||||||
double content;
|
|
||||||
char* error_msg;
|
|
||||||
};
|
|
||||||
|
|
||||||
// Macros to handle errors
|
/* Get 90% confidence interval */
|
||||||
#define MAX_ERROR_LENGTH 500
|
|
||||||
#define EXIT_ON_ERROR 0
|
|
||||||
#define PROCESS_ERROR(error_msg) process_error(error_msg, EXIT_ON_ERROR, __FILE__, __LINE__)
|
|
||||||
struct box process_error(const char* error_msg, int should_exit, char* file, int line);
|
|
||||||
|
|
||||||
// Inverse cdf
|
|
||||||
struct box inverse_cdf_double(double cdf(double), double p);
|
|
||||||
struct box inverse_cdf_box(struct box cdf_box(double), double p);
|
|
||||||
|
|
||||||
// Samplers from cdf
|
|
||||||
struct box sampler_cdf_double(double cdf(double), uint64_t* seed);
|
|
||||||
struct box sampler_cdf_box(struct box cdf(double), uint64_t* seed);
|
|
||||||
|
|
||||||
// Get 90% confidence interval
|
|
||||||
typedef struct ci_t {
|
typedef struct ci_t {
|
||||||
float low;
|
double low;
|
||||||
float high;
|
double high;
|
||||||
} ci;
|
} ci;
|
||||||
ci get_90_confidence_interval(double (*sampler)(uint64_t*), uint64_t* seed);
|
ci array_get_ci(ci interval, double* xs, int n);
|
||||||
|
ci array_get_90_ci(double xs[], int n);
|
||||||
|
ci sampler_get_ci(ci interval, double (*sampler)(uint64_t*), int n, uint64_t* seed);
|
||||||
|
ci sampler_get_90_ci(double (*sampler)(uint64_t*), int n, uint64_t* seed);
|
||||||
|
|
||||||
// small algebra manipulations
|
/* Algebra manipulations */
|
||||||
|
|
||||||
typedef struct normal_params_t {
|
typedef struct normal_params_t {
|
||||||
double mean;
|
double mean;
|
||||||
|
@ -46,6 +31,24 @@ lognormal_params algebra_product_lognormals(lognormal_params a, lognormal_params
|
||||||
lognormal_params convert_ci_to_lognormal_params(ci x);
|
lognormal_params convert_ci_to_lognormal_params(ci x);
|
||||||
ci convert_lognormal_params_to_ci(lognormal_params y);
|
ci convert_lognormal_params_to_ci(lognormal_params y);
|
||||||
|
|
||||||
void parallel_sampler(double (*sampler)(uint64_t* seed), double* results, int n_threads, int n_samples);
|
/* Error handling */
|
||||||
|
struct box {
|
||||||
|
int empty;
|
||||||
|
double content;
|
||||||
|
char* error_msg;
|
||||||
|
};
|
||||||
|
#define MAX_ERROR_LENGTH 500
|
||||||
|
#define EXIT_ON_ERROR 0
|
||||||
|
#define PROCESS_ERROR(error_msg) process_error(error_msg, EXIT_ON_ERROR, __FILE__, __LINE__)
|
||||||
|
struct box process_error(const char* error_msg, int should_exit, char* file, int line);
|
||||||
|
void array_print(double* array, int length);
|
||||||
|
|
||||||
|
/* Inverse cdf */
|
||||||
|
struct box inverse_cdf_double(double cdf(double), double p);
|
||||||
|
struct box inverse_cdf_box(struct box cdf_box(double), double p);
|
||||||
|
|
||||||
|
/* Samplers from cdf */
|
||||||
|
struct box sampler_cdf_double(double cdf(double), uint64_t* seed);
|
||||||
|
struct box sampler_cdf_box(struct box cdf(double), uint64_t* seed);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
Loading…
Reference in New Issue
Block a user