Compare commits

...

6 Commits

Author SHA1 Message Date
1d89eb6231 formatting pass, upkeep 2024-01-20 14:30:20 +01:00
199e76bdfb add fermi paradox to examples 2024-01-20 14:28:20 +01:00
bb91d78859 add fermi paradox example 2024-01-20 14:02:59 +01:00
b99a9cb3f5 readme tweak 2024-01-13 13:01:58 +01:00
4a4e90f492 reformat remake 2024-01-13 12:47:56 +01:00
8e2f918dd3 add comment about cache analysis 2024-01-13 12:47:14 +01:00
26 changed files with 389 additions and 18 deletions

View File

@ -334,6 +334,8 @@ But for more complicated use cases, my recommendation would be to not use parall
- If you run the `sampler_parallel` function on two different inputs, their outputs will be correlated. E.g., if you run two lognormals, indices which have higher samples in one will tend to have higher samples in the other one. Why? - If you run the `sampler_parallel` function on two different inputs, their outputs will be correlated. E.g., if you run two lognormals, indices which have higher samples in one will tend to have higher samples in the other one. Why?
- For a small amount of samples, if you run the `sampler_parallel` function, you will get better spread out random numbers than if you run things serially. Why? - For a small amount of samples, if you run the `sampler_parallel` function, you will get better spread out random numbers than if you run things serially. Why?
That said, I found adding parallelism to be an interesting an engaging task. Most recently, I even optimized the code to ensure that two threads weren't accessing the same cache line at the same time, and it was very satisfying to see a 30% improvement as a result.
#### Extra: Algebraic manipulations #### Extra: Algebraic manipulations
`squiggle_more.c` has some functions to do some simple algebra manipulations: sums of normals and products of lognormals. You can see some example usage [here](examples/more/07_algebra/example.c) and [here](examples/more/08_algebra_and_conversion/example.c). `squiggle_more.c` has some functions to do some simple algebra manipulations: sums of normals and products of lognormals. You can see some example usage [here](examples/more/07_algebra/example.c) and [here](examples/more/08_algebra_and_conversion/example.c).

View File

@ -15,8 +15,16 @@ int main()
int n_dists = 4; int n_dists = 4;
// These are nested functions. They will not compile without gcc. // These are nested functions. They will not compile without gcc.
double sample_0(uint64_t * seed) { UNUSED(seed); return 0; } double sample_0(uint64_t * seed)
double sample_1(uint64_t * seed) { UNUSED(seed); return 1; } {
UNUSED(seed);
return 0;
}
double sample_1(uint64_t * seed)
{
UNUSED(seed);
return 1;
}
double sample_few(uint64_t * seed) { return sample_to(1, 3, seed); } double sample_few(uint64_t * seed) { return sample_to(1, 3, seed); }
double sample_many(uint64_t * seed) { return sample_to(2, 10, seed); } double sample_many(uint64_t * seed) { return sample_to(2, 10, seed); }

Binary file not shown.

View File

@ -0,0 +1,147 @@
#include "../../../squiggle.h"
#include <math.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
double sample_loguniform(double a, double b, uint64_t* seed)
{
return exp(sample_uniform(log(a), log(b), seed));
}
int main()
{
// Replicate <https://arxiv.org/pdf/1806.02404.pdf>, and in particular the red line in page 11.
// Could also be interesting to just produce and save many samples.
// set randomness seed
uint64_t* seed = malloc(sizeof(uint64_t));
*seed = UINT64_MAX / 64; // xorshift can't start with a seed of 0
double sample_fermi_naive(uint64_t * seed)
{
double rate_of_star_formation = sample_loguniform(1, 100, seed);
double fraction_of_stars_with_planets = sample_loguniform(0.1, 1, seed);
double number_of_habitable_planets_per_star_system = sample_loguniform(0.1, 1, seed);
double rate_of_life_formation_in_habitable_planets = sample_lognormal(1, 50, seed);
double fraction_of_habitable_planets_in_which_any_life_appears = -expm1(-rate_of_life_formation_in_habitable_planets);
// double fraction_of_habitable_planets_in_which_any_life_appears = 1-exp(-rate_of_life_formation_in_habitable_planets);
// but with more precision
double fraction_of_planets_with_life_in_which_intelligent_life_appears = sample_loguniform(0.001, 1, seed);
double fraction_of_intelligent_planets_which_are_detectable_as_such = sample_loguniform(0.01, 1, seed);
double longevity_of_detectable_civilizations = sample_loguniform(100, 10000000000, seed);
// printf(" rate_of_star_formation = %lf\n", rate_of_star_formation);
// printf(" fraction_of_stars_with_planets = %lf\n", fraction_of_stars_with_planets);
// printf(" number_of_habitable_planets_per_star_system = %lf\n", number_of_habitable_planets_per_star_system);
// printf(" rate_of_life_formation_in_habitable_planets = %.16lf\n", rate_of_life_formation_in_habitable_planets);
// printf(" fraction_of_habitable_planets_in_which_any_life_appears = %lf\n", fraction_of_habitable_planets_in_which_any_life_appears);
// printf(" fraction_of_planets_with_life_in_which_intelligent_life_appears = %lf\n", fraction_of_planets_with_life_in_which_intelligent_life_appears);
// printf(" fraction_of_intelligent_planets_which_are_detectable_as_such = %lf\n", fraction_of_intelligent_planets_which_are_detectable_as_such);
// printf(" longevity_of_detectable_civilizations = %lf\n", longevity_of_detectable_civilizations);
// Expected number of civilizations in the Milky way;
// see footnote 3 (p. 5)
double n = rate_of_star_formation * fraction_of_stars_with_planets * number_of_habitable_planets_per_star_system * fraction_of_habitable_planets_in_which_any_life_appears * fraction_of_planets_with_life_in_which_intelligent_life_appears * fraction_of_intelligent_planets_which_are_detectable_as_such * longevity_of_detectable_civilizations;
return n;
}
double sample_fermi_paradox_naive(uint64_t * seed)
{
double n = sample_fermi_naive(seed);
return ((n > 1) ? 1 : 0);
}
double n = 1000000;
double naive_fermi_proportion = 0;
for (int i = 0; i < n; i++) {
double result = sample_fermi_paradox_naive(seed);
// printf("result: %lf\n", result);
naive_fermi_proportion += result;
}
printf("Naïve %% that we are not alone: %lf\n", naive_fermi_proportion / n);
// Thinking in log space
double sample_fermi_logspace(uint64_t * seed)
{
double log_rate_of_star_formation = sample_uniform(log(1), log(100), seed);
double log_fraction_of_stars_with_planets = sample_uniform(log(0.1), log(1), seed);
double log_number_of_habitable_planets_per_star_system = sample_uniform(log(0.1), log(1), seed);
double log_fraction_of_planets_with_life_in_which_intelligent_life_appears = sample_uniform(log(0.001), log(1), seed);
double log_fraction_of_intelligent_planets_which_are_detectable_as_such = sample_uniform(log(0.01), log(1), seed);
double log_longevity_of_detectable_civilizations = sample_uniform(log(100), log(10000000000), seed);
// printf(" log_rate_of_star_formation = %lf\n", log_rate_of_star_formation);
// printf(" log_fraction_of_stars_with_planets = %lf\n", log_fraction_of_stars_with_planets);
// printf(" log_number_of_habitable_planets_per_star_system = %lf\n", log_number_of_habitable_planets_per_star_system);
// printf(" log_fraction_of_planets_with_life_in_which_intelligent_life_appears = %lf\n", log_fraction_of_planets_with_life_in_which_intelligent_life_appears);
// printf(" log_fraction_of_intelligent_planets_which_are_detectable_as_such = %lf\n", log_fraction_of_intelligent_planets_which_are_detectable_as_such);
// printf(" log_longevity_of_detectable_civilizations = %lf\n", log_longevity_of_detectable_civilizations);
double log_n1 = log_rate_of_star_formation + log_fraction_of_stars_with_planets + log_number_of_habitable_planets_per_star_system + log_fraction_of_planets_with_life_in_which_intelligent_life_appears + log_fraction_of_intelligent_planets_which_are_detectable_as_such + log_longevity_of_detectable_civilizations;
// printf("first part of calculation: %lf\n", log_n1);
/* Consider fraction_of_habitable_planets_in_which_any_life_appears separately.
Imprecisely, we could do:
double rate_of_life_formation_in_habitable_planets = sample_lognormal(1, 50, seed);
double fraction_of_habitable_planets_in_which_any_life_appears = 1- exp(-rate_of_life_formation_in_habitable_planets);
double log_fraction_of_habitable_planets_in_which_any_life_appears = log(1-fraction_of_habitable_planets_in_which_any_life_appears);
double n = exp(log_n1) * fraction_of_habitable_planets_in_which_any_life_appears;
// or:
double n2 = exp(log_n1 + log(fraction_of_habitable_planets_in_which_any_life_appears))
However, we lose all precision here.
Now, say
a = underlying normal
b = rate_of_life_formation_in_habitable_planets = exp(underlying normal)
c = 1 - exp(-b) = fraction_of_habitable_planets_in_which_any_life_appears
d = log(c)
Now, is there some way we can d more efficiently/precisely?
Turns out there is!
Looking at the Taylor expansion for c = 1 - exp(-b), it's b - b^2/2 + b^3/6 - x^b/24, etc.
// https://www.wolframalpha.com/input?i=1-exp%28-x%29
When b ~ 0 (as is often the case), this is close to b.
But now, if b ~ 0
c ~ b
and d = log(c) ~ log(b) = log(exp(a)) = a
*/
double log_rate_of_life_formation_in_habitable_planets = sample_normal(1, 50, seed);
// printf("log_rate_of_life_formation_in_habitable_planets: %lf\n", log_rate_of_life_formation_in_habitable_planets);
double log_fraction_of_habitable_planets_in_which_any_life_appears;
if (log_rate_of_life_formation_in_habitable_planets < -32) {
log_fraction_of_habitable_planets_in_which_any_life_appears = log_rate_of_life_formation_in_habitable_planets;
} else {
double rate_of_life_formation_in_habitable_planets = exp(log_rate_of_life_formation_in_habitable_planets);
double fraction_of_habitable_planets_in_which_any_life_appears = -expm1(-rate_of_life_formation_in_habitable_planets);
log_fraction_of_habitable_planets_in_which_any_life_appears = log(fraction_of_habitable_planets_in_which_any_life_appears);
}
// printf(" log_fraction_of_habitable_planets_in_which_any_life_appears: %lf\n", log_fraction_of_habitable_planets_in_which_any_life_appears);
double log_n = log_n1 + log_fraction_of_habitable_planets_in_which_any_life_appears;
return log_n;
}
double sample_fermi_paradox_logspace(uint64_t * seed)
{
double n = sample_fermi_logspace(seed);
return ((n > 0) ? 1 : 0);
}
double logspace_fermi_proportion = 0;
for (int i = 0; i < n; i++) {
double result = sample_fermi_paradox_logspace(seed);
// printf("result: %lf\n", result);
logspace_fermi_proportion += result;
}
printf("Using more accurate logspace computations, %% that we are not alone: %lf\n", logspace_fermi_proportion / n);
free(seed);
}

Binary file not shown.

View File

@ -0,0 +1,56 @@
# Interface:
# make
# make build
# make format
# make run
# Compiler
CC=gcc
# CC=tcc # <= faster compilation
# Main file
SRC=scratchpad.c ../squiggle.c ../squiggle_more.c
OUTPUT=scratchpad
## Dependencies
MATH=-lm
## Flags
DEBUG= #'-g'
STANDARD=-std=c99
WARNINGS=-Wall
OPTIMIZED=-O3 #-Ofast
# OPENMP=-fopenmp
## Formatter
STYLE_BLUEPRINT=webkit
FORMATTER=clang-format -i -style=$(STYLE_BLUEPRINT)
## make build
build: $(SRC)
$(CC) $(OPTIMIZED) $(DEBUG) $(SRC) $(MATH) -o $(OUTPUT)
format: $(SRC)
$(FORMATTER) $(SRC)
run: $(SRC) $(OUTPUT)
./$(OUTPUT)
verify: $(SRC) $(OUTPUT)
./$(OUTPUT) | grep "NOT passed" -A 2 --group-separator='' || true
time-linux:
@echo "Requires /bin/time, found on GNU/Linux systems" && echo
@echo "Running 100x and taking avg time $(OUTPUT)"
@t=$$(/usr/bin/time -f "%e" -p bash -c 'for i in {1..100}; do $(OUTPUT); done' 2>&1 >/dev/null | grep real | awk '{print $$2}' ); echo "scale=2; 1000 * $$t / 100" | bc | sed "s|^|Time using 1 thread: |" | sed 's|$$|ms|' && echo
## Profiling
profile-linux:
echo "Requires perf, which depends on the kernel version, and might be in linux-tools package or similar"
echo "Must be run as sudo"
$(CC) $(SRC) $(MATH) -o $(OUTPUT)
sudo perf record ./$(OUTPUT)
sudo perf report
rm perf.data

Binary file not shown.

View File

@ -41,6 +41,8 @@ all:
$(CC) $(OPTIMIZED) $(DEBUG) $(WARN) 03_gcc_nested_function/$(SRC) $(DEPS) -o 03_gcc_nested_function/$(OUTPUT) $(CC) $(OPTIMIZED) $(DEBUG) $(WARN) 03_gcc_nested_function/$(SRC) $(DEPS) -o 03_gcc_nested_function/$(OUTPUT)
$(CC) $(OPTIMIZED) $(DEBUG) $(WARN) 04_gamma_beta/$(SRC) $(DEPS) -o 04_gamma_beta/$(OUTPUT) $(CC) $(OPTIMIZED) $(DEBUG) $(WARN) 04_gamma_beta/$(SRC) $(DEPS) -o 04_gamma_beta/$(OUTPUT)
$(CC) $(OPTIMIZED) $(DEBUG) $(WARN) 05_hundred_lognormals/$(SRC) $(DEPS) -o 05_hundred_lognormals/$(OUTPUT) $(CC) $(OPTIMIZED) $(DEBUG) $(WARN) 05_hundred_lognormals/$(SRC) $(DEPS) -o 05_hundred_lognormals/$(OUTPUT)
$(CC) $(OPTIMIZED) $(DEBUG) $(WARN) 06_dissolving_fermi_paradox/$(SRC) $(DEPS) -o 06_dissolving_fermi_paradox/$(OUTPUT)
format-all: format-all:
$(FORMATTER) 00_example_template/$(SRC) $(FORMATTER) 00_example_template/$(SRC)
@ -49,6 +51,7 @@ format-all:
$(FORMATTER) 03_gcc_nested_function/$(SRC) $(FORMATTER) 03_gcc_nested_function/$(SRC)
$(FORMATTER) 04_gamma_beta/$(SRC) $(FORMATTER) 04_gamma_beta/$(SRC)
$(FORMATTER) 05_hundred_lognormals/$(SRC) $(FORMATTER) 05_hundred_lognormals/$(SRC)
$(FORMATTER) 06_dissolving_fermi_paradox/$(SRC)
run-all: run-all:
00_example_template/$(OUTPUT) 00_example_template/$(OUTPUT)
@ -57,6 +60,7 @@ run-all:
03_gcc_nested_function/$(OUTPUT) 03_gcc_nested_function/$(OUTPUT)
04_gamma_beta/$(OUTPUT) 04_gamma_beta/$(OUTPUT)
05_hundred_lognormals/$(OUTPUT) 05_hundred_lognormals/$(OUTPUT)
06_dissolving_fermi_paradox/$(OUTPUT)
## make one DIR=01_one_sample ## make one DIR=01_one_sample
one: $(DIR)/$(SRC) one: $(DIR)/$(SRC)

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -1,25 +1,157 @@
#include "../squiggle.h" #include "../squiggle.h"
#include "../squiggle_more.h" // #include "../squiggle_more.h"
#include <math.h> #include <math.h>
#include <stdint.h> #include <stdint.h>
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
double sample_loguniform(double a, double b, uint64_t* seed){
return exp(sample_uniform(log(a), log(b), seed));
}
int main() int main()
{ {
// Replicate <https://arxiv.org/pdf/1806.02404.pdf>, and in particular the red line in page 11.
// Could also be interesting to just produce and save many samples.
// set randomness seed // set randomness seed
uint64_t* seed = malloc(sizeof(uint64_t)); uint64_t* seed = malloc(sizeof(uint64_t));
*seed = 1000; // xorshift can't start with a seed of 0 *seed = UINT64_MAX/64; // xorshift can't start with a seed of 0
int n = 1000000; double sample_fermi_naive(uint64_t* seed){
double* xs = malloc(sizeof(double) * n); double rate_of_star_formation = sample_loguniform(1,100, seed);
for (int i = 0; i < n; i++) { double fraction_of_stars_with_planets = sample_loguniform(0.1, 1, seed);
xs[i] = sample_to(10, 100, seed); double number_of_habitable_planets_per_star_system = sample_loguniform(0.1, 1, seed);
double rate_of_life_formation_in_habitable_planets = sample_lognormal(1, 50, seed);
double fraction_of_habitable_planets_in_which_any_life_appears = -expm1(-rate_of_life_formation_in_habitable_planets);
// double fraction_of_habitable_planets_in_which_any_life_appears = 1-exp(-rate_of_life_formation_in_habitable_planets);
// but with more precision
double fraction_of_planets_with_life_in_which_intelligent_life_appears = sample_loguniform(0.001, 1, seed);
double fraction_of_intelligent_planets_which_are_detectable_as_such = sample_loguniform(0.01, 1, seed);
double longevity_of_detectable_civilizations = sample_loguniform(100, 10000000000, seed);
// printf(" rate_of_star_formation = %lf\n", rate_of_star_formation);
// printf(" fraction_of_stars_with_planets = %lf\n", fraction_of_stars_with_planets);
// printf(" number_of_habitable_planets_per_star_system = %lf\n", number_of_habitable_planets_per_star_system);
// printf(" rate_of_life_formation_in_habitable_planets = %.16lf\n", rate_of_life_formation_in_habitable_planets);
// printf(" fraction_of_habitable_planets_in_which_any_life_appears = %lf\n", fraction_of_habitable_planets_in_which_any_life_appears);
// printf(" fraction_of_planets_with_life_in_which_intelligent_life_appears = %lf\n", fraction_of_planets_with_life_in_which_intelligent_life_appears);
// printf(" fraction_of_intelligent_planets_which_are_detectable_as_such = %lf\n", fraction_of_intelligent_planets_which_are_detectable_as_such);
// printf(" longevity_of_detectable_civilizations = %lf\n", longevity_of_detectable_civilizations);
// Expected number of civilizations in the Milky way;
// see footnote 3 (p. 5)
double n = rate_of_star_formation *
fraction_of_stars_with_planets *
number_of_habitable_planets_per_star_system *
fraction_of_habitable_planets_in_which_any_life_appears *
fraction_of_planets_with_life_in_which_intelligent_life_appears *
fraction_of_intelligent_planets_which_are_detectable_as_such *
longevity_of_detectable_civilizations;
return n;
} }
ci ci_90 = array_get_90_ci(xs, n);
printf("Recovering confidence interval of sample_to(10, 100):\n low: %f, high: %f\n", ci_90.low, ci_90.high); double sample_fermi_paradox_naive(uint64_t* seed){
double n = sample_fermi_naive(seed);
return ((n > 1) ? 1 : 0);
}
double n = 1000000;
double naive_fermi_proportion = 0;
for(int i=0; i<n; i++){
double result = sample_fermi_paradox_naive(seed);
// printf("result: %lf\n", result);
naive_fermi_proportion+=result;
}
printf("Naïve %% that we are not alone: %lf\n", naive_fermi_proportion/n);
printf("Size of uint64_t: %ld", sizeof(uint64_t*)); // Thinking in log space
double sample_fermi_logspace(uint64_t* seed){
double log_rate_of_star_formation = sample_uniform(log(1), log(100), seed);
double log_fraction_of_stars_with_planets = sample_uniform(log(0.1), log(1), seed);
double log_number_of_habitable_planets_per_star_system = sample_uniform(log(0.1), log(1), seed);
double log_fraction_of_planets_with_life_in_which_intelligent_life_appears = sample_uniform(log(0.001), log(1), seed);
double log_fraction_of_intelligent_planets_which_are_detectable_as_such = sample_uniform(log(0.01), log(1), seed);
double log_longevity_of_detectable_civilizations = sample_uniform(log(100), log(10000000000), seed);
// printf(" log_rate_of_star_formation = %lf\n", log_rate_of_star_formation);
// printf(" log_fraction_of_stars_with_planets = %lf\n", log_fraction_of_stars_with_planets);
// printf(" log_number_of_habitable_planets_per_star_system = %lf\n", log_number_of_habitable_planets_per_star_system);
// printf(" log_fraction_of_planets_with_life_in_which_intelligent_life_appears = %lf\n", log_fraction_of_planets_with_life_in_which_intelligent_life_appears);
// printf(" log_fraction_of_intelligent_planets_which_are_detectable_as_such = %lf\n", log_fraction_of_intelligent_planets_which_are_detectable_as_such);
// printf(" log_longevity_of_detectable_civilizations = %lf\n", log_longevity_of_detectable_civilizations);
double log_n1 =
log_rate_of_star_formation +
log_fraction_of_stars_with_planets +
log_number_of_habitable_planets_per_star_system +
log_fraction_of_planets_with_life_in_which_intelligent_life_appears +
log_fraction_of_intelligent_planets_which_are_detectable_as_such +
log_longevity_of_detectable_civilizations;
// printf("first part of calculation: %lf\n", log_n1);
/* Consider fraction_of_habitable_planets_in_which_any_life_appears separately.
Imprecisely, we could do:
double rate_of_life_formation_in_habitable_planets = sample_lognormal(1, 50, seed);
double fraction_of_habitable_planets_in_which_any_life_appears = 1- exp(-rate_of_life_formation_in_habitable_planets);
double log_fraction_of_habitable_planets_in_which_any_life_appears = log(1-fraction_of_habitable_planets_in_which_any_life_appears);
double n = exp(log_n1) * fraction_of_habitable_planets_in_which_any_life_appears;
// or:
double n2 = exp(log_n1 + log(fraction_of_habitable_planets_in_which_any_life_appears))
However, we lose all precision here.
Now, say
a = underlying normal
b = rate_of_life_formation_in_habitable_planets = exp(underlying normal)
c = 1 - exp(-b) = fraction_of_habitable_planets_in_which_any_life_appears
d = log(c)
Now, is there some way we can d more efficiently/precisely?
Turns out there is!
Looking at the Taylor expansion for c = 1 - exp(-b), it's b - b^2/2 + b^3/6 - x^b/24, etc.
// https://www.wolframalpha.com/input?i=1-exp%28-x%29
When b ~ 0 (as is often the case), this is close to b.
But now, if b ~ 0
c ~ b
and d = log(c) ~ log(b) = log(exp(a)) = a
*/
double log_rate_of_life_formation_in_habitable_planets = sample_normal(1, 50, seed);
// printf("log_rate_of_life_formation_in_habitable_planets: %lf\n", log_rate_of_life_formation_in_habitable_planets);
double log_fraction_of_habitable_planets_in_which_any_life_appears;
if(log_rate_of_life_formation_in_habitable_planets < -32){
log_fraction_of_habitable_planets_in_which_any_life_appears = log_rate_of_life_formation_in_habitable_planets;
} else{
double rate_of_life_formation_in_habitable_planets = exp(log_rate_of_life_formation_in_habitable_planets);
double fraction_of_habitable_planets_in_which_any_life_appears = -expm1(-rate_of_life_formation_in_habitable_planets);
log_fraction_of_habitable_planets_in_which_any_life_appears = log(fraction_of_habitable_planets_in_which_any_life_appears);
}
// printf(" log_fraction_of_habitable_planets_in_which_any_life_appears: %lf\n", log_fraction_of_habitable_planets_in_which_any_life_appears);
double log_n = log_n1 + log_fraction_of_habitable_planets_in_which_any_life_appears;
return log_n;
}
double sample_fermi_paradox_logspace(uint64_t* seed){
double n = sample_fermi_logspace(seed);
return ((n > 0) ? 1 : 0);
}
double logspace_fermi_proportion = 0;
for(int i=0; i<n; i++){
double result = sample_fermi_paradox_logspace(seed);
// printf("result: %lf\n", result);
logspace_fermi_proportion+=result;
}
printf("Using more accurate logspace computations, %% that we are not alone: %lf\n", logspace_fermi_proportion/n);
double result2;
free(seed); free(seed);
} }

View File

@ -51,7 +51,6 @@ void sampler_parallel(double (*sampler)(uint64_t* seed), double* results, int n_
// - xorshift can't start with 0 // - xorshift can't start with 0
// - the seeds should be reasonably separated and not correlated // - the seeds should be reasonably separated and not correlated
cache_box[i].seed = (uint64_t)rand() * (UINT64_MAX / RAND_MAX); cache_box[i].seed = (uint64_t)rand() * (UINT64_MAX / RAND_MAX);
// printf("#%ld: %lu\n",i, *seeds[i]);
// Other initializations tried: // Other initializations tried:
// *seeds[i] = 1 + i; // *seeds[i] = 1 + i;
@ -69,17 +68,40 @@ void sampler_parallel(double (*sampler)(uint64_t* seed), double* results, int n_
for (int j = lower_bound_inclusive; j < upper_bound_not_inclusive; j++) { for (int j = lower_bound_inclusive; j < upper_bound_not_inclusive; j++) {
results[j] = sampler(&(cache_box[i].seed)); results[j] = sampler(&(cache_box[i].seed));
// In principle, these results[j] could also result in two threads competing for the same cache line. /*
// In practice, though, t starts at 0 and ends at T
// a) this would happen infrequently at t=0,
// b) trying to unroll loops actually makes the code slower thread i accesses: results[i*quotient +0],
// c) 8 results[j] are 8 doubles, which fit a cache line. If n_samples/n_threads thread i+1 acccesses: results[(i+1)*quotient +0]
at t=T
thread i accesses: results[(i+1)*quotient -1]
thread i+1 acccesses: results[(i+2)*quotient -1]
The results[j] that are directly adjacent are
results[(i+1)*quotient -1] (accessed by thread i at time T)
results[(i+1)*quotient +0] (accessed by thread i+1 at time 0)
and these are themselves adjacent to
results[(i+1)*quotient -2] (accessed by thread i at time T-1)
results[(i+1)*quotient +1] (accessed by thread i+1 at time 2)
If T is large enough, which it is, two threads won't access the same
cache line at the same time.
Pictorially:
at t=0 ....i.........I.........
at t=T .............i.........I
and the two never overlap
Note that results[j] is a double, a double has 8 bytes (64 bits)
8 doubles fill a cache line of 64 bytes.
So we specifically won't get problems as long as n_samples/n_threads > 8
n_threads is normally 16, so n_samples > 128
Note also that this is only a problem in terms of speed, if n_samples<128
the results are still computed, it'll just be slower
*/
} }
} }
} }
for (int j = divisor_multiple; j < n_samples; j++) { for (int j = divisor_multiple; j < n_samples; j++) {
results[j] = sampler(&(cache_box[0].seed)); results[j] = sampler(&(cache_box[0].seed));
// we can just reuse a seed, this isn't problematic because we are not doing multithreading // we can just reuse a seed,
// this isn't problematic because we;ve now stopped doing multithreading
} }
free(cache_box); free(cache_box);