Compare commits

..

2 Commits

21 changed files with 115 additions and 9 deletions

View File

@ -308,6 +308,11 @@ Overall, I'd describe the error handling capabilities of this library as pretty
## To do list
- [ ] Write better confidence interval code that:
- Gets number of samples as an input
- Gets either a sampler function or a list of samples
- is O(n), not O(nlog(n))
- Parallelizes stuff
- [ ] Document paralellism
- [ ] Document confidence intervals
- [ ] Point out that, even though the C standard is ambiguous about this, this code assumes that doubles are 64 bit precision (otherwise the xorshift should be different).

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -0,0 +1,67 @@
#include "../../../squiggle.h"
#include "../../../squiggle_more.h"
#include <stdio.h>
#include <stdlib.h>
int main()
{
/* Question: can we parallelize this?
A = normal(5,2)
B = min(A)
B * 20
*/
/* Option 1: parallelize taking from n samples */
// Question being asked: what is the distribution of sampling 1000 times and taking the min?
double sample_min_of_n(uint64_t* seed, int n){
double min = sample_normal(5, 2, seed);
for(int i=0; i<(n-2); i++){
double sample = sample_normal(5, 2, seed);
if(sample < min){
min = sample;
}
}
return min;
}
double sampler_min_of_1000(uint64_t* seed) {
return sample_min_of_n(seed, 1000);
}
int n_samples = 1000000, n_threads = 16;
double* results = malloc(n_samples * sizeof(double));
parallel_sampler(sampler_min_of_1000, results, n_threads, n_samples);
printf("Mean of the distribution of (taking the min of 1000 samples of a normal(5,2)): %f\n", array_mean(results, n_samples));
free(results);
/* Option 2: take the min from n samples cleverly using parallelism */
// Question being asked: can we take the min of n samples cleverly?
double sample_n_parallel(int n){
int n_threads = 16;
int quotient = n / 16;
int remainder = n % 16;
uint64_t seed = 1000;
double result_remainder = sample_min_of_n(&seed, remainder);
double sample_min_of_quotient(uint64_t* seed) {
return sample_min_of_n(seed, quotient);
}
double* results_quotient = malloc(quotient * sizeof(double));
parallel_sampler(sample_min_of_quotient, results_quotient, n_threads, quotient);
double min = results_quotient[0];
for(int i=1; i<quotient; i++){
if(min > results_quotient[i]){
min = results_quotient[i];
}
}
if(min > result_remainder){
min = result_remainder;
}
free(results_quotient);
return min;
}
printf("Minimum of 1M samples of normal(5,2): %f\n", sample_n_parallel(1000000));
}

View File

@ -48,6 +48,7 @@ all:
$(CC) $(OPTIMIZED) $(DEBUG) 10_twitter_thread_example/$(SRC) $(DEPS) -o 10_twitter_thread_example/$(OUTPUT)
$(CC) $(OPTIMIZED) $(DEBUG) 11_billion_lognormals_paralell/$(SRC) $(DEPS) -o 11_billion_lognormals_paralell/$(OUTPUT)
$(CC) $(OPTIMIZED) $(DEBUG) 12_time_to_botec_parallel/$(SRC) $(DEPS) -o 12_time_to_botec_parallel/$(OUTPUT)
$(CC) $(OPTIMIZED) $(DEBUG) 13_parallelize_min/$(SRC) $(DEPS) -o 13_parallelize_min/$(OUTPUT)
format-all:
$(FORMATTER) 00_example_template/$(SRC)
@ -63,6 +64,7 @@ format-all:
$(FORMATTER) 10_twitter_thread_example/$(SRC)
$(FORMATTER) 11_billion_lognormals_paralell/$(SRC)
$(FORMATTER) 12_time_to_botec_parallel/$(SRC)
$(FORMATTER) 13_parallelize_min/$(SRC)
run-all:
00_example_template/$(OUTPUT)
@ -78,6 +80,7 @@ run-all:
10_twitter_thread_example/$(OUTPUT)
11_billion_lognormals_paralell/$(OUTPUT)
12_time_to_botec_parallel/$(OUTPUT)
13_parallelize_min/$(OUTPUT)
## make one DIR=06_nuclear_recovery
one: $(DIR)/$(SRC)

View File

@ -9,7 +9,7 @@ CC=gcc
# CC=tcc # <= faster compilation
# Main file
SRC=scratchpad.c ../squiggle.c
SRC=scratchpad.c ../squiggle.c ../squiggle_more.c
OUTPUT=scratchpad
## Dependencies

Binary file not shown.

View File

@ -1,4 +1,5 @@
#include "../squiggle.h"
#include "../squiggle_more.h"
#include <math.h>
#include <stdint.h>
#include <stdio.h>
@ -16,7 +17,18 @@ int main()
}*/
// Test division
printf("\n%d\n", 10 % 3);
// printf("\n%d\n", 10 % 3);
//
int n_samples = 100, n_threads = 16;
double* results = malloc(n_samples * sizeof(double));
double sampler_scratchpad(uint64_t* seed){
return 1;
}
parallel_sampler(sampler_scratchpad, results, n_threads, n_samples);
for(int i=0; i<n_samples; i++){
printf("Sample %d: %f\n", i, results[i]);
}
free(seed);
}

View File

@ -306,10 +306,25 @@ ci convert_lognormal_params_to_ci(lognormal_params y)
/* Parallel sampler */
void parallel_sampler(double (*sampler)(uint64_t* seed), double* results, int n_threads, int n_samples){
if((n_samples % n_threads) != 0){
fprintf(stderr, "Number of samples isn't divisible by number of threads, aborting\n");
exit(1);
}
// Division terminology:
// a = b * quotient + reminder
// a = (a/b)*b + (a%b)
// dividend: a
// divisor: b
// quotient = a / b
// reminder = a % b
// "divisor's multiple" := (a/b)*b
// now, we have n_samples and n_threads
// to make our life easy, each thread will have a number of samples of: a/b (quotient)
// and we'll compute the remainder of samples separately
// to possibly do by Jorge: improve so that the remainder is included in the threads
int quotient = n_samples / n_threads;
int remainder = n_samples % n_threads;
int divisor_multiple = quotient * n_threads;
uint64_t** seeds = malloc(n_threads * sizeof(uint64_t*));
for (uint64_t i = 0; i < n_threads; i++) {
seeds[i] = malloc(sizeof(uint64_t));
@ -321,14 +336,18 @@ void parallel_sampler(double (*sampler)(uint64_t* seed), double* results, int n_
{
#pragma omp for
for (i = 0; i < n_threads; i++) {
int lower_bound = i * (n_samples / n_threads);
int upper_bound = ((i+1) * (n_samples / n_threads)) - 1;
int lower_bound_inclusive = i * quotient;
int upper_bound_not_inclusive = ((i+1) * quotient); // note the < in the for loop below,
// printf("Lower bound: %d, upper bound: %d\n", lower_bound, upper_bound);
for (int j = lower_bound; j < upper_bound; j++) {
for (int j = lower_bound_inclusive; j < upper_bound_not_inclusive; j++) {
results[j] = sampler(seeds[i]);
}
}
}
for(int j=divisor_multiple; j<n_samples; j++){
results[j] = sampler(seeds[0]);
// we can just reuse a seed, this isn't problematic because we are not doing multithreading
}
for (uint64_t i = 0; i < n_threads; i++) {
free(seeds[i]);