forked from personal/squiggle.c
generalize parallelism to arbitrary num of samples, fix off by one error
This commit is contained in:
parent
5f6cc0fe4f
commit
66d33c0fb7
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
BIN
examples/more/13_parallelize_min/example
Executable file
BIN
examples/more/13_parallelize_min/example
Executable file
Binary file not shown.
67
examples/more/13_parallelize_min/example.c
Normal file
67
examples/more/13_parallelize_min/example.c
Normal file
|
@ -0,0 +1,67 @@
|
|||
#include "../../../squiggle.h"
|
||||
#include "../../../squiggle_more.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
int main()
|
||||
{
|
||||
/* Question: can we parallelize this?
|
||||
A = normal(5,2)
|
||||
B = min(A)
|
||||
B * 20
|
||||
*/
|
||||
|
||||
/* Option 1: parallelize taking from n samples */
|
||||
// Question being asked: what is the distribution of sampling 1000 times and taking the min?
|
||||
double sample_min_of_n(uint64_t* seed, int n){
|
||||
double min = sample_normal(5, 2, seed);
|
||||
for(int i=0; i<(n-2); i++){
|
||||
double sample = sample_normal(5, 2, seed);
|
||||
if(sample < min){
|
||||
min = sample;
|
||||
}
|
||||
}
|
||||
return min;
|
||||
}
|
||||
double sampler_min_of_1000(uint64_t* seed) {
|
||||
return sample_min_of_n(seed, 1000);
|
||||
}
|
||||
|
||||
int n_samples = 1000000, n_threads = 16;
|
||||
double* results = malloc(n_samples * sizeof(double));
|
||||
parallel_sampler(sampler_min_of_1000, results, n_threads, n_samples);
|
||||
printf("Mean of the distribution of (taking the min of 1000 samples of a normal(5,2)): %f\n", array_mean(results, n_samples));
|
||||
free(results);
|
||||
|
||||
/* Option 2: take the min from n samples cleverly using parallelism */
|
||||
// Question being asked: can we take the min of n samples cleverly?
|
||||
double sample_n_parallel(int n){
|
||||
|
||||
int n_threads = 16;
|
||||
int quotient = n / 16;
|
||||
int remainder = n % 16;
|
||||
|
||||
uint64_t seed = 1000;
|
||||
double result_remainder = sample_min_of_n(&seed, remainder);
|
||||
|
||||
double sample_min_of_quotient(uint64_t* seed) {
|
||||
return sample_min_of_n(seed, quotient);
|
||||
}
|
||||
double* results_quotient = malloc(quotient * sizeof(double));
|
||||
parallel_sampler(sample_min_of_quotient, results_quotient, n_threads, quotient);
|
||||
|
||||
double min = results_quotient[0];
|
||||
for(int i=1; i<quotient; i++){
|
||||
if(min > results_quotient[i]){
|
||||
min = results_quotient[i];
|
||||
}
|
||||
}
|
||||
if(min > result_remainder){
|
||||
min = result_remainder;
|
||||
}
|
||||
free(results_quotient);
|
||||
return min;
|
||||
}
|
||||
printf("Minimum of 1M samples of normal(5,2): %f\n", sample_n_parallel(1000000));
|
||||
|
||||
}
|
|
@ -48,6 +48,7 @@ all:
|
|||
$(CC) $(OPTIMIZED) $(DEBUG) 10_twitter_thread_example/$(SRC) $(DEPS) -o 10_twitter_thread_example/$(OUTPUT)
|
||||
$(CC) $(OPTIMIZED) $(DEBUG) 11_billion_lognormals_paralell/$(SRC) $(DEPS) -o 11_billion_lognormals_paralell/$(OUTPUT)
|
||||
$(CC) $(OPTIMIZED) $(DEBUG) 12_time_to_botec_parallel/$(SRC) $(DEPS) -o 12_time_to_botec_parallel/$(OUTPUT)
|
||||
$(CC) $(OPTIMIZED) $(DEBUG) 13_parallelize_min/$(SRC) $(DEPS) -o 13_parallelize_min/$(OUTPUT)
|
||||
|
||||
format-all:
|
||||
$(FORMATTER) 00_example_template/$(SRC)
|
||||
|
@ -63,6 +64,7 @@ format-all:
|
|||
$(FORMATTER) 10_twitter_thread_example/$(SRC)
|
||||
$(FORMATTER) 11_billion_lognormals_paralell/$(SRC)
|
||||
$(FORMATTER) 12_time_to_botec_parallel/$(SRC)
|
||||
$(FORMATTER) 13_parallelize_min/$(SRC)
|
||||
|
||||
run-all:
|
||||
00_example_template/$(OUTPUT)
|
||||
|
@ -78,6 +80,7 @@ run-all:
|
|||
10_twitter_thread_example/$(OUTPUT)
|
||||
11_billion_lognormals_paralell/$(OUTPUT)
|
||||
12_time_to_botec_parallel/$(OUTPUT)
|
||||
13_parallelize_min/$(OUTPUT)
|
||||
|
||||
## make one DIR=06_nuclear_recovery
|
||||
one: $(DIR)/$(SRC)
|
||||
|
|
|
@ -9,7 +9,7 @@ CC=gcc
|
|||
# CC=tcc # <= faster compilation
|
||||
|
||||
# Main file
|
||||
SRC=scratchpad.c ../squiggle.c
|
||||
SRC=scratchpad.c ../squiggle.c ../squiggle_more.c
|
||||
OUTPUT=scratchpad
|
||||
|
||||
## Dependencies
|
||||
|
|
Binary file not shown.
|
@ -1,4 +1,5 @@
|
|||
#include "../squiggle.h"
|
||||
#include "../squiggle_more.h"
|
||||
#include <math.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
|
@ -16,7 +17,18 @@ int main()
|
|||
|
||||
}*/
|
||||
// Test division
|
||||
printf("\n%d\n", 10 % 3);
|
||||
// printf("\n%d\n", 10 % 3);
|
||||
//
|
||||
|
||||
int n_samples = 100, n_threads = 16;
|
||||
double* results = malloc(n_samples * sizeof(double));
|
||||
double sampler_scratchpad(uint64_t* seed){
|
||||
return 1;
|
||||
}
|
||||
parallel_sampler(sampler_scratchpad, results, n_threads, n_samples);
|
||||
for(int i=0; i<n_samples; i++){
|
||||
printf("Sample %d: %f\n", i, results[i]);
|
||||
}
|
||||
|
||||
free(seed);
|
||||
}
|
||||
|
|
|
@ -306,10 +306,25 @@ ci convert_lognormal_params_to_ci(lognormal_params y)
|
|||
|
||||
/* Parallel sampler */
|
||||
void parallel_sampler(double (*sampler)(uint64_t* seed), double* results, int n_threads, int n_samples){
|
||||
if((n_samples % n_threads) != 0){
|
||||
fprintf(stderr, "Number of samples isn't divisible by number of threads, aborting\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
// Division terminology:
|
||||
// a = b * quotient + reminder
|
||||
// a = (a/b)*b + (a%b)
|
||||
// dividend: a
|
||||
// divisor: b
|
||||
// quotient = a / b
|
||||
// reminder = a % b
|
||||
// "divisor's multiple" := (a/b)*b
|
||||
|
||||
// now, we have n_samples and n_threads
|
||||
// to make our life easy, each thread will have a number of samples of: a/b (quotient)
|
||||
// and we'll compute the remainder of samples separately
|
||||
// to possibly do by Jorge: improve so that the remainder is included in the threads
|
||||
|
||||
int quotient = n_samples / n_threads;
|
||||
int remainder = n_samples % n_threads;
|
||||
int divisor_multiple = quotient * n_threads;
|
||||
|
||||
uint64_t** seeds = malloc(n_threads * sizeof(uint64_t*));
|
||||
for (uint64_t i = 0; i < n_threads; i++) {
|
||||
seeds[i] = malloc(sizeof(uint64_t));
|
||||
|
@ -321,14 +336,18 @@ void parallel_sampler(double (*sampler)(uint64_t* seed), double* results, int n_
|
|||
{
|
||||
#pragma omp for
|
||||
for (i = 0; i < n_threads; i++) {
|
||||
int lower_bound = i * (n_samples / n_threads);
|
||||
int upper_bound = ((i+1) * (n_samples / n_threads)) - 1;
|
||||
int lower_bound_inclusive = i * quotient;
|
||||
int upper_bound_not_inclusive = ((i+1) * quotient); // note the < in the for loop below,
|
||||
// printf("Lower bound: %d, upper bound: %d\n", lower_bound, upper_bound);
|
||||
for (int j = lower_bound; j < upper_bound; j++) {
|
||||
for (int j = lower_bound_inclusive; j < upper_bound_not_inclusive; j++) {
|
||||
results[j] = sampler(seeds[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
for(int j=divisor_multiple; j<n_samples; j++){
|
||||
results[j] = sampler(seeds[0]);
|
||||
// we can just reuse a seed, this isn't problematic because we are not doing multithreading
|
||||
}
|
||||
|
||||
for (uint64_t i = 0; i < n_threads; i++) {
|
||||
free(seeds[i]);
|
||||
|
|
Loading…
Reference in New Issue
Block a user