forked from personal/squiggle.c
test i there might be some "false sharing" going on with openmp
This commit is contained in:
parent
95e4532c2c
commit
73ea6f32c2
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -9,6 +9,12 @@
|
|||
#include <string.h> // memcpy
|
||||
|
||||
/* Parallel sampler */
|
||||
#define CACHE_LINE_SIZE 64
|
||||
typedef struct padded_seed_t {
|
||||
uint64_t* seed;
|
||||
char padding[CACHE_LINE_SIZE - sizeof(uint64_t*)];
|
||||
} padded_seed;
|
||||
|
||||
void sampler_parallel(double (*sampler)(uint64_t* seed), double* results, int n_threads, int n_samples)
|
||||
{
|
||||
|
||||
|
@ -29,14 +35,15 @@ void sampler_parallel(double (*sampler)(uint64_t* seed), double* results, int n_
|
|||
int quotient = n_samples / n_threads;
|
||||
int divisor_multiple = quotient * n_threads;
|
||||
|
||||
uint64_t** seeds = malloc((size_t)n_threads * sizeof(uint64_t*));
|
||||
// uint64_t** seeds = malloc((size_t)n_threads * sizeof(uint64_t*));
|
||||
padded_seed* seeds = (padded_seed*) malloc(sizeof(padded_seed) * (size_t)n_threads);
|
||||
srand(1);
|
||||
for (int i = 0; i < n_threads; i++) {
|
||||
seeds[i] = malloc(sizeof(uint64_t));
|
||||
seeds[i].seed = malloc(sizeof(uint64_t*));
|
||||
// Constraints:
|
||||
// - xorshift can't start with 0
|
||||
// - the seeds should be reasonably separated and not correlated
|
||||
*seeds[i] = (uint64_t)rand() * (UINT64_MAX / RAND_MAX);
|
||||
*(seeds[i].seed) = (uint64_t)rand() * (UINT64_MAX / RAND_MAX);
|
||||
// printf("#%ld: %lu\n",i, *seeds[i]);
|
||||
|
||||
// Other initializations tried:
|
||||
|
@ -53,17 +60,17 @@ void sampler_parallel(double (*sampler)(uint64_t* seed), double* results, int n_
|
|||
int lower_bound_inclusive = i * quotient;
|
||||
int upper_bound_not_inclusive = ((i + 1) * quotient); // note the < in the for loop below,
|
||||
for (int j = lower_bound_inclusive; j < upper_bound_not_inclusive; j++) {
|
||||
results[j] = sampler(seeds[i]);
|
||||
results[j] = sampler(seeds[i].seed);
|
||||
}
|
||||
}
|
||||
}
|
||||
for (int j = divisor_multiple; j < n_samples; j++) {
|
||||
results[j] = sampler(seeds[0]);
|
||||
results[j] = sampler(seeds[0].seed);
|
||||
// we can just reuse a seed, this isn't problematic because we are not doing multithreading
|
||||
}
|
||||
|
||||
for (int i = 0; i < n_threads; i++) {
|
||||
free(seeds[i]);
|
||||
free(seeds[i].seed);
|
||||
}
|
||||
free(seeds);
|
||||
}
|
||||
|
@ -109,7 +116,7 @@ static double quickselect(int k, double xs[], int n)
|
|||
|
||||
double *ys = malloc((size_t)n * sizeof(double));
|
||||
memcpy(ys, xs, (size_t)n * sizeof(double));
|
||||
// ^: don't make this operation "destructive"
|
||||
// ^: don't rearrange item order in the original array
|
||||
|
||||
int low = 0;
|
||||
int high = n - 1;
|
||||
|
|
Loading…
Reference in New Issue
Block a user