forked from personal/squiggle.c
tweak: don't use unneeded pointers to get cache math right
This commit is contained in:
parent
bbe0116381
commit
14a18276c0
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -11,7 +11,7 @@
|
|||
/* Parallel sampler */
|
||||
#define CACHE_LINE_SIZE 64
|
||||
typedef struct seed_cache_box_t {
|
||||
uint64_t* seed;
|
||||
uint64_t seed;
|
||||
char padding[CACHE_LINE_SIZE - sizeof(uint64_t*)];
|
||||
} seed_cache_box;
|
||||
// This avoids "false sharing", i.e., different threads competing for the same cache line
|
||||
|
@ -43,11 +43,10 @@ void sampler_parallel(double (*sampler)(uint64_t* seed), double* results, int n_
|
|||
seed_cache_box* cache_box = (seed_cache_box*) malloc(sizeof(seed_cache_box) * (size_t)n_threads);
|
||||
srand(1);
|
||||
for (int i = 0; i < n_threads; i++) {
|
||||
cache_box[i].seed = malloc(sizeof(uint64_t*));
|
||||
// Constraints:
|
||||
// - xorshift can't start with 0
|
||||
// - the seeds should be reasonably separated and not correlated
|
||||
*(cache_box[i].seed) = (uint64_t)rand() * (UINT64_MAX / RAND_MAX);
|
||||
cache_box[i].seed = (uint64_t)rand() * (UINT64_MAX / RAND_MAX);
|
||||
// printf("#%ld: %lu\n",i, *seeds[i]);
|
||||
|
||||
// Other initializations tried:
|
||||
|
@ -57,25 +56,26 @@ void sampler_parallel(double (*sampler)(uint64_t* seed), double* results, int n_
|
|||
}
|
||||
|
||||
int i;
|
||||
#pragma omp parallel private(i, quotient)
|
||||
#pragma omp parallel private(i)
|
||||
{
|
||||
#pragma omp for
|
||||
for (i = 0; i < n_threads; i++) {
|
||||
int lower_bound_inclusive = i * quotient;
|
||||
int upper_bound_not_inclusive = ((i + 1) * quotient); // note the < in the for loop below,
|
||||
for (int j = lower_bound_inclusive; j < upper_bound_not_inclusive; j++) {
|
||||
results[j] = sampler(cache_box[i].seed);
|
||||
results[j] = sampler(&(cache_box[i].seed));
|
||||
}
|
||||
}
|
||||
}
|
||||
for (int j = divisor_multiple; j < n_samples; j++) {
|
||||
results[j] = sampler(cache_box[0].seed);
|
||||
results[j] = sampler(&(cache_box[0].seed));
|
||||
// we can just reuse a seed, this isn't problematic because we are not doing multithreading
|
||||
}
|
||||
|
||||
/*
|
||||
for (int i = 0; i < n_threads; i++) {
|
||||
free(cache_box[i].seed);
|
||||
}
|
||||
*/
|
||||
free(cache_box);
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user