use omp reductions to shave off 0.6ms

This commit is contained in:
NunoSempere 2023-06-03 00:52:31 -06:00
parent 15e65534e4
commit 75b9e38694
2 changed files with 12 additions and 25 deletions

Binary file not shown.

View File

@ -62,22 +62,19 @@ void split_array_free(float** meta_array, int divided_into)
float split_array_sum(float** meta_array, int length, int divided_into) float split_array_sum(float** meta_array, int length, int divided_into)
{ {
int i; int i;
float output; float output = 0;
float* partial_sum = malloc(divided_into * sizeof(float));
#pragma omp private(i) shared(partial_sum) #pragma omp parallel for reduction(+:output)
for (int i = 0; i < divided_into; i++) { for (int i = 0; i < divided_into; i++) {
float own_partial_sum = 0; float own_partial_sum = 0;
int own_length = split_array_get_my_length(i, length, divided_into); int own_length = split_array_get_my_length(i, length, divided_into);
for (int j = 0; j < own_length; j++) { for (int j = 0; j < own_length; j++) {
own_partial_sum += meta_array[i][j]; own_partial_sum += meta_array[i][j];
} }
partial_sum[i] = own_partial_sum; output += own_partial_sum;
}
for (int i = 0; i < divided_into; i++) {
output += partial_sum[i];
} }
return output; return output;
} }
// Distribution & sampling functions // Distribution & sampling functions
@ -128,16 +125,6 @@ void mixture(float (*samplers[])(unsigned int*), float* weights, int n_dists, fl
// You can see a simpler version of this function in the git history // You can see a simpler version of this function in the git history
// or in C-02-better-algorithm-one-thread/ // or in C-02-better-algorithm-one-thread/
float sum_weights = array_sum(weights, n_dists); float sum_weights = array_sum(weights, n_dists);
/*float* normalized_weights = malloc(n_dists * sizeof(float));
// float normalized_weights[n_dists];
for (int i = 0; i < n_dists; i++) {
normalized_weights[i] = weights[i] / sum_weights;
}
float* cummulative_weights = malloc(n_dists * sizeof(float));
// float cummulative_weights[n_dists];
array_cumsum(normalized_weights, cummulative_weights, n_dists);
*/
float* cumsummed_normalized_weights = malloc(n_dists * sizeof(float)); float* cumsummed_normalized_weights = malloc(n_dists * sizeof(float));
cumsummed_normalized_weights[0] = weights[0]/sum_weights; cumsummed_normalized_weights[0] = weights[0]/sum_weights;
for (int i = 1; i < n_dists; i++) { for (int i = 1; i < n_dists; i++) {