diff --git a/C/makefile b/C/makefile index 94fcb5f2..5e5029b4 100644 --- a/C/makefile +++ b/C/makefile @@ -33,14 +33,12 @@ FORMATTER=clang-format -i -style=$(STYLE_BLUEPRINT) ## make build build: $(SRC) $(CC) $(OPTIMIZED) $(DEBUG) $(SRC) $(OPENMP) $(MATH) -o $(OUTPUT) - $(CC) $(OPTIMIZED) $(DEBUG) $(SRC_ONE_THREAD) $(OPENMP) $(MATH) -o $(OUTPUT_ONE_THREAD) format: $(SRC) $(FORMATTER) $(SRC) run: $(SRC) $(OUTPUT) OMP_NUM_THREADS=1 ./$(OUTPUT) && echo - ./$(OUTPUT_ONE_THREAD) multi: OMP_NUM_THREADS=1 ./$(OUTPUT) && echo @@ -48,7 +46,6 @@ multi: OMP_NUM_THREADS=4 ./$(OUTPUT) && echo OMP_NUM_THREADS=8 ./$(OUTPUT) && echo OMP_NUM_THREADS=16 ./$(OUTPUT) && echo - ./$(OUTPUT_ONE_THREAD) && echo time-linux: @echo "Requires /bin/time, found on GNU/Linux systems" && echo @@ -68,6 +65,10 @@ time-linux: @echo "Running 100x and taking avg time: OMP_NUM_THREADS=16 $(OUTPUT)" @t=$$(/usr/bin/time -f "%e" -p bash -c 'for i in {1..100}; do OMP_NUM_THREADS=16 $(OUTPUT); done' 2>&1 >/dev/null | grep real | awk '{print $$2}' ); echo "scale=2; 1000 * $$t / 100" | bc | sed "s|^|Time using 16 threads: |" | sed 's|$$|ms|' && echo +time-linux-fastest: + @echo "Running 100x and taking avg time: OMP_NUM_THREADS=16 $(OUTPUT)" + @t=$$(/usr/bin/time -f "%e" -p bash -c 'for i in {1..100}; do OMP_NUM_THREADS=16 $(OUTPUT); done' 2>&1 >/dev/null | grep real | awk '{print $$2}' ); echo "scale=2; 1000 * $$t / 100" | bc | sed "s|^|Time using 16 threads: |" | sed 's|$$|ms|' && echo + time-linux-simple: @echo "Requires /bin/time, found on GNU/Linux systems" && echo OMP_NUM_THREADS=1 /bin/time -f "Time: %es" ./$(OUTPUT) && echo @@ -75,7 +76,6 @@ time-linux-simple: OMP_NUM_THREADS=4 /bin/time -f "Time: %es" ./$(OUTPUT) && echo OMP_NUM_THREADS=8 /bin/time -f "Time: %es" ./$(OUTPUT) && echo OMP_NUM_THREADS=16 /bin/time -f "Time: %es" ./$(OUTPUT) && echo - /bin/time -f "Time: %es" ./$(OUTPUT_ONE_THREAD) && echo debian-install-dependencies: sudo apt-get install libomp-dev diff --git a/C/out/samples b/C/out/samples index 7ce57e22..66adf472 100755 Binary files a/C/out/samples and b/C/out/samples differ diff --git a/C/samples.c b/C/samples.c index 0a16ee9b..08020ebf 100644 --- a/C/samples.c +++ b/C/samples.c @@ -128,19 +128,29 @@ void mixture(float (*samplers[])(unsigned int*), float* weights, int n_dists, fl // You can see a simpler version of this function in the git history // or in C-02-better-algorithm-one-thread/ float sum_weights = array_sum(weights, n_dists); - float* normalized_weights = malloc(n_dists * sizeof(float)); + /*float* normalized_weights = malloc(n_dists * sizeof(float)); + // float normalized_weights[n_dists]; for (int i = 0; i < n_dists; i++) { normalized_weights[i] = weights[i] / sum_weights; } float* cummulative_weights = malloc(n_dists * sizeof(float)); + // float cummulative_weights[n_dists]; array_cumsum(normalized_weights, cummulative_weights, n_dists); + */ + float* cumsummed_normalized_weights = malloc(n_dists * sizeof(float)); + cumsummed_normalized_weights[0] = weights[0]/sum_weights; + for (int i = 1; i < n_dists; i++) { + cumsummed_normalized_weights[i] = cumsummed_normalized_weights[i - 1] + weights[i]/sum_weights; + } //create var holders float p1; int sample_index, i, own_length; - unsigned int* seeds[n_threads]; - for (unsigned int i = 0; i < n_threads; i++) { + + // unsigned int* seeds[n_threads]; + unsigned int** seeds = malloc(n_threads * sizeof(unsigned int*)); + for (unsigned int i = 0; i < n_threads; i++) { seeds[i] = malloc(sizeof(unsigned int)); *seeds[i] = i; } @@ -153,7 +163,7 @@ void mixture(float (*samplers[])(unsigned int*), float* weights, int n_dists, fl for (int j = 0; j < own_length; j++) { p1 = random_uniform(0, 1, seeds[i]); for (int k = 0; k < n_dists; k++) { - if (p1 < cummulative_weights[k]) { + if (p1 < cumsummed_normalized_weights[k]) { results[i][j] = samplers[k](seeds[i]); break; } @@ -161,11 +171,13 @@ void mixture(float (*samplers[])(unsigned int*), float* weights, int n_dists, fl } } } - free(normalized_weights); - free(cummulative_weights); + // free(normalized_weights); + // free(cummulative_weights); + free(cumsummed_normalized_weights); for (unsigned int i = 0; i < n_threads; i++) { free(seeds[i]); } + free(seeds); } // Functions used for the BOTEC.