normalize & cumsum array in one for loop.
This commit is contained in:
parent
52260630de
commit
15e65534e4
|
@ -33,14 +33,12 @@ FORMATTER=clang-format -i -style=$(STYLE_BLUEPRINT)
|
||||||
## make build
|
## make build
|
||||||
build: $(SRC)
|
build: $(SRC)
|
||||||
$(CC) $(OPTIMIZED) $(DEBUG) $(SRC) $(OPENMP) $(MATH) -o $(OUTPUT)
|
$(CC) $(OPTIMIZED) $(DEBUG) $(SRC) $(OPENMP) $(MATH) -o $(OUTPUT)
|
||||||
$(CC) $(OPTIMIZED) $(DEBUG) $(SRC_ONE_THREAD) $(OPENMP) $(MATH) -o $(OUTPUT_ONE_THREAD)
|
|
||||||
|
|
||||||
format: $(SRC)
|
format: $(SRC)
|
||||||
$(FORMATTER) $(SRC)
|
$(FORMATTER) $(SRC)
|
||||||
|
|
||||||
run: $(SRC) $(OUTPUT)
|
run: $(SRC) $(OUTPUT)
|
||||||
OMP_NUM_THREADS=1 ./$(OUTPUT) && echo
|
OMP_NUM_THREADS=1 ./$(OUTPUT) && echo
|
||||||
./$(OUTPUT_ONE_THREAD)
|
|
||||||
|
|
||||||
multi:
|
multi:
|
||||||
OMP_NUM_THREADS=1 ./$(OUTPUT) && echo
|
OMP_NUM_THREADS=1 ./$(OUTPUT) && echo
|
||||||
|
@ -48,7 +46,6 @@ multi:
|
||||||
OMP_NUM_THREADS=4 ./$(OUTPUT) && echo
|
OMP_NUM_THREADS=4 ./$(OUTPUT) && echo
|
||||||
OMP_NUM_THREADS=8 ./$(OUTPUT) && echo
|
OMP_NUM_THREADS=8 ./$(OUTPUT) && echo
|
||||||
OMP_NUM_THREADS=16 ./$(OUTPUT) && echo
|
OMP_NUM_THREADS=16 ./$(OUTPUT) && echo
|
||||||
./$(OUTPUT_ONE_THREAD) && echo
|
|
||||||
|
|
||||||
time-linux:
|
time-linux:
|
||||||
@echo "Requires /bin/time, found on GNU/Linux systems" && echo
|
@echo "Requires /bin/time, found on GNU/Linux systems" && echo
|
||||||
|
@ -68,6 +65,10 @@ time-linux:
|
||||||
@echo "Running 100x and taking avg time: OMP_NUM_THREADS=16 $(OUTPUT)"
|
@echo "Running 100x and taking avg time: OMP_NUM_THREADS=16 $(OUTPUT)"
|
||||||
@t=$$(/usr/bin/time -f "%e" -p bash -c 'for i in {1..100}; do OMP_NUM_THREADS=16 $(OUTPUT); done' 2>&1 >/dev/null | grep real | awk '{print $$2}' ); echo "scale=2; 1000 * $$t / 100" | bc | sed "s|^|Time using 16 threads: |" | sed 's|$$|ms|' && echo
|
@t=$$(/usr/bin/time -f "%e" -p bash -c 'for i in {1..100}; do OMP_NUM_THREADS=16 $(OUTPUT); done' 2>&1 >/dev/null | grep real | awk '{print $$2}' ); echo "scale=2; 1000 * $$t / 100" | bc | sed "s|^|Time using 16 threads: |" | sed 's|$$|ms|' && echo
|
||||||
|
|
||||||
|
time-linux-fastest:
|
||||||
|
@echo "Running 100x and taking avg time: OMP_NUM_THREADS=16 $(OUTPUT)"
|
||||||
|
@t=$$(/usr/bin/time -f "%e" -p bash -c 'for i in {1..100}; do OMP_NUM_THREADS=16 $(OUTPUT); done' 2>&1 >/dev/null | grep real | awk '{print $$2}' ); echo "scale=2; 1000 * $$t / 100" | bc | sed "s|^|Time using 16 threads: |" | sed 's|$$|ms|' && echo
|
||||||
|
|
||||||
time-linux-simple:
|
time-linux-simple:
|
||||||
@echo "Requires /bin/time, found on GNU/Linux systems" && echo
|
@echo "Requires /bin/time, found on GNU/Linux systems" && echo
|
||||||
OMP_NUM_THREADS=1 /bin/time -f "Time: %es" ./$(OUTPUT) && echo
|
OMP_NUM_THREADS=1 /bin/time -f "Time: %es" ./$(OUTPUT) && echo
|
||||||
|
@ -75,7 +76,6 @@ time-linux-simple:
|
||||||
OMP_NUM_THREADS=4 /bin/time -f "Time: %es" ./$(OUTPUT) && echo
|
OMP_NUM_THREADS=4 /bin/time -f "Time: %es" ./$(OUTPUT) && echo
|
||||||
OMP_NUM_THREADS=8 /bin/time -f "Time: %es" ./$(OUTPUT) && echo
|
OMP_NUM_THREADS=8 /bin/time -f "Time: %es" ./$(OUTPUT) && echo
|
||||||
OMP_NUM_THREADS=16 /bin/time -f "Time: %es" ./$(OUTPUT) && echo
|
OMP_NUM_THREADS=16 /bin/time -f "Time: %es" ./$(OUTPUT) && echo
|
||||||
/bin/time -f "Time: %es" ./$(OUTPUT_ONE_THREAD) && echo
|
|
||||||
|
|
||||||
debian-install-dependencies:
|
debian-install-dependencies:
|
||||||
sudo apt-get install libomp-dev
|
sudo apt-get install libomp-dev
|
||||||
|
|
BIN
C/out/samples
BIN
C/out/samples
Binary file not shown.
24
C/samples.c
24
C/samples.c
|
@ -128,19 +128,29 @@ void mixture(float (*samplers[])(unsigned int*), float* weights, int n_dists, fl
|
||||||
// You can see a simpler version of this function in the git history
|
// You can see a simpler version of this function in the git history
|
||||||
// or in C-02-better-algorithm-one-thread/
|
// or in C-02-better-algorithm-one-thread/
|
||||||
float sum_weights = array_sum(weights, n_dists);
|
float sum_weights = array_sum(weights, n_dists);
|
||||||
float* normalized_weights = malloc(n_dists * sizeof(float));
|
/*float* normalized_weights = malloc(n_dists * sizeof(float));
|
||||||
|
// float normalized_weights[n_dists];
|
||||||
for (int i = 0; i < n_dists; i++) {
|
for (int i = 0; i < n_dists; i++) {
|
||||||
normalized_weights[i] = weights[i] / sum_weights;
|
normalized_weights[i] = weights[i] / sum_weights;
|
||||||
}
|
}
|
||||||
|
|
||||||
float* cummulative_weights = malloc(n_dists * sizeof(float));
|
float* cummulative_weights = malloc(n_dists * sizeof(float));
|
||||||
|
// float cummulative_weights[n_dists];
|
||||||
array_cumsum(normalized_weights, cummulative_weights, n_dists);
|
array_cumsum(normalized_weights, cummulative_weights, n_dists);
|
||||||
|
*/
|
||||||
|
float* cumsummed_normalized_weights = malloc(n_dists * sizeof(float));
|
||||||
|
cumsummed_normalized_weights[0] = weights[0]/sum_weights;
|
||||||
|
for (int i = 1; i < n_dists; i++) {
|
||||||
|
cumsummed_normalized_weights[i] = cumsummed_normalized_weights[i - 1] + weights[i]/sum_weights;
|
||||||
|
}
|
||||||
|
|
||||||
//create var holders
|
//create var holders
|
||||||
float p1;
|
float p1;
|
||||||
int sample_index, i, own_length;
|
int sample_index, i, own_length;
|
||||||
unsigned int* seeds[n_threads];
|
|
||||||
for (unsigned int i = 0; i < n_threads; i++) {
|
// unsigned int* seeds[n_threads];
|
||||||
|
unsigned int** seeds = malloc(n_threads * sizeof(unsigned int*));
|
||||||
|
for (unsigned int i = 0; i < n_threads; i++) {
|
||||||
seeds[i] = malloc(sizeof(unsigned int));
|
seeds[i] = malloc(sizeof(unsigned int));
|
||||||
*seeds[i] = i;
|
*seeds[i] = i;
|
||||||
}
|
}
|
||||||
|
@ -153,7 +163,7 @@ void mixture(float (*samplers[])(unsigned int*), float* weights, int n_dists, fl
|
||||||
for (int j = 0; j < own_length; j++) {
|
for (int j = 0; j < own_length; j++) {
|
||||||
p1 = random_uniform(0, 1, seeds[i]);
|
p1 = random_uniform(0, 1, seeds[i]);
|
||||||
for (int k = 0; k < n_dists; k++) {
|
for (int k = 0; k < n_dists; k++) {
|
||||||
if (p1 < cummulative_weights[k]) {
|
if (p1 < cumsummed_normalized_weights[k]) {
|
||||||
results[i][j] = samplers[k](seeds[i]);
|
results[i][j] = samplers[k](seeds[i]);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -161,11 +171,13 @@ void mixture(float (*samplers[])(unsigned int*), float* weights, int n_dists, fl
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
free(normalized_weights);
|
// free(normalized_weights);
|
||||||
free(cummulative_weights);
|
// free(cummulative_weights);
|
||||||
|
free(cumsummed_normalized_weights);
|
||||||
for (unsigned int i = 0; i < n_threads; i++) {
|
for (unsigned int i = 0; i < n_threads; i++) {
|
||||||
free(seeds[i]);
|
free(seeds[i]);
|
||||||
}
|
}
|
||||||
|
free(seeds);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Functions used for the BOTEC.
|
// Functions used for the BOTEC.
|
||||||
|
|
Loading…
Reference in New Issue
Block a user