formatting tweaks
This commit is contained in:
parent
f64fedc398
commit
28d443a6cf
|
@ -1,10 +1,19 @@
|
||||||
# C-Optimized
|
# C-Optimized
|
||||||
An optimized version of the original C implementation. <br>
|
|
||||||
The main changes are an optimization of the mixture function (it passes the functions instead of the whole arrays, reducing in great measure the memory usage and the computation time) and the implementation of multi-threading with OpenMP. <br>
|
An optimized version of the original C implementation.
|
||||||
|
|
||||||
|
The main changes are:
|
||||||
|
|
||||||
|
- an optimization of the mixture function (it passes the functions instead of the whole arrays, reducing in great measure the memory usage and the computation time) and
|
||||||
|
- the implementation of multi-threading with OpenMP.
|
||||||
|
|
||||||
The mean time of execution is 6 ms. With the following distribution:
|
The mean time of execution is 6 ms. With the following distribution:
|
||||||
![Time histogram](https://i.imgur.com/6iT2PkF.png) <br>
|
|
||||||
Take into account that the multi-threading introduces a bit of dispersion in the execution time due to the creation and destruction of threads.
|
![Time histogram](https://i.imgur.com/6iT2PkF.png)
|
||||||
Also, the time data has been collected by executing the interior of the main() function 1000 times in a for loop, not executing the program itself 1000 times. <br>
|
|
||||||
|
|
||||||
The hardware used has been an AMD 5800x3D and 16GB of DDR4-3200 MHz.
|
The hardware used has been an AMD 5800x3D and 16GB of DDR4-3200 MHz.
|
||||||
|
|
||||||
|
Take into account that the multi-threading introduces a bit of dispersion in the execution time due to the creation and destruction of threads.
|
||||||
|
|
||||||
|
Also, the time data has been collected by executing the interior of the main() function 1000 times in a for loop, not executing the program itself 1000 times.
|
||||||
|
|
||||||
|
|
|
@ -5,7 +5,7 @@
|
||||||
# make run
|
# make run
|
||||||
|
|
||||||
# Compiler
|
# Compiler
|
||||||
CC=gcc -v
|
CC=gcc
|
||||||
# CC=tcc # <= faster compilation
|
# CC=tcc # <= faster compilation
|
||||||
|
|
||||||
# Main file
|
# Main file
|
||||||
|
@ -37,7 +37,15 @@ format: $(SRC)
|
||||||
$(FORMATTER) $(SRC)
|
$(FORMATTER) $(SRC)
|
||||||
|
|
||||||
run: $(SRC) $(OUTPUT)
|
run: $(SRC) $(OUTPUT)
|
||||||
./$(OUTPUT)
|
export OMP_NUM_THREADS=4; ./$(OUTPUT)
|
||||||
|
|
||||||
|
test: $(SRC) $(OUTPUT)
|
||||||
|
OMP_NUM_THREADS=1 ./$(OUTPUT)
|
||||||
|
echo ""
|
||||||
|
OMP_NUM_THREADS=2 ./$(OUTPUT)
|
||||||
|
echo ""
|
||||||
|
OMP_NUM_THREADS=4 ./$(OUTPUT)
|
||||||
|
|
||||||
# echo "Increasing stack size limit, because we are dealing with 1M samples"
|
# echo "Increasing stack size limit, because we are dealing with 1M samples"
|
||||||
# # ulimit: increase stack size limit
|
# # ulimit: increase stack size limit
|
||||||
# # -Ss: the soft limit. If you set the hard limit, you then can't raise it
|
# # -Ss: the soft limit. If you set the hard limit, you then can't raise it
|
||||||
|
|
|
@ -21,9 +21,9 @@ void array_print(float* array, int length)
|
||||||
void array_fill(float* array, int length, float item)
|
void array_fill(float* array, int length, float item)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
#pragma omp private(i)
|
#pragma omp private(i)
|
||||||
{
|
{
|
||||||
#pragma omp for
|
#pragma omp for
|
||||||
for (i = 0; i < length; i++) {
|
for (i = 0; i < length; i++) {
|
||||||
array[i] = item;
|
array[i] = item;
|
||||||
}
|
}
|
||||||
|
@ -88,9 +88,9 @@ inline float random_to(float low, float high)
|
||||||
void array_random_to(float* array, int length, float low, float high)
|
void array_random_to(float* array, int length, float low, float high)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
#pragma omp private(i)
|
#pragma omp private(i)
|
||||||
{
|
{
|
||||||
#pragma omp for
|
#pragma omp for
|
||||||
for (i = 0; i < length; i++) {
|
for (i = 0; i < length; i++) {
|
||||||
array[i] = random_to(low, high);
|
array[i] = random_to(low, high);
|
||||||
}
|
}
|
||||||
|
@ -118,9 +118,9 @@ void mixture(float* dists[], float* weights, int n_dists, float* results)
|
||||||
float p1, p2;
|
float p1, p2;
|
||||||
int index_found, index_counter, sample_index, i;
|
int index_found, index_counter, sample_index, i;
|
||||||
|
|
||||||
#pragma omp parallel private(i, p1, p2, index_found, index_counter, sample_index)
|
#pragma omp parallel private(i, p1, p2, index_found, index_counter, sample_index)
|
||||||
{
|
{
|
||||||
#pragma omp for
|
#pragma omp for
|
||||||
for (i = 0; i < N; i++) {
|
for (i = 0; i < N; i++) {
|
||||||
p1 = random_uniform(0, 1);
|
p1 = random_uniform(0, 1);
|
||||||
p2 = random_uniform(0, 1);
|
p2 = random_uniform(0, 1);
|
||||||
|
@ -161,9 +161,9 @@ void mixture_f(float (*samplers[])(void), float* weights, int n_dists, float** r
|
||||||
float p1;
|
float p1;
|
||||||
int sample_index, i, own_length;
|
int sample_index, i, own_length;
|
||||||
|
|
||||||
#pragma omp parallel private(i, p1, sample_index, own_length)
|
#pragma omp parallel private(i, p1, sample_index, own_length)
|
||||||
{
|
{
|
||||||
#pragma omp for
|
#pragma omp for
|
||||||
for (i = 0; i < n_threads; i++) {
|
for (i = 0; i < n_threads; i++) {
|
||||||
own_length = split_array_get_my_length(i, N, n_threads);
|
own_length = split_array_get_my_length(i, N, n_threads);
|
||||||
for (int j = 0; j < own_length; j++) {
|
for (int j = 0; j < own_length; j++) {
|
||||||
|
@ -225,7 +225,7 @@ float split_array_sum(float** meta_array, int length, int divided_into)
|
||||||
float output;
|
float output;
|
||||||
float* partial_sum = malloc(divided_into * sizeof(float));
|
float* partial_sum = malloc(divided_into * sizeof(float));
|
||||||
|
|
||||||
#pragma omp private(i) shared(partial_sum)
|
#pragma omp private(i) shared(partial_sum)
|
||||||
for (int i = 0; i < divided_into; i++) {
|
for (int i = 0; i < divided_into; i++) {
|
||||||
float own_partial_sum = 0;
|
float own_partial_sum = 0;
|
||||||
int own_length = split_array_get_my_length(i, length, divided_into);
|
int own_length = split_array_get_my_length(i, length, divided_into);
|
||||||
|
@ -246,10 +246,12 @@ int main()
|
||||||
start = clock();
|
start = clock();
|
||||||
//initialize randomness
|
//initialize randomness
|
||||||
srand(time(NULL));
|
srand(time(NULL));
|
||||||
//Toy example
|
// Toy example
|
||||||
// Declare variables in play
|
// Declare variables in play
|
||||||
float p_a, p_b, p_c;
|
float p_a, p_b, p_c;
|
||||||
int n_threads = omp_get_max_threads();
|
int n_threads = omp_get_max_threads();
|
||||||
|
// printf("Max threads: %d\n", n_threads);
|
||||||
|
// omp_set_num_threads(n_threads);
|
||||||
float** dist_mixture = malloc(n_threads * sizeof(float*));
|
float** dist_mixture = malloc(n_threads * sizeof(float*));
|
||||||
split_array_allocate(dist_mixture, N, n_threads);
|
split_array_allocate(dist_mixture, N, n_threads);
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user