| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #include "kutuzov_i_elem_vec_average/mpi/include/ops_mpi.hpp" | ||
| 2 | |||
| 3 | #include <mpi.h> | ||
| 4 | |||
| 5 | #include <cmath> | ||
| 6 | #include <vector> | ||
| 7 | |||
| 8 | #include "kutuzov_i_elem_vec_average/common/include/common.hpp" | ||
| 9 | |||
| 10 | namespace kutuzov_i_elem_vec_average { | ||
| 11 | |||
| 12 |
1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
|
8 | KutuzovIElemVecAverageMPI::KutuzovIElemVecAverageMPI(const InType &in) { |
| 13 | SetTypeOfTask(GetStaticTypeOfTask()); | ||
| 14 |
1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
|
8 | GetInput() = in; |
| 15 | 8 | GetOutput() = 0.0; | |
| 16 | 8 | } | |
| 17 | |||
| 18 | 8 | bool KutuzovIElemVecAverageMPI::ValidationImpl() { | |
| 19 | 8 | return !GetInput().empty(); | |
| 20 | } | ||
| 21 | |||
| 22 | 8 | bool KutuzovIElemVecAverageMPI::PreProcessingImpl() { | |
| 23 | 8 | return true; | |
| 24 | } | ||
| 25 | |||
| 26 | 8 | bool KutuzovIElemVecAverageMPI::RunImpl() { | |
| 27 | 8 | GetOutput() = 0.0; | |
| 28 | |||
| 29 | 8 | double result = 0.0; | |
| 30 | 8 | double global_sum = 0.0; | |
| 31 | |||
| 32 | // MPI Data | ||
| 33 | 8 | int rank = 0; | |
| 34 | 8 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); | |
| 35 | |||
| 36 | 8 | int num_processes = 0; | |
| 37 | 8 | MPI_Comm_size(MPI_COMM_WORLD, &num_processes); | |
| 38 | |||
| 39 | // Syncing input size data | ||
| 40 | 8 | int total_elements_num = 0; | |
| 41 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 4 times.
|
8 | if (rank == 0) { |
| 42 | 4 | total_elements_num = static_cast<int>(GetInput().size()); | |
| 43 | } | ||
| 44 | 8 | MPI_Bcast(&total_elements_num, 1, MPI_INT, 0, MPI_COMM_WORLD); | |
| 45 | |||
| 46 | // Calculating batch size | ||
| 47 | int batch_size = 0; | ||
| 48 | 8 | batch_size = total_elements_num / num_processes; | |
| 49 | |||
| 50 | // If batch size isn't negative: Scatter the data among processes, | ||
| 51 | // sum it and reduce back to the process-0 | ||
| 52 |
2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.
|
8 | if (batch_size > 0) { |
| 53 | 6 | auto *local_buffer = new double[batch_size]; | |
| 54 | 6 | MPI_Scatter(GetInput().data(), batch_size, MPI_DOUBLE, local_buffer, batch_size, MPI_DOUBLE, 0, MPI_COMM_WORLD); | |
| 55 | |||
| 56 | 6 | double sum = 0.0; | |
| 57 |
2/2✓ Branch 0 taken 11010 times.
✓ Branch 1 taken 6 times.
|
11016 | for (int i = 0; i < batch_size; i++) { |
| 58 | 11010 | sum += local_buffer[i]; | |
| 59 | } | ||
| 60 | |||
| 61 | 6 | MPI_Reduce(&sum, &global_sum, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); | |
| 62 | 6 | delete[] local_buffer; | |
| 63 | } | ||
| 64 | |||
| 65 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 4 times.
|
8 | if (rank == 0) { |
| 66 | // Add remaining elements on process-0 | ||
| 67 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 3 times.
|
4 | if (num_processes * batch_size < total_elements_num) { |
| 68 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
|
2 | for (int i = num_processes * batch_size; i < total_elements_num; i++) { |
| 69 | 1 | global_sum += GetInput()[i]; | |
| 70 | } | ||
| 71 | } | ||
| 72 | // Get the average | ||
| 73 | 4 | result = global_sum / static_cast<double>(total_elements_num); | |
| 74 | } | ||
| 75 | |||
| 76 | // Broadcast the result to all the processes | ||
| 77 | 8 | MPI_Bcast(&result, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); | |
| 78 | 8 | GetOutput() = result; | |
| 79 | |||
| 80 | // Wait for all processes to finish working | ||
| 81 | 8 | MPI_Barrier(MPI_COMM_WORLD); | |
| 82 | 8 | return true; | |
| 83 | } | ||
| 84 | |||
| 85 | 8 | bool KutuzovIElemVecAverageMPI::PostProcessingImpl() { | |
| 86 | 8 | return true; | |
| 87 | } | ||
| 88 | |||
| 89 | } // namespace kutuzov_i_elem_vec_average | ||
| 90 |