| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #include "gasenin_l_image_smooth/mpi/include/ops_mpi.hpp" | ||
| 2 | |||
| 3 | #include <mpi.h> | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <array> | ||
| 7 | #include <cstddef> | ||
| 8 | #include <cstdint> | ||
| 9 | #include <vector> | ||
| 10 | |||
| 11 | #include "gasenin_l_image_smooth/common/include/common.hpp" | ||
| 12 | |||
| 13 | namespace gasenin_l_image_smooth { | ||
| 14 | |||
| 15 |
1/2✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.
|
12 | GaseninLImageSmoothMPI::GaseninLImageSmoothMPI(const InType &in) { |
| 16 | SetTypeOfTask(GetStaticTypeOfTask()); | ||
| 17 | GetInput() = in; | ||
| 18 | 12 | } | |
| 19 | |||
| 20 | 12 | bool GaseninLImageSmoothMPI::ValidationImpl() { | |
| 21 | 12 | int rank = 0; | |
| 22 | 12 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); | |
| 23 | |||
| 24 | 12 | std::array<int, 4> params = {0, 0, 0, 0}; | |
| 25 | |||
| 26 |
2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 6 times.
|
12 | if (rank == 0) { |
| 27 | 6 | params[1] = GetInput().width; | |
| 28 | 6 | params[2] = GetInput().height; | |
| 29 | 6 | params[3] = GetInput().kernel_size; | |
| 30 | |||
| 31 |
3/6✓ Branch 0 taken 6 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 6 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 6 times.
✗ Branch 5 not taken.
|
6 | if (params[1] > 0 && params[2] > 0 && params[3] > 0) { |
| 32 | 6 | params[0] = 1; | |
| 33 | } else { | ||
| 34 | params[0] = 0; | ||
| 35 | } | ||
| 36 | } | ||
| 37 | |||
| 38 | 12 | MPI_Bcast(params.data(), 4, MPI_INT, 0, MPI_COMM_WORLD); | |
| 39 | |||
| 40 |
1/2✓ Branch 0 taken 12 times.
✗ Branch 1 not taken.
|
12 | if (params[0] == 0) { |
| 41 | return false; | ||
| 42 | } | ||
| 43 | |||
| 44 | 12 | GetInput().width = params[1]; | |
| 45 | 12 | GetInput().height = params[2]; | |
| 46 | 12 | GetInput().kernel_size = params[3]; | |
| 47 | |||
| 48 | 12 | return true; | |
| 49 | } | ||
| 50 | |||
| 51 | 12 | bool GaseninLImageSmoothMPI::PreProcessingImpl() { | |
| 52 | 12 | int rank = 0; | |
| 53 | 12 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); | |
| 54 | |||
| 55 |
2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 6 times.
|
12 | if (rank == 0) { |
| 56 | GetOutput() = GetInput(); | ||
| 57 | std::fill(GetOutput().data.begin(), GetOutput().data.end(), 0); | ||
| 58 | } else { | ||
| 59 | 6 | GetOutput().width = GetInput().width; | |
| 60 | 6 | GetOutput().height = GetInput().height; | |
| 61 | 6 | GetOutput().kernel_size = GetInput().kernel_size; | |
| 62 | } | ||
| 63 | 12 | return true; | |
| 64 | } | ||
| 65 | |||
| 66 | namespace { | ||
| 67 | |||
| 68 | void CalculateRowDistribution(int rank, int size, int height, int &start_row, int &end_row, int &local_rows) { | ||
| 69 | 12 | const int base_rows = height / size; | |
| 70 | 12 | const int extra_rows = height % size; | |
| 71 | |||
| 72 | 12 | start_row = (rank * base_rows) + std::min(rank, extra_rows); | |
| 73 |
2/2✓ Branch 0 taken 11 times.
✓ Branch 1 taken 1 times.
|
12 | end_row = start_row + base_rows + (rank < extra_rows ? 1 : 0); |
| 74 | 12 | local_rows = end_row - start_row; | |
| 75 | } | ||
| 76 | |||
| 77 | 11 | void PrepareScatterParameters(int rank, int size, int width, int height, int kernel_radius, int base_rows, | |
| 78 | int extra_rows, std::vector<int> &sendcounts, std::vector<int> &displs) { | ||
| 79 |
2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 5 times.
|
11 | if (rank == 0) { |
| 80 | 6 | sendcounts.resize(size); | |
| 81 | 6 | displs.resize(size); | |
| 82 |
2/2✓ Branch 0 taken 12 times.
✓ Branch 1 taken 6 times.
|
18 | for (int process_idx = 0; process_idx < size; ++process_idx) { |
| 83 |
2/2✓ Branch 0 taken 11 times.
✓ Branch 1 taken 1 times.
|
12 | const int row_start = (process_idx * base_rows) + std::min(process_idx, extra_rows); |
| 84 |
2/2✓ Branch 0 taken 11 times.
✓ Branch 1 taken 1 times.
|
12 | const int row_end = row_start + base_rows + (process_idx < extra_rows ? 1 : 0); |
| 85 | |||
| 86 |
2/2✓ Branch 0 taken 11 times.
✓ Branch 1 taken 1 times.
|
12 | if (row_end > row_start) { |
| 87 | 11 | const int actual_start = std::max(0, row_start - kernel_radius); | |
| 88 | 11 | const int actual_end = std::min(height, row_end + kernel_radius); | |
| 89 | 11 | sendcounts[process_idx] = (actual_end - actual_start) * width; | |
| 90 | 11 | displs[process_idx] = actual_start * width; | |
| 91 | } else { | ||
| 92 | 1 | sendcounts[process_idx] = 0; | |
| 93 | 1 | displs[process_idx] = 0; | |
| 94 | } | ||
| 95 | } | ||
| 96 | } | ||
| 97 | 11 | } | |
| 98 | |||
| 99 | 10824 | void ProcessInteriorPixel(int buffer_y, int col, int width, int kernel_size, const std::vector<uint8_t> &local_data, | |
| 100 | std::vector<uint8_t> &local_result, int local_row_idx) { | ||
| 101 | 10824 | const int kernel_sq = kernel_size * kernel_size; | |
| 102 | 10824 | const int radius = kernel_size / 2; | |
| 103 | |||
| 104 | 10824 | const auto *row_ptr = local_data.data() + (static_cast<ptrdiff_t>(buffer_y - radius) * width) + (col - radius); | |
| 105 | int sum = 0; | ||
| 106 | |||
| 107 |
2/2✓ Branch 0 taken 53096 times.
✓ Branch 1 taken 10824 times.
|
63920 | for (int kernel_y = 0; kernel_y < kernel_size; ++kernel_y) { |
| 108 |
2/2✓ Branch 0 taken 289448 times.
✓ Branch 1 taken 53096 times.
|
342544 | for (int kernel_x = 0; kernel_x < kernel_size; ++kernel_x) { |
| 109 | 289448 | sum += row_ptr[kernel_x]; | |
| 110 | } | ||
| 111 | 53096 | row_ptr += width; | |
| 112 | } | ||
| 113 | 10824 | local_result[(local_row_idx * width) + col] = static_cast<uint8_t>(sum / kernel_sq); | |
| 114 | 10824 | } | |
| 115 | |||
| 116 | 1505 | void ProcessBorderPixel(int global_y, int col, int width, int height, int kernel_radius, int actual_start, | |
| 117 | const std::vector<uint8_t> &local_data, std::vector<uint8_t> &local_result, int local_row_idx) { | ||
| 118 | int sum = 0; | ||
| 119 | int count = 0; | ||
| 120 | |||
| 121 |
2/2✓ Branch 0 taken 8435 times.
✓ Branch 1 taken 1505 times.
|
9940 | for (int kernel_y = -kernel_radius; kernel_y <= kernel_radius; ++kernel_y) { |
| 122 | 8435 | const int global_y_offset = global_y + kernel_y; | |
| 123 |
2/2✓ Branch 0 taken 7780 times.
✓ Branch 1 taken 655 times.
|
8435 | const int clamped_y = Clamp(global_y_offset, 0, height - 1); |
| 124 | 8435 | const int buffer_y = clamped_y - actual_start; | |
| 125 | |||
| 126 |
2/2✓ Branch 0 taken 50761 times.
✓ Branch 1 taken 8435 times.
|
59196 | for (int kernel_x = -kernel_radius; kernel_x <= kernel_radius; ++kernel_x) { |
| 127 |
2/2✓ Branch 0 taken 46912 times.
✓ Branch 1 taken 3849 times.
|
50761 | const int global_x_offset = Clamp(col + kernel_x, 0, width - 1); |
| 128 | 50761 | sum += local_data[(buffer_y * width) + global_x_offset]; | |
| 129 | 50761 | ++count; | |
| 130 | } | ||
| 131 | } | ||
| 132 | |||
| 133 | 1505 | const int index = (local_row_idx * width) + col; | |
| 134 |
1/2✓ Branch 0 taken 1505 times.
✗ Branch 1 not taken.
|
1505 | local_result[index] = (count > 0) ? static_cast<uint8_t>(sum / count) : 0; |
| 135 | 1505 | } | |
| 136 | |||
| 137 | 11 | void ProcessLocalRows(int start_row, int local_rows, int width, int height, int kernel_size, int kernel_radius, | |
| 138 | int actual_start, const std::vector<uint8_t> &local_data, std::vector<uint8_t> &local_result) { | ||
| 139 | 11 | const int row_offset_in_buffer = start_row - actual_start; | |
| 140 | |||
| 141 |
2/2✓ Branch 0 taken 199 times.
✓ Branch 1 taken 11 times.
|
210 | for (int local_row_idx = 0; local_row_idx < local_rows; ++local_row_idx) { |
| 142 | 199 | const int global_y = start_row + local_row_idx; | |
| 143 | 199 | const int buffer_y = row_offset_in_buffer + local_row_idx; | |
| 144 |
4/4✓ Branch 0 taken 191 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 7 times.
✓ Branch 3 taken 184 times.
|
199 | const bool is_global_border_y = (global_y < kernel_radius) || (global_y >= height - kernel_radius); |
| 145 | |||
| 146 |
2/2✓ Branch 0 taken 12329 times.
✓ Branch 1 taken 199 times.
|
12528 | for (int col = 0; col < width; ++col) { |
| 147 |
4/4✓ Branch 0 taken 11942 times.
✓ Branch 1 taken 387 times.
✓ Branch 2 taken 11555 times.
✓ Branch 3 taken 387 times.
|
12329 | const bool is_border_col = (col < kernel_radius) || (col >= width - kernel_radius); |
| 148 | |||
| 149 |
2/2✓ Branch 0 taken 10824 times.
✓ Branch 1 taken 731 times.
|
11555 | if (!is_global_border_y && !is_border_col) { |
| 150 | 10824 | ProcessInteriorPixel(buffer_y, col, width, kernel_size, local_data, local_result, local_row_idx); | |
| 151 | } else { | ||
| 152 | 1505 | ProcessBorderPixel(global_y, col, width, height, kernel_radius, actual_start, local_data, local_result, | |
| 153 | local_row_idx); | ||
| 154 | } | ||
| 155 | } | ||
| 156 | } | ||
| 157 | 11 | } | |
| 158 | |||
| 159 | 11 | void PrepareGatherParameters(int rank, int size, int width, int height, std::vector<int> &sendcounts, | |
| 160 | std::vector<int> &displs) { | ||
| 161 |
2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 5 times.
|
11 | if (rank == 0) { |
| 162 | 6 | sendcounts.resize(size); | |
| 163 | 6 | displs.resize(size); | |
| 164 | |||
| 165 | 6 | const int base_rows = height / size; | |
| 166 | 6 | const int extra_rows = height % size; | |
| 167 | |||
| 168 |
2/2✓ Branch 0 taken 12 times.
✓ Branch 1 taken 6 times.
|
18 | for (int process_idx = 0; process_idx < size; ++process_idx) { |
| 169 |
2/2✓ Branch 0 taken 11 times.
✓ Branch 1 taken 1 times.
|
12 | const int row_start = (process_idx * base_rows) + std::min(process_idx, extra_rows); |
| 170 |
2/2✓ Branch 0 taken 11 times.
✓ Branch 1 taken 1 times.
|
12 | const int row_end = row_start + base_rows + (process_idx < extra_rows ? 1 : 0); |
| 171 | 12 | sendcounts[process_idx] = (row_end - row_start) * width; | |
| 172 | 12 | displs[process_idx] = row_start * width; | |
| 173 | } | ||
| 174 | } | ||
| 175 | 11 | } | |
| 176 | |||
| 177 | } // namespace | ||
| 178 | |||
| 179 | 12 | bool GaseninLImageSmoothMPI::RunImpl() { | |
| 180 | 12 | int rank = 0; | |
| 181 | 12 | int size = 0; | |
| 182 | 12 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); | |
| 183 | 12 | MPI_Comm_size(MPI_COMM_WORLD, &size); | |
| 184 | |||
| 185 | 12 | const int width = GetInput().width; | |
| 186 | 12 | const int height = GetInput().height; | |
| 187 | 12 | const int kernel_size = GetInput().kernel_size; | |
| 188 | 12 | const int kernel_radius = kernel_size / 2; | |
| 189 | |||
| 190 | int start_row = 0; | ||
| 191 | int end_row = 0; | ||
| 192 | int local_rows = 0; | ||
| 193 |
2/2✓ Branch 0 taken 11 times.
✓ Branch 1 taken 1 times.
|
12 | CalculateRowDistribution(rank, size, height, start_row, end_row, local_rows); |
| 194 | |||
| 195 |
2/2✓ Branch 0 taken 11 times.
✓ Branch 1 taken 1 times.
|
12 | if (local_rows <= 0) { |
| 196 | return true; | ||
| 197 | } | ||
| 198 | |||
| 199 | const int overlap_top = kernel_radius; | ||
| 200 | const int overlap_bottom = kernel_radius; | ||
| 201 | 11 | const int actual_start = std::max(0, start_row - overlap_top); | |
| 202 | 11 | const int actual_end = std::min(height, end_row + overlap_bottom); | |
| 203 | 11 | const int extended_rows = actual_end - actual_start; | |
| 204 | 11 | const int recv_count = extended_rows * width; | |
| 205 | |||
| 206 | 11 | std::vector<uint8_t> local_data(recv_count); | |
| 207 | |||
| 208 | 11 | const int base_rows = height / size; | |
| 209 | 11 | const int extra_rows = height % size; | |
| 210 | 11 | std::vector<int> sendcounts; | |
| 211 | 11 | std::vector<int> displs; | |
| 212 | |||
| 213 |
1/2✓ Branch 1 taken 11 times.
✗ Branch 2 not taken.
|
11 | PrepareScatterParameters(rank, size, width, height, kernel_radius, base_rows, extra_rows, sendcounts, displs); |
| 214 | |||
| 215 |
3/4✓ Branch 0 taken 6 times.
✓ Branch 1 taken 5 times.
✓ Branch 3 taken 11 times.
✗ Branch 4 not taken.
|
17 | MPI_Scatterv((rank == 0 ? GetInput().data.data() : nullptr), sendcounts.data(), displs.data(), MPI_UNSIGNED_CHAR, |
| 216 | local_data.data(), recv_count, MPI_UNSIGNED_CHAR, 0, MPI_COMM_WORLD); | ||
| 217 | |||
| 218 |
1/4✓ Branch 1 taken 11 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
11 | std::vector<uint8_t> local_result(static_cast<size_t>(local_rows) * width); |
| 219 | |||
| 220 | 11 | ProcessLocalRows(start_row, local_rows, width, height, kernel_size, kernel_radius, actual_start, local_data, | |
| 221 | local_result); | ||
| 222 | |||
| 223 |
1/2✓ Branch 1 taken 11 times.
✗ Branch 2 not taken.
|
11 | PrepareGatherParameters(rank, size, width, height, sendcounts, displs); |
| 224 | |||
| 225 |
1/2✓ Branch 1 taken 11 times.
✗ Branch 2 not taken.
|
11 | MPI_Gatherv(local_result.data(), local_rows * width, MPI_UNSIGNED_CHAR, |
| 226 |
2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 5 times.
|
11 | (rank == 0 ? GetOutput().data.data() : nullptr), sendcounts.data(), displs.data(), MPI_UNSIGNED_CHAR, 0, |
| 227 | MPI_COMM_WORLD); | ||
| 228 | |||
| 229 | return true; | ||
| 230 | } | ||
| 231 | |||
| 232 | 12 | bool GaseninLImageSmoothMPI::PostProcessingImpl() { | |
| 233 | 12 | return true; | |
| 234 | } | ||
| 235 | |||
| 236 | } // namespace gasenin_l_image_smooth | ||
| 237 |