| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #include "smyshlaev_a_gauss_filt/mpi/include/ops_mpi.hpp" | ||
| 2 | |||
| 3 | #include <mpi.h> | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <array> | ||
| 7 | #include <cmath> | ||
| 8 | #include <cstddef> | ||
| 9 | #include <cstdint> | ||
| 10 | #include <vector> | ||
| 11 | |||
| 12 | #include "smyshlaev_a_gauss_filt/common/include/common.hpp" | ||
| 13 | #include "smyshlaev_a_gauss_filt/seq/include/ops_seq.hpp" | ||
| 14 | |||
| 15 | namespace smyshlaev_a_gauss_filt { | ||
| 16 | |||
| 17 | namespace { | ||
| 18 | const std::vector<int> kErnel = {1, 2, 1, 2, 4, 2, 1, 2, 1}; | ||
| 19 | const int kErnelSum = 16; | ||
| 20 | |||
| 21 | void FindOptimalGrid(int size, int &grid_rows, int &grid_cols) { | ||
| 22 | int best_diff = size; | ||
| 23 | 8 | grid_rows = 1; | |
| 24 | 8 | grid_cols = size; | |
| 25 |
4/4✓ Branch 0 taken 16 times.
✓ Branch 1 taken 16 times.
✓ Branch 2 taken 8 times.
✓ Branch 3 taken 8 times.
|
48 | for (int rows = 1; rows * rows <= size; ++rows) { |
| 26 |
2/4✗ Branch 0 not taken.
✓ Branch 1 taken 16 times.
✓ Branch 2 taken 8 times.
✗ Branch 3 not taken.
|
24 | if (size % rows == 0) { |
| 27 | 24 | int cols = size / rows; | |
| 28 | 24 | int diff = std::abs(cols - rows); | |
| 29 |
2/4✗ Branch 0 not taken.
✓ Branch 1 taken 16 times.
✓ Branch 2 taken 8 times.
✗ Branch 3 not taken.
|
24 | if (diff < best_diff) { |
| 30 | best_diff = diff; | ||
| 31 | 8 | grid_rows = rows; | |
| 32 | 8 | grid_cols = cols; | |
| 33 | } | ||
| 34 | } | ||
| 35 | } | ||
| 36 | } | ||
| 37 | |||
| 38 | 16354 | uint8_t ApplyGaussianFilter(const std::vector<uint8_t> &padded_data, int x, int y, int padded_width, int channels, | |
| 39 | int channel) { | ||
| 40 | int sum = 0; | ||
| 41 |
2/2✓ Branch 0 taken 49062 times.
✓ Branch 1 taken 16354 times.
|
65416 | for (int ky = -1; ky <= 1; ++ky) { |
| 42 |
2/2✓ Branch 0 taken 147186 times.
✓ Branch 1 taken 49062 times.
|
196248 | for (int kx = -1; kx <= 1; ++kx) { |
| 43 | 147186 | int curr_x = x + kx; | |
| 44 | 147186 | int curr_y = y + ky; | |
| 45 | 147186 | int pixel = padded_data[(((curr_y * padded_width) + curr_x) * channels) + channel]; | |
| 46 | 147186 | int k_value = kErnel[((ky + 1) * 3) + (kx + 1)]; | |
| 47 | 147186 | sum += pixel * k_value; | |
| 48 | } | ||
| 49 | } | ||
| 50 | 16354 | return static_cast<uint8_t>(sum / kErnelSum); | |
| 51 | } | ||
| 52 | |||
| 53 | } // namespace | ||
| 54 | |||
| 55 |
1/2✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
|
16 | SmyshlaevAGaussFiltMPI::SmyshlaevAGaussFiltMPI(const InType &in) { |
| 56 | SetTypeOfTask(GetStaticTypeOfTask()); | ||
| 57 | 16 | int rank = 0; | |
| 58 |
1/2✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
|
16 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); |
| 59 |
2/2✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
|
16 | if (rank == 0) { |
| 60 | GetInput() = in; | ||
| 61 | } | ||
| 62 | 16 | } | |
| 63 | |||
| 64 | 16 | bool SmyshlaevAGaussFiltMPI::ValidationImpl() { | |
| 65 | 16 | int rank = 0; | |
| 66 | 16 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); | |
| 67 | 16 | int error_flag = 0; | |
| 68 |
2/2✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
|
16 | if (rank == 0) { |
| 69 | const InType &input_img = GetInput(); | ||
| 70 |
4/8✓ Branch 0 taken 8 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 8 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 8 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 8 times.
|
8 | if (input_img.width <= 0 || input_img.height <= 0 || input_img.channels <= 0 || input_img.data.empty()) { |
| 71 | ✗ | error_flag = 1; | |
| 72 | } | ||
| 73 | } | ||
| 74 | 16 | MPI_Bcast(&error_flag, 1, MPI_INT, 0, MPI_COMM_WORLD); | |
| 75 | 16 | return (error_flag == 0); | |
| 76 | } | ||
| 77 | |||
| 78 | 16 | bool SmyshlaevAGaussFiltMPI::PreProcessingImpl() { | |
| 79 | 16 | return true; | |
| 80 | } | ||
| 81 | |||
| 82 | 16 | void SmyshlaevAGaussFiltMPI::BroadcastImageDimensions(int &width, int &height, int &channels) { | |
| 83 | 16 | int rank = 0; | |
| 84 | 16 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); | |
| 85 | |||
| 86 | 16 | std::array<int, 3> dims = {0, 0, 0}; | |
| 87 |
2/2✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
|
16 | if (rank == 0) { |
| 88 | 8 | dims[0] = GetInput().width; | |
| 89 | 8 | dims[1] = GetInput().height; | |
| 90 | 8 | dims[2] = GetInput().channels; | |
| 91 | } | ||
| 92 | 16 | MPI_Bcast(dims.data(), 3, MPI_INT, 0, MPI_COMM_WORLD); | |
| 93 | 16 | width = dims[0]; | |
| 94 | 16 | height = dims[1]; | |
| 95 | 16 | channels = dims[2]; | |
| 96 | 16 | } | |
| 97 | |||
| 98 | ✗ | bool SmyshlaevAGaussFiltMPI::RunSequential() { | |
| 99 | ✗ | int rank = 0; | |
| 100 | ✗ | MPI_Comm_rank(MPI_COMM_WORLD, &rank); | |
| 101 | ✗ | if (rank == 0) { | |
| 102 | ✗ | SmyshlaevAGaussFiltSEQ seq_task(GetInput()); | |
| 103 | ✗ | if (seq_task.Validation()) { | |
| 104 | ✗ | seq_task.PreProcessing(); | |
| 105 | ✗ | seq_task.Run(); | |
| 106 | ✗ | seq_task.PostProcessing(); | |
| 107 | GetOutput() = seq_task.GetOutput(); | ||
| 108 | } | ||
| 109 | } | ||
| 110 | ✗ | return true; | |
| 111 | } | ||
| 112 | |||
| 113 | 16 | void SmyshlaevAGaussFiltMPI::SetupDecomposition(DecompositionInfo &info, int width, int height, int channels) { | |
| 114 | 16 | int rank = 0; | |
| 115 | 16 | int size = 0; | |
| 116 | 16 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); | |
| 117 | 16 | MPI_Comm_size(MPI_COMM_WORLD, &size); | |
| 118 | |||
| 119 |
2/2✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
|
16 | if (rank == 0) { |
| 120 | 8 | FindOptimalGrid(size, info.grid_rows, info.grid_cols); | |
| 121 | } | ||
| 122 | |||
| 123 | 16 | MPI_Bcast(&info.grid_rows, 1, MPI_INT, 0, MPI_COMM_WORLD); | |
| 124 | 16 | MPI_Bcast(&info.grid_cols, 1, MPI_INT, 0, MPI_COMM_WORLD); | |
| 125 | |||
| 126 | 16 | info.blocks.resize(size); | |
| 127 | 16 | info.sendcounts.resize(size); | |
| 128 | 16 | info.displs.resize(size); | |
| 129 | |||
| 130 |
2/2✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
|
16 | if (rank == 0) { |
| 131 | 8 | int block_height = (height + info.grid_rows - 1) / info.grid_rows; | |
| 132 | 8 | int block_width = (width + info.grid_cols - 1) / info.grid_cols; | |
| 133 |
2/2✓ Branch 0 taken 16 times.
✓ Branch 1 taken 8 times.
|
24 | for (int pdx = 0; pdx < size; ++pdx) { |
| 134 | 16 | int grid_row = pdx / info.grid_cols; | |
| 135 | 16 | int grid_col = pdx % info.grid_cols; | |
| 136 | 16 | int start_row = grid_row * block_height; | |
| 137 | 16 | int start_col = grid_col * block_width; | |
| 138 | 16 | int actual_block_height = std::min(block_height, height - start_row); | |
| 139 | 16 | int actual_block_width = std::min(block_width, width - start_col); | |
| 140 | |||
| 141 | int padded_top = 1; | ||
| 142 | int padded_bottom = 1; | ||
| 143 | int padded_left = 1; | ||
| 144 | int padded_right = 1; | ||
| 145 | |||
| 146 | 16 | info.blocks[pdx].start_row = start_row; | |
| 147 | 16 | info.blocks[pdx].start_col = start_col; | |
| 148 | 16 | info.blocks[pdx].block_height = actual_block_height; | |
| 149 | 16 | info.blocks[pdx].block_width = actual_block_width; | |
| 150 | 16 | info.blocks[pdx].padded_height = actual_block_height + padded_top + padded_bottom; | |
| 151 | 16 | info.blocks[pdx].padded_width = actual_block_width + padded_left + padded_right; | |
| 152 | 16 | info.blocks[pdx].count = info.blocks[pdx].padded_height * info.blocks[pdx].padded_width * channels; | |
| 153 | 16 | info.sendcounts[pdx] = info.blocks[pdx].count; | |
| 154 | } | ||
| 155 | 8 | info.displs[0] = 0; | |
| 156 |
2/2✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
|
16 | for (int pdx = 1; pdx < size; ++pdx) { |
| 157 | 8 | info.displs[pdx] = info.displs[pdx - 1] + info.sendcounts[pdx - 1]; | |
| 158 | } | ||
| 159 | } | ||
| 160 | |||
| 161 | 16 | MPI_Bcast(info.sendcounts.data(), size, MPI_INT, 0, MPI_COMM_WORLD); | |
| 162 | 16 | MPI_Bcast(info.displs.data(), size, MPI_INT, 0, MPI_COMM_WORLD); | |
| 163 | |||
| 164 | 16 | std::vector<int> block_info_buffer(static_cast<size_t>(size) * 6); | |
| 165 |
2/2✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
|
16 | if (rank == 0) { |
| 166 |
2/2✓ Branch 0 taken 16 times.
✓ Branch 1 taken 8 times.
|
24 | for (int pdx = 0; pdx < size; ++pdx) { |
| 167 | 16 | block_info_buffer[(pdx * 6) + 0] = info.blocks[pdx].start_row; | |
| 168 | 16 | block_info_buffer[(pdx * 6) + 1] = info.blocks[pdx].start_col; | |
| 169 | 16 | block_info_buffer[(pdx * 6) + 2] = info.blocks[pdx].block_height; | |
| 170 | 16 | block_info_buffer[(pdx * 6) + 3] = info.blocks[pdx].block_width; | |
| 171 | 16 | block_info_buffer[(pdx * 6) + 4] = info.blocks[pdx].padded_height; | |
| 172 | 16 | block_info_buffer[(pdx * 6) + 5] = info.blocks[pdx].padded_width; | |
| 173 | } | ||
| 174 | } | ||
| 175 |
1/2✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
|
16 | MPI_Bcast(block_info_buffer.data(), size * 6, MPI_INT, 0, MPI_COMM_WORLD); |
| 176 | |||
| 177 |
2/2✓ Branch 0 taken 32 times.
✓ Branch 1 taken 16 times.
|
48 | for (int pdx = 0; pdx < size; ++pdx) { |
| 178 | 32 | info.blocks[pdx].start_row = block_info_buffer[(pdx * 6) + 0]; | |
| 179 | 32 | info.blocks[pdx].start_col = block_info_buffer[(pdx * 6) + 1]; | |
| 180 | 32 | info.blocks[pdx].block_height = block_info_buffer[(pdx * 6) + 2]; | |
| 181 | 32 | info.blocks[pdx].block_width = block_info_buffer[(pdx * 6) + 3]; | |
| 182 | 32 | info.blocks[pdx].padded_height = block_info_buffer[(pdx * 6) + 4]; | |
| 183 | 32 | info.blocks[pdx].padded_width = block_info_buffer[(pdx * 6) + 5]; | |
| 184 | } | ||
| 185 | 16 | } | |
| 186 | |||
| 187 |
1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
|
8 | std::vector<uint8_t> SmyshlaevAGaussFiltMPI::PrepareScatterBuffer(const DecompositionInfo &info, int width, int height, |
| 188 | int channels) { | ||
| 189 | 8 | int size = static_cast<int>(info.sendcounts.size()); | |
| 190 | const auto &input_image = GetInput(); | ||
| 191 | |||
| 192 | 8 | std::vector<uint8_t> scatter_buffer; | |
| 193 |
1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
|
8 | scatter_buffer.resize(info.displs[size - 1] + info.sendcounts[size - 1]); |
| 194 | |||
| 195 |
2/2✓ Branch 0 taken 16 times.
✓ Branch 1 taken 8 times.
|
24 | for (int pdx = 0; pdx < size; ++pdx) { |
| 196 | 16 | uint8_t *buffer_ptr = scatter_buffer.data() + info.displs[pdx]; | |
| 197 | const auto &block = info.blocks[pdx]; | ||
| 198 | 16 | int src_y_start = block.start_row - 1; | |
| 199 | 16 | int src_x_start = block.start_col - 1; | |
| 200 |
2/2✓ Branch 0 taken 456 times.
✓ Branch 1 taken 16 times.
|
472 | for (int idy = 0; idy < block.padded_height; ++idy) { |
| 201 |
2/2✓ Branch 0 taken 7646 times.
✓ Branch 1 taken 456 times.
|
8102 | for (int idx = 0; idx < block.padded_width; ++idx) { |
| 202 | 7646 | int global_y = std::clamp(src_y_start + idy, 0, height - 1); | |
| 203 | 7646 | int global_x = std::clamp(src_x_start + idx, 0, width - 1); | |
| 204 |
2/2✓ Branch 0 taken 19970 times.
✓ Branch 1 taken 7646 times.
|
27616 | for (int ch = 0; ch < channels; ++ch) { |
| 205 | 19970 | buffer_ptr[(((idy * block.padded_width) + idx) * channels) + ch] = | |
| 206 | 19970 | input_image.data[(((global_y * width) + global_x) * channels) + ch]; | |
| 207 | } | ||
| 208 | } | ||
| 209 | } | ||
| 210 | } | ||
| 211 | 8 | return scatter_buffer; | |
| 212 | } | ||
| 213 | |||
| 214 | 16 | std::vector<uint8_t> SmyshlaevAGaussFiltMPI::ProcessLocalBlock(const DecompositionInfo &info, int width, int height, | |
| 215 | int channels) { | ||
| 216 | 16 | int rank = 0; | |
| 217 | 16 | int size = 0; | |
| 218 | 16 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); | |
| 219 | 16 | MPI_Comm_size(MPI_COMM_WORLD, &size); | |
| 220 | |||
| 221 | 16 | std::vector<uint8_t> scatter_buffer; | |
| 222 |
2/2✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
|
16 | if (rank == 0) { |
| 223 |
1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
|
16 | scatter_buffer = PrepareScatterBuffer(info, width, height, channels); |
| 224 | } | ||
| 225 | |||
| 226 |
1/4✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
16 | std::vector<uint8_t> local_block_data(info.sendcounts[rank]); |
| 227 |
1/2✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
|
16 | MPI_Scatterv(scatter_buffer.data(), info.sendcounts.data(), info.displs.data(), MPI_UNSIGNED_CHAR, |
| 228 |
1/2✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
|
16 | local_block_data.data(), info.sendcounts[rank], MPI_UNSIGNED_CHAR, 0, MPI_COMM_WORLD); |
| 229 | |||
| 230 |
1/2✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
|
16 | const auto &my_block = info.blocks[rank]; |
| 231 | |||
| 232 | int x_offset = 1; | ||
| 233 | int y_offset = 1; | ||
| 234 | |||
| 235 |
1/4✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
16 | std::vector<uint8_t> local_output_data(static_cast<size_t>(my_block.block_height) * my_block.block_width * channels); |
| 236 |
2/2✓ Branch 0 taken 424 times.
✓ Branch 1 taken 16 times.
|
440 | for (int idy = 0; idy < my_block.block_height; ++idy) { |
| 237 |
2/2✓ Branch 0 taken 6326 times.
✓ Branch 1 taken 424 times.
|
6750 | for (int idx = 0; idx < my_block.block_width; ++idx) { |
| 238 |
2/2✓ Branch 0 taken 16354 times.
✓ Branch 1 taken 6326 times.
|
22680 | for (int ch = 0; ch < channels; ++ch) { |
| 239 | 16354 | local_output_data[((idy * my_block.block_width + idx) * channels) + ch] = | |
| 240 | 16354 | ApplyGaussianFilter(local_block_data, idx + x_offset, idy + y_offset, my_block.padded_width, channels, ch); | |
| 241 | } | ||
| 242 | } | ||
| 243 | } | ||
| 244 | 16 | return local_output_data; | |
| 245 | } | ||
| 246 | |||
| 247 | 16 | void SmyshlaevAGaussFiltMPI::CollectResult(const std::vector<uint8_t> &local_result, const DecompositionInfo &info, | |
| 248 | int width, int height, int channels) { | ||
| 249 | 16 | int rank = 0; | |
| 250 | 16 | int size = 0; | |
| 251 | 16 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); | |
| 252 | 16 | MPI_Comm_size(MPI_COMM_WORLD, &size); | |
| 253 | |||
| 254 | 16 | std::vector<int> recvcounts(size); | |
| 255 |
1/4✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
16 | std::vector<int> recv_displs(size, 0); |
| 256 | 16 | std::vector<uint8_t> gathered_data; | |
| 257 | |||
| 258 |
2/2✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
|
16 | if (rank == 0) { |
| 259 |
2/2✓ Branch 0 taken 16 times.
✓ Branch 1 taken 8 times.
|
24 | for (int pdx = 0; pdx < size; ++pdx) { |
| 260 | 16 | recvcounts[pdx] = info.blocks[pdx].block_height * info.blocks[pdx].block_width * channels; | |
| 261 | } | ||
| 262 | 8 | recv_displs[0] = 0; | |
| 263 |
2/2✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
|
16 | for (int pdx = 1; pdx < size; ++pdx) { |
| 264 | 8 | recv_displs[pdx] = recv_displs[pdx - 1] + recvcounts[pdx - 1]; | |
| 265 | } | ||
| 266 |
1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
|
8 | gathered_data.resize(static_cast<size_t>(width) * height * channels); |
| 267 | } | ||
| 268 | |||
| 269 |
1/2✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
|
16 | MPI_Gatherv(local_result.data(), static_cast<int>(local_result.size()), MPI_UNSIGNED_CHAR, gathered_data.data(), |
| 270 | recvcounts.data(), recv_displs.data(), MPI_UNSIGNED_CHAR, 0, MPI_COMM_WORLD); | ||
| 271 | |||
| 272 |
2/2✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
|
16 | if (rank == 0) { |
| 273 | OutType &output_image = GetOutput(); | ||
| 274 | 8 | output_image.width = width; | |
| 275 | 8 | output_image.height = height; | |
| 276 | 8 | output_image.channels = channels; | |
| 277 |
1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
|
8 | output_image.data.resize(static_cast<size_t>(width) * height * channels); |
| 278 |
2/2✓ Branch 0 taken 16 times.
✓ Branch 1 taken 8 times.
|
24 | for (int pdx = 0; pdx < size; ++pdx) { |
| 279 | 16 | const uint8_t *src_ptr = gathered_data.data() + recv_displs[pdx]; | |
| 280 | const auto &block = info.blocks[pdx]; | ||
| 281 |
2/2✓ Branch 0 taken 424 times.
✓ Branch 1 taken 16 times.
|
440 | for (int idy = 0; idy < block.block_height; ++idy) { |
| 282 | 424 | int global_y = block.start_row + idy; | |
| 283 |
1/2✓ Branch 0 taken 424 times.
✗ Branch 1 not taken.
|
424 | uint8_t *dst_ptr = &output_image.data[(static_cast<size_t>(global_y) * width + block.start_col) * channels]; |
| 284 |
1/2✓ Branch 0 taken 424 times.
✗ Branch 1 not taken.
|
424 | std::copy_n(src_ptr + (static_cast<size_t>(idy) * block.block_width * channels), block.block_width * channels, |
| 285 | dst_ptr); | ||
| 286 | } | ||
| 287 | } | ||
| 288 | } | ||
| 289 | 16 | } | |
| 290 | |||
| 291 | 16 | bool SmyshlaevAGaussFiltMPI::RunImpl() { | |
| 292 | 16 | int rank = 0; | |
| 293 | 16 | int size = 0; | |
| 294 | 16 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); | |
| 295 | 16 | MPI_Comm_size(MPI_COMM_WORLD, &size); | |
| 296 | |||
| 297 | 16 | int img_width = 0; | |
| 298 | 16 | int img_height = 0; | |
| 299 | 16 | int img_channels = 0; | |
| 300 | 16 | BroadcastImageDimensions(img_width, img_height, img_channels); | |
| 301 | |||
| 302 | int grid_rows = 0; | ||
| 303 | int grid_cols = 0; | ||
| 304 | 16 | FindOptimalGrid(size, grid_rows, grid_cols); | |
| 305 | |||
| 306 |
2/4✓ Branch 0 taken 16 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 16 times.
|
16 | if (img_height < grid_rows || img_width < grid_cols) { |
| 307 | ✗ | RunSequential(); | |
| 308 | ✗ | return true; | |
| 309 | } | ||
| 310 | |||
| 311 | 16 | DecompositionInfo decomp_info; | |
| 312 |
1/2✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
|
16 | SetupDecomposition(decomp_info, img_width, img_height, img_channels); |
| 313 | |||
| 314 |
1/2✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
|
16 | std::vector<uint8_t> local_result = ProcessLocalBlock(decomp_info, img_width, img_height, img_channels); |
| 315 | |||
| 316 |
1/2✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
|
16 | CollectResult(local_result, decomp_info, img_width, img_height, img_channels); |
| 317 | |||
| 318 | return true; | ||
| 319 | 16 | } | |
| 320 | |||
| 321 | 16 | bool SmyshlaevAGaussFiltMPI::PostProcessingImpl() { | |
| 322 | 16 | int rank = 0; | |
| 323 | 16 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); | |
| 324 | auto &output = GetOutput(); | ||
| 325 | 16 | std::array<int, 3> dims = {0, 0, 0}; | |
| 326 | |||
| 327 |
2/2✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
|
16 | if (rank == 0) { |
| 328 | 8 | dims[0] = output.width; | |
| 329 | 8 | dims[1] = output.height; | |
| 330 | 8 | dims[2] = output.channels; | |
| 331 | } | ||
| 332 | 16 | MPI_Bcast(dims.data(), 3, MPI_INT, 0, MPI_COMM_WORLD); | |
| 333 | |||
| 334 |
2/2✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
|
16 | if (rank != 0) { |
| 335 | 8 | output.width = dims[0]; | |
| 336 | 8 | output.height = dims[1]; | |
| 337 | 8 | output.channels = dims[2]; | |
| 338 |
3/6✓ Branch 0 taken 8 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 8 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 8 times.
✗ Branch 5 not taken.
|
8 | if (dims[0] > 0 && dims[1] > 0 && dims[2] > 0) { |
| 339 | 8 | output.data.resize(static_cast<size_t>(dims[0]) * dims[1] * dims[2]); | |
| 340 | } else { | ||
| 341 | output.data.clear(); | ||
| 342 | } | ||
| 343 | } | ||
| 344 | |||
| 345 |
1/2✓ Branch 0 taken 16 times.
✗ Branch 1 not taken.
|
16 | if (!output.data.empty()) { |
| 346 | 16 | MPI_Bcast(output.data.data(), static_cast<int>(output.data.size()), MPI_UNSIGNED_CHAR, 0, MPI_COMM_WORLD); | |
| 347 | } | ||
| 348 | |||
| 349 | 16 | return true; | |
| 350 | } | ||
| 351 | |||
| 352 | } // namespace smyshlaev_a_gauss_filt | ||
| 353 |