GCC Code Coverage Report


Directory: ./
File: tasks/melnik_i_gauss_block_part/mpi/src/ops_mpi.cpp
Date: 2026-01-27 01:59:34
Exec Total Coverage
Lines: 253 274 92.3%
Functions: 23 25 92.0%
Branches: 150 212 70.8%

Line Branch Exec Source
1 #include "melnik_i_gauss_block_part/mpi/include/ops_mpi.hpp"
2
3 #include <mpi.h>
4
5 #include <algorithm>
6 #include <array>
7 #include <cmath>
8 #include <cstddef>
9 #include <cstdint>
10 #include <limits>
11 #include <ranges>
12 #include <utility>
13 #include <vector>
14
15 #include "melnik_i_gauss_block_part/common/include/common.hpp"
16 #include "task/include/task.hpp"
17
18 namespace melnik_i_gauss_block_part {
19
20 namespace {
21
22 inline std::size_t Idx(int y, int x, int width) {
23 153 return (static_cast<std::size_t>(y) * static_cast<std::size_t>(width)) + static_cast<std::size_t>(x);
24 }
25
26 inline std::size_t ExtIdx(int y, int x, int ext_w) {
27
4/6
✓ Branch 0 taken 11 times.
✓ Branch 1 taken 7 times.
✓ Branch 3 taken 18 times.
✗ Branch 4 not taken.
✓ Branch 6 taken 18 times.
✗ Branch 7 not taken.
224 return (static_cast<std::size_t>(y) * static_cast<std::size_t>(ext_w)) + static_cast<std::size_t>(x);
28 }
29
30 } // namespace
31
32 namespace {
33
34 inline std::uint8_t SelectCornerValue(bool prefer_first, std::uint8_t first, bool prefer_second, std::uint8_t second,
35 std::uint8_t fallback) {
36 72 if (prefer_first) {
37 return first;
38 }
39
8/8
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 9 times.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 9 times.
✓ Branch 4 taken 2 times.
✓ Branch 5 taken 9 times.
✓ Branch 6 taken 2 times.
✓ Branch 7 taken 9 times.
44 if (prefer_second) {
40 8 return second;
41 }
42 return fallback;
43 }
44
45 } // namespace
46
47 18 MelnikIGaussBlockPartMPI::Neighbours MelnikIGaussBlockPartMPI::ComputeNeighbours(
48 const BlockInfo &blk, int grid_rows, int grid_cols, int rank, const std::vector<BlockInfo> &all_blocks) {
49
1/2
✓ Branch 0 taken 18 times.
✗ Branch 1 not taken.
18 Neighbours nbh;
50 if (blk.Empty()) {
51 return nbh;
52 }
53
54 18 const int pr = rank / grid_cols;
55 18 const int pc = rank % grid_cols;
56
57 144 auto get_rank = [&](int npr, int npc) -> int {
58
8/8
✓ Branch 0 taken 96 times.
✓ Branch 1 taken 48 times.
✓ Branch 2 taken 48 times.
✓ Branch 3 taken 48 times.
✓ Branch 4 taken 33 times.
✓ Branch 5 taken 15 times.
✓ Branch 6 taken 18 times.
✓ Branch 7 taken 15 times.
144 if (npr < 0 || npr >= grid_rows || npc < 0 || npc >= grid_cols) {
59 return MPI_PROC_NULL;
60 }
61 18 const int neighbour = (npr * grid_cols) + npc;
62
1/2
✓ Branch 0 taken 18 times.
✗ Branch 1 not taken.
18 const int blocks_size = static_cast<int>(all_blocks.size());
63
1/2
✓ Branch 0 taken 18 times.
✗ Branch 1 not taken.
18 if (neighbour < 0 || neighbour >= blocks_size) {
64 return MPI_PROC_NULL;
65 }
66
1/2
✓ Branch 0 taken 18 times.
✗ Branch 1 not taken.
18 if (all_blocks[static_cast<std::size_t>(neighbour)].Empty()) {
67 return MPI_PROC_NULL;
68 }
69 return neighbour;
70 18 };
71
72 18 nbh.up = get_rank(pr - 1, pc);
73 18 nbh.down = get_rank(pr + 1, pc);
74 18 nbh.left = get_rank(pr, pc - 1);
75 18 nbh.right = get_rank(pr, pc + 1);
76 18 nbh.up_left = get_rank(pr - 1, pc - 1);
77 18 nbh.up_right = get_rank(pr - 1, pc + 1);
78 18 nbh.down_left = get_rank(pr + 1, pc - 1);
79 18 nbh.down_right = get_rank(pr + 1, pc + 1);
80 18 return nbh;
81 }
82
83 18 void MelnikIGaussBlockPartMPI::ExchangeRowHalos(const BlockInfo &blk, const Neighbours &nbh, int ext_w,
84 std::vector<std::uint8_t> &ext) {
85 18 std::vector<std::uint8_t> recv_row(static_cast<std::size_t>(blk.width), 0);
86
87 18 MPI_Sendrecv(ext.data() + ExtIdx(1, 1, ext_w), blk.width, MPI_BYTE, nbh.up, 10, recv_row.data(), blk.width, MPI_BYTE,
88
1/2
✓ Branch 1 taken 18 times.
✗ Branch 2 not taken.
18 nbh.up, 11, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
89
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 16 times.
18 if (nbh.up != MPI_PROC_NULL) {
90 std::ranges::copy(recv_row, ext.begin() + static_cast<std::ptrdiff_t>(ExtIdx(0, 1, ext_w)));
91 }
92
93 18 MPI_Sendrecv(ext.data() + ExtIdx(blk.height, 1, ext_w), blk.width, MPI_BYTE, nbh.down, 11, recv_row.data(), blk.width,
94
1/2
✓ Branch 1 taken 18 times.
✗ Branch 2 not taken.
18 MPI_BYTE, nbh.down, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
95
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 16 times.
18 if (nbh.down != MPI_PROC_NULL) {
96
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
2 std::ranges::copy(recv_row, ext.begin() + static_cast<std::ptrdiff_t>(ExtIdx(blk.height + 1, 1, ext_w)));
97 }
98 18 }
99
100 18 void MelnikIGaussBlockPartMPI::ExchangeColHalos(const BlockInfo &blk, const Neighbours &nbh, int ext_w,
101 std::vector<std::uint8_t> &ext) {
102 18 std::vector<std::uint8_t> send_col(static_cast<std::size_t>(blk.height), 0);
103
1/4
✓ Branch 1 taken 18 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
18 std::vector<std::uint8_t> recv_col(static_cast<std::size_t>(blk.height), 0);
104
105
2/2
✓ Branch 0 taken 59 times.
✓ Branch 1 taken 18 times.
77 for (int row = 0; row < blk.height; ++row) {
106 59 send_col[static_cast<std::size_t>(row)] = ext[ExtIdx(row + 1, 1, ext_w)];
107 }
108
1/2
✓ Branch 1 taken 18 times.
✗ Branch 2 not taken.
18 MPI_Sendrecv(send_col.data(), blk.height, MPI_BYTE, nbh.left, 20, recv_col.data(), blk.height, MPI_BYTE, nbh.left, 21,
109 MPI_COMM_WORLD, MPI_STATUS_IGNORE);
110
2/2
✓ Branch 0 taken 7 times.
✓ Branch 1 taken 11 times.
18 if (nbh.left != MPI_PROC_NULL) {
111
2/2
✓ Branch 0 taken 22 times.
✓ Branch 1 taken 7 times.
29 for (int row = 0; row < blk.height; ++row) {
112 22 ext[ExtIdx(row + 1, 0, ext_w)] = recv_col[static_cast<std::size_t>(row)];
113 }
114 }
115
116
2/2
✓ Branch 0 taken 59 times.
✓ Branch 1 taken 18 times.
77 for (int row = 0; row < blk.height; ++row) {
117 59 send_col[static_cast<std::size_t>(row)] = ext[ExtIdx(row + 1, blk.width, ext_w)];
118 }
119
1/2
✓ Branch 1 taken 18 times.
✗ Branch 2 not taken.
18 MPI_Sendrecv(send_col.data(), blk.height, MPI_BYTE, nbh.right, 21, recv_col.data(), blk.height, MPI_BYTE, nbh.right,
120 20, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
121
2/2
✓ Branch 0 taken 7 times.
✓ Branch 1 taken 11 times.
18 if (nbh.right != MPI_PROC_NULL) {
122
2/2
✓ Branch 0 taken 22 times.
✓ Branch 1 taken 7 times.
29 for (int row = 0; row < blk.height; ++row) {
123 22 ext[ExtIdx(row + 1, blk.width + 1, ext_w)] = recv_col[static_cast<std::size_t>(row)];
124 }
125 }
126 18 }
127
128 18 void MelnikIGaussBlockPartMPI::ExchangeCornerHalos(const BlockInfo &blk, const Neighbours &nbh, int ext_w,
129 std::vector<std::uint8_t> &ext) {
130 18 std::uint8_t send_val = 0;
131 18 std::uint8_t recv_val = 0;
132
133 // - up_left <-> down_right : tags (30, 31)
134 // - up_right <-> down_left : tags (32, 33)
135 18 send_val = ext[ExtIdx(1, 1, ext_w)];
136 18 MPI_Sendrecv(&send_val, 1, MPI_BYTE, nbh.up_left, 30, &recv_val, 1, MPI_BYTE, nbh.up_left, 31, MPI_COMM_WORLD,
137 MPI_STATUS_IGNORE);
138
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 18 times.
18 if (nbh.up_left != MPI_PROC_NULL) {
139 ext[ExtIdx(0, 0, ext_w)] = recv_val;
140 }
141
142 18 send_val = ext[ExtIdx(1, blk.width, ext_w)];
143 18 MPI_Sendrecv(&send_val, 1, MPI_BYTE, nbh.up_right, 32, &recv_val, 1, MPI_BYTE, nbh.up_right, 33, MPI_COMM_WORLD,
144 MPI_STATUS_IGNORE);
145
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 18 times.
18 if (nbh.up_right != MPI_PROC_NULL) {
146 ext[ExtIdx(0, blk.width + 1, ext_w)] = recv_val;
147 }
148
149 18 send_val = ext[ExtIdx(blk.height, 1, ext_w)];
150 18 MPI_Sendrecv(&send_val, 1, MPI_BYTE, nbh.down_left, 33, &recv_val, 1, MPI_BYTE, nbh.down_left, 32, MPI_COMM_WORLD,
151 MPI_STATUS_IGNORE);
152
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 18 times.
18 if (nbh.down_left != MPI_PROC_NULL) {
153 ext[ExtIdx(blk.height + 1, 0, ext_w)] = recv_val;
154 }
155
156 18 send_val = ext[ExtIdx(blk.height, blk.width, ext_w)];
157 18 MPI_Sendrecv(&send_val, 1, MPI_BYTE, nbh.down_right, 31, &recv_val, 1, MPI_BYTE, nbh.down_right, 30, MPI_COMM_WORLD,
158 MPI_STATUS_IGNORE);
159
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 18 times.
18 if (nbh.down_right != MPI_PROC_NULL) {
160 ext[ExtIdx(blk.height + 1, blk.width + 1, ext_w)] = recv_val;
161 }
162 18 }
163
164 18 void MelnikIGaussBlockPartMPI::FixCornersWithoutDiagonal(const BlockInfo &blk, const Neighbours &nbh, int ext_w,
165 std::vector<std::uint8_t> &ext) {
166 18 const bool has_up = nbh.up != MPI_PROC_NULL;
167 18 const bool has_down = nbh.down != MPI_PROC_NULL;
168 18 const bool has_left = nbh.left != MPI_PROC_NULL;
169 18 const bool has_right = nbh.right != MPI_PROC_NULL;
170
171
1/2
✓ Branch 0 taken 18 times.
✗ Branch 1 not taken.
18 if (nbh.up_left == MPI_PROC_NULL) {
172
2/2
✓ Branch 0 taken 11 times.
✓ Branch 1 taken 7 times.
29 ext[ExtIdx(0, 0, ext_w)] = SelectCornerValue(has_left, ext[ExtIdx(1, 0, ext_w)], has_up, ext[ExtIdx(0, 1, ext_w)],
173 ext[ExtIdx(1, 1, ext_w)]);
174 }
175
176
1/2
✓ Branch 0 taken 18 times.
✗ Branch 1 not taken.
18 if (nbh.up_right == MPI_PROC_NULL) {
177
2/2
✓ Branch 0 taken 11 times.
✓ Branch 1 taken 7 times.
18 const int col = blk.width + 1;
178
2/2
✓ Branch 0 taken 11 times.
✓ Branch 1 taken 7 times.
29 ext[ExtIdx(0, col, ext_w)] = SelectCornerValue(has_right, ext[ExtIdx(1, col, ext_w)], has_up,
179 ext[ExtIdx(0, blk.width, ext_w)], ext[ExtIdx(1, blk.width, ext_w)]);
180 }
181
182
1/2
✓ Branch 0 taken 18 times.
✗ Branch 1 not taken.
18 if (nbh.down_left == MPI_PROC_NULL) {
183
2/2
✓ Branch 0 taken 11 times.
✓ Branch 1 taken 7 times.
18 const int row = blk.height + 1;
184
2/2
✓ Branch 0 taken 11 times.
✓ Branch 1 taken 7 times.
29 ext[ExtIdx(row, 0, ext_w)] = SelectCornerValue(has_left, ext[ExtIdx(blk.height, 0, ext_w)], has_down,
185 ext[ExtIdx(row, 1, ext_w)], ext[ExtIdx(blk.height, 1, ext_w)]);
186 }
187
188
1/2
✓ Branch 0 taken 18 times.
✗ Branch 1 not taken.
18 if (nbh.down_right == MPI_PROC_NULL) {
189 18 const int row = blk.height + 1;
190
2/2
✓ Branch 0 taken 11 times.
✓ Branch 1 taken 7 times.
18 const int col = blk.width + 1;
191 18 ext[ExtIdx(row, col, ext_w)] =
192
2/2
✓ Branch 0 taken 11 times.
✓ Branch 1 taken 7 times.
18 SelectCornerValue(has_right, ext[ExtIdx(blk.height, col, ext_w)], has_down, ext[ExtIdx(row, blk.width, ext_w)],
193 ext[ExtIdx(blk.height, blk.width, ext_w)]);
194 }
195 18 }
196
197
1/2
✓ Branch 1 taken 18 times.
✗ Branch 2 not taken.
18 MelnikIGaussBlockPartMPI::MelnikIGaussBlockPartMPI(const InType &in) {
198 SetTypeOfTask(GetStaticTypeOfTask());
199 GetInput() = in;
200 GetOutput().clear();
201 18 }
202
203 18 bool MelnikIGaussBlockPartMPI::ValidationImpl() {
204 18 int rank = 0;
205 18 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
206
207 18 int width = 0;
208 18 int height = 0;
209 18 int valid_flag = 0;
210
2/2
✓ Branch 0 taken 9 times.
✓ Branch 1 taken 9 times.
18 if (rank == 0) {
211 const auto &[data, w, h] = GetInput();
212 9 width = w;
213 9 height = h;
214 const std::size_t expected =
215
1/2
✓ Branch 0 taken 9 times.
✗ Branch 1 not taken.
9 (width > 0 && height > 0) ? (static_cast<std::size_t>(width) * static_cast<std::size_t>(height)) : 0U;
216
3/6
✓ Branch 0 taken 9 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 9 times.
✗ Branch 4 not taken.
✓ Branch 5 taken 9 times.
9 valid_flag = (width > 0 && height > 0 && data.size() == expected) ? 1 : 0;
217 }
218
219 18 MPI_Bcast(&width, 1, MPI_INT, 0, MPI_COMM_WORLD);
220 18 MPI_Bcast(&height, 1, MPI_INT, 0, MPI_COMM_WORLD);
221 18 MPI_Bcast(&valid_flag, 1, MPI_INT, 0, MPI_COMM_WORLD);
222 18 return valid_flag == 1;
223 }
224
225 18 bool MelnikIGaussBlockPartMPI::PreProcessingImpl() {
226 // Enforce "only rank 0 owns full input data": other ranks can drop the buffer to save memory.
227 18 int rank = 0;
228 18 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
229
2/2
✓ Branch 0 taken 9 times.
✓ Branch 1 taken 9 times.
18 if (rank != 0) {
230 auto &in = GetInput();
231 std::get<0>(in).clear();
232 std::get<0>(in).shrink_to_fit();
233 }
234 18 return true;
235 }
236
237 int MelnikIGaussBlockPartMPI::ClampInt(int v, int low, int high) {
238 return std::max(low, std::min(v, high));
239 }
240
241 18 std::pair<int, int> MelnikIGaussBlockPartMPI::ComputeProcessGrid(int comm_size, int width, int height) {
242 // Choose factorization close to square and roughly matching aspect ratio.
243 int best_r = 1;
244 int best_c = comm_size;
245 double best_cost = std::numeric_limits<double>::infinity();
246
247 18 const double aspect = static_cast<double>(height) / static_cast<double>(width);
248
249
2/2
✓ Branch 0 taken 36 times.
✓ Branch 1 taken 18 times.
54 for (int rows = 1; rows <= comm_size; ++rows) {
250
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 36 times.
36 if (comm_size % rows != 0) {
251 continue;
252 }
253 36 const int cols = comm_size / rows;
254 36 const double grid_aspect = static_cast<double>(rows) / static_cast<double>(cols);
255
2/2
✓ Branch 0 taken 14 times.
✓ Branch 1 taken 22 times.
36 const double cost = std::abs(grid_aspect - aspect) + (0.01 * std::abs(rows - cols));
256
2/2
✓ Branch 0 taken 14 times.
✓ Branch 1 taken 22 times.
36 if (cost < best_cost) {
257 best_cost = cost;
258 best_r = rows;
259 best_c = cols;
260 }
261 }
262
263 18 return {best_r, best_c};
264 }
265
266 MelnikIGaussBlockPartMPI::BlockInfo MelnikIGaussBlockPartMPI::ComputeBlockInfoByCoords(int pr, int pc, int grid_rows,
267 int grid_cols, int width,
268 int height) {
269 36 const int base_w = width / grid_cols;
270 36 const int rem_w = width % grid_cols;
271 36 const int base_h = height / grid_rows;
272 36 const int rem_h = height % grid_rows;
273
274 const int local_w = base_w + (pc < rem_w ? 1 : 0);
275
2/4
✓ Branch 0 taken 34 times.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
36 const int local_h = base_h + (pr < rem_h ? 1 : 0);
276
277 36 const int start_x = (pc * base_w) + std::min(pc, rem_w);
278 36 const int start_y = (pr * base_h) + std::min(pr, rem_h);
279
280 return BlockInfo{.start_x = start_x, .start_y = start_y, .width = local_w, .height = local_h};
281 }
282
283 36 MelnikIGaussBlockPartMPI::BlockInfo MelnikIGaussBlockPartMPI::ComputeBlockInfo(int rank, int grid_rows, int grid_cols,
284 int width, int height) {
285 36 const int pr = rank / grid_cols;
286
2/2
✓ Branch 0 taken 30 times.
✓ Branch 1 taken 6 times.
36 const int pc = rank % grid_cols;
287 36 return ComputeBlockInfoByCoords(pr, pc, grid_rows, grid_cols, width, height);
288 }
289
290 18 void MelnikIGaussBlockPartMPI::FillExtendedWithClamp(const std::vector<std::uint8_t> &local, const BlockInfo &blk,
291 int ext_w, std::vector<std::uint8_t> &ext) {
292 18 const int ext_h = blk.height + 2;
293 18 ext.assign(static_cast<std::size_t>(ext_w) * static_cast<std::size_t>(ext_h), 0);
294
295 // Interior
296
2/2
✓ Branch 0 taken 59 times.
✓ Branch 1 taken 18 times.
77 for (int row = 0; row < blk.height; ++row) {
297
2/2
✓ Branch 0 taken 153 times.
✓ Branch 1 taken 59 times.
212 for (int col = 0; col < blk.width; ++col) {
298 153 ext[ExtIdx(row + 1, col + 1, ext_w)] = local[Idx(row, col, blk.width)];
299 }
300 }
301
302 // Clamp borders based on own interior
303
2/2
✓ Branch 0 taken 49 times.
✓ Branch 1 taken 18 times.
67 for (int col = 1; col <= blk.width; ++col) {
304 49 ext[ExtIdx(0, col, ext_w)] = ext[ExtIdx(1, col, ext_w)];
305 49 ext[ExtIdx(blk.height + 1, col, ext_w)] = ext[ExtIdx(blk.height, col, ext_w)];
306 }
307
2/2
✓ Branch 0 taken 59 times.
✓ Branch 1 taken 18 times.
77 for (int row = 1; row <= blk.height; ++row) {
308 59 ext[ExtIdx(row, 0, ext_w)] = ext[ExtIdx(row, 1, ext_w)];
309 59 ext[ExtIdx(row, blk.width + 1, ext_w)] = ext[ExtIdx(row, blk.width, ext_w)];
310 }
311 18 ext[ExtIdx(0, 0, ext_w)] = ext[ExtIdx(1, 1, ext_w)];
312 18 ext[ExtIdx(0, blk.width + 1, ext_w)] = ext[ExtIdx(1, blk.width, ext_w)];
313 18 ext[ExtIdx(blk.height + 1, 0, ext_w)] = ext[ExtIdx(blk.height, 1, ext_w)];
314 18 ext[ExtIdx(blk.height + 1, blk.width + 1, ext_w)] = ext[ExtIdx(blk.height, blk.width, ext_w)];
315 18 }
316
317
1/2
✓ Branch 0 taken 18 times.
✗ Branch 1 not taken.
18 void MelnikIGaussBlockPartMPI::ExchangeHalos(const BlockInfo &blk, int grid_rows, int grid_cols, int rank,
318 const std::vector<BlockInfo> &all_blocks, std::vector<std::uint8_t> &ext) {
319 if (blk.Empty()) {
320 return;
321 }
322 18 const Neighbours nbh = ComputeNeighbours(blk, grid_rows, grid_cols, rank, all_blocks);
323 18 const int ext_w = blk.width + 2;
324
325 18 ExchangeRowHalos(blk, nbh, ext_w, ext);
326 18 ExchangeColHalos(blk, nbh, ext_w, ext);
327 18 ExchangeCornerHalos(blk, nbh, ext_w, ext);
328 18 FixCornersWithoutDiagonal(blk, nbh, ext_w, ext);
329 }
330
331
1/2
✓ Branch 0 taken 18 times.
✗ Branch 1 not taken.
18 void MelnikIGaussBlockPartMPI::ApplyGaussianFromExtended(const BlockInfo &blk, const std::vector<std::uint8_t> &ext,
332 std::vector<std::uint8_t> &local_out) {
333 static constexpr std::array<int, 9> kKernel = {1, 2, 1, 2, 4, 2, 1, 2, 1};
334 static constexpr int kSum = 16;
335
336 if (blk.Empty()) {
337 local_out.clear();
338 return;
339 }
340
341 18 const int ext_w = blk.width + 2;
342 18 local_out.resize(static_cast<std::size_t>(blk.width) * static_cast<std::size_t>(blk.height));
343
344
2/2
✓ Branch 0 taken 59 times.
✓ Branch 1 taken 18 times.
77 for (int row = 0; row < blk.height; ++row) {
345
2/2
✓ Branch 0 taken 153 times.
✓ Branch 1 taken 59 times.
212 for (int col = 0; col < blk.width; ++col) {
346 int acc = 0;
347 std::size_t kernel_idx = 0;
348
2/2
✓ Branch 0 taken 459 times.
✓ Branch 1 taken 153 times.
612 for (int dy = 0; dy < 3; ++dy) {
349
2/2
✓ Branch 0 taken 1377 times.
✓ Branch 1 taken 459 times.
1836 for (int dx = 0; dx < 3; ++dx) {
350 1377 acc += kKernel.at(kernel_idx) * static_cast<int>(ext[ExtIdx(row + dy, col + dx, ext_w)]);
351 1377 ++kernel_idx;
352 }
353 }
354 153 local_out[Idx(row, col, blk.width)] = static_cast<std::uint8_t>((acc + kSum / 2) / kSum);
355 }
356 }
357 }
358
359 18 bool MelnikIGaussBlockPartMPI::RunImpl() {
360 18 int rank = 0;
361 18 int comm_size = 1;
362 18 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
363 18 MPI_Comm_size(MPI_COMM_WORLD, &comm_size);
364
365 18 int width = 0;
366 18 int height = 0;
367 const std::vector<std::uint8_t> *root_ptr = nullptr;
368
2/2
✓ Branch 0 taken 9 times.
✓ Branch 1 taken 9 times.
18 if (rank == 0) {
369 const auto &[data, w, h] = GetInput();
370 root_ptr = &data;
371 9 width = w;
372 9 height = h;
373 }
374
375 18 BroadcastImageSize(rank, width, height);
376 18 const auto [grid_rows, grid_cols] = ComputeProcessGrid(comm_size, width, height);
377 18 const std::vector<BlockInfo> blocks = BuildAllBlocks(comm_size, grid_rows, grid_cols, width, height);
378
2/2
✓ Branch 0 taken 9 times.
✓ Branch 1 taken 9 times.
18 const BlockInfo my_blk = blocks[static_cast<std::size_t>(rank)];
379
380 18 const std::vector<std::uint8_t> empty_data{};
381
2/2
✓ Branch 0 taken 9 times.
✓ Branch 1 taken 9 times.
18 const std::vector<std::uint8_t> &root_data = (root_ptr == nullptr) ? empty_data : *root_ptr;
382
1/2
✓ Branch 1 taken 18 times.
✗ Branch 2 not taken.
18 const std::vector<std::uint8_t> local_data = ScatterBlock(rank, comm_size, width, height, blocks, my_blk, root_data);
383
1/2
✓ Branch 1 taken 18 times.
✗ Branch 2 not taken.
18 const std::vector<std::uint8_t> local_out = ComputeLocal(my_blk, grid_rows, grid_cols, rank, blocks, local_data);
384
1/2
✓ Branch 1 taken 18 times.
✗ Branch 2 not taken.
18 std::vector<std::uint8_t> global_out = GatherGlobal(rank, comm_size, width, height, blocks, my_blk, local_out);
385
1/2
✓ Branch 1 taken 18 times.
✗ Branch 2 not taken.
18 FinalizeOutput(rank, width, height, global_out);
386 18 return true;
387 }
388
389 18 void MelnikIGaussBlockPartMPI::BroadcastImageSize(int rank, int &width, int &height) {
390 18 MPI_Bcast(&width, 1, MPI_INT, 0, MPI_COMM_WORLD);
391 18 MPI_Bcast(&height, 1, MPI_INT, 0, MPI_COMM_WORLD);
392 (void)rank;
393 18 }
394
395 18 std::vector<MelnikIGaussBlockPartMPI::BlockInfo> MelnikIGaussBlockPartMPI::BuildAllBlocks(int comm_size, int grid_rows,
396 int grid_cols, int width,
397 int height) {
398 18 std::vector<BlockInfo> blocks(static_cast<std::size_t>(comm_size));
399
2/2
✓ Branch 0 taken 36 times.
✓ Branch 1 taken 18 times.
54 for (int rank_idx = 0; rank_idx < comm_size; ++rank_idx) {
400 36 blocks[static_cast<std::size_t>(rank_idx)] = ComputeBlockInfo(rank_idx, grid_rows, grid_cols, width, height);
401 }
402 18 return blocks;
403 }
404
405 9 void MelnikIGaussBlockPartMPI::SendBlocksToOthers(int comm_size, int width, int height,
406 const std::vector<BlockInfo> &blocks,
407 const std::vector<std::uint8_t> &root_data) {
408
2/2
✓ Branch 0 taken 9 times.
✓ Branch 1 taken 9 times.
18 for (int dest = 1; dest < comm_size; ++dest) {
409
1/2
✓ Branch 0 taken 9 times.
✗ Branch 1 not taken.
9 const auto &blk = blocks[static_cast<std::size_t>(dest)];
410 if (blk.Empty()) {
411 MPI_Send(nullptr, 0, MPI_BYTE, dest, 0, MPI_COMM_WORLD);
412 continue;
413 }
414
415 9 MPI_Datatype sub{};
416 9 const std::array<int, 2> sizes = {height, width};
417 9 const std::array<int, 2> subs = {blk.height, blk.width};
418 9 const std::array<int, 2> starts = {blk.start_y, blk.start_x};
419 9 MPI_Type_create_subarray(2, sizes.data(), subs.data(), starts.data(), MPI_ORDER_C, MPI_BYTE, &sub);
420 9 MPI_Type_commit(&sub);
421 9 MPI_Send(root_data.data(), 1, sub, dest, 0, MPI_COMM_WORLD);
422 9 MPI_Type_free(&sub);
423 }
424 9 }
425
426 18 std::vector<std::uint8_t> MelnikIGaussBlockPartMPI::ScatterBlock(int rank, int comm_size, int width, int height,
427 const std::vector<BlockInfo> &blocks,
428 const BlockInfo &my_blk,
429 const std::vector<std::uint8_t> &root_data) {
430
1/2
✓ Branch 0 taken 18 times.
✗ Branch 1 not taken.
18 std::vector<std::uint8_t> local_data;
431 if (!my_blk.Empty()) {
432
1/2
✓ Branch 1 taken 18 times.
✗ Branch 2 not taken.
18 local_data.resize(static_cast<std::size_t>(my_blk.width) * static_cast<std::size_t>(my_blk.height));
433 }
434
435
2/2
✓ Branch 0 taken 9 times.
✓ Branch 1 taken 9 times.
18 if (rank == 0) {
436 // Copy root local block
437 if (!my_blk.Empty()) {
438
2/2
✓ Branch 0 taken 30 times.
✓ Branch 1 taken 9 times.
39 for (int row = 0; row < my_blk.height; ++row) {
439 30 const int global_row = my_blk.start_y + row;
440 30 const std::size_t src_off = Idx(global_row, my_blk.start_x, width);
441 30 const std::size_t dst_off = Idx(row, 0, my_blk.width);
442 std::ranges::copy(
443 root_data.begin() + static_cast<std::ptrdiff_t>(src_off),
444 root_data.begin() + static_cast<std::ptrdiff_t>(src_off + static_cast<std::size_t>(my_blk.width)),
445 local_data.begin() + static_cast<std::ptrdiff_t>(dst_off));
446 }
447 }
448
1/2
✓ Branch 1 taken 9 times.
✗ Branch 2 not taken.
9 SendBlocksToOthers(comm_size, width, height, blocks, root_data);
449 } else {
450 if (!my_blk.Empty()) {
451
1/2
✓ Branch 1 taken 9 times.
✗ Branch 2 not taken.
9 const int recv_count = my_blk.width * my_blk.height;
452
1/2
✓ Branch 1 taken 9 times.
✗ Branch 2 not taken.
9 MPI_Recv(local_data.data(), recv_count, MPI_BYTE, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
453 } else {
454 MPI_Recv(nullptr, 0, MPI_BYTE, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
455 }
456 }
457
458 18 return local_data;
459 }
460
461 18 std::vector<std::uint8_t> MelnikIGaussBlockPartMPI::ComputeLocal(const BlockInfo &my_blk, int grid_rows, int grid_cols,
462 int rank, const std::vector<BlockInfo> &blocks,
463 const std::vector<std::uint8_t> &local_data) {
464
1/2
✓ Branch 0 taken 18 times.
✗ Branch 1 not taken.
18 std::vector<std::uint8_t> local_out;
465 if (my_blk.Empty()) {
466 return local_out;
467 }
468
469 18 std::vector<std::uint8_t> ext;
470 18 const int ext_w = my_blk.width + 2;
471
1/2
✓ Branch 1 taken 18 times.
✗ Branch 2 not taken.
18 FillExtendedWithClamp(local_data, my_blk, ext_w, ext);
472
1/2
✓ Branch 1 taken 18 times.
✗ Branch 2 not taken.
18 ExchangeHalos(my_blk, grid_rows, grid_cols, rank, blocks, ext);
473
1/2
✓ Branch 1 taken 18 times.
✗ Branch 2 not taken.
18 ApplyGaussianFromExtended(my_blk, ext, local_out);
474 return local_out;
475 }
476
477 18 std::vector<std::uint8_t> MelnikIGaussBlockPartMPI::GatherGlobal(int rank, int comm_size, int width, int height,
478 const std::vector<BlockInfo> &blocks,
479 const BlockInfo &my_blk,
480 const std::vector<std::uint8_t> &local_out) {
481
2/2
✓ Branch 0 taken 9 times.
✓ Branch 1 taken 9 times.
18 if (rank != 0) {
482 9 const int send_count = my_blk.Empty() ? 0 : (my_blk.width * my_blk.height);
483 const std::uint8_t *send_ptr = send_count > 0 ? local_out.data() : nullptr;
484 9 MPI_Send(send_ptr, send_count, MPI_BYTE, 0, 1, MPI_COMM_WORLD);
485 9 return {};
486 }
487
488
1/2
✓ Branch 1 taken 9 times.
✗ Branch 2 not taken.
9 std::vector<std::uint8_t> global_out(static_cast<std::size_t>(width) * static_cast<std::size_t>(height), 0);
489
490 // Copy root block
491 if (!my_blk.Empty()) {
492
2/2
✓ Branch 0 taken 30 times.
✓ Branch 1 taken 9 times.
39 for (int row = 0; row < my_blk.height; ++row) {
493 30 const std::size_t dst_off = Idx(my_blk.start_y + row, my_blk.start_x, width);
494 30 const std::size_t src_off = Idx(row, 0, my_blk.width);
495 std::ranges::copy(
496 local_out.begin() + static_cast<std::ptrdiff_t>(src_off),
497 local_out.begin() + static_cast<std::ptrdiff_t>(src_off + static_cast<std::size_t>(my_blk.width)),
498 global_out.begin() + static_cast<std::ptrdiff_t>(dst_off));
499 }
500 }
501
502
2/2
✓ Branch 0 taken 9 times.
✓ Branch 1 taken 9 times.
18 for (int src = 1; src < comm_size; ++src) {
503
1/2
✓ Branch 0 taken 9 times.
✗ Branch 1 not taken.
9 const auto &blk = blocks[static_cast<std::size_t>(src)];
504 if (blk.Empty()) {
505 MPI_Recv(nullptr, 0, MPI_BYTE, src, 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
506 continue;
507 }
508
509 9 MPI_Datatype sub{};
510 9 const std::array<int, 2> sizes = {height, width};
511 9 const std::array<int, 2> subs = {blk.height, blk.width};
512 9 const std::array<int, 2> starts = {blk.start_y, blk.start_x};
513
1/2
✓ Branch 1 taken 9 times.
✗ Branch 2 not taken.
9 MPI_Type_create_subarray(2, sizes.data(), subs.data(), starts.data(), MPI_ORDER_C, MPI_BYTE, &sub);
514
1/2
✓ Branch 1 taken 9 times.
✗ Branch 2 not taken.
9 MPI_Type_commit(&sub);
515
1/2
✓ Branch 1 taken 9 times.
✗ Branch 2 not taken.
9 MPI_Recv(global_out.data(), 1, sub, src, 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
516
1/2
✓ Branch 1 taken 9 times.
✗ Branch 2 not taken.
9 MPI_Type_free(&sub);
517 }
518
519 return global_out;
520 }
521
522 18 void MelnikIGaussBlockPartMPI::FinalizeOutput(int rank, int width, int height, std::vector<std::uint8_t> &global_out) {
523 18 const auto state = GetStateOfTesting();
524 18 const std::size_t total = static_cast<std::size_t>(width) * static_cast<std::size_t>(height);
525
1/2
✓ Branch 0 taken 18 times.
✗ Branch 1 not taken.
18 if (state == ppc::task::StateOfTesting::kFunc) {
526
2/2
✓ Branch 0 taken 9 times.
✓ Branch 1 taken 9 times.
18 if (rank != 0) {
527 9 global_out.assign(total, 0);
528 }
529
1/2
✓ Branch 0 taken 18 times.
✗ Branch 1 not taken.
18 if (total > 0) {
530 18 MPI_Bcast(global_out.data(), static_cast<int>(total), MPI_BYTE, 0, MPI_COMM_WORLD);
531 }
532 GetOutput() = std::move(global_out);
533 18 return;
534 }
535
536 if (rank == 0) {
537 GetOutput() = std::move(global_out);
538 } else {
539 GetOutput().clear();
540 }
541 }
542
543 18 bool MelnikIGaussBlockPartMPI::PostProcessingImpl() {
544 18 return true;
545 }
546
547 } // namespace melnik_i_gauss_block_part
548