GCC Code Coverage Report


Directory: ./
File: tasks/smyshlaev_a_gauss_filt/mpi/src/ops_mpi.cpp
Date: 2026-01-10 02:40:41
Exec Total Coverage
Lines: 196 209 93.8%
Functions: 11 12 91.7%
Branches: 94 146 64.4%

Line Branch Exec Source
1 #include "smyshlaev_a_gauss_filt/mpi/include/ops_mpi.hpp"
2
3 #include <mpi.h>
4
5 #include <algorithm>
6 #include <array>
7 #include <cmath>
8 #include <cstddef>
9 #include <cstdint>
10 #include <vector>
11
12 #include "smyshlaev_a_gauss_filt/common/include/common.hpp"
13 #include "smyshlaev_a_gauss_filt/seq/include/ops_seq.hpp"
14
15 namespace smyshlaev_a_gauss_filt {
16
17 namespace {
18 const std::vector<int> kErnel = {1, 2, 1, 2, 4, 2, 1, 2, 1};
19 const int kErnelSum = 16;
20
21 void FindOptimalGrid(int size, int &grid_rows, int &grid_cols) {
22 int best_diff = size;
23 8 grid_rows = 1;
24 8 grid_cols = size;
25
4/4
✓ Branch 0 taken 16 times.
✓ Branch 1 taken 16 times.
✓ Branch 2 taken 8 times.
✓ Branch 3 taken 8 times.
48 for (int rows = 1; rows * rows <= size; ++rows) {
26
2/4
✗ Branch 0 not taken.
✓ Branch 1 taken 16 times.
✓ Branch 2 taken 8 times.
✗ Branch 3 not taken.
24 if (size % rows == 0) {
27 24 int cols = size / rows;
28 24 int diff = std::abs(cols - rows);
29
2/4
✗ Branch 0 not taken.
✓ Branch 1 taken 16 times.
✓ Branch 2 taken 8 times.
✗ Branch 3 not taken.
24 if (diff < best_diff) {
30 best_diff = diff;
31 8 grid_rows = rows;
32 8 grid_cols = cols;
33 }
34 }
35 }
36 }
37
38 16354 uint8_t ApplyGaussianFilter(const std::vector<uint8_t> &padded_data, int x, int y, int padded_width, int channels,
39 int channel) {
40 int sum = 0;
41
2/2
✓ Branch 0 taken 49062 times.
✓ Branch 1 taken 16354 times.
65416 for (int ky = -1; ky <= 1; ++ky) {
42
2/2
✓ Branch 0 taken 147186 times.
✓ Branch 1 taken 49062 times.
196248 for (int kx = -1; kx <= 1; ++kx) {
43 147186 int curr_x = x + kx;
44 147186 int curr_y = y + ky;
45 147186 int pixel = padded_data[(((curr_y * padded_width) + curr_x) * channels) + channel];
46 147186 int k_value = kErnel[((ky + 1) * 3) + (kx + 1)];
47 147186 sum += pixel * k_value;
48 }
49 }
50 16354 return static_cast<uint8_t>(sum / kErnelSum);
51 }
52
53 } // namespace
54
55
1/2
✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
16 SmyshlaevAGaussFiltMPI::SmyshlaevAGaussFiltMPI(const InType &in) {
56 SetTypeOfTask(GetStaticTypeOfTask());
57 16 int rank = 0;
58
1/2
✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
16 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
59
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
16 if (rank == 0) {
60 GetInput() = in;
61 }
62 16 }
63
64 16 bool SmyshlaevAGaussFiltMPI::ValidationImpl() {
65 16 int rank = 0;
66 16 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
67 16 int error_flag = 0;
68
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
16 if (rank == 0) {
69 const InType &input_img = GetInput();
70
4/8
✓ Branch 0 taken 8 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 8 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 8 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 8 times.
8 if (input_img.width <= 0 || input_img.height <= 0 || input_img.channels <= 0 || input_img.data.empty()) {
71 error_flag = 1;
72 }
73 }
74 16 MPI_Bcast(&error_flag, 1, MPI_INT, 0, MPI_COMM_WORLD);
75 16 return (error_flag == 0);
76 }
77
78 16 bool SmyshlaevAGaussFiltMPI::PreProcessingImpl() {
79 16 return true;
80 }
81
82 16 void SmyshlaevAGaussFiltMPI::BroadcastImageDimensions(int &width, int &height, int &channels) {
83 16 int rank = 0;
84 16 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
85
86 16 std::array<int, 3> dims = {0, 0, 0};
87
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
16 if (rank == 0) {
88 8 dims[0] = GetInput().width;
89 8 dims[1] = GetInput().height;
90 8 dims[2] = GetInput().channels;
91 }
92 16 MPI_Bcast(dims.data(), 3, MPI_INT, 0, MPI_COMM_WORLD);
93 16 width = dims[0];
94 16 height = dims[1];
95 16 channels = dims[2];
96 16 }
97
98 bool SmyshlaevAGaussFiltMPI::RunSequential() {
99 int rank = 0;
100 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
101 if (rank == 0) {
102 SmyshlaevAGaussFiltSEQ seq_task(GetInput());
103 if (seq_task.Validation()) {
104 seq_task.PreProcessing();
105 seq_task.Run();
106 seq_task.PostProcessing();
107 GetOutput() = seq_task.GetOutput();
108 }
109 }
110 return true;
111 }
112
113 16 void SmyshlaevAGaussFiltMPI::SetupDecomposition(DecompositionInfo &info, int width, int height, int channels) {
114 16 int rank = 0;
115 16 int size = 0;
116 16 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
117 16 MPI_Comm_size(MPI_COMM_WORLD, &size);
118
119
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
16 if (rank == 0) {
120 8 FindOptimalGrid(size, info.grid_rows, info.grid_cols);
121 }
122
123 16 MPI_Bcast(&info.grid_rows, 1, MPI_INT, 0, MPI_COMM_WORLD);
124 16 MPI_Bcast(&info.grid_cols, 1, MPI_INT, 0, MPI_COMM_WORLD);
125
126 16 info.blocks.resize(size);
127 16 info.sendcounts.resize(size);
128 16 info.displs.resize(size);
129
130
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
16 if (rank == 0) {
131 8 int block_height = (height + info.grid_rows - 1) / info.grid_rows;
132 8 int block_width = (width + info.grid_cols - 1) / info.grid_cols;
133
2/2
✓ Branch 0 taken 16 times.
✓ Branch 1 taken 8 times.
24 for (int pdx = 0; pdx < size; ++pdx) {
134 16 int grid_row = pdx / info.grid_cols;
135 16 int grid_col = pdx % info.grid_cols;
136 16 int start_row = grid_row * block_height;
137 16 int start_col = grid_col * block_width;
138 16 int actual_block_height = std::min(block_height, height - start_row);
139 16 int actual_block_width = std::min(block_width, width - start_col);
140
141 int padded_top = 1;
142 int padded_bottom = 1;
143 int padded_left = 1;
144 int padded_right = 1;
145
146 16 info.blocks[pdx].start_row = start_row;
147 16 info.blocks[pdx].start_col = start_col;
148 16 info.blocks[pdx].block_height = actual_block_height;
149 16 info.blocks[pdx].block_width = actual_block_width;
150 16 info.blocks[pdx].padded_height = actual_block_height + padded_top + padded_bottom;
151 16 info.blocks[pdx].padded_width = actual_block_width + padded_left + padded_right;
152 16 info.blocks[pdx].count = info.blocks[pdx].padded_height * info.blocks[pdx].padded_width * channels;
153 16 info.sendcounts[pdx] = info.blocks[pdx].count;
154 }
155 8 info.displs[0] = 0;
156
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
16 for (int pdx = 1; pdx < size; ++pdx) {
157 8 info.displs[pdx] = info.displs[pdx - 1] + info.sendcounts[pdx - 1];
158 }
159 }
160
161 16 MPI_Bcast(info.sendcounts.data(), size, MPI_INT, 0, MPI_COMM_WORLD);
162 16 MPI_Bcast(info.displs.data(), size, MPI_INT, 0, MPI_COMM_WORLD);
163
164 16 std::vector<int> block_info_buffer(static_cast<size_t>(size) * 6);
165
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
16 if (rank == 0) {
166
2/2
✓ Branch 0 taken 16 times.
✓ Branch 1 taken 8 times.
24 for (int pdx = 0; pdx < size; ++pdx) {
167 16 block_info_buffer[(pdx * 6) + 0] = info.blocks[pdx].start_row;
168 16 block_info_buffer[(pdx * 6) + 1] = info.blocks[pdx].start_col;
169 16 block_info_buffer[(pdx * 6) + 2] = info.blocks[pdx].block_height;
170 16 block_info_buffer[(pdx * 6) + 3] = info.blocks[pdx].block_width;
171 16 block_info_buffer[(pdx * 6) + 4] = info.blocks[pdx].padded_height;
172 16 block_info_buffer[(pdx * 6) + 5] = info.blocks[pdx].padded_width;
173 }
174 }
175
1/2
✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
16 MPI_Bcast(block_info_buffer.data(), size * 6, MPI_INT, 0, MPI_COMM_WORLD);
176
177
2/2
✓ Branch 0 taken 32 times.
✓ Branch 1 taken 16 times.
48 for (int pdx = 0; pdx < size; ++pdx) {
178 32 info.blocks[pdx].start_row = block_info_buffer[(pdx * 6) + 0];
179 32 info.blocks[pdx].start_col = block_info_buffer[(pdx * 6) + 1];
180 32 info.blocks[pdx].block_height = block_info_buffer[(pdx * 6) + 2];
181 32 info.blocks[pdx].block_width = block_info_buffer[(pdx * 6) + 3];
182 32 info.blocks[pdx].padded_height = block_info_buffer[(pdx * 6) + 4];
183 32 info.blocks[pdx].padded_width = block_info_buffer[(pdx * 6) + 5];
184 }
185 16 }
186
187
1/2
✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
8 std::vector<uint8_t> SmyshlaevAGaussFiltMPI::PrepareScatterBuffer(const DecompositionInfo &info, int width, int height,
188 int channels) {
189 8 int size = static_cast<int>(info.sendcounts.size());
190 const auto &input_image = GetInput();
191
192 8 std::vector<uint8_t> scatter_buffer;
193
1/2
✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
8 scatter_buffer.resize(info.displs[size - 1] + info.sendcounts[size - 1]);
194
195
2/2
✓ Branch 0 taken 16 times.
✓ Branch 1 taken 8 times.
24 for (int pdx = 0; pdx < size; ++pdx) {
196 16 uint8_t *buffer_ptr = scatter_buffer.data() + info.displs[pdx];
197 const auto &block = info.blocks[pdx];
198 16 int src_y_start = block.start_row - 1;
199 16 int src_x_start = block.start_col - 1;
200
2/2
✓ Branch 0 taken 456 times.
✓ Branch 1 taken 16 times.
472 for (int idy = 0; idy < block.padded_height; ++idy) {
201
2/2
✓ Branch 0 taken 7646 times.
✓ Branch 1 taken 456 times.
8102 for (int idx = 0; idx < block.padded_width; ++idx) {
202 7646 int global_y = std::clamp(src_y_start + idy, 0, height - 1);
203 7646 int global_x = std::clamp(src_x_start + idx, 0, width - 1);
204
2/2
✓ Branch 0 taken 19970 times.
✓ Branch 1 taken 7646 times.
27616 for (int ch = 0; ch < channels; ++ch) {
205 19970 buffer_ptr[(((idy * block.padded_width) + idx) * channels) + ch] =
206 19970 input_image.data[(((global_y * width) + global_x) * channels) + ch];
207 }
208 }
209 }
210 }
211 8 return scatter_buffer;
212 }
213
214 16 std::vector<uint8_t> SmyshlaevAGaussFiltMPI::ProcessLocalBlock(const DecompositionInfo &info, int width, int height,
215 int channels) {
216 16 int rank = 0;
217 16 int size = 0;
218 16 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
219 16 MPI_Comm_size(MPI_COMM_WORLD, &size);
220
221 16 std::vector<uint8_t> scatter_buffer;
222
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
16 if (rank == 0) {
223
1/2
✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
16 scatter_buffer = PrepareScatterBuffer(info, width, height, channels);
224 }
225
226
1/4
✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
16 std::vector<uint8_t> local_block_data(info.sendcounts[rank]);
227
1/2
✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
16 MPI_Scatterv(scatter_buffer.data(), info.sendcounts.data(), info.displs.data(), MPI_UNSIGNED_CHAR,
228
1/2
✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
16 local_block_data.data(), info.sendcounts[rank], MPI_UNSIGNED_CHAR, 0, MPI_COMM_WORLD);
229
230
1/2
✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
16 const auto &my_block = info.blocks[rank];
231
232 int x_offset = 1;
233 int y_offset = 1;
234
235
1/4
✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
16 std::vector<uint8_t> local_output_data(static_cast<size_t>(my_block.block_height) * my_block.block_width * channels);
236
2/2
✓ Branch 0 taken 424 times.
✓ Branch 1 taken 16 times.
440 for (int idy = 0; idy < my_block.block_height; ++idy) {
237
2/2
✓ Branch 0 taken 6326 times.
✓ Branch 1 taken 424 times.
6750 for (int idx = 0; idx < my_block.block_width; ++idx) {
238
2/2
✓ Branch 0 taken 16354 times.
✓ Branch 1 taken 6326 times.
22680 for (int ch = 0; ch < channels; ++ch) {
239 16354 local_output_data[((idy * my_block.block_width + idx) * channels) + ch] =
240 16354 ApplyGaussianFilter(local_block_data, idx + x_offset, idy + y_offset, my_block.padded_width, channels, ch);
241 }
242 }
243 }
244 16 return local_output_data;
245 }
246
247 16 void SmyshlaevAGaussFiltMPI::CollectResult(const std::vector<uint8_t> &local_result, const DecompositionInfo &info,
248 int width, int height, int channels) {
249 16 int rank = 0;
250 16 int size = 0;
251 16 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
252 16 MPI_Comm_size(MPI_COMM_WORLD, &size);
253
254 16 std::vector<int> recvcounts(size);
255
1/4
✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
16 std::vector<int> recv_displs(size, 0);
256 16 std::vector<uint8_t> gathered_data;
257
258
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
16 if (rank == 0) {
259
2/2
✓ Branch 0 taken 16 times.
✓ Branch 1 taken 8 times.
24 for (int pdx = 0; pdx < size; ++pdx) {
260 16 recvcounts[pdx] = info.blocks[pdx].block_height * info.blocks[pdx].block_width * channels;
261 }
262 8 recv_displs[0] = 0;
263
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
16 for (int pdx = 1; pdx < size; ++pdx) {
264 8 recv_displs[pdx] = recv_displs[pdx - 1] + recvcounts[pdx - 1];
265 }
266
1/2
✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
8 gathered_data.resize(static_cast<size_t>(width) * height * channels);
267 }
268
269
1/2
✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
16 MPI_Gatherv(local_result.data(), static_cast<int>(local_result.size()), MPI_UNSIGNED_CHAR, gathered_data.data(),
270 recvcounts.data(), recv_displs.data(), MPI_UNSIGNED_CHAR, 0, MPI_COMM_WORLD);
271
272
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
16 if (rank == 0) {
273 OutType &output_image = GetOutput();
274 8 output_image.width = width;
275 8 output_image.height = height;
276 8 output_image.channels = channels;
277
1/2
✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
8 output_image.data.resize(static_cast<size_t>(width) * height * channels);
278
2/2
✓ Branch 0 taken 16 times.
✓ Branch 1 taken 8 times.
24 for (int pdx = 0; pdx < size; ++pdx) {
279 16 const uint8_t *src_ptr = gathered_data.data() + recv_displs[pdx];
280 const auto &block = info.blocks[pdx];
281
2/2
✓ Branch 0 taken 424 times.
✓ Branch 1 taken 16 times.
440 for (int idy = 0; idy < block.block_height; ++idy) {
282 424 int global_y = block.start_row + idy;
283
1/2
✓ Branch 0 taken 424 times.
✗ Branch 1 not taken.
424 uint8_t *dst_ptr = &output_image.data[(static_cast<size_t>(global_y) * width + block.start_col) * channels];
284
1/2
✓ Branch 0 taken 424 times.
✗ Branch 1 not taken.
424 std::copy_n(src_ptr + (static_cast<size_t>(idy) * block.block_width * channels), block.block_width * channels,
285 dst_ptr);
286 }
287 }
288 }
289 16 }
290
291 16 bool SmyshlaevAGaussFiltMPI::RunImpl() {
292 16 int rank = 0;
293 16 int size = 0;
294 16 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
295 16 MPI_Comm_size(MPI_COMM_WORLD, &size);
296
297 16 int img_width = 0;
298 16 int img_height = 0;
299 16 int img_channels = 0;
300 16 BroadcastImageDimensions(img_width, img_height, img_channels);
301
302 int grid_rows = 0;
303 int grid_cols = 0;
304 16 FindOptimalGrid(size, grid_rows, grid_cols);
305
306
2/4
✓ Branch 0 taken 16 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 16 times.
16 if (img_height < grid_rows || img_width < grid_cols) {
307 RunSequential();
308 return true;
309 }
310
311 16 DecompositionInfo decomp_info;
312
1/2
✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
16 SetupDecomposition(decomp_info, img_width, img_height, img_channels);
313
314
1/2
✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
16 std::vector<uint8_t> local_result = ProcessLocalBlock(decomp_info, img_width, img_height, img_channels);
315
316
1/2
✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
16 CollectResult(local_result, decomp_info, img_width, img_height, img_channels);
317
318 return true;
319 16 }
320
321 16 bool SmyshlaevAGaussFiltMPI::PostProcessingImpl() {
322 16 int rank = 0;
323 16 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
324 auto &output = GetOutput();
325 16 std::array<int, 3> dims = {0, 0, 0};
326
327
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
16 if (rank == 0) {
328 8 dims[0] = output.width;
329 8 dims[1] = output.height;
330 8 dims[2] = output.channels;
331 }
332 16 MPI_Bcast(dims.data(), 3, MPI_INT, 0, MPI_COMM_WORLD);
333
334
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
16 if (rank != 0) {
335 8 output.width = dims[0];
336 8 output.height = dims[1];
337 8 output.channels = dims[2];
338
3/6
✓ Branch 0 taken 8 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 8 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 8 times.
✗ Branch 5 not taken.
8 if (dims[0] > 0 && dims[1] > 0 && dims[2] > 0) {
339 8 output.data.resize(static_cast<size_t>(dims[0]) * dims[1] * dims[2]);
340 } else {
341 output.data.clear();
342 }
343 }
344
345
1/2
✓ Branch 0 taken 16 times.
✗ Branch 1 not taken.
16 if (!output.data.empty()) {
346 16 MPI_Bcast(output.data.data(), static_cast<int>(output.data.size()), MPI_UNSIGNED_CHAR, 0, MPI_COMM_WORLD);
347 }
348
349 16 return true;
350 }
351
352 } // namespace smyshlaev_a_gauss_filt
353