GCC Code Coverage Report


Directory: ./
File: tasks/romanov_a_gauss_block/all/src/ops_all.cpp
Date: 2026-06-04 20:25:32
Exec Total Coverage
Lines: 156 157 99.4%
Functions: 14 14 100.0%
Branches: 102 134 76.1%

Line Branch Exec Source
1 #include "romanov_a_gauss_block/all/include/ops_all.hpp"
2
3 #include <mpi.h>
4
5 #include <algorithm>
6 #include <array>
7 #include <cstddef>
8 #include <cstdint>
9 #include <thread>
10 #include <utility>
11 #include <vector>
12
13 #include "romanov_a_gauss_block/common/include/common.hpp"
14 #include "util/include/util.hpp"
15
16 namespace romanov_a_gauss_block {
17
18 namespace {
19
20 constexpr int kBlockSize = 32;
21
22 struct Distribution {
23 std::vector<int> rows_per_proc;
24 std::vector<int> row_displs;
25 int halo_top{0};
26 int halo_bottom{0};
27 int buffer_height{0};
28 };
29
30 30084 int ApplyKernel(const std::vector<uint8_t> &img, int row, int col, int channel, int width, int buffer_height,
31 int halo_top, const std::array<std::array<int, 3>, 3> &kernel) {
32 int sum = 0;
33
2/2
✓ Branch 0 taken 90252 times.
✓ Branch 1 taken 30084 times.
120336 for (size_t kr = 0; kr < 3; ++kr) {
34
2/2
✓ Branch 0 taken 270756 times.
✓ Branch 1 taken 90252 times.
361008 for (size_t kc = 0; kc < 3; ++kc) {
35 270756 int nr_local = row + static_cast<int>(kr) - 1;
36 270756 int nc = col + static_cast<int>(kc) - 1;
37 270756 int buffer_row = nr_local + halo_top;
38
4/4
✓ Branch 0 taken 268686 times.
✓ Branch 1 taken 2070 times.
✓ Branch 2 taken 266748 times.
✓ Branch 3 taken 1938 times.
270756 if (buffer_row >= 0 && buffer_row < buffer_height && nc >= 0 && nc < width) {
39 266748 size_t idx = (((static_cast<size_t>(buffer_row) * width) + nc) * 3) + channel;
40 266748 sum += (static_cast<int>(img[idx]) * kernel.at(kr).at(kc));
41 }
42 }
43 }
44 30084 return sum;
45 }
46
47 9 void ProcessFullBlock(const std::vector<uint8_t> &input, std::vector<uint8_t> &output, int width, int buffer_height,
48 int halo_top, int start_row, int start_col) {
49 static constexpr std::array<std::array<int, 3>, 3> kKernel = {{{1, 2, 1}, {2, 4, 2}, {1, 2, 1}}};
50
51
2/2
✓ Branch 0 taken 288 times.
✓ Branch 1 taken 9 times.
297 for (int row = start_row; row < start_row + kBlockSize; ++row) {
52
2/2
✓ Branch 0 taken 9216 times.
✓ Branch 1 taken 288 times.
9504 for (int col = start_col; col < start_col + kBlockSize; ++col) {
53
2/2
✓ Branch 0 taken 27648 times.
✓ Branch 1 taken 9216 times.
36864 for (int channel = 0; channel < 3; ++channel) {
54 27648 int sum = ApplyKernel(input, row, col, channel, width, buffer_height, halo_top, kKernel);
55
1/2
✓ Branch 0 taken 27648 times.
✗ Branch 1 not taken.
27648 int result_value = (sum + 8) / 16;
56 result_value = std::clamp(result_value, 0, 255);
57 27648 auto idx = ((static_cast<size_t>(row) * width + col) * 3) + channel;
58 27648 output[idx] = static_cast<uint8_t>(result_value);
59 }
60 }
61 }
62 9 }
63
64 14 void ProcessPartBlock(const std::vector<uint8_t> &input, std::vector<uint8_t> &output, int width, int local_rows,
65 int buffer_height, int halo_top, int start_row, int start_col) {
66 static constexpr std::array<std::array<int, 3>, 3> kKernel = {{{1, 2, 1}, {2, 4, 2}, {1, 2, 1}}};
67
68 14 const int end_row = std::min(local_rows, start_row + kBlockSize);
69 14 const int end_col = std::min(width, start_col + kBlockSize);
70
71
2/2
✓ Branch 0 taken 125 times.
✓ Branch 1 taken 14 times.
139 for (int row = start_row; row < end_row; ++row) {
72
2/2
✓ Branch 0 taken 812 times.
✓ Branch 1 taken 125 times.
937 for (int col = start_col; col < end_col; ++col) {
73
2/2
✓ Branch 0 taken 2436 times.
✓ Branch 1 taken 812 times.
3248 for (int channel = 0; channel < 3; ++channel) {
74 2436 int sum = ApplyKernel(input, row, col, channel, width, buffer_height, halo_top, kKernel);
75
1/2
✓ Branch 0 taken 2436 times.
✗ Branch 1 not taken.
2436 int result_value = (sum + 8) / 16;
76 result_value = std::clamp(result_value, 0, 255);
77 2436 auto idx = ((static_cast<size_t>(row) * width + col) * 3) + channel;
78 2436 output[idx] = static_cast<uint8_t>(result_value);
79 }
80 }
81 }
82 14 }
83
84 16 Distribution BuildDistribution(int rank, int world_size, int height) {
85 16 const int total_block_rows = height / kBlockSize;
86 16 const int height_remainder = height % kBlockSize;
87
88 16 std::vector<int> block_rows_per_proc(world_size);
89 16 const int base_blocks = total_block_rows / world_size;
90 16 const int extra_blocks = total_block_rows % world_size;
91
2/2
✓ Branch 0 taken 32 times.
✓ Branch 1 taken 16 times.
48 for (int proc = 0; proc < world_size; ++proc) {
92
2/2
✓ Branch 0 taken 30 times.
✓ Branch 1 taken 2 times.
62 block_rows_per_proc[proc] = base_blocks + (proc < extra_blocks ? 1 : 0);
93 }
94
95 16 Distribution dist;
96
1/2
✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
16 dist.rows_per_proc.resize(world_size);
97
1/2
✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
16 dist.row_displs.resize(world_size);
98 int pixel_offset = 0;
99
2/2
✓ Branch 0 taken 32 times.
✓ Branch 1 taken 16 times.
48 for (int proc = 0; proc < world_size; ++proc) {
100
2/2
✓ Branch 0 taken 16 times.
✓ Branch 1 taken 16 times.
32 int rows = block_rows_per_proc[proc] * kBlockSize;
101
2/2
✓ Branch 0 taken 16 times.
✓ Branch 1 taken 16 times.
32 if (proc == world_size - 1) {
102 16 rows += height_remainder;
103 }
104 32 dist.rows_per_proc[proc] = rows;
105 32 dist.row_displs[proc] = pixel_offset;
106 32 pixel_offset += rows;
107 }
108
109 // halo для текущего ранга
110
2/2
✓ Branch 0 taken 9 times.
✓ Branch 1 taken 7 times.
16 if (dist.rows_per_proc[rank] > 0) {
111
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 1 times.
9 dist.halo_top = (dist.row_displs[rank] > 0) ? 1 : 0;
112
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 1 times.
17 dist.halo_bottom = (dist.row_displs[rank] + dist.rows_per_proc[rank] < height) ? 1 : 0;
113 }
114
1/2
✓ Branch 0 taken 16 times.
✗ Branch 1 not taken.
16 dist.buffer_height = dist.rows_per_proc[rank] + dist.halo_top + dist.halo_bottom;
115
116 16 return dist;
117 }
118
119 std::pair<int, int> HaloFor(int proc, const Distribution &dist, int height) {
120
2/2
✓ Branch 0 taken 18 times.
✓ Branch 1 taken 14 times.
32 if (dist.rows_per_proc[proc] == 0) {
121 return {0, 0};
122 }
123
2/2
✓ Branch 0 taken 16 times.
✓ Branch 1 taken 2 times.
18 int top = (dist.row_displs[proc] > 0) ? 1 : 0;
124
2/2
✓ Branch 0 taken 16 times.
✓ Branch 1 taken 2 times.
18 int bot = (dist.row_displs[proc] + dist.rows_per_proc[proc] < height) ? 1 : 0;
125 return {top, bot};
126 }
127
128 16 void ScatterWithHalo(int rank, int world_size, int width, int height, const Distribution &dist,
129 const uint8_t *full_image, std::vector<uint8_t> &local_input) {
130 16 std::vector<int> scatter_counts(world_size);
131
1/4
✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
16 std::vector<int> scatter_displs(world_size);
132
2/2
✓ Branch 0 taken 32 times.
✓ Branch 1 taken 16 times.
48 for (int proc = 0; proc < world_size; ++proc) {
133 auto [proc_top, proc_bot] = HaloFor(proc, dist, height);
134 32 int proc_buffer_rows = dist.rows_per_proc[proc] + proc_top + proc_bot;
135 32 scatter_counts[proc] = proc_buffer_rows * width * 3;
136 32 scatter_displs[proc] = (dist.row_displs[proc] - proc_top) * width * 3;
137 }
138
139
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
16 const uint8_t *send_buf = (rank == 0) ? full_image : nullptr;
140
1/2
✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
16 MPI_Scatterv(send_buf, scatter_counts.data(), scatter_displs.data(), MPI_UNSIGNED_CHAR, local_input.data(),
141 static_cast<int>(local_input.size()), MPI_UNSIGNED_CHAR, 0, MPI_COMM_WORLD);
142 16 }
143
144 15 void ProcessThreadShare(int current_part, int num_threads, int local_block_rows, int num_col_blocks, int local_rows,
145 int height_remainder, bool is_last, bool width_has_remainder, int width, int buffer_height,
146 int halo_top, const std::vector<uint8_t> &local_input, std::vector<uint8_t> &local_output) {
147 15 const int start_col_tail = num_col_blocks * kBlockSize;
148 15 const int bottom_row_start = local_block_rows * kBlockSize;
149
150 15 int left_border_r = (local_block_rows * current_part) / num_threads;
151 15 int right_border_r = (local_block_rows * (current_part + 1)) / num_threads;
152
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 15 times.
18 for (int bi = left_border_r; bi < right_border_r; ++bi) {
153
2/2
✓ Branch 0 taken 9 times.
✓ Branch 1 taken 3 times.
12 for (int bj = 0; bj < num_col_blocks; ++bj) {
154 9 ProcessFullBlock(local_input, local_output, width, buffer_height, halo_top, bi * kBlockSize, bj * kBlockSize);
155 }
156 }
157
158
1/2
✓ Branch 0 taken 15 times.
✗ Branch 1 not taken.
15 if (width_has_remainder) {
159
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 15 times.
18 for (int bi = left_border_r; bi < right_border_r; ++bi) {
160 3 ProcessPartBlock(local_input, local_output, width, local_rows, buffer_height, halo_top, bi * kBlockSize,
161 start_col_tail);
162 }
163 }
164
165
2/2
✓ Branch 0 taken 13 times.
✓ Branch 1 taken 2 times.
15 if (is_last && height_remainder > 0) {
166 13 int left_border_l = (num_col_blocks * current_part) / num_threads;
167 13 int right_border_l = (num_col_blocks * (current_part + 1)) / num_threads;
168
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 13 times.
16 for (int bj = left_border_l; bj < right_border_l; ++bj) {
169 3 ProcessPartBlock(local_input, local_output, width, local_rows, buffer_height, halo_top, bottom_row_start,
170 bj * kBlockSize);
171 }
172 }
173 15 }
174
175 16 void RunLocal(int rank, int world_size, int width, int height, const Distribution &dist,
176 const std::vector<uint8_t> &local_input, std::vector<uint8_t> &local_output) {
177
2/2
✓ Branch 0 taken 7 times.
✓ Branch 1 taken 9 times.
16 const int local_rows = dist.rows_per_proc[rank];
178
2/2
✓ Branch 0 taken 7 times.
✓ Branch 1 taken 9 times.
16 if (local_rows == 0) {
179 7 return;
180 }
181
182 9 const int total_block_rows = height / kBlockSize;
183 9 const int height_remainder = height % kBlockSize;
184 9 const int num_col_blocks = width / kBlockSize;
185 9 const bool width_has_remainder = (width % kBlockSize) != 0;
186 9 const int local_block_rows =
187
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 8 times.
9 (rank < total_block_rows % world_size) ? ((total_block_rows / world_size) + 1) : (total_block_rows / world_size);
188 9 const bool is_last = (rank == world_size - 1);
189
190 9 int num_threads = std::max(1, ppc::util::GetNumThreads());
191 9 num_threads = std::min(num_threads, local_rows);
192
193 9 std::vector<std::thread> threads;
194
1/2
✓ Branch 1 taken 9 times.
✗ Branch 2 not taken.
9 threads.reserve(num_threads);
195
2/2
✓ Branch 0 taken 15 times.
✓ Branch 1 taken 9 times.
24 for (int tid = 0; tid < num_threads; ++tid) {
196
1/2
✓ Branch 1 taken 15 times.
✗ Branch 2 not taken.
15 threads.emplace_back([&, tid]() {
197 15 ProcessThreadShare(tid, num_threads, local_block_rows, num_col_blocks, local_rows, height_remainder, is_last,
198 15 width_has_remainder, width, dist.buffer_height, dist.halo_top, local_input, local_output);
199 15 });
200 }
201
2/2
✓ Branch 0 taken 15 times.
✓ Branch 1 taken 9 times.
24 for (auto &th : threads) {
202
1/2
✓ Branch 1 taken 15 times.
✗ Branch 2 not taken.
15 th.join();
203 }
204
205
3/4
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 1 times.
✓ Branch 2 taken 8 times.
✗ Branch 3 not taken.
9 if (is_last && height_remainder > 0) {
206 8 ProcessPartBlock(local_input, local_output, width, local_rows, dist.buffer_height, dist.halo_top,
207 8 local_block_rows * kBlockSize, num_col_blocks * kBlockSize);
208 }
209 9 }
210
211 16 void GatherAndBroadcast(int world_size, int width, int height, const Distribution &dist,
212 const std::vector<uint8_t> &local_output, std::vector<uint8_t> &result) {
213 16 std::vector<int> recv_counts(world_size);
214
1/4
✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
16 std::vector<int> recv_displs(world_size);
215
2/2
✓ Branch 0 taken 32 times.
✓ Branch 1 taken 16 times.
48 for (int proc = 0; proc < world_size; ++proc) {
216 32 recv_counts[proc] = dist.rows_per_proc[proc] * width * 3;
217 32 recv_displs[proc] = dist.row_displs[proc] * width * 3;
218 }
219
2/6
✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 16 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
16 result.assign(static_cast<size_t>(height) * width * 3, 0);
220
1/2
✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
16 MPI_Gatherv(local_output.data(), static_cast<int>(local_output.size()), MPI_UNSIGNED_CHAR, result.data(),
221 recv_counts.data(), recv_displs.data(), MPI_UNSIGNED_CHAR, 0, MPI_COMM_WORLD);
222
1/2
✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
16 MPI_Bcast(result.data(), static_cast<int>(result.size()), MPI_UNSIGNED_CHAR, 0, MPI_COMM_WORLD);
223 16 }
224
225 } // namespace
226
227
1/2
✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
16 RomanovAGaussBlockALL::RomanovAGaussBlockALL(const InType &in) {
228 SetTypeOfTask(GetStaticTypeOfTask());
229 16 int rank = 0;
230
1/2
✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
16 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
231
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
16 if (rank == 0) {
232 GetInput() = in;
233 }
234 16 GetOutput() = std::vector<uint8_t>();
235 16 }
236
237 16 bool RomanovAGaussBlockALL::ValidationImpl() {
238 16 int rank = 0;
239 16 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
240
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
16 if (rank != 0) {
241 return true;
242 }
243 8 return std::get<0>(GetInput()) * std::get<1>(GetInput()) * 3 == static_cast<int>(std::get<2>(GetInput()).size());
244 }
245
246 16 bool RomanovAGaussBlockALL::PreProcessingImpl() {
247 16 return true;
248 }
249
250 16 bool RomanovAGaussBlockALL::RunImpl() {
251 16 int rank = 0;
252 16 int world_size = 1;
253 16 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
254 16 MPI_Comm_size(MPI_COMM_WORLD, &world_size);
255
256 16 std::array<int, 2> dims{};
257
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
16 if (rank == 0) {
258 8 dims[0] = std::get<0>(GetInput());
259 8 dims[1] = std::get<1>(GetInput());
260 }
261 16 MPI_Bcast(dims.data(), 2, MPI_INT, 0, MPI_COMM_WORLD);
262 16 const int width = dims[0];
263 16 const int height = dims[1];
264
265 16 const Distribution dist = BuildDistribution(rank, world_size, height);
266
267
1/2
✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
16 std::vector<uint8_t> local_input(static_cast<size_t>(dist.buffer_height) * width * 3);
268
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
16 const uint8_t *full_image = (rank == 0) ? std::get<2>(GetInput()).data() : nullptr;
269
1/2
✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
16 ScatterWithHalo(rank, world_size, width, height, dist, full_image, local_input);
270
271
1/4
✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
16 std::vector<uint8_t> local_output(static_cast<size_t>(dist.rows_per_proc[rank]) * width * 3);
272
1/2
✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
16 RunLocal(rank, world_size, width, height, dist, local_input, local_output);
273
274 16 std::vector<uint8_t> result;
275
1/2
✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
16 GatherAndBroadcast(world_size, width, height, dist, local_output, result);
276 GetOutput() = std::move(result);
277 16 return true;
278 16 }
279
280 16 bool RomanovAGaussBlockALL::PostProcessingImpl() {
281 16 return true;
282 }
283
284 } // namespace romanov_a_gauss_block
285