GCC Code Coverage Report


Directory: ./
File: tasks/nikolaev_d_block_linear_image_filtering/all/src/ops_all.cpp
Date: 2026-06-04 20:25:32
Exec Total Coverage
Lines: 129 140 92.1%
Functions: 13 14 92.9%
Branches: 69 110 62.7%

Line Branch Exec Source
1 #include "nikolaev_d_block_linear_image_filtering/all/include/ops_all.hpp"
2
3 #include <mpi.h>
4 #include <tbb/blocked_range2d.h>
5 #include <tbb/parallel_for.h>
6
7 #include <algorithm>
8 #include <array>
9 #include <cstddef>
10 #include <cstdint>
11 #include <utility>
12 #include <vector>
13
14 #include "nikolaev_d_block_linear_image_filtering/common/include/common.hpp"
15
16 namespace nikolaev_d_block_linear_image_filtering {
17
18 namespace {
19
20 constexpr int kBlockSize = 32;
21
22 struct GridSlicing {
23 std::vector<int> rows_per_proc;
24 std::vector<int> row_displs;
25 int halo_top{0};
26 int halo_bottom{0};
27 int buffer_height{0};
28 };
29
30 54 int ApplyKernel(const std::vector<uint8_t> &img, int row, int col, int channel, int width, int height, int rank,
31 const GridSlicing &dist, const std::array<std::array<int, 3>, 3> &kernel) {
32 int sum = 0;
33 54 int input_start_row = dist.row_displs[rank] - dist.halo_top;
34
35
2/2
✓ Branch 0 taken 162 times.
✓ Branch 1 taken 54 times.
216 for (size_t kr = 0; kr < 3; ++kr) {
36 162 int target_global_row = dist.row_displs[rank] + row + (static_cast<int>(kr) - 1);
37 162 int clamped_global_row = std::clamp(target_global_row, 0, height - 1);
38 162 int buffer_row = clamped_global_row - input_start_row;
39
40
2/2
✓ Branch 0 taken 486 times.
✓ Branch 1 taken 162 times.
648 for (size_t kc = 0; kc < 3; ++kc) {
41 486 int target_col = col + (static_cast<int>(kc) - 1);
42 486 int clamped_col = std::clamp(target_col, 0, width - 1);
43
44 486 size_t idx = (((static_cast<size_t>(buffer_row) * width) + clamped_col) * 3) + channel;
45 486 sum += (static_cast<int>(img[idx]) * kernel.at(kr).at(kc));
46 }
47 }
48 54 return sum;
49 }
50
51 void ProcessFullBlock(const std::vector<uint8_t> &input, std::vector<uint8_t> &output, int width, int height, int rank,
52 const GridSlicing &dist, int start_row, int start_col) {
53 static constexpr std::array<std::array<int, 3>, 3> kKernel = {{{1, 2, 1}, {2, 4, 2}, {1, 2, 1}}};
54
55 for (int row = start_row; row < start_row + kBlockSize; ++row) {
56 for (int col = start_col; col < start_col + kBlockSize; ++col) {
57 for (int channel = 0; channel < 3; ++channel) {
58 int sum = ApplyKernel(input, row, col, channel, width, height, rank, dist, kKernel);
59 int result_value = (sum + 8) / 16;
60 result_value = std::clamp(result_value, 0, 255);
61 auto idx = ((static_cast<size_t>(row) * width + col) * 3) + channel;
62 output[idx] = static_cast<uint8_t>(result_value);
63 }
64 }
65 }
66 }
67
68 5 void ProcessPartBlock(const std::vector<uint8_t> &input, std::vector<uint8_t> &output, int width, int height,
69 int local_rows, int rank, const GridSlicing &dist, int start_row, int start_col) {
70 static constexpr std::array<std::array<int, 3>, 3> kKernel = {{{1, 2, 1}, {2, 4, 2}, {1, 2, 1}}};
71
72 5 const int end_row = std::min(local_rows, start_row + kBlockSize);
73 5 const int end_col = std::min(width, start_col + kBlockSize);
74
75
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 5 times.
13 for (int row = start_row; row < end_row; ++row) {
76
2/2
✓ Branch 0 taken 18 times.
✓ Branch 1 taken 8 times.
26 for (int col = start_col; col < end_col; ++col) {
77
2/2
✓ Branch 0 taken 54 times.
✓ Branch 1 taken 18 times.
72 for (int channel = 0; channel < 3; ++channel) {
78 54 int sum = ApplyKernel(input, row, col, channel, width, height, rank, dist, kKernel);
79
1/2
✓ Branch 0 taken 54 times.
✗ Branch 1 not taken.
54 int result_value = (sum + 8) / 16;
80 result_value = std::clamp(result_value, 0, 255);
81 54 auto idx = ((static_cast<size_t>(row) * width + col) * 3) + channel;
82 54 output[idx] = static_cast<uint8_t>(result_value);
83 }
84 }
85 }
86 5 }
87
88 5 void ProcessGridRange(const tbb::blocked_range2d<int> &r, int local_block_rows, int num_col_blocks, int local_rows,
89 int width, int height, int rank, const GridSlicing &dist, const std::vector<uint8_t> &local_input,
90 std::vector<uint8_t> &local_output) {
91
2/2
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
10 for (int bi = r.rows().begin(); bi < r.rows().end(); ++bi) {
92
2/2
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
10 for (int bj = r.cols().begin(); bj < r.cols().end(); ++bj) {
93
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
5 if (bi < local_block_rows && bj < num_col_blocks) {
94 ProcessFullBlock(local_input, local_output, width, height, rank, dist, bi * kBlockSize, bj * kBlockSize);
95 } else {
96 5 ProcessPartBlock(local_input, local_output, width, height, local_rows, rank, dist, bi * kBlockSize,
97 bj * kBlockSize);
98 }
99 }
100 }
101 5 }
102
103 10 GridSlicing BuildGridSlicing(int rank, int world_size, int height) {
104 10 const int total_block_rows = height / kBlockSize;
105 10 const int height_remainder = height % kBlockSize;
106
107 10 std::vector<int> block_rows_per_proc(world_size);
108 10 const int base_blocks = total_block_rows / world_size;
109 10 const int extra_blocks = total_block_rows % world_size;
110
2/2
✓ Branch 0 taken 20 times.
✓ Branch 1 taken 10 times.
30 for (int proc = 0; proc < world_size; ++proc) {
111
1/2
✓ Branch 0 taken 20 times.
✗ Branch 1 not taken.
40 block_rows_per_proc[proc] = base_blocks + (proc < extra_blocks ? 1 : 0);
112 }
113
114 10 GridSlicing dist;
115
1/2
✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
10 dist.rows_per_proc.resize(world_size);
116
1/2
✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
10 dist.row_displs.resize(world_size);
117 int pixel_offset = 0;
118
2/2
✓ Branch 0 taken 20 times.
✓ Branch 1 taken 10 times.
30 for (int proc = 0; proc < world_size; ++proc) {
119
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
20 int rows = block_rows_per_proc[proc] * kBlockSize;
120
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
20 if (proc == world_size - 1) {
121 10 rows += height_remainder;
122 }
123 20 dist.rows_per_proc[proc] = rows;
124 20 dist.row_displs[proc] = pixel_offset;
125 20 pixel_offset += rows;
126 }
127
128
2/2
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
10 if (dist.rows_per_proc[rank] > 0) {
129
1/2
✓ Branch 0 taken 5 times.
✗ Branch 1 not taken.
5 dist.halo_top = (dist.row_displs[rank] > 0) ? 1 : 0;
130
1/2
✓ Branch 0 taken 5 times.
✗ Branch 1 not taken.
10 dist.halo_bottom = (dist.row_displs[rank] + dist.rows_per_proc[rank] < height) ? 1 : 0;
131 }
132
1/2
✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
10 dist.buffer_height = dist.rows_per_proc[rank] + dist.halo_top + dist.halo_bottom;
133
134 10 return dist;
135 }
136
137 std::pair<int, int> HaloFor(int proc, const GridSlicing &dist, int height) {
138
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
20 if (dist.rows_per_proc[proc] == 0) {
139 return {0, 0};
140 }
141
1/2
✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
10 int top = (dist.row_displs[proc] > 0) ? 1 : 0;
142
1/2
✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
10 int bot = (dist.row_displs[proc] + dist.rows_per_proc[proc] < height) ? 1 : 0;
143 return {top, bot};
144 }
145
146 10 void ScatterWithHalo(int rank, int world_size, int width, int height, const GridSlicing &dist,
147 const uint8_t *full_image, std::vector<uint8_t> &local_input) {
148 10 std::vector<int> scatter_counts(world_size);
149
1/4
✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
10 std::vector<int> scatter_displs(world_size);
150
2/2
✓ Branch 0 taken 20 times.
✓ Branch 1 taken 10 times.
30 for (int proc = 0; proc < world_size; ++proc) {
151 auto [proc_top, proc_bot] = HaloFor(proc, dist, height);
152 20 int proc_buffer_rows = dist.rows_per_proc[proc] + proc_top + proc_bot;
153 20 scatter_counts[proc] = proc_buffer_rows * width * 3;
154 20 scatter_displs[proc] = (dist.row_displs[proc] - proc_top) * width * 3;
155 }
156
157
2/2
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
10 const uint8_t *send_buf = (rank == 0) ? full_image : nullptr;
158
1/2
✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
10 MPI_Scatterv(send_buf, scatter_counts.data(), scatter_displs.data(), MPI_UNSIGNED_CHAR, local_input.data(),
159 static_cast<int>(local_input.size()), MPI_UNSIGNED_CHAR, 0, MPI_COMM_WORLD);
160 10 }
161
162 10 void RunLocalTBB(int rank, int world_size, int width, int height, const GridSlicing &dist,
163 const std::vector<uint8_t> &local_input, std::vector<uint8_t> &local_output) {
164
2/2
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
10 const int local_rows = dist.rows_per_proc[rank];
165
2/2
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
10 if (local_rows == 0) {
166 5 return;
167 }
168
169 5 const int total_block_rows = height / kBlockSize;
170 5 const int height_remainder = height % kBlockSize;
171 5 const int num_col_blocks = width / kBlockSize;
172 5 const bool width_has_remainder = (width % kBlockSize) != 0;
173
174 5 const int local_block_rows =
175
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
5 (rank < total_block_rows % world_size) ? ((total_block_rows / world_size) + 1) : (total_block_rows / world_size);
176 5 const bool is_last = (rank == world_size - 1);
177
178 5 const int total_local_bi = local_block_rows + static_cast<int>(is_last && height_remainder > 0);
179 5 const int total_local_bj = num_col_blocks + static_cast<int>(width_has_remainder);
180
181 5 tbb::parallel_for(tbb::blocked_range2d<int>(0, total_local_bi, 0, total_local_bj),
182 5 [&](const tbb::blocked_range2d<int> &r) {
183 5 ProcessGridRange(r, local_block_rows, num_col_blocks, local_rows, width, height, rank, dist, local_input,
184 local_output);
185 5 });
186 }
187
188 10 void GatherAndBroadcast(int world_size, int width, int height, const GridSlicing &dist,
189 const std::vector<uint8_t> &local_output, std::vector<uint8_t> &result) {
190 10 std::vector<int> recv_counts(world_size);
191
1/4
✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
10 std::vector<int> recv_displs(world_size);
192
2/2
✓ Branch 0 taken 20 times.
✓ Branch 1 taken 10 times.
30 for (int proc = 0; proc < world_size; ++proc) {
193 20 recv_counts[proc] = dist.rows_per_proc[proc] * width * 3;
194 20 recv_displs[proc] = dist.row_displs[proc] * width * 3;
195 }
196
2/6
✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 10 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
10 result.assign(static_cast<size_t>(height) * width * 3, 0);
197
1/2
✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
10 MPI_Gatherv(local_output.data(), static_cast<int>(local_output.size()), MPI_UNSIGNED_CHAR, result.data(),
198 recv_counts.data(), recv_displs.data(), MPI_UNSIGNED_CHAR, 0, MPI_COMM_WORLD);
199
1/2
✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
10 MPI_Bcast(result.data(), static_cast<int>(result.size()), MPI_UNSIGNED_CHAR, 0, MPI_COMM_WORLD);
200 10 }
201
202 } // namespace
203
204
1/2
✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
10 NikolaevDBlockLinearImageFilteringALL::NikolaevDBlockLinearImageFilteringALL(const InType &in) {
205 SetTypeOfTask(GetStaticTypeOfTask());
206 10 int rank = 0;
207
1/2
✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
10 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
208
2/2
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
10 if (rank == 0) {
209 GetInput() = in;
210 }
211 10 GetOutput() = std::vector<uint8_t>();
212 10 }
213
214 10 bool NikolaevDBlockLinearImageFilteringALL::ValidationImpl() {
215 10 int rank = 0;
216 10 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
217
2/2
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
10 if (rank != 0) {
218 return true;
219 }
220 5 return std::get<0>(GetInput()) * std::get<1>(GetInput()) * 3 == static_cast<int>(std::get<2>(GetInput()).size());
221 }
222
223 10 bool NikolaevDBlockLinearImageFilteringALL::PreProcessingImpl() {
224 10 return true;
225 }
226
227 10 bool NikolaevDBlockLinearImageFilteringALL::RunImpl() {
228 10 int rank = 0;
229 10 int world_size = 1;
230 10 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
231 10 MPI_Comm_size(MPI_COMM_WORLD, &world_size);
232
233 10 std::array<int, 2> dims{};
234
2/2
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
10 if (rank == 0) {
235 5 dims[0] = std::get<0>(GetInput());
236 5 dims[1] = std::get<1>(GetInput());
237 }
238 10 MPI_Bcast(dims.data(), 2, MPI_INT, 0, MPI_COMM_WORLD);
239 10 const int width = dims[0];
240 10 const int height = dims[1];
241
242 10 const GridSlicing dist = BuildGridSlicing(rank, world_size, height);
243
244
1/2
✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
10 std::vector<uint8_t> local_input(static_cast<size_t>(dist.buffer_height) * width * 3);
245
2/2
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
10 const uint8_t *full_image = (rank == 0) ? std::get<2>(GetInput()).data() : nullptr;
246
1/2
✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
10 ScatterWithHalo(rank, world_size, width, height, dist, full_image, local_input);
247
248
1/4
✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
10 std::vector<uint8_t> local_output(static_cast<size_t>(dist.rows_per_proc[rank]) * width * 3);
249
250
1/2
✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
10 RunLocalTBB(rank, world_size, width, height, dist, local_input, local_output);
251
252 10 std::vector<uint8_t> result;
253
1/2
✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
10 GatherAndBroadcast(world_size, width, height, dist, local_output, result);
254 GetOutput() = std::move(result);
255 10 return true;
256 10 }
257
258 10 bool NikolaevDBlockLinearImageFilteringALL::PostProcessingImpl() {
259 10 return true;
260 }
261
262 } // namespace nikolaev_d_block_linear_image_filtering
263