GCC Code Coverage Report


Directory: ./
File: tasks/zenin_a_gauss_filter/mpi/src/ops_mpi.cpp
Date: 2026-01-10 02:40:41
Exec Total Coverage
Lines: 147 147 100.0%
Functions: 11 11 100.0%
Branches: 66 92 71.7%

Line Branch Exec Source
1 #include "zenin_a_gauss_filter/mpi/include/ops_mpi.hpp"
2
3 #include <mpi.h>
4
5 #include <algorithm>
6 #include <array>
7 #include <cstddef>
8 #include <cstdint>
9 #include <functional>
10 #include <utility>
11 #include <vector>
12
13 #include "zenin_a_gauss_filter/common/include/common.hpp"
14
15 namespace zenin_a_gauss_filter {
16
17 namespace {
18
19 constexpr int kHalo = 1;
20 constexpr int kTagExpanded = 200;
21 constexpr int kTagResult = 500;
22
23 struct BlockInfo {
24 int my_h = 0, my_w = 0;
25 int start_y = 0, start_x = 0;
26 };
27
28 std::size_t GlobalIdx(int gx, int gy, int chan, int width, int channels) {
29 61854 return ((static_cast<std::size_t>(gy) * width + gx) * channels) + static_cast<std::size_t>(chan);
30 }
31
32 int Clampi(int v, int lo, int hi) {
33 return std::max(lo, std::min(hi, v));
34 }
35
36 std::uint8_t Clampu8(int v) {
37 51776 return static_cast<std::uint8_t>(Clampi(v, 0, 255));
38 }
39
40 std::uint8_t GetLocal(const std::vector<std::uint8_t> &buf, int local_w_with_halo, int ch, int x, int y, int c) {
41 51776 const int idx = ((y * local_w_with_halo + x) * ch) + c;
42 51776 return buf[idx];
43 }
44
45 BlockInfo CalcBlock(int pr, int pc, int h, int w, int grid_r, int grid_c) {
46 176 const int base_h = h / grid_r;
47 176 const int base_w = w / grid_c;
48 176 const int extra_h = h % grid_r;
49 176 const int extra_w = w % grid_c;
50
51 BlockInfo b;
52 352 b.my_h = base_h + (pr < extra_h ? 1 : 0);
53
2/4
✓ Branch 0 taken 88 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 88 times.
✗ Branch 3 not taken.
176 b.my_w = base_w + (pc < extra_w ? 1 : 0);
54
55 88 b.start_y = (pr * base_h) + std::min(pr, extra_h);
56 176 b.start_x = (pc * base_w) + std::min(pc, extra_w);
57 return b;
58 }
59
60 88 void FillExpandedBlock(const zenin_a_gauss_filter::Image &img, const zenin_a_gauss_filter::BlockInfo &bb, int width,
61 int height, int channels, std::vector<std::uint8_t> *dst) {
62 88 const int hh = bb.my_h;
63 88 const int ww = bb.my_w;
64 88 const int dst_w = ww + (2 * kHalo);
65 88 const int dst_h = hh + (2 * kHalo);
66
67 88 dst->assign(static_cast<std::size_t>(dst_h) * dst_w * channels, 0);
68
69
2/2
✓ Branch 0 taken 1129 times.
✓ Branch 1 taken 88 times.
1217 for (int ly = -kHalo; ly < hh + kHalo; ++ly) {
70
2/2
✓ Branch 0 taken 39620 times.
✓ Branch 1 taken 1129 times.
40749 for (int lx = -kHalo; lx < ww + kHalo; ++lx) {
71 39620 int gy = bb.start_y + ly;
72 39620 int gx = bb.start_x + lx;
73
74
4/4
✓ Branch 0 taken 37312 times.
✓ Branch 1 taken 2308 times.
✓ Branch 2 taken 37375 times.
✓ Branch 3 taken 2245 times.
76932 gy = std::max(0, std::min(height - 1, gy));
75
4/4
✓ Branch 0 taken 37362 times.
✓ Branch 1 taken 2258 times.
✓ Branch 2 taken 37345 times.
✓ Branch 3 taken 2275 times.
76982 gx = std::max(0, std::min(width - 1, gx));
76
77 39620 const int dy = ly + kHalo;
78 39620 const int dx = lx + kHalo;
79
80
2/2
✓ Branch 0 taken 61854 times.
✓ Branch 1 taken 39620 times.
101474 for (int chan = 0; chan < channels; ++chan) {
81 61854 (*dst)[((dy * dst_w + dx) * channels) + chan] = img.pixels[GlobalIdx(gx, gy, chan, width, channels)];
82 }
83 }
84 }
85 88 }
86
87 88 void BuildOrRecvExpandedBlock(int rank, int proc_num, int grid_cols, int width, int height, int channels,
88 const zenin_a_gauss_filter::BlockInfo &my_block,
89 const std::function<zenin_a_gauss_filter::BlockInfo(int, int)> &calc_block,
90 const zenin_a_gauss_filter::Image *root_img, std::vector<std::uint8_t> *local_in) {
91
2/2
✓ Branch 0 taken 44 times.
✓ Branch 1 taken 44 times.
88 if (rank == 0) {
92 44 FillExpandedBlock(*root_img, my_block, width, height, channels, local_in);
93
94
2/2
✓ Branch 0 taken 44 times.
✓ Branch 1 taken 44 times.
88 for (int rnk = 1; rnk < proc_num; ++rnk) {
95 44 const int rpr = rnk / grid_cols;
96
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 44 times.
44 const int rpc = rnk % grid_cols;
97 44 const auto rb = calc_block(rpr, rpc);
98
99 44 std::vector<std::uint8_t> pack;
100
1/2
✓ Branch 1 taken 44 times.
✗ Branch 2 not taken.
44 FillExpandedBlock(*root_img, rb, width, height, channels, &pack);
101
1/2
✓ Branch 1 taken 44 times.
✗ Branch 2 not taken.
44 MPI_Send(pack.data(), static_cast<int>(pack.size()), MPI_UNSIGNED_CHAR, rnk, kTagExpanded, MPI_COMM_WORLD);
102 }
103 } else {
104 44 MPI_Recv(local_in->data(), static_cast<int>(local_in->size()), MPI_UNSIGNED_CHAR, 0, kTagExpanded, MPI_COMM_WORLD,
105 MPI_STATUS_IGNORE);
106 }
107 88 }
108
109 88 void ConvolveLocalBlock(const std::vector<std::uint8_t> &local_in, int lw, int my_w, int my_h, int channels,
110 std::vector<std::uint8_t> *local_out) {
111 constexpr int kKernelSum = 16;
112
113
2/2
✓ Branch 0 taken 953 times.
✓ Branch 1 taken 88 times.
1041 for (int yd = 0; yd < my_h; ++yd) {
114 953 const int ly = yd + kHalo;
115
2/2
✓ Branch 0 taken 33290 times.
✓ Branch 1 taken 953 times.
34243 for (int xd = 0; xd < my_w; ++xd) {
116 33290 const int lx = xd + kHalo;
117
2/2
✓ Branch 0 taken 51776 times.
✓ Branch 1 taken 33290 times.
85066 for (int chan = 0; chan < channels; ++chan) {
118 51776 const int v00 = static_cast<int>(zenin_a_gauss_filter::GetLocal(local_in, lw, channels, lx - 1, ly - 1, chan));
119 51776 const int v01 = static_cast<int>(zenin_a_gauss_filter::GetLocal(local_in, lw, channels, lx, ly - 1, chan));
120 51776 const int v02 = static_cast<int>(zenin_a_gauss_filter::GetLocal(local_in, lw, channels, lx + 1, ly - 1, chan));
121
122 51776 const int v10 = static_cast<int>(zenin_a_gauss_filter::GetLocal(local_in, lw, channels, lx - 1, ly, chan));
123 51776 const int v11 = static_cast<int>(zenin_a_gauss_filter::GetLocal(local_in, lw, channels, lx, ly, chan));
124 51776 const int v12 = static_cast<int>(zenin_a_gauss_filter::GetLocal(local_in, lw, channels, lx + 1, ly, chan));
125
126 51776 const int v20 = static_cast<int>(zenin_a_gauss_filter::GetLocal(local_in, lw, channels, lx - 1, ly + 1, chan));
127 51776 const int v21 = static_cast<int>(zenin_a_gauss_filter::GetLocal(local_in, lw, channels, lx, ly + 1, chan));
128 51776 const int v22 = static_cast<int>(zenin_a_gauss_filter::GetLocal(local_in, lw, channels, lx + 1, ly + 1, chan));
129
130 int sum = 0;
131 sum += v00 * 1;
132 51776 sum += v01 * 2;
133 51776 sum += v02 * 1;
134 51776 sum += v10 * 2;
135 51776 sum += v11 * 4;
136 51776 sum += v12 * 2;
137 51776 sum += v20 * 1;
138 51776 sum += v21 * 2;
139 51776 sum += v22 * 1;
140
141 51776 const int res = (sum + (kKernelSum / 2)) / kKernelSum;
142 51776 (*local_out)[((yd * my_w + xd) * channels) + chan] = zenin_a_gauss_filter::Clampu8(res);
143 }
144 }
145 }
146 88 }
147
148 88 void CopyBlockToImage(const BlockInfo &block, const std::vector<std::uint8_t> &src, int src_w, int width, int channels,
149 std::vector<std::uint8_t> *dst) {
150
2/2
✓ Branch 0 taken 953 times.
✓ Branch 1 taken 88 times.
1041 for (int yd = 0; yd < block.my_h; ++yd) {
151
2/2
✓ Branch 0 taken 33290 times.
✓ Branch 1 taken 953 times.
34243 for (int xd = 0; xd < block.my_w; ++xd) {
152 33290 const int gy = block.start_y + yd;
153 33290 const int gx = block.start_x + xd;
154
2/2
✓ Branch 0 taken 51776 times.
✓ Branch 1 taken 33290 times.
85066 for (int chan = 0; chan < channels; ++chan) {
155 51776 (*dst)[((gy * width + gx) * channels) + chan] = src[((yd * src_w + xd) * channels) + chan];
156 }
157 }
158 }
159 88 }
160
161 88 void GatherAndBroadcastResult(int rank, int proc_num, int grid_cols, int width, int channels, const BlockInfo &my_block,
162 const std::function<BlockInfo(int, int)> &calc_block,
163 const std::vector<std::uint8_t> &local_out, std::vector<std::uint8_t> *final_image) {
164
2/2
✓ Branch 0 taken 44 times.
✓ Branch 1 taken 44 times.
88 if (rank == 0) {
165 44 CopyBlockToImage(my_block, local_out, my_block.my_w, width, channels, final_image);
166
167
2/2
✓ Branch 0 taken 44 times.
✓ Branch 1 taken 44 times.
88 for (int src_rank = 1; src_rank < proc_num; ++src_rank) {
168 44 const int spr = src_rank / grid_cols;
169
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 44 times.
44 const int spc = src_rank % grid_cols;
170 44 const BlockInfo sb = calc_block(spr, spc);
171
172
1/2
✓ Branch 2 taken 44 times.
✗ Branch 3 not taken.
44 std::vector<std::uint8_t> recv(static_cast<std::size_t>(sb.my_h) * sb.my_w * channels);
173
1/2
✓ Branch 1 taken 44 times.
✗ Branch 2 not taken.
44 MPI_Recv(recv.data(), static_cast<int>(recv.size()), MPI_UNSIGNED_CHAR, src_rank, kTagResult, MPI_COMM_WORLD,
174 MPI_STATUS_IGNORE);
175
176 44 CopyBlockToImage(sb, recv, sb.my_w, width, channels, final_image);
177 }
178 } else {
179 44 MPI_Send(local_out.data(), static_cast<int>(local_out.size()), MPI_UNSIGNED_CHAR, 0, kTagResult, MPI_COMM_WORLD);
180 }
181
182 88 MPI_Bcast(final_image->data(), static_cast<int>(final_image->size()), MPI_UNSIGNED_CHAR, 0, MPI_COMM_WORLD);
183 88 }
184
185 } // namespace
186
187
1/2
✓ Branch 1 taken 88 times.
✗ Branch 2 not taken.
88 ZeninAGaussFilterMPI::ZeninAGaussFilterMPI(const InType &in) {
188 SetTypeOfTask(GetStaticTypeOfTask());
189 GetInput() = in;
190 88 GetOutput() = OutType{};
191 88 }
192
193 88 bool ZeninAGaussFilterMPI::ValidationImpl() {
194 88 int rank = 0;
195 88 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
196
197 bool ok = true;
198
2/2
✓ Branch 0 taken 44 times.
✓ Branch 1 taken 44 times.
88 if (rank == 0) {
199 const auto &in = GetInput();
200 44 const std::size_t need = static_cast<std::size_t>(in.width) * in.height * in.channels;
201
202
4/8
✓ Branch 0 taken 44 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 44 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 44 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 44 times.
✗ Branch 7 not taken.
44 ok = (in.width > 0) && (in.height > 0) && (in.channels == 1 || in.channels == 3) && (in.pixels.size() == need);
203 }
204
205 88 int ok_int = ok ? 1 : 0;
206
207 88 MPI_Bcast(&ok_int, 1, MPI_INT, 0, MPI_COMM_WORLD);
208 88 return ok_int == 1;
209 }
210
211 88 bool ZeninAGaussFilterMPI::PreProcessingImpl() {
212 88 int rank = 0;
213 88 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
214 88 MPI_Comm_size(MPI_COMM_WORLD, &proc_num_);
215
216
2/2
✓ Branch 0 taken 44 times.
✓ Branch 1 taken 44 times.
88 if (rank == 0) {
217 const auto &in = GetInput();
218 44 width_ = in.width;
219 44 height_ = in.height;
220 44 channels_ = in.channels;
221 }
222
223 88 MPI_Bcast(&width_, 1, MPI_INT, 0, MPI_COMM_WORLD);
224 88 MPI_Bcast(&height_, 1, MPI_INT, 0, MPI_COMM_WORLD);
225 88 MPI_Bcast(&channels_, 1, MPI_INT, 0, MPI_COMM_WORLD);
226
227 88 std::array<int, 2> dims{0, 0};
228 88 MPI_Dims_create(proc_num_, 2, dims.data());
229 88 grid_rows_ = dims[0];
230 88 grid_cols_ = dims[1];
231
232 88 block_h_ = height_ / grid_rows_;
233 88 block_w_ = width_ / grid_cols_;
234 88 extra_h_ = height_ % grid_rows_;
235 88 extra_w_ = width_ % grid_cols_;
236
237
2/2
✓ Branch 0 taken 44 times.
✓ Branch 1 taken 44 times.
88 if (rank == 0) {
238 auto &out = GetOutput();
239 44 out.height = height_;
240 44 out.width = width_;
241 44 out.channels = channels_;
242 44 out.pixels.assign(static_cast<std::size_t>(width_) * height_ * channels_, 0);
243 }
244
245 88 return true;
246 }
247
248 88 bool ZeninAGaussFilterMPI::RunImpl() {
249 88 int rank = 0;
250 88 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
251
252 88 const int pr = rank / grid_cols_;
253 88 const int pc = rank % grid_cols_;
254
255
2/2
✓ Branch 0 taken 55 times.
✓ Branch 1 taken 33 times.
88 const BlockInfo my_block = CalcBlock(pr, pc, height_, width_, grid_rows_, grid_cols_);
256 const int my_h = my_block.my_h;
257 const int my_w = my_block.my_w;
258
259 88 const int lw = my_w + (2 * kHalo);
260 88 const int lh = my_h + (2 * kHalo);
261
262 88 std::vector<std::uint8_t> local_in(static_cast<std::size_t>(lh) * lw * channels_, 0);
263
1/4
✓ Branch 1 taken 88 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
88 std::vector<std::uint8_t> local_out(static_cast<std::size_t>(my_h) * my_w * channels_, 0);
264
265 88 auto calc_block = [&](int rpr, int rpc) -> BlockInfo {
266
1/2
✓ Branch 0 taken 88 times.
✗ Branch 1 not taken.
88 return CalcBlock(rpr, rpc, height_, width_, grid_rows_, grid_cols_);
267 };
268
269
2/2
✓ Branch 0 taken 44 times.
✓ Branch 1 taken 44 times.
88 if (rank == 0) {
270 const auto &img = GetInput();
271
1/2
✓ Branch 1 taken 44 times.
✗ Branch 2 not taken.
88 BuildOrRecvExpandedBlock(rank, proc_num_, grid_cols_, width_, height_, channels_, my_block, calc_block, &img,
272 &local_in);
273 } else {
274
1/2
✓ Branch 1 taken 44 times.
✗ Branch 2 not taken.
88 BuildOrRecvExpandedBlock(rank, proc_num_, grid_cols_, width_, height_, channels_, my_block, calc_block, nullptr,
275 &local_in);
276 }
277
278 88 ConvolveLocalBlock(local_in, lw, my_w, my_h, channels_, &local_out);
279
280
2/6
✓ Branch 1 taken 88 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 88 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
88 std::vector<std::uint8_t> final_image(static_cast<std::size_t>(width_) * height_ * channels_, 0);
281
1/4
✓ Branch 1 taken 88 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
88 GatherAndBroadcastResult(rank, proc_num_, grid_cols_, width_, channels_, my_block, calc_block, local_out,
282 &final_image);
283
284
2/2
✓ Branch 1 taken 85 times.
✓ Branch 2 taken 3 times.
176 GetOutput() = OutType{height_, width_, channels_, std::move(final_image)};
285 88 return true;
286 }
287
288 88 bool ZeninAGaussFilterMPI::PostProcessingImpl() {
289 88 return true;
290 }
291
292 } // namespace zenin_a_gauss_filter
293