GCC Code Coverage Report


Directory: ./
File: tasks/boltenkov_s_gaussian_kernel/all/src/ops_all.cpp
Date: 2026-06-04 20:25:32
Exec Total Coverage
Lines: 115 117 98.3%
Functions: 8 9 88.9%
Branches: 77 138 55.8%

Line Branch Exec Source
1 #include "boltenkov_s_gaussian_kernel/all/include/ops_all.hpp"
2
3 #include <mpi.h>
4 #include <omp.h>
5
6 #include <algorithm>
7 #include <array>
8 #include <climits>
9 #include <cstddef>
10 #include <vector>
11
12 #include "boltenkov_s_gaussian_kernel/common/include/common.hpp"
13 #include "util/include/util.hpp"
14
15 namespace boltenkov_s_gaussian_kernel {
16
17
1/2
✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
4 BoltenkovSGaussianKernelALL::BoltenkovSGaussianKernelALL(const InType &in)
18
1/2
✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
4 : kernel_{{{1, 2, 1}, {2, 4, 2}, {1, 2, 1}}} {
19 SetTypeOfTask(GetStaticTypeOfTask());
20 4 int rank = 0;
21
1/2
✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
4 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
22
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
4 if (rank == 0) {
23 GetInput() = in;
24 } else {
25 2 GetInput() = InType();
26 }
27 4 GetOutput() = std::vector<std::vector<int>>();
28 4 }
29
30 4 bool BoltenkovSGaussianKernelALL::ValidationImpl() {
31 4 int rank = 0;
32 4 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
33
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
4 if (rank == 0) {
34 2 std::size_t n = std::get<0>(GetInput());
35
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
2 std::size_t m = std::get<1>(GetInput());
36 const auto &data = std::get<2>(GetInput());
37
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
2 if (data.size() != n) {
38 return false;
39 }
40
2/2
✓ Branch 0 taken 55 times.
✓ Branch 1 taken 2 times.
57 for (std::size_t i = 0; i < n; ++i) {
41
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 55 times.
55 if (data[i].size() != m) {
42 return false;
43 }
44 }
45 return true;
46 }
47 return true;
48 }
49
50 4 bool BoltenkovSGaussianKernelALL::PreProcessingImpl() {
51 4 int rank = 0;
52 4 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
53 4 auto n_size_t = std::get<0>(GetInput());
54 4 auto m_size_t = std::get<1>(GetInput());
55
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
4 if (n_size_t > INT_MAX || m_size_t > INT_MAX) {
56 return false;
57 }
58 4 int n_val = 0;
59 4 int m_val = 0;
60
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
4 if (rank == 0) {
61 2 n_val = static_cast<int>(n_size_t);
62 2 m_val = static_cast<int>(m_size_t);
63 }
64 4 MPI_Bcast(&n_val, 1, MPI_INT, 0, MPI_COMM_WORLD);
65 4 MPI_Bcast(&m_val, 1, MPI_INT, 0, MPI_COMM_WORLD);
66
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
4 if (n_val < 1e6 && n_val > 0) {
67 4 GetOutput().resize(static_cast<std::size_t>(n_val));
68 } else {
69 return false;
70 }
71
2/2
✓ Branch 0 taken 110 times.
✓ Branch 1 taken 4 times.
114 for (int i = 0; i < n_val; ++i) {
72
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 110 times.
110 if (m_val < 1e6 && m_val > 0) {
73 110 GetOutput()[i].resize(static_cast<std::size_t>(m_val));
74 } else {
75 return false;
76 }
77 }
78 return true;
79 }
80
81 bool BoltenkovSGaussianKernelALL::IsValidSize(int n, int m) {
82 return n > 0 && m > 0 && n < 1e6 && m < 1e6;
83 }
84
85 2 void BoltenkovSGaussianKernelALL::ComputeScatterParams(int n, int m, int size, int rows_per_proc,
86 std::vector<int> &send_counts, std::vector<int> &displs) {
87 2 send_counts.assign(size, 0);
88 2 displs.assign(size, 0);
89
2/2
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 2 times.
6 for (int i = 0; i < size; ++i) {
90 4 int s = i * rows_per_proc;
91
1/2
✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
4 int e = std::min(s + rows_per_proc, n) - 1;
92
1/2
✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
4 int rows = (s < n) ? (e - s + 1) : 0;
93
1/2
✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
4 if (rows > 0) {
94 4 int h_first = std::max(0, s - 1);
95 4 int h_last = std::min(n - 1, e + 1);
96 4 int h_rows = h_last - h_first + 1;
97 4 send_counts[i] = h_rows * m;
98 4 displs[i] = h_first * m;
99 }
100 }
101 2 }
102
103 2 void BoltenkovSGaussianKernelALL::ComputeGatherDispls(int m, const std::vector<int> &gather_counts,
104 std::vector<int> &recv_counts, std::vector<int> &recv_displs) {
105 2 int size = static_cast<int>(gather_counts.size());
106 2 recv_counts.resize(size);
107 2 recv_displs.resize(size);
108
2/2
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 2 times.
6 for (int i = 0; i < size; ++i) {
109
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
4 recv_counts[i] = gather_counts[i] * m;
110
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
4 recv_displs[i] = (i == 0) ? 0 : recv_displs[i - 1] + recv_counts[i - 1];
111 }
112 2 }
113
114 4 std::vector<int> BoltenkovSGaussianKernelALL::ApplyGaussianFilterFlat(const std::vector<int> &local_halo_flat,
115 int halo_rows, int local_start_row,
116 int local_rows, int m,
117 const std::array<std::array<int, 3>, 3> &kernel,
118 int shift) {
119 4 const int tmp_rows = local_rows + 2;
120 4 const int tmp_cols = m + 2;
121 4 std::vector<int> tmp(static_cast<size_t>(tmp_rows) * static_cast<size_t>(tmp_cols), 0);
122
123 4 const int halo_first = std::max(0, local_start_row - 1);
124
125
2/2
✓ Branch 0 taken 63 times.
✓ Branch 1 taken 4 times.
67 for (int i = 0; i < tmp_rows; ++i) {
126 63 int global_row = local_start_row - 1 + i;
127
4/4
✓ Branch 0 taken 61 times.
✓ Branch 1 taken 2 times.
✓ Branch 2 taken 59 times.
✓ Branch 3 taken 2 times.
63 if (global_row >= halo_first && global_row < halo_first + halo_rows) {
128 59 const int src_offset = (global_row - halo_first) * m;
129
1/2
✓ Branch 0 taken 59 times.
✗ Branch 1 not taken.
59 int *dst_row = &tmp[(static_cast<size_t>(i) * static_cast<size_t>(tmp_cols)) + 1];
130
1/2
✓ Branch 0 taken 59 times.
✗ Branch 1 not taken.
59 std::copy_n(&local_halo_flat[src_offset], m, dst_row);
131 }
132 }
133
134
1/4
✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
4 std::vector<int> local_res(static_cast<size_t>(local_rows) * static_cast<size_t>(m), 0);
135
136
2/4
✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 4 times.
✗ Branch 4 not taken.
4 #pragma omp parallel for num_threads(ppc::util::GetNumThreads()) default(none) \
137 shared(tmp, local_res, local_rows, m, kernel, shift, tmp_cols)
138 for (int i = 0; i < local_rows; ++i) {
139 const int *row0 = &tmp[static_cast<size_t>(i) * static_cast<size_t>(tmp_cols)];
140 const int *row1 = row0 + tmp_cols;
141 const int *row2 = row1 + tmp_cols;
142 int *out_row = &local_res[static_cast<size_t>(i) * static_cast<size_t>(m)];
143
144 const int k00 = kernel[0][0];
145 const int k01 = kernel[0][1];
146 const int k02 = kernel[0][2];
147 const int k10 = kernel[1][0];
148 const int k11 = kernel[1][1];
149 const int k12 = kernel[1][2];
150 const int k20 = kernel[2][0];
151 const int k21 = kernel[2][1];
152 const int k22 = kernel[2][2];
153
154 for (int j = 0; j < m; ++j) {
155 int val = (row0[j] * k00) + (row0[j + 1] * k01) + (row0[j + 2] * k02) + (row1[j] * k10) + (row1[j + 1] * k11) +
156 (row1[j + 2] * k12) + (row2[j] * k20) + (row2[j + 1] * k21) + (row2[j + 2] * k22);
157 out_row[j] = val >> shift;
158 }
159 }
160
161 4 return local_res;
162 }
163
164 4 bool BoltenkovSGaussianKernelALL::RunImpl() {
165 4 int rank = 0;
166 4 int size = 0;
167 4 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
168 4 MPI_Comm_size(MPI_COMM_WORLD, &size);
169
170 4 int n = static_cast<int>(GetOutput().size());
171
1/2
✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
4 int m = static_cast<int>(GetOutput()[0].size());
172
1/2
✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
4 if (!IsValidSize(n, m)) {
173 return false;
174 }
175
176 4 MPI_Bcast(&n, 1, MPI_INT, 0, MPI_COMM_WORLD);
177 4 MPI_Bcast(&m, 1, MPI_INT, 0, MPI_COMM_WORLD);
178
179 4 std::vector<int> data_flat(static_cast<size_t>(n) * static_cast<size_t>(m));
180
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
4 if (rank == 0) {
181 const auto &global_data = std::get<2>(GetInput());
182
2/2
✓ Branch 0 taken 55 times.
✓ Branch 1 taken 2 times.
57 for (int i = 0; i < n; ++i) {
183
1/2
✓ Branch 0 taken 55 times.
✗ Branch 1 not taken.
55 std::copy_n(global_data[i].data(), m, &data_flat[static_cast<size_t>(i) * static_cast<size_t>(m)]);
184 }
185 }
186
187 4 int rows_per_proc = (n + size - 1) / size;
188 4 int local_start = rank * rows_per_proc;
189 4 int local_rows = 0;
190
1/2
✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
4 if (local_start < n) {
191 4 local_rows = std::min(rows_per_proc, n - local_start);
192 }
193
194
1/2
✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
4 int halo_first = std::max(0, local_start - 1);
195
1/2
✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
4 int halo_last = std::min(n - 1, local_start + local_rows);
196
1/2
✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
4 int halo_rows = (local_rows > 0) ? (halo_last - halo_first + 1) : 0;
197
198
1/4
✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
4 std::vector<int> send_counts(size, 0);
199
1/4
✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
4 std::vector<int> displs(size, 0);
200
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
4 if (rank == 0) {
201
1/2
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
2 ComputeScatterParams(n, m, size, rows_per_proc, send_counts, displs);
202 }
203
204
2/6
✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 4 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
4 std::vector<int> local_halo_flat(static_cast<size_t>(halo_rows) * static_cast<size_t>(m));
205
206
1/2
✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
4 MPI_Scatterv(data_flat.data(), send_counts.data(), displs.data(), MPI_INT, local_halo_flat.data(),
207 static_cast<int>(local_halo_flat.size()), MPI_INT, 0, MPI_COMM_WORLD);
208
209 4 std::vector<int> local_res_flat;
210
1/2
✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
4 if (local_rows > 0) {
211
1/2
✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
8 local_res_flat = ApplyGaussianFilterFlat(local_halo_flat, halo_rows, local_start, local_rows, m, kernel_, shift_);
212 }
213
214
2/6
✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 4 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
4 std::vector<int> gather_counts(size, 0);
215
1/2
✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
4 MPI_Gather(&local_rows, 1, MPI_INT, gather_counts.data(), 1, MPI_INT, 0, MPI_COMM_WORLD);
216
217
1/4
✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
4 std::vector<int> recv_counts(size, 0);
218
1/4
✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
4 std::vector<int> recv_displs(size, 0);
219
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
4 if (rank == 0) {
220
1/2
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
2 ComputeGatherDispls(m, gather_counts, recv_counts, recv_displs);
221 }
222
223
2/6
✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 4 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
4 std::vector<int> out_flat(static_cast<size_t>(n) * static_cast<size_t>(m));
224
225
1/2
✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
4 MPI_Gatherv(local_res_flat.data(), static_cast<int>(local_res_flat.size()), MPI_INT, out_flat.data(),
226 recv_counts.data(), recv_displs.data(), MPI_INT, 0, MPI_COMM_WORLD);
227
228
1/2
✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
4 MPI_Bcast(out_flat.data(), static_cast<int>(out_flat.size()), MPI_INT, 0, MPI_COMM_WORLD);
229
230 auto &output = GetOutput();
231
2/2
✓ Branch 0 taken 110 times.
✓ Branch 1 taken 4 times.
114 for (int i = 0; i < n; ++i) {
232
1/2
✓ Branch 0 taken 110 times.
✗ Branch 1 not taken.
110 std::copy_n(&out_flat[static_cast<size_t>(i) * static_cast<size_t>(m)], m, output[i].data());
233 }
234
235 return true;
236 }
237
238 4 bool BoltenkovSGaussianKernelALL::PostProcessingImpl() {
239 4 return true;
240 }
241
242 } // namespace boltenkov_s_gaussian_kernel
243