GCC Code Coverage Report


Directory: ./
File: tasks/kulik_a_mat_mul_double_ccs/all/src/ops_all.cpp
Date: 2026-06-04 20:25:32
Exec Total Coverage
Lines: 207 208 99.5%
Functions: 15 15 100.0%
Branches: 103 148 69.6%

Line Branch Exec Source
1 #include "kulik_a_mat_mul_double_ccs/all/include/ops_all.hpp"
2
3 #include <mpi.h>
4 #include <omp.h>
5
6 #include <algorithm>
7 #include <cstddef>
8 #include <limits>
9 #include <stdexcept>
10 #include <tuple>
11 #include <vector>
12
13 #include "kulik_a_mat_mul_double_ccs/common/include/common.hpp"
14 #include "util/include/util.hpp"
15
16 namespace kulik_a_mat_mul_double_ccs {
17
18 namespace {
19
20 constexpr int kTagMatrix = 1001;
21
22 16 inline int ToIntCount(size_t value) {
23
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 16 times.
16 if (value > static_cast<size_t>(std::numeric_limits<int>::max())) {
24 throw std::runtime_error("MPI count overflow");
25 }
26 16 return static_cast<int>(value);
27 }
28
29 6 inline size_t EstimateColumnCost(const CCS &a, const CCS &b, size_t j) {
30 size_t cost = 0;
31
2/2
✓ Branch 0 taken 9 times.
✓ Branch 1 taken 6 times.
15 for (size_t k = b.col_ind[j]; k < b.col_ind[j + 1]; ++k) {
32 9 const size_t a_col = b.row[k];
33 9 cost += a.col_ind[a_col + 1] - a.col_ind[a_col];
34 }
35 6 return cost;
36 }
37
38 1 inline std::vector<int> BuildBalancedStarts(const std::vector<size_t> &weights, int parts) {
39 1 if (parts <= 0) {
40 parts = 1;
41 }
42 1 const int n = static_cast<int>(weights.size());
43 1 std::vector<int> starts(static_cast<size_t>(parts) + 1, 0);
44
1/2
✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
1 if (n == 0) {
45 return starts;
46 }
47
1/4
✓ Branch 1 taken 1 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
1 std::vector<size_t> prefix(weights.size() + 1, 0);
48
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 1 times.
7 for (size_t i = 0; i < weights.size(); ++i) {
49 6 prefix[i + 1] = prefix[i] + weights[i];
50 }
51 1 const size_t total = prefix.back();
52 int cur = 0;
53
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
2 for (int part = 1; part < parts; ++part) {
54 1 const size_t target = total * static_cast<size_t>(part) / static_cast<size_t>(parts);
55
3/4
✓ Branch 0 taken 5 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 4 times.
✓ Branch 3 taken 1 times.
5 while (cur < n && prefix[static_cast<size_t>(cur)] < target) {
56 4 ++cur;
57 }
58 1 starts[static_cast<size_t>(part)] = cur;
59 }
60 1 starts[static_cast<size_t>(parts)] = n;
61
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 1 times.
3 for (int part = 1; part <= parts; ++part) {
62
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
2 starts[part] = std::max(starts[part], starts[part - 1]);
63 }
64 return starts;
65 }
66
67 6 inline void MatMultPhase1(size_t j, const CCS &a, const CCS &b, std::vector<size_t> &was,
68 std::vector<size_t> &col_nnz) {
69 size_t count = 0;
70
2/2
✓ Branch 0 taken 9 times.
✓ Branch 1 taken 6 times.
15 for (size_t k = b.col_ind[j]; k < b.col_ind[j + 1]; ++k) {
71 9 const size_t b_row = b.row[k];
72
2/2
✓ Branch 0 taken 14 times.
✓ Branch 1 taken 9 times.
23 for (size_t zc = a.col_ind[b_row]; zc < a.col_ind[b_row + 1]; ++zc) {
73
2/2
✓ Branch 0 taken 12 times.
✓ Branch 1 taken 2 times.
14 const size_t a_row = a.row[zc];
74
2/2
✓ Branch 0 taken 12 times.
✓ Branch 1 taken 2 times.
14 if (was[a_row] != j) {
75 12 was[a_row] = j;
76 12 ++count;
77 }
78 }
79 }
80 6 col_nnz[j] = count;
81 6 }
82
83
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 4 times.
6 inline void MatMultPhase2(size_t j, const CCS &a, const CCS &b, CCS &c, size_t stamp, std::vector<size_t> &was,
84 std::vector<double> &accum, std::vector<size_t> &rows) {
85 rows.clear();
86
2/2
✓ Branch 0 taken 9 times.
✓ Branch 1 taken 6 times.
15 for (size_t k = b.col_ind[j]; k < b.col_ind[j + 1]; ++k) {
87 9 const double b_val = b.value[k];
88 9 const size_t b_row = b.row[k];
89
2/2
✓ Branch 0 taken 14 times.
✓ Branch 1 taken 9 times.
23 for (size_t zc = a.col_ind[b_row]; zc < a.col_ind[b_row + 1]; ++zc) {
90 14 const size_t a_row = a.row[zc];
91
2/2
✓ Branch 0 taken 12 times.
✓ Branch 1 taken 2 times.
14 accum[a_row] += a.value[zc] * b_val;
92
2/2
✓ Branch 0 taken 12 times.
✓ Branch 1 taken 2 times.
14 if (was[a_row] != stamp) {
93
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 9 times.
12 was[a_row] = stamp;
94 rows.push_back(a_row);
95 }
96 }
97 }
98 std::ranges::sort(rows);
99 6 size_t write = c.col_ind[j];
100
2/2
✓ Branch 0 taken 12 times.
✓ Branch 1 taken 6 times.
18 for (const size_t i : rows) {
101 12 c.row[write] = i;
102 12 c.value[write] = accum[i];
103 12 accum[i] = 0.0;
104 12 ++write;
105 }
106 6 }
107
108 2 void BcastCCS(CCS &m, int root_rank = 0) {
109 2 int world_rank = 0;
110 2 MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);
111
112 2 MPI_Bcast(&m.m, 1, MPI_INT, root_rank, MPI_COMM_WORLD);
113 2 MPI_Bcast(&m.n, 1, MPI_INT, root_rank, MPI_COMM_WORLD);
114
115 2 int nz = 0;
116
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
2 if (world_rank == root_rank) {
117 1 nz = static_cast<int>(m.value.size());
118 }
119 2 MPI_Bcast(&nz, 1, MPI_INT, root_rank, MPI_COMM_WORLD);
120
121
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
2 if (world_rank != root_rank) {
122 1 m.col_ind.resize(static_cast<size_t>(m.m) + 1);
123 1 m.row.resize(static_cast<size_t>(nz));
124 1 m.value.resize(static_cast<size_t>(nz));
125 }
126
127 2 MPI_Bcast(m.col_ind.data(), ToIntCount((static_cast<size_t>(m.m) + 1) * sizeof(size_t)), MPI_BYTE, root_rank,
128 MPI_COMM_WORLD);
129 2 MPI_Bcast(m.row.data(), ToIntCount(static_cast<size_t>(nz) * sizeof(size_t)), MPI_BYTE, root_rank, MPI_COMM_WORLD);
130 2 MPI_Bcast(m.value.data(), nz, MPI_DOUBLE, root_rank, MPI_COMM_WORLD);
131 2 }
132
133 2 void SendCCS(const CCS &m, int dest) {
134 2 MPI_Send(&m.m, 1, MPI_INT, dest, kTagMatrix, MPI_COMM_WORLD);
135 2 MPI_Send(&m.n, 1, MPI_INT, dest, kTagMatrix, MPI_COMM_WORLD);
136 2 const int nz = static_cast<int>(m.value.size());
137 2 MPI_Send(&nz, 1, MPI_INT, dest, kTagMatrix, MPI_COMM_WORLD);
138 2 MPI_Send(m.col_ind.data(), ToIntCount(m.col_ind.size() * sizeof(size_t)), MPI_BYTE, dest, kTagMatrix, MPI_COMM_WORLD);
139 2 MPI_Send(m.row.data(), ToIntCount(m.row.size() * sizeof(size_t)), MPI_BYTE, dest, kTagMatrix, MPI_COMM_WORLD);
140 2 MPI_Send(m.value.data(), nz, MPI_DOUBLE, dest, kTagMatrix, MPI_COMM_WORLD);
141 2 }
142
143 2 void RecvCCS(CCS &m, int src) {
144 MPI_Status st;
145 2 MPI_Recv(&m.m, 1, MPI_INT, src, kTagMatrix, MPI_COMM_WORLD, &st);
146 2 MPI_Recv(&m.n, 1, MPI_INT, src, kTagMatrix, MPI_COMM_WORLD, &st);
147 2 int nz = 0;
148 2 MPI_Recv(&nz, 1, MPI_INT, src, kTagMatrix, MPI_COMM_WORLD, &st);
149 2 m.col_ind.resize(static_cast<size_t>(m.m) + 1);
150 2 m.row.resize(static_cast<size_t>(nz));
151 2 m.value.resize(static_cast<size_t>(nz));
152 2 MPI_Recv(m.col_ind.data(), ToIntCount(m.col_ind.size() * sizeof(size_t)), MPI_BYTE, src, kTagMatrix, MPI_COMM_WORLD,
153 &st);
154 2 MPI_Recv(m.row.data(), ToIntCount(m.row.size() * sizeof(size_t)), MPI_BYTE, src, kTagMatrix, MPI_COMM_WORLD, &st);
155 2 MPI_Recv(m.value.data(), nz, MPI_DOUBLE, src, kTagMatrix, MPI_COMM_WORLD, &st);
156 2 }
157
158 2 void ScatterB(const CCS &b, CCS &b_local, const std::vector<int> &col_starts, int rank, int size) {
159
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
2 if (rank == 0) {
160
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 1 times.
3 for (int proc = 0; proc < size; ++proc) {
161
1/2
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
2 const int jstart = col_starts[static_cast<size_t>(proc)];
162 2 const int jend = col_starts[static_cast<size_t>(proc) + 1];
163 2 const int local_cols = jend - jstart;
164
165 2 CCS tmp;
166 2 tmp.m = local_cols;
167 2 tmp.n = b.n;
168
1/2
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
2 tmp.col_ind.resize(static_cast<size_t>(local_cols) + 1);
169
170
1/2
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
2 const size_t nnz_start = b.col_ind[static_cast<size_t>(jstart)];
171
1/2
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
2 const size_t nnz_end = b.col_ind[static_cast<size_t>(jend)];
172
173
1/2
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
2 tmp.row.assign(b.row.begin() + static_cast<std::ptrdiff_t>(nnz_start),
174 b.row.begin() + static_cast<std::ptrdiff_t>(nnz_end));
175 2 tmp.value.assign(b.value.begin() + static_cast<std::ptrdiff_t>(nnz_start),
176 b.value.begin() + static_cast<std::ptrdiff_t>(nnz_end));
177
178
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 2 times.
10 for (int j = 0; j <= local_cols; ++j) {
179 8 tmp.col_ind[static_cast<size_t>(j)] =
180 8 b.col_ind[static_cast<size_t>(jstart) + static_cast<size_t>(j)] - nnz_start;
181 }
182
183
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
2 if (proc == 0) {
184
1/2
✓ Branch 1 taken 1 times.
✗ Branch 2 not taken.
1 b_local = tmp;
185 } else {
186
1/2
✓ Branch 1 taken 1 times.
✗ Branch 2 not taken.
1 SendCCS(tmp, proc);
187 }
188 2 }
189 } else {
190 1 RecvCCS(b_local, 0);
191 }
192 2 }
193
194 2 void GatherC(CCS &c, const CCS &c_local, const std::vector<int> &col_starts, int rank, int size) {
195
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
2 if (rank != 0) {
196 1 SendCCS(c_local, 0);
197 1 return;
198 }
199
200
1/2
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
1 std::vector<CCS> parts(static_cast<size_t>(size));
201
1/2
✓ Branch 1 taken 1 times.
✗ Branch 2 not taken.
1 parts[0] = c_local;
202
203 size_t total_nnz = c_local.value.size();
204
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
2 for (int proc = 1; proc < size; ++proc) {
205
1/2
✓ Branch 1 taken 1 times.
✗ Branch 2 not taken.
1 RecvCCS(parts[static_cast<size_t>(proc)], proc);
206 1 total_nnz += parts[static_cast<size_t>(proc)].value.size();
207 }
208
209 1 c.m = col_starts.back();
210 1 c.n = c_local.n;
211
1/2
✓ Branch 1 taken 1 times.
✗ Branch 2 not taken.
1 c.col_ind.assign(static_cast<size_t>(c.m) + 1, 0);
212
1/2
✓ Branch 1 taken 1 times.
✗ Branch 2 not taken.
1 c.row.resize(total_nnz);
213
1/2
✓ Branch 1 taken 1 times.
✗ Branch 2 not taken.
1 c.value.resize(total_nnz);
214
215 size_t nnz_offset = 0;
216
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 1 times.
3 for (int proc = 0; proc < size; ++proc) {
217 2 const CCS &src = parts[static_cast<size_t>(proc)];
218 2 const int start = col_starts[static_cast<size_t>(proc)];
219 2 const size_t cols = src.m;
220
221
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 2 times.
10 for (size_t j = 0; j <= cols; ++j) {
222 8 c.col_ind[static_cast<size_t>(start) + j] = nnz_offset + src.col_ind[j];
223 }
224
225 std::ranges::copy(src.row, c.row.begin() + static_cast<std::ptrdiff_t>(nnz_offset));
226 std::ranges::copy(src.value, c.value.begin() + static_cast<std::ptrdiff_t>(nnz_offset));
227 2 nnz_offset += src.value.size();
228 }
229
230 1 c.col_ind[static_cast<size_t>(c.m)] = total_nnz;
231 1 }
232
233 } // namespace
234
235
1/2
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
2 KulikAMatMulDoubleCcsALL::KulikAMatMulDoubleCcsALL(const InType &in) {
236 SetTypeOfTask(GetStaticTypeOfTask());
237
238 2 int world_rank = 0;
239
1/2
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
2 MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);
240
241
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
2 if (world_rank == 0) {
242 GetInput() = in;
243
1/2
✓ Branch 1 taken 1 times.
✗ Branch 2 not taken.
1 GetOutput() = CCS();
244 } else {
245
1/2
✓ Branch 1 taken 1 times.
✗ Branch 2 not taken.
2 GetInput() = std::make_tuple(CCS(), CCS());
246
1/2
✓ Branch 1 taken 1 times.
✗ Branch 2 not taken.
1 GetOutput() = CCS();
247 }
248 2 }
249
250 2 bool KulikAMatMulDoubleCcsALL::ValidationImpl() {
251 2 int world_rank = 0;
252 2 MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);
253
254
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
2 if (world_rank == 0) {
255 const auto &a = std::get<0>(GetInput());
256 const auto &b = std::get<1>(GetInput());
257 1 return (a.m == b.n);
258 }
259
260 return true;
261 }
262
263 2 bool KulikAMatMulDoubleCcsALL::PreProcessingImpl() {
264 2 return true;
265 }
266
267 2 bool KulikAMatMulDoubleCcsALL::RunImpl() {
268 2 int world_rank = 0;
269 2 int world_size = 0;
270 2 MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);
271 2 MPI_Comm_size(MPI_COMM_WORLD, &world_size);
272
273 auto &a = std::get<0>(GetInput());
274 auto &b = std::get<1>(GetInput());
275 OutType &c = GetOutput();
276
277 2 std::vector<int> col_starts;
278
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
2 if (world_rank == 0) {
279
1/2
✓ Branch 1 taken 1 times.
✗ Branch 2 not taken.
1 std::vector<size_t> weights(static_cast<size_t>(b.m), 0);
280
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 1 times.
7 for (size_t j = 0; j < static_cast<size_t>(b.m); ++j) {
281 6 weights[j] = EstimateColumnCost(a, b, j);
282 }
283
2/6
✓ Branch 1 taken 1 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 1 times.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
2 col_starts = BuildBalancedStarts(weights, world_size);
284 } else {
285
1/4
✓ Branch 1 taken 1 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
1 col_starts.resize(static_cast<size_t>(world_size) + 1, 0);
286 }
287
1/2
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
2 MPI_Bcast(col_starts.data(), world_size + 1, MPI_INT, 0, MPI_COMM_WORLD);
288
289
1/2
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
2 BcastCCS(a);
290
291 2 CCS local_b;
292
1/2
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
2 ScatterB(b, local_b, col_starts, world_rank, world_size);
293
294 2 const auto local_cols = static_cast<size_t>(local_b.m);
295
296 2 CCS local_c;
297 2 local_c.m = local_b.m;
298 2 local_c.n = a.n;
299
1/2
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
2 local_c.col_ind.assign(local_cols + 1, 0);
300
301
1/2
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
2 std::vector<size_t> col_nnz(local_cols, 0);
302
303
1/2
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
2 #pragma omp parallel num_threads(std::max(1, ppc::util::GetNumThreads())) default(none) \
304 shared(a, local_b, col_nnz, local_cols)
305 {
306 std::vector<size_t> was(static_cast<size_t>(a.n), std::numeric_limits<size_t>::max());
307 #pragma omp for schedule(static)
308 for (size_t j = 0; j < local_cols; ++j) {
309 MatMultPhase1(j, a, local_b, was, col_nnz);
310 }
311 }
312
313 size_t total_nz = 0;
314
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.
8 for (size_t j = 0; j < local_cols; ++j) {
315 6 local_c.col_ind[j] = total_nz;
316 6 total_nz += col_nnz[j];
317 }
318 2 local_c.col_ind[local_cols] = total_nz;
319 2 local_c.nz = total_nz;
320
1/2
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
2 local_c.value.resize(total_nz);
321
1/2
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
2 local_c.row.resize(total_nz);
322
323
1/2
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
2 #pragma omp parallel num_threads(std::max(1, ppc::util::GetNumThreads())) default(none) \
324 shared(a, local_b, local_c, local_cols)
325 {
326 std::vector<size_t> was(static_cast<size_t>(a.n), std::numeric_limits<size_t>::max());
327 std::vector<double> accum(static_cast<size_t>(a.n), 0.0);
328 std::vector<size_t> rows;
329 #pragma omp for schedule(static)
330 for (size_t j = 0; j < local_cols; ++j) {
331 MatMultPhase2(j, a, local_b, local_c, local_cols + j, was, accum, rows);
332 }
333 }
334
335
1/2
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
2 GatherC(c, local_c, col_starts, world_rank, world_size);
336
337 2 return true;
338 2 }
339
340 2 bool KulikAMatMulDoubleCcsALL::PostProcessingImpl() {
341 2 int world_rank = 0;
342 2 MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);
343
344 OutType &c = GetOutput();
345
346 2 int m = 0;
347 2 int n = 0;
348 2 int nz = 0;
349
350
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
2 if (world_rank == 0) {
351 1 m = static_cast<int>(c.m);
352 1 n = static_cast<int>(c.n);
353 1 nz = static_cast<int>(c.value.size());
354 }
355
356 2 MPI_Bcast(&m, 1, MPI_INT, 0, MPI_COMM_WORLD);
357 2 MPI_Bcast(&n, 1, MPI_INT, 0, MPI_COMM_WORLD);
358 2 MPI_Bcast(&nz, 1, MPI_INT, 0, MPI_COMM_WORLD);
359
360
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
2 if (world_rank != 0) {
361 1 c.m = m;
362 1 c.n = n;
363 1 c.col_ind.resize(static_cast<size_t>(m) + 1);
364 1 c.row.resize(static_cast<size_t>(nz));
365 1 c.value.resize(static_cast<size_t>(nz));
366 }
367
368 2 MPI_Bcast(c.col_ind.data(), ToIntCount((static_cast<size_t>(m) + 1) * sizeof(size_t)), MPI_BYTE, 0, MPI_COMM_WORLD);
369 2 MPI_Bcast(c.row.data(), ToIntCount(static_cast<size_t>(nz) * sizeof(size_t)), MPI_BYTE, 0, MPI_COMM_WORLD);
370 2 MPI_Bcast(c.value.data(), nz, MPI_DOUBLE, 0, MPI_COMM_WORLD);
371
372 2 return true;
373 }
374
375 } // namespace kulik_a_mat_mul_double_ccs
376