GCC Code Coverage Report


Directory: ./
File: tasks/kapanova_s_sparse_matrix_mult_ccs/all/src/ops_all.cpp
Date: 2026-06-04 20:25:32
Exec Total Coverage
Lines: 124 124 100.0%
Functions: 9 9 100.0%
Branches: 83 122 68.0%

Line Branch Exec Source
1 #include "kapanova_s_sparse_matrix_mult_ccs/all/include/ops_all.hpp"
2
3 #include <mpi.h>
4 #include <omp.h>
5
6 #include <algorithm>
7 #include <cstddef>
8 #include <cstdint>
9 #include <vector>
10
11 #include "kapanova_s_sparse_matrix_mult_ccs/common/include/common.hpp"
12
13 namespace kapanova_s_sparse_matrix_mult_ccs {
14
15
1/2
✓ Branch 2 taken 10 times.
✗ Branch 3 not taken.
10 KapanovaSSparseMatrixMultCCSALL::KapanovaSSparseMatrixMultCCSALL(const InType &in) {
16 SetTypeOfTask(GetStaticTypeOfTask());
17 GetInput() = in;
18 10 }
19
20 10 bool KapanovaSSparseMatrixMultCCSALL::ValidationImpl() {
21 const auto &a = std::get<0>(GetInput());
22 const auto &b = std::get<1>(GetInput());
23
3/6
✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 10 times.
✗ Branch 4 not taken.
✓ Branch 5 taken 10 times.
10 return (a.cols == b.rows && a.rows > 0 && a.cols > 0 && b.rows > 0 && b.cols > 0 &&
24
3/6
✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 10 times.
✗ Branch 4 not taken.
✓ Branch 5 taken 10 times.
20 a.col_ptrs.size() == static_cast<size_t>(a.cols + 1) && b.col_ptrs.size() == static_cast<size_t>(b.cols + 1));
25 }
26
27 10 bool KapanovaSSparseMatrixMultCCSALL::PreProcessingImpl() {
28 10 return true;
29 }
30 10 bool KapanovaSSparseMatrixMultCCSALL::PostProcessingImpl() {
31 10 return true;
32 }
33
34 namespace {
35
36 using MpiU64 = std::uint64_t;
37 MPI_Datatype k_mpi_u64 = MPI_UINT64_T;
38
39 5 std::vector<MpiU64> ComputeBalancedRanges(int total_cols, int num_procs, const CCSMatrix &a, const CCSMatrix &b) {
40 5 std::vector<MpiU64> ranges(static_cast<size_t>(num_procs) + 1, 0);
41
1/2
✓ Branch 0 taken 5 times.
✗ Branch 1 not taken.
5 ranges[num_procs] = static_cast<MpiU64>(total_cols);
42
1/2
✓ Branch 0 taken 5 times.
✗ Branch 1 not taken.
5 if (total_cols == 0) {
43 return ranges;
44 }
45
46
1/4
✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
5 std::vector<int> cost(static_cast<size_t>(total_cols), 0);
47 size_t total_cost = 0;
48 5 #pragma omp parallel for reduction(+ : total_cost) schedule(guided) default(none) shared(a, b, cost, total_cols)
49 for (int col = 0; col < total_cols; ++col) {
50 int c = 0;
51 for (size_t k = b.col_ptrs[col]; k < b.col_ptrs[col + 1]; ++k) {
52 c += static_cast<int>(a.col_ptrs[b.row_indices[k] + 1] - a.col_ptrs[b.row_indices[k]]);
53 }
54 cost[static_cast<size_t>(col)] = c;
55 total_cost += static_cast<size_t>(c);
56 }
57
2/2
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 1 times.
5 if (total_cost == 0) {
58 return ranges;
59 }
60 4 size_t per = total_cost / static_cast<size_t>(num_procs);
61 size_t cur = 0;
62 size_t acc = 0;
63 auto total_cols_sz = static_cast<size_t>(total_cols);
64
2/2
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 4 times.
8 for (int proc = 1; proc < num_procs; ++proc) {
65 4 size_t target = static_cast<size_t>(proc) * per;
66
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 4 times.
10 while (cur < total_cols_sz && acc < target) {
67 6 acc += static_cast<size_t>(cost[cur]);
68 6 ++cur;
69 }
70 4 ranges[static_cast<size_t>(proc)] = static_cast<MpiU64>(cur);
71 }
72 return ranges;
73 }
74
75 12 void ProcessColumn(size_t gcol, const CCSMatrix &a, const CCSMatrix &b, std::vector<MpiU64> &out_rows,
76 std::vector<MpiU64> &out_cols, std::vector<double> &out_vals, double *accum, char *used,
77 size_t *active, int &active_count) {
78
2/2
✓ Branch 0 taken 18 times.
✓ Branch 1 taken 12 times.
30 for (size_t k = b.col_ptrs[gcol]; k < b.col_ptrs[gcol + 1]; ++k) {
79 18 size_t row_b = b.row_indices[k];
80 18 double vb = b.values[k];
81
2/2
✓ Branch 0 taken 22 times.
✓ Branch 1 taken 18 times.
40 for (size_t zc = a.col_ptrs[row_b]; zc < a.col_ptrs[row_b + 1]; ++zc) {
82 22 size_t i = a.row_indices[zc];
83 22 double va = a.values[zc];
84
2/2
✓ Branch 0 taken 16 times.
✓ Branch 1 taken 6 times.
22 if (used[i] == 0) {
85 16 used[i] = 1;
86 16 active[active_count++] = i;
87 16 accum[i] = va * vb;
88 } else {
89 6 accum[i] += va * vb;
90 }
91 }
92 }
93
2/2
✓ Branch 0 taken 16 times.
✓ Branch 1 taken 12 times.
28 for (int idx = 0; idx < active_count; ++idx) {
94 16 size_t i = active[static_cast<size_t>(idx)];
95
1/2
✓ Branch 0 taken 16 times.
✗ Branch 1 not taken.
16 if (accum[i] != 0.0) {
96 16 out_rows.push_back(static_cast<MpiU64>(i));
97
2/2
✓ Branch 1 taken 1 times.
✓ Branch 2 taken 15 times.
16 out_cols.push_back(static_cast<MpiU64>(gcol));
98 out_vals.push_back(accum[i]);
99 }
100 16 used[i] = 0;
101 16 accum[i] = 0.0;
102 }
103 12 }
104
105 void ComputeLocalColumns(size_t start, size_t local_cols, const CCSMatrix &a, const CCSMatrix &b,
106 std::vector<MpiU64> &send_rows, std::vector<MpiU64> &send_cols,
107 std::vector<double> &send_vals) {
108 9 #pragma omp parallel default(none) shared(a, b, start, local_cols, send_rows, send_cols, send_vals)
109 {
110 std::vector<double> accum(a.rows, 0.0);
111 std::vector<char> used(static_cast<size_t>(a.rows), 0);
112 std::vector<size_t> active(static_cast<size_t>(a.rows));
113 std::vector<MpiU64> thr_rows;
114 std::vector<MpiU64> thr_cols;
115 std::vector<double> thr_vals;
116 int active_count = 0;
117
118 #pragma omp for schedule(guided, 32) nowait
119 for (size_t j = 0; j < local_cols; ++j) {
120 active_count = 0;
121 ProcessColumn(start + j, a, b, thr_rows, thr_cols, thr_vals, accum.data(), used.data(), active.data(),
122 active_count);
123 }
124 #pragma omp critical
125 {
126 send_rows.insert(send_rows.end(), thr_rows.begin(), thr_rows.end());
127 send_cols.insert(send_cols.end(), thr_cols.begin(), thr_cols.end());
128 send_vals.insert(send_vals.end(), thr_vals.begin(), thr_vals.end());
129 }
130 }
131 9 }
132
133 5 void BuildCcsOnRoot(OutType &c, int total, std::vector<MpiU64> &recv_rows, std::vector<MpiU64> &recv_cols,
134 std::vector<double> &recv_vals) {
135 5 c.nnz = static_cast<size_t>(total);
136 5 c.col_ptrs.assign(static_cast<size_t>(c.cols) + 1, 0);
137 5 c.row_indices.resize(static_cast<size_t>(total));
138 5 c.values.resize(static_cast<size_t>(total));
139
140
2/2
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 1 times.
5 if (total > 0) {
141 auto total_sz = static_cast<size_t>(total);
142 4 std::vector<size_t> idx(total_sz);
143
2/2
✓ Branch 0 taken 16 times.
✓ Branch 1 taken 4 times.
20 for (size_t i = 0; i < total_sz; ++i) {
144 16 idx[i] = i;
145 }
146 std::ranges::sort(idx, [&](size_t a_idx, size_t b_idx) {
147
2/2
✓ Branch 0 taken 17 times.
✓ Branch 1 taken 8 times.
25 auto lc = recv_cols[a_idx];
148 25 auto rc = recv_cols[b_idx];
149
2/2
✓ Branch 0 taken 17 times.
✓ Branch 1 taken 8 times.
25 if (lc != rc) {
150 17 return lc < rc;
151 }
152 8 return recv_rows[a_idx] < recv_rows[b_idx];
153 });
154
2/2
✓ Branch 0 taken 16 times.
✓ Branch 1 taken 4 times.
20 for (size_t i = 0; i < total_sz; ++i) {
155 16 size_t src = idx[i];
156 16 c.row_indices[i] = static_cast<size_t>(recv_rows[src]);
157 16 c.values[i] = recv_vals[src];
158 16 c.col_ptrs[recv_cols[src] + 1]++;
159 }
160
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 4 times.
14 for (size_t j = 0; j < static_cast<size_t>(c.cols); ++j) {
161 10 c.col_ptrs[j + 1] += c.col_ptrs[j];
162 }
163 }
164 5 }
165
166 10 void BroadcastResult(OutType &c, int rank, MPI_Datatype mpi_type) {
167 10 auto nnz_b = static_cast<MpiU64>(c.nnz);
168 10 auto cols_b = static_cast<MpiU64>(c.cols);
169 10 MPI_Bcast(&nnz_b, 1, mpi_type, 0, MPI_COMM_WORLD);
170 10 MPI_Bcast(&cols_b, 1, mpi_type, 0, MPI_COMM_WORLD);
171
172 10 auto nnz_i = static_cast<int>(nnz_b);
173 10 auto cols_i = static_cast<int>(cols_b);
174
175
2/2
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
10 if (rank != 0) {
176 5 c.nnz = static_cast<size_t>(nnz_b);
177 5 c.cols = cols_i;
178 5 c.col_ptrs.resize(static_cast<size_t>(cols_i) + 1);
179 5 c.row_indices.resize(static_cast<size_t>(nnz_i));
180 5 c.values.resize(static_cast<size_t>(nnz_i));
181 }
182
183 10 MPI_Bcast(c.col_ptrs.data(), cols_i + 1, mpi_type, 0, MPI_COMM_WORLD);
184 10 MPI_Bcast(c.row_indices.data(), nnz_i, mpi_type, 0, MPI_COMM_WORLD);
185 10 MPI_Bcast(c.values.data(), nnz_i, MPI_DOUBLE, 0, MPI_COMM_WORLD);
186 10 }
187
188 } // namespace
189
190 10 bool KapanovaSSparseMatrixMultCCSALL::RunImpl() {
191 const auto &a = std::get<0>(GetInput());
192 const auto &b = std::get<1>(GetInput());
193 auto &c = GetOutput();
194 10 c.rows = a.rows;
195 10 c.cols = b.cols;
196
197 10 int rank = 0;
198 10 int size = 1;
199 10 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
200 10 MPI_Comm_size(MPI_COMM_WORLD, &size);
201
202 10 auto r_sz = static_cast<size_t>(size);
203 10 std::vector<MpiU64> ranges(r_sz + 1, 0);
204
2/2
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
10 if (rank == 0) {
205
1/4
✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
10 ranges = ComputeBalancedRanges(static_cast<int>(c.cols), size, a, b);
206 }
207
1/2
✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
10 MPI_Bcast(ranges.data(), size + 1, k_mpi_u64, 0, MPI_COMM_WORLD);
208
209
2/2
✓ Branch 0 taken 9 times.
✓ Branch 1 taken 1 times.
10 auto start = static_cast<size_t>(ranges[rank]);
210 10 auto local_cols = static_cast<size_t>(ranges[rank + 1]) - start;
211
212 10 std::vector<MpiU64> send_rows;
213 10 std::vector<MpiU64> send_cols;
214 10 std::vector<double> send_vals;
215
216
2/2
✓ Branch 0 taken 9 times.
✓ Branch 1 taken 1 times.
10 if (local_cols > 0) {
217 ComputeLocalColumns(start, local_cols, a, b, send_rows, send_cols, send_vals);
218 }
219
220 10 int local_nnz = static_cast<int>(send_rows.size());
221
2/6
✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 10 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
10 std::vector<int> counts(r_sz, 0);
222
1/2
✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
10 MPI_Gather(&local_nnz, 1, MPI_INT, counts.data(), 1, MPI_INT, 0, MPI_COMM_WORLD);
223
224
1/4
✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
10 std::vector<int> displs(r_sz, 0);
225 int total = 0;
226
2/2
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
10 if (rank == 0) {
227
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 5 times.
15 for (size_t i = 0; i < r_sz; ++i) {
228 10 displs[i] = total;
229 10 total += counts[i];
230 }
231 }
232
233
2/2
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
10 auto total_sz = static_cast<size_t>(total);
234 auto safe_sz = std::max(total_sz, static_cast<size_t>(1));
235
3/6
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
✓ Branch 3 taken 10 times.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
15 std::vector<MpiU64> recv_rows(rank == 0 ? safe_sz : 0);
236
3/6
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
✓ Branch 3 taken 10 times.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
15 std::vector<MpiU64> recv_cols(rank == 0 ? safe_sz : 0);
237
5/8
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
✓ Branch 3 taken 10 times.
✗ Branch 4 not taken.
✓ Branch 5 taken 8 times.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
15 std::vector<double> recv_vals(rank == 0 ? safe_sz : 0);
238
239
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 2 times.
10 const MpiU64 *rp = send_rows.empty() ? nullptr : send_rows.data();
240
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 2 times.
10 const MpiU64 *cp = send_cols.empty() ? nullptr : send_cols.data();
241
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 2 times.
10 const double *vp = send_vals.empty() ? nullptr : send_vals.data();
242
243
1/2
✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
10 MPI_Gatherv(rp, local_nnz, k_mpi_u64, recv_rows.data(), counts.data(), displs.data(), k_mpi_u64, 0, MPI_COMM_WORLD);
244
1/2
✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
10 MPI_Gatherv(cp, local_nnz, k_mpi_u64, recv_cols.data(), counts.data(), displs.data(), k_mpi_u64, 0, MPI_COMM_WORLD);
245
1/2
✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
10 MPI_Gatherv(vp, local_nnz, MPI_DOUBLE, recv_vals.data(), counts.data(), displs.data(), MPI_DOUBLE, 0, MPI_COMM_WORLD);
246
247
2/2
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
10 if (rank == 0) {
248
1/2
✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
5 BuildCcsOnRoot(c, total, recv_rows, recv_cols, recv_vals);
249 }
250
251
1/2
✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
10 BroadcastResult(c, rank, k_mpi_u64);
252
253 10 return true;
254 }
255
256 } // namespace kapanova_s_sparse_matrix_mult_ccs
257