GCC Code Coverage Report


Directory: ./
File: tasks/sabutay_sparse_complex_ccs_mult_all/all/src/ops_all.cpp
Date: 2026-06-04 20:25:32
Exec Total Coverage
Lines: 123 171 71.9%
Functions: 13 15 86.7%
Branches: 79 160 49.4%

Line Branch Exec Source
1 #include "sabutay_sparse_complex_ccs_mult_all/all/include/ops_all.hpp"
2
3 #include <mpi.h>
4 #include <omp.h>
5
6 #include <algorithm>
7 #include <atomic>
8 #include <cmath>
9 #include <complex>
10 #include <cstddef>
11 #include <thread>
12 #include <utility>
13 #include <vector>
14
15 #include "oneapi/tbb/blocked_range.h"
16 #include "oneapi/tbb/parallel_for.h"
17 #include "sabutay_sparse_complex_ccs_mult_all/common/include/common.hpp"
18 #include "task/include/task.hpp"
19 #include "util/include/util.hpp"
20
21 namespace sabutay_sparse_complex_ccs_mult_all {
22 namespace {
23
24 constexpr double kDropMagnitude = (1e-14);
25
26 12 auto IsValidStructure(const CCS &matrix) -> bool {
27
2/4
✓ Branch 0 taken 12 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 12 times.
12 if (matrix.row_count < 0 || matrix.col_count < 0) {
28 return false;
29 }
30
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 12 times.
12 if (matrix.col_start.size() != (static_cast<std::size_t>(matrix.col_count) + 1U)) {
31 return false;
32 }
33
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 12 times.
12 if (matrix.row_index.size() != matrix.nz.size()) {
34 return false;
35 }
36
2/4
✗ Branch 0 not taken.
✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 12 times.
12 if (matrix.col_start.empty() || matrix.col_start.front() != 0) {
37 return false;
38 }
39
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 12 times.
12 if (!std::cmp_equal(matrix.col_start.back(), matrix.nz.size())) {
40 return false;
41 }
42
2/2
✓ Branch 0 taken 26 times.
✓ Branch 1 taken 12 times.
38 for (int j = 0; j < matrix.col_count; ++j) {
43
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 26 times.
26 const auto col_idx = static_cast<std::size_t>(j);
44
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 26 times.
26 if (matrix.col_start[col_idx] > matrix.col_start[col_idx + 1U]) {
45 return false;
46 }
47 }
48
2/4
✗ Branch 0 not taken.
✓ Branch 1 taken 36 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 36 times.
36 return std::ranges::all_of(matrix.row_index, [&matrix](int row) { return row >= 0 && row < matrix.row_count; });
49 }
50
51 56 void BuildColumnFromRight(const CCS &left, const CCS &right, int column_index,
52 std::vector<std::pair<int, std::complex<double>>> &buffer) {
53
2/2
✓ Branch 0 taken 23 times.
✓ Branch 1 taken 33 times.
56 const int right_begin = right.col_start[static_cast<std::size_t>(column_index)];
54
2/2
✓ Branch 0 taken 23 times.
✓ Branch 1 taken 33 times.
56 const int right_end = right.col_start[static_cast<std::size_t>(column_index) + 1U];
55 buffer.clear();
56
57
2/2
✓ Branch 0 taken 80 times.
✓ Branch 1 taken 56 times.
136 for (int right_pos = right_begin; right_pos < right_end; ++right_pos) {
58 80 const int inner = right.row_index[static_cast<std::size_t>(right_pos)];
59 80 const std::complex<double> scalar = right.nz[static_cast<std::size_t>(right_pos)];
60 80 const int left_begin = left.col_start[static_cast<std::size_t>(inner)];
61 80 const int left_end = left.col_start[static_cast<std::size_t>(inner) + 1U];
62
2/2
✓ Branch 0 taken 104 times.
✓ Branch 1 taken 80 times.
184 for (int left_pos = left_begin; left_pos < left_end; ++left_pos) {
63 104 const int row = left.row_index[static_cast<std::size_t>(left_pos)];
64 104 buffer.emplace_back(row, left.nz[static_cast<std::size_t>(left_pos)] * scalar);
65 }
66 }
67 56 }
68
69 56 void SortBufferByRow(std::vector<std::pair<int, std::complex<double>>> &buffer) {
70 std::ranges::sort(buffer, {}, &std::pair<int, std::complex<double>>::first);
71 56 }
72
73
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 35 times.
35 void CoalesceSortedPairs(const std::vector<std::pair<int, std::complex<double>>> &row_sorted, CCS &out,
74 double drop_magnitude) {
75
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 35 times.
35 if (row_sorted.empty()) {
76 return;
77 }
78 35 int active_row = row_sorted[0].first;
79 35 std::complex<double> running = row_sorted[0].second;
80
2/2
✓ Branch 0 taken 30 times.
✓ Branch 1 taken 35 times.
65 for (std::size_t idx = 1; idx < row_sorted.size(); ++idx) {
81 30 const int r = row_sorted[idx].first;
82
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 20 times.
30 if (r == active_row) {
83 running += row_sorted[idx].second;
84 } else {
85
1/2
✓ Branch 0 taken 20 times.
✗ Branch 1 not taken.
20 if (std::abs(running) > drop_magnitude) {
86
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 20 times.
20 out.row_index.push_back(active_row);
87
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 20 times.
20 out.nz.push_back(running);
88 }
89 20 active_row = r;
90 20 running = row_sorted[idx].second;
91 }
92 }
93
1/2
✓ Branch 0 taken 35 times.
✗ Branch 1 not taken.
35 if (std::abs(running) > drop_magnitude) {
94
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 25 times.
35 out.row_index.push_back(active_row);
95
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 25 times.
35 out.nz.push_back(running);
96 }
97 }
98
99
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 21 times.
21 void CoalesceBufferToColumn(const std::vector<std::pair<int, std::complex<double>>> &buffer,
100 std::vector<std::pair<int, std::complex<double>>> &column, double drop_magnitude) {
101
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 21 times.
21 if (buffer.empty()) {
102 column.clear();
103 return;
104 }
105
106 column.clear();
107 21 column.reserve(buffer.size());
108 21 int active_row = buffer[0].first;
109 21 std::complex<double> running = buffer[0].second;
110
2/2
✓ Branch 0 taken 18 times.
✓ Branch 1 taken 21 times.
39 for (std::size_t idx = 1; idx < buffer.size(); ++idx) {
111 const auto &entry = buffer[idx];
112
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 12 times.
18 if (entry.first == active_row) {
113 running += entry.second;
114 } else {
115
1/2
✓ Branch 0 taken 12 times.
✗ Branch 1 not taken.
12 if (std::abs(running) > drop_magnitude) {
116 12 column.emplace_back(active_row, running);
117 }
118 12 active_row = entry.first;
119 12 running = entry.second;
120 }
121 }
122
1/2
✓ Branch 0 taken 21 times.
✗ Branch 1 not taken.
21 if (std::abs(running) > drop_magnitude) {
123 21 column.emplace_back(active_row, running);
124 }
125 }
126
127 15 void SpmmAbc(const CCS &left, const CCS &right, CCS &out, double drop_magnitude) {
128 15 out.row_count = left.row_count;
129 15 out.col_count = right.col_count;
130
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 15 times.
15 out.col_start.assign(static_cast<std::size_t>(out.col_count) + 1U, 0);
131 out.row_index.clear();
132 out.nz.clear();
133
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 15 times.
15 if (out.col_count == 0) {
134 return;
135 }
136
137 15 std::vector<std::pair<int, std::complex<double>>> buffer;
138
1/2
✓ Branch 1 taken 15 times.
✗ Branch 2 not taken.
15 buffer.reserve(128U);
139
140
2/2
✓ Branch 0 taken 35 times.
✓ Branch 1 taken 15 times.
50 for (int j = 0; j < right.col_count; ++j) {
141
1/2
✓ Branch 1 taken 35 times.
✗ Branch 2 not taken.
35 BuildColumnFromRight(left, right, j, buffer);
142
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 35 times.
35 if (buffer.empty()) {
143 out.col_start[static_cast<std::size_t>(j) + 1U] = static_cast<int>(out.nz.size());
144 continue;
145 }
146 35 SortBufferByRow(buffer);
147
1/2
✓ Branch 1 taken 35 times.
✗ Branch 2 not taken.
35 CoalesceSortedPairs(buffer, out, drop_magnitude);
148 35 out.col_start[static_cast<std::size_t>(j) + 1U] = static_cast<int>(out.nz.size());
149 }
150 }
151
152 9 void BuildProductMatrixOmp(const CCS &left, const CCS &right, CCS &out) {
153 9 out.row_count = left.row_count;
154 9 out.col_count = right.col_count;
155
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 9 times.
9 out.col_start.assign(static_cast<std::size_t>(out.col_count) + 1U, 0);
156 out.row_index.clear();
157 out.nz.clear();
158
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 9 times.
9 if (out.col_count == 0) {
159 return;
160 }
161
162 9 std::vector<std::vector<std::pair<int, std::complex<double>>>> columns(static_cast<std::size_t>(out.col_count));
163 9 #pragma omp parallel default(none) shared(left, right, columns)
164 {
165 std::vector<std::pair<int, std::complex<double>>> buffer;
166 buffer.reserve(128U);
167
168 #pragma omp for schedule(static)
169 for (int j = 0; j < right.col_count; ++j) {
170 auto &column = columns[static_cast<std::size_t>(j)];
171 BuildColumnFromRight(left, right, j, buffer);
172 if (!buffer.empty()) {
173 SortBufferByRow(buffer);
174 }
175 CoalesceBufferToColumn(buffer, column, kDropMagnitude);
176 }
177 }
178
179
2/2
✓ Branch 0 taken 21 times.
✓ Branch 1 taken 9 times.
30 for (int j = 0; j < out.col_count; ++j) {
180 21 const int col_size = static_cast<int>(columns[static_cast<std::size_t>(j)].size());
181 21 out.col_start[static_cast<std::size_t>(j) + 1U] = out.col_start[static_cast<std::size_t>(j)] + col_size;
182 }
183
184 9 const auto nnz = static_cast<std::size_t>(out.col_start.back());
185
1/2
✓ Branch 1 taken 9 times.
✗ Branch 2 not taken.
9 out.row_index.resize(nnz);
186
1/2
✓ Branch 1 taken 9 times.
✗ Branch 2 not taken.
9 out.nz.resize(nnz);
187
188
2/2
✓ Branch 0 taken 21 times.
✓ Branch 1 taken 9 times.
30 for (int j = 0; j < out.col_count; ++j) {
189 21 const int start = out.col_start[static_cast<std::size_t>(j)];
190 const auto &column = columns[static_cast<std::size_t>(j)];
191
2/2
✓ Branch 0 taken 33 times.
✓ Branch 1 taken 21 times.
54 for (std::size_t k = 0; k < column.size(); ++k) {
192 33 const int dst = start + static_cast<int>(k);
193 33 out.row_index[static_cast<std::size_t>(dst)] = column[k].first;
194 33 out.nz[static_cast<std::size_t>(dst)] = column[k].second;
195 }
196 }
197 9 }
198
199 void BuildProductMatrixTbb(const CCS &left, const CCS &right, CCS &out) {
200 out.row_count = left.row_count;
201 out.col_count = right.col_count;
202 out.col_start.assign(static_cast<std::size_t>(out.col_count) + 1U, 0);
203 out.row_index.clear();
204 out.nz.clear();
205 if (out.col_count == 0) {
206 return;
207 }
208
209 std::vector<std::vector<int>> local_row_index(static_cast<std::size_t>(right.col_count));
210 std::vector<std::vector<std::complex<double>>> local_nz(static_cast<std::size_t>(right.col_count));
211 std::vector<int> local_sizes(static_cast<std::size_t>(right.col_count), 0);
212
213 oneapi::tbb::parallel_for(oneapi::tbb::blocked_range<int>(0, right.col_count),
214 [&](const oneapi::tbb::blocked_range<int> &range) {
215 std::vector<std::pair<int, std::complex<double>>> buffer;
216 buffer.reserve(128U);
217 for (int jcol = range.begin(); jcol < range.end(); ++jcol) {
218 BuildColumnFromRight(left, right, jcol, buffer);
219 if (!buffer.empty()) {
220 SortBufferByRow(buffer);
221 CCS tmp;
222 CoalesceSortedPairs(buffer, tmp, kDropMagnitude);
223 local_row_index[static_cast<std::size_t>(jcol)] = std::move(tmp.row_index);
224 local_nz[static_cast<std::size_t>(jcol)] = std::move(tmp.nz);
225 } else {
226 local_row_index[static_cast<std::size_t>(jcol)].clear();
227 local_nz[static_cast<std::size_t>(jcol)].clear();
228 }
229 local_sizes[static_cast<std::size_t>(jcol)] = static_cast<int>(local_nz[static_cast<std::size_t>(jcol)].size());
230 }
231 });
232
233 for (int jcol = 0; jcol < right.col_count; ++jcol) {
234 out.col_start[static_cast<std::size_t>(jcol) + 1U] =
235 out.col_start[static_cast<std::size_t>(jcol)] + local_sizes[static_cast<std::size_t>(jcol)];
236 }
237
238 out.row_index.reserve(static_cast<std::size_t>(out.col_start[static_cast<std::size_t>(out.col_count)]));
239 out.nz.reserve(static_cast<std::size_t>(out.col_start[static_cast<std::size_t>(out.col_count)]));
240
241 for (int jcol = 0; jcol < right.col_count; ++jcol) {
242 const auto idx = static_cast<std::size_t>(jcol);
243 out.row_index.insert(out.row_index.end(), local_row_index[idx].begin(), local_row_index[idx].end());
244 out.nz.insert(out.nz.end(), local_nz[idx].begin(), local_nz[idx].end());
245 }
246 }
247 } // namespace
248
249
1/2
✓ Branch 2 taken 6 times.
✗ Branch 3 not taken.
6 SabutaySparseComplexCcsMultAll::SabutaySparseComplexCcsMultAll(const InType &in) {
250 SetTypeOfTask(GetStaticTypeOfTask());
251 GetInput() = in;
252
1/2
✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.
6 GetOutput() = CCS();
253 6 }
254
255 24 void SabutaySparseComplexCcsMultAll::BuildProductMatrix(const CCS &left, const CCS &right, CCS &out,
256 ppc::task::TypeOfTask backend) {
257
2/4
✓ Branch 0 taken 15 times.
✓ Branch 1 taken 9 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
24 switch (backend) {
258 15 case ppc::task::TypeOfTask::kSEQ:
259 case ppc::task::TypeOfTask::kSTL:
260 15 SpmmAbc(left, right, out, kDropMagnitude);
261 15 return;
262 9 case ppc::task::TypeOfTask::kOMP:
263 9 BuildProductMatrixOmp(left, right, out);
264 9 return;
265 case ppc::task::TypeOfTask::kTBB:
266 BuildProductMatrixTbb(left, right, out);
267 return;
268 case ppc::task::TypeOfTask::kALL:
269 case ppc::task::TypeOfTask::kMPI:
270 case ppc::task::TypeOfTask::kUnknown:
271 default:
272 SpmmAbc(left, right, out, kDropMagnitude);
273 return;
274 }
275 }
276
277
1/2
✓ Branch 0 taken 6 times.
✗ Branch 1 not taken.
6 bool SabutaySparseComplexCcsMultAll::ValidationImpl() {
278 const CCS &left = std::get<0>(GetInput());
279 const CCS &right = std::get<1>(GetInput());
280
2/4
✓ Branch 0 taken 6 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 6 times.
✗ Branch 3 not taken.
6 return left.col_count == right.row_count && IsValidStructure(left) && IsValidStructure(right);
281 }
282
283 6 bool SabutaySparseComplexCcsMultAll::PreProcessingImpl() {
284 6 return true;
285 }
286
287 6 bool SabutaySparseComplexCcsMultAll::RunImpl() {
288 const CCS &left = std::get<0>(GetInput());
289 const CCS &right = std::get<1>(GetInput());
290 6 const int num_threads = ppc::util::GetNumThreads();
291 6 int rank = 0;
292 6 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
293
294 ppc::task::TypeOfTask backend = ppc::task::TypeOfTask::kSEQ;
295
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 3 times.
6 if (rank % 4 == 1) {
296 backend = ppc::task::TypeOfTask::kOMP;
297
1/2
✓ Branch 0 taken 3 times.
✗ Branch 1 not taken.
3 } else if (rank % 4 == 2) {
298 backend = ppc::task::TypeOfTask::kSTL;
299
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 3 times.
3 } else if (rank % 4 == 3) {
300 backend = ppc::task::TypeOfTask::kTBB;
301 }
302 6 BuildProductMatrix(left, right, GetOutput(), backend);
303
304
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 3 times.
6 if (rank == 0) {
305 3 std::atomic<int> counter(0);
306 3 #pragma omp parallel default(none) shared(counter) num_threads(ppc::util::GetNumThreads())
307 counter++;
308 }
309
310 {
311 6 std::vector<std::thread> threads(num_threads);
312 6 std::atomic<int> counter(0);
313
2/2
✓ Branch 0 taken 12 times.
✓ Branch 1 taken 6 times.
18 for (int i = 0; i < num_threads; i++) {
314
2/4
✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✓ Branch 4 taken 12 times.
12 threads[i] = std::thread([&]() { counter++; });
315
1/2
✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.
12 threads[i].join();
316 }
317 6 }
318
319 {
320 6 std::atomic<int> counter(0);
321 18 tbb::parallel_for(0, ppc::util::GetNumThreads(), [&](int /*i*/) { counter++; });
322 }
323
324 6 MPI_Barrier(MPI_COMM_WORLD);
325 6 return true;
326 }
327
328 6 bool SabutaySparseComplexCcsMultAll::PostProcessingImpl() {
329 6 return true;
330 }
331
332 } // namespace sabutay_sparse_complex_ccs_mult_all
333