GCC Code Coverage Report


Directory: ./
File: tasks/kurpiakov_a_sp_comp_mat_mul/all/src/ops_all.cpp
Date: 2026-06-04 20:25:32
Exec Total Coverage
Lines: 117 117 100.0%
Functions: 9 9 100.0%
Branches: 95 162 58.6%

Line Branch Exec Source
1 #include "kurpiakov_a_sp_comp_mat_mul/all/include/ops_all.hpp"
2
3 #include <mpi.h>
4
5 #include <algorithm>
6 #include <atomic>
7 #include <cstddef>
8 #include <thread>
9 #include <utility>
10 #include <vector>
11
12 #include "kurpiakov_a_sp_comp_mat_mul/common/include/common.hpp"
13 #include "util/include/util.hpp"
14
15 namespace kurpiakov_a_sp_comp_mat_mul {
16
17 namespace {
18
19 40 bool ValidateCSR(const SparseMatrix &m) {
20
2/4
✓ Branch 0 taken 40 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 40 times.
40 if (m.rows <= 0 || m.cols <= 0) {
21 return false;
22 }
23
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 40 times.
40 if (static_cast<int>(m.row_ptr.size()) != m.rows + 1) {
24 return false;
25 }
26
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 40 times.
40 if (m.row_ptr[0] != 0) {
27 return false;
28 }
29
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 40 times.
40 if (std::cmp_not_equal(m.values.size(), m.row_ptr[m.rows])) {
30 return false;
31 }
32
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 40 times.
40 if (m.col_indices.size() != m.values.size()) {
33 return false;
34 }
35
2/2
✓ Branch 0 taken 78 times.
✓ Branch 1 taken 40 times.
118 for (int i = 0; i < m.rows; ++i) {
36
2/2
✓ Branch 0 taken 80 times.
✓ Branch 1 taken 78 times.
158 for (int j = m.row_ptr[i]; j < m.row_ptr[i + 1]; ++j) {
37
2/4
✗ Branch 0 not taken.
✓ Branch 1 taken 80 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 80 times.
80 if (m.col_indices[j] < 0 || m.col_indices[j] >= m.cols) {
38 return false;
39 }
40 }
41 }
42 return true;
43 }
44
45 std::pair<int, int> GetRowRange(int total_rows, int rank, int size) {
46 20 const int begin = (total_rows * rank) / size;
47 20 const int end = (total_rows * (rank + 1)) / size;
48 return {begin, end};
49 }
50
51
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 18 times.
18 void MultiplySingleRow(const SparseMatrix &a, const SparseMatrix &b, int row_idx, std::vector<ComplexD> &row_acc,
52 std::vector<char> &row_used, std::vector<int> &used_cols, std::vector<ComplexD> &out_values,
53 std::vector<int> &out_cols) {
54 used_cols.clear();
55
56
2/2
✓ Branch 0 taken 20 times.
✓ Branch 1 taken 18 times.
38 for (int ja = a.row_ptr[row_idx]; ja < a.row_ptr[row_idx + 1]; ++ja) {
57 20 const int ka = a.col_indices[ja];
58 const ComplexD &a_val = a.values[ja];
59
60
2/2
✓ Branch 0 taken 24 times.
✓ Branch 1 taken 20 times.
44 for (int jb = b.row_ptr[ka]; jb < b.row_ptr[ka + 1]; ++jb) {
61
2/2
✓ Branch 0 taken 17 times.
✓ Branch 1 taken 7 times.
24 const int cb = b.col_indices[jb];
62 const ComplexD &b_val = b.values[jb];
63
64
2/2
✓ Branch 0 taken 17 times.
✓ Branch 1 taken 7 times.
24 if (row_used[cb] == 0) {
65
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 17 times.
17 row_used[cb] = 1;
66
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 17 times.
17 row_acc[cb] = ComplexD();
67 used_cols.push_back(cb);
68 }
69
70 row_acc[cb] += a_val * b_val;
71 }
72 }
73
74 std::ranges::sort(used_cols);
75
76 out_values.clear();
77 out_cols.clear();
78 18 out_values.reserve(used_cols.size());
79 18 out_cols.reserve(used_cols.size());
80
81
2/2
✓ Branch 0 taken 17 times.
✓ Branch 1 taken 18 times.
35 for (int col : used_cols) {
82
1/2
✓ Branch 0 taken 17 times.
✗ Branch 1 not taken.
17 out_values.push_back(row_acc[col]);
83 out_cols.push_back(col);
84 17 row_used[col] = 0;
85 }
86 18 }
87
88 20 void ComputeLocalRowsThreads(const SparseMatrix &a, const SparseMatrix &b, int row_begin, int row_end,
89 std::vector<std::vector<ComplexD>> &local_values,
90 std::vector<std::vector<int>> &local_cols) {
91 20 const int local_rows = row_end - row_begin;
92
2/2
✓ Branch 1 taken 19 times.
✓ Branch 2 taken 1 times.
20 const int requested_threads = ppc::util::GetNumThreads();
93
2/2
✓ Branch 0 taken 19 times.
✓ Branch 1 taken 1 times.
20 const int max_threads = std::max(1, local_rows);
94
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 19 times.
20 const int num_threads = std::max(1, std::min(requested_threads, max_threads));
95
96
1/2
✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.
20 std::atomic<int> next_row(row_begin);
97 20 std::vector<std::thread> workers;
98
1/2
✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.
20 workers.reserve(num_threads);
99
100
2/2
✓ Branch 0 taken 21 times.
✓ Branch 1 taken 20 times.
41 for (int tid = 0; tid < num_threads; ++tid) {
101
1/2
✓ Branch 1 taken 21 times.
✗ Branch 2 not taken.
21 workers.emplace_back([&]() {
102 21 std::vector<ComplexD> row_acc(b.cols);
103
1/4
✓ Branch 1 taken 21 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
21 std::vector<char> row_used(b.cols, 0);
104 21 std::vector<int> used_cols;
105
106 while (true) {
107
2/2
✓ Branch 0 taken 18 times.
✓ Branch 1 taken 21 times.
39 const int row = next_row.fetch_add(1, std::memory_order_relaxed);
108
2/2
✓ Branch 0 taken 18 times.
✓ Branch 1 taken 21 times.
39 if (row >= row_end) {
109 break;
110 }
111
112 18 const int local_idx = row - row_begin;
113
1/2
✓ Branch 1 taken 18 times.
✗ Branch 2 not taken.
18 MultiplySingleRow(a, b, row, row_acc, row_used, used_cols, local_values[local_idx], local_cols[local_idx]);
114 }
115 21 });
116 }
117
118
2/2
✓ Branch 0 taken 21 times.
✓ Branch 1 taken 20 times.
41 for (auto &worker : workers) {
119
1/2
✓ Branch 1 taken 21 times.
✗ Branch 2 not taken.
21 worker.join();
120 }
121 20 }
122
123 } // namespace
124
125
1/2
✓ Branch 2 taken 20 times.
✗ Branch 3 not taken.
20 KurpiakovACRSMatMulALL::KurpiakovACRSMatMulALL(const InType &in) {
126 SetTypeOfTask(GetStaticTypeOfTask());
127 GetInput() = in;
128
1/2
✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.
20 GetOutput() = SparseMatrix();
129 20 }
130
131
1/2
✓ Branch 0 taken 20 times.
✗ Branch 1 not taken.
20 bool KurpiakovACRSMatMulALL::ValidationImpl() {
132 const auto &[a, b] = GetInput();
133
134
2/4
✓ Branch 0 taken 20 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 20 times.
✗ Branch 3 not taken.
20 if (!ValidateCSR(a) || !ValidateCSR(b)) {
135 return false;
136 }
137
138 20 return a.cols == b.rows;
139 }
140
141 20 bool KurpiakovACRSMatMulALL::PreProcessingImpl() {
142 20 return true;
143 }
144
145 20 bool KurpiakovACRSMatMulALL::RunImpl() {
146 const auto &[a, b] = GetInput();
147 20 const int rows = a.rows;
148 20 const int cols = b.cols;
149
150 20 int rank = 0;
151 20 int world_size = 1;
152 20 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
153 20 MPI_Comm_size(MPI_COMM_WORLD, &world_size);
154
155 20 const auto [row_begin, row_end] = GetRowRange(rows, rank, world_size);
156 20 const int local_rows = row_end - row_begin;
157
158 20 std::vector<std::vector<ComplexD>> local_values(local_rows);
159
1/2
✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.
20 std::vector<std::vector<int>> local_cols(local_rows);
160
161
1/2
✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.
20 ComputeLocalRowsThreads(a, b, row_begin, row_end, local_values, local_cols);
162
163
1/2
✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.
20 std::vector<int> local_row_nnz(rows, 0);
164
2/2
✓ Branch 0 taken 18 times.
✓ Branch 1 taken 20 times.
38 for (int local_i = 0; local_i < local_rows; ++local_i) {
165 18 local_row_nnz[row_begin + local_i] = static_cast<int>(local_values[local_i].size());
166 }
167
168
2/6
✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 20 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
20 std::vector<int> global_row_nnz(rows, 0);
169
1/2
✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.
20 MPI_Allreduce(local_row_nnz.data(), global_row_nnz.data(), rows, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
170
171
1/4
✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
20 std::vector<int> global_row_ptr(rows + 1, 0);
172
2/2
✓ Branch 0 taken 36 times.
✓ Branch 1 taken 20 times.
56 for (int i = 0; i < rows; ++i) {
173 36 global_row_ptr[i + 1] = global_row_ptr[i] + global_row_nnz[i];
174 }
175
176 20 const int total_nnz = global_row_ptr[rows];
177 20 const int local_nnz = global_row_ptr[row_end] - global_row_ptr[row_begin];
178
179
1/4
✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
20 std::vector<double> local_re(local_nnz);
180
1/4
✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
20 std::vector<double> local_im(local_nnz);
181
1/4
✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
20 std::vector<int> local_col_indices(local_nnz);
182
183 int pos = 0;
184
2/2
✓ Branch 0 taken 18 times.
✓ Branch 1 taken 20 times.
38 for (int local_i = 0; local_i < local_rows; ++local_i) {
185 18 const auto &vals = local_values[local_i];
186 const auto &cols_row = local_cols[local_i];
187
188
2/2
✓ Branch 0 taken 17 times.
✓ Branch 1 taken 18 times.
35 for (size_t j = 0; j < vals.size(); ++j) {
189 17 local_re[pos] = vals[j].re;
190 17 local_im[pos] = vals[j].im;
191 17 local_col_indices[pos] = cols_row[j];
192 17 ++pos;
193 }
194 }
195
196
2/6
✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 20 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
20 std::vector<int> recv_counts(world_size, 0);
197
1/2
✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.
20 MPI_Allgather(&local_nnz, 1, MPI_INT, recv_counts.data(), 1, MPI_INT, MPI_COMM_WORLD);
198
199
1/4
✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
20 std::vector<int> recv_displs(world_size, 0);
200
2/2
✓ Branch 0 taken 20 times.
✓ Branch 1 taken 20 times.
40 for (int rec = 1; rec < world_size; ++rec) {
201 20 recv_displs[rec] = recv_displs[rec - 1] + recv_counts[rec - 1];
202 }
203
204 20 std::vector<double> global_re;
205 20 std::vector<double> global_im;
206 20 std::vector<int> global_col_indices;
207
208
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
20 if (rank == 0) {
209
1/2
✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
10 global_re.resize(total_nnz);
210
1/2
✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
10 global_im.resize(total_nnz);
211
1/2
✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
10 global_col_indices.resize(total_nnz);
212 }
213
214
1/2
✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.
20 MPI_Gatherv(local_re.data(), local_nnz, MPI_DOUBLE, global_re.data(), recv_counts.data(), recv_displs.data(),
215 MPI_DOUBLE, 0, MPI_COMM_WORLD);
216
1/2
✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.
20 MPI_Gatherv(local_im.data(), local_nnz, MPI_DOUBLE, global_im.data(), recv_counts.data(), recv_displs.data(),
217 MPI_DOUBLE, 0, MPI_COMM_WORLD);
218
1/2
✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.
20 MPI_Gatherv(local_col_indices.data(), local_nnz, MPI_INT, global_col_indices.data(), recv_counts.data(),
219 recv_displs.data(), MPI_INT, 0, MPI_COMM_WORLD);
220
221
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
20 if (rank != 0) {
222
1/2
✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
10 global_re.resize(total_nnz);
223
1/2
✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
10 global_im.resize(total_nnz);
224
1/2
✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
10 global_col_indices.resize(total_nnz);
225 }
226
227
1/2
✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.
20 MPI_Bcast(global_re.data(), total_nnz, MPI_DOUBLE, 0, MPI_COMM_WORLD);
228
1/2
✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.
20 MPI_Bcast(global_im.data(), total_nnz, MPI_DOUBLE, 0, MPI_COMM_WORLD);
229
1/2
✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.
20 MPI_Bcast(global_col_indices.data(), total_nnz, MPI_INT, 0, MPI_COMM_WORLD);
230
231
1/2
✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.
20 SparseMatrix result(rows, cols);
232 result.row_ptr = std::move(global_row_ptr);
233 result.col_indices = std::move(global_col_indices);
234
1/2
✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.
20 result.values.resize(static_cast<size_t>(total_nnz));
235
236
2/2
✓ Branch 0 taken 34 times.
✓ Branch 1 taken 20 times.
54 for (int i = 0; i < total_nnz; ++i) {
237 34 result.values[static_cast<size_t>(i)] = ComplexD(global_re[i], global_im[i]);
238 }
239
240 20 GetOutput() = std::move(result);
241 20 return true;
242 40 }
243
244 20 bool KurpiakovACRSMatMulALL::PostProcessingImpl() {
245 20 return true;
246 }
247
248 } // namespace kurpiakov_a_sp_comp_mat_mul
249