GCC Code Coverage Report


Directory: ./
File: tasks/ermakov_a_spar_mat_mult/stl/src/ops_stl.cpp
Date: 2026-05-11 08:26:31
Exec Total Coverage
Lines: 115 116 99.1%
Functions: 13 13 100.0%
Branches: 81 124 65.3%

Line Branch Exec Source
1 #include "ermakov_a_spar_mat_mult/stl/include/ops_stl.hpp"
2
3 #include <algorithm>
4 #include <complex>
5 #include <cstddef>
6 #include <numeric>
7 #include <thread>
8 #include <vector>
9
10 #include "ermakov_a_spar_mat_mult/common/include/common.hpp"
11 #include "util/include/util.hpp"
12
13 namespace ermakov_a_spar_mat_mult {
14
15 namespace {
16
17 constexpr std::complex<double> kZero{0.0, 0.0};
18 constexpr std::size_t kMinWorkPerThread = 4096;
19
20 } // namespace
21
22
1/2
✓ Branch 1 taken 32 times.
✗ Branch 2 not taken.
32 ErmakovASparMatMultSTL::ErmakovASparMatMultSTL(const InType &in) {
23 SetTypeOfTask(GetStaticTypeOfTask());
24 GetInput() = in;
25 32 }
26
27 64 bool ErmakovASparMatMultSTL::ValidateMatrix(const MatrixCRS &m) {
28
2/4
✓ Branch 0 taken 64 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 64 times.
64 if (m.rows < 0 || m.cols < 0) {
29 return false;
30 }
31
32
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 64 times.
64 if (m.row_ptr.size() != static_cast<std::size_t>(m.rows) + 1) {
33 return false;
34 }
35
36
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 64 times.
64 if (m.values.size() != m.col_index.size()) {
37 return false;
38 }
39
40
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 64 times.
64 if (m.row_ptr.empty()) {
41 return false;
42 }
43
44 64 const int nnz = static_cast<int>(m.values.size());
45
2/4
✗ Branch 0 not taken.
✓ Branch 1 taken 64 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 64 times.
64 if (m.row_ptr.front() != 0 || m.row_ptr.back() != nnz) {
46 return false;
47 }
48
49
2/2
✓ Branch 0 taken 1008 times.
✓ Branch 1 taken 64 times.
1072 for (int i = 0; i < m.rows; ++i) {
50
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1008 times.
1008 if (m.row_ptr[i] > m.row_ptr[i + 1]) {
51 return false;
52 }
53 }
54
55
2/2
✓ Branch 0 taken 11384 times.
✓ Branch 1 taken 64 times.
11448 for (int k = 0; k < nnz; ++k) {
56
2/4
✗ Branch 0 not taken.
✓ Branch 1 taken 11384 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 11384 times.
11384 if (m.col_index[k] < 0 || m.col_index[k] >= m.cols) {
57 return false;
58 }
59 }
60
61 return true;
62 }
63
64 32 bool ErmakovASparMatMultSTL::ValidationImpl() {
65 32 const auto &a = GetInput().A;
66 32 const auto &b = GetInput().B;
67
68
2/4
✓ Branch 0 taken 32 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 32 times.
✗ Branch 3 not taken.
32 return a.cols == b.rows && ValidateMatrix(a) && ValidateMatrix(b);
69 }
70
71 32 bool ErmakovASparMatMultSTL::PreProcessingImpl() {
72 32 a_ = GetInput().A;
73 32 b_ = GetInput().B;
74
75 32 c_.rows = a_.rows;
76
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 32 times.
32 c_.cols = b_.cols;
77 c_.values.clear();
78 c_.col_index.clear();
79 32 c_.row_ptr.assign(static_cast<std::size_t>(c_.rows) + 1, 0);
80
81 32 return true;
82 }
83
84 32 int ErmakovASparMatMultSTL::ResolveThreadCount(int row_count, std::size_t total_work) {
85
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 24 times.
32 if (row_count <= 1 || total_work < kMinWorkPerThread) {
86 return 1;
87 }
88
89 8 const int hw_threads = std::max(1, ppc::util::GetNumThreads());
90
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 8 times.
8 const std::size_t work_limited_threads = std::max<std::size_t>(1, total_work / kMinWorkPerThread);
91 16 return std::max(1, std::min({hw_threads, row_count, static_cast<int>(work_limited_threads)}));
92 }
93
94 32 std::vector<int> ErmakovASparMatMultSTL::BuildRowCosts(const MatrixCRS &a, const MatrixCRS &b) {
95 32 std::vector<int> row_costs(static_cast<std::size_t>(a.rows), 1);
96
97
2/2
✓ Branch 0 taken 504 times.
✓ Branch 1 taken 32 times.
536 for (int i = 0; i < a.rows; ++i) {
98 504 int cost = 0;
99
2/2
✓ Branch 0 taken 5632 times.
✓ Branch 1 taken 504 times.
6136 for (int ak = a.row_ptr[i]; ak < a.row_ptr[i + 1]; ++ak) {
100 5632 const int b_row = a.col_index[ak];
101 5632 cost += b.row_ptr[b_row + 1] - b.row_ptr[b_row];
102 }
103 504 row_costs[static_cast<std::size_t>(i)] = std::max(1, cost);
104 }
105
106 32 return row_costs;
107 }
108
109 6 std::vector<int> ErmakovASparMatMultSTL::BuildThreadBoundaries(const std::vector<int> &row_costs, int thread_count) {
110 const int safe_thread_count = std::max(1, thread_count);
111
1/2
✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.
6 std::vector<int> boundaries(static_cast<std::size_t>(safe_thread_count) + 1, 0);
112
1/2
✓ Branch 0 taken 6 times.
✗ Branch 1 not taken.
6 boundaries[static_cast<std::size_t>(safe_thread_count)] = static_cast<int>(row_costs.size());
113
114
2/4
✓ Branch 0 taken 6 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 6 times.
✗ Branch 3 not taken.
6 if (safe_thread_count <= 1 || row_costs.empty()) {
115 return boundaries;
116 }
117
118 const std::size_t total_work = std::accumulate(row_costs.begin(), row_costs.end(), std::size_t{0});
119 6 const std::size_t target_chunk = std::max<std::size_t>(1, total_work / static_cast<std::size_t>(safe_thread_count));
120
121 std::size_t accumulated = 0;
122 std::size_t next_target = target_chunk;
123 int boundary_index = 1;
124 const auto row_count = row_costs.size();
125
126
2/2
✓ Branch 0 taken 112 times.
✓ Branch 1 taken 6 times.
118 for (std::size_t row = 0; row < row_count && boundary_index < safe_thread_count; ++row) {
127 112 accumulated += static_cast<std::size_t>(row_costs[row]);
128
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 102 times.
112 if (accumulated >= next_target) {
129 10 boundaries[static_cast<std::size_t>(boundary_index)] = static_cast<int>(row) + 1;
130 10 ++boundary_index;
131 10 next_target = target_chunk * static_cast<std::size_t>(boundary_index);
132 }
133 }
134
135
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 6 times.
6 for (; boundary_index < safe_thread_count; ++boundary_index) {
136 boundaries[static_cast<std::size_t>(boundary_index)] = boundaries[static_cast<std::size_t>(boundary_index) - 1];
137 }
138
139 return boundaries;
140 }
141
142
1/2
✓ Branch 0 taken 42 times.
✗ Branch 1 not taken.
42 void ErmakovASparMatMultSTL::ResetWorkspace(Workspace &workspace, int cols) {
143
1/2
✓ Branch 0 taken 42 times.
✗ Branch 1 not taken.
42 if (workspace.accum.size() != static_cast<std::size_t>(cols)) {
144 42 workspace.accum.assign(static_cast<std::size_t>(cols), kZero);
145
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 42 times.
42 workspace.marks.assign(static_cast<std::size_t>(cols), -1);
146 workspace.touched_cols.clear();
147 42 workspace.touched_cols.reserve(256);
148 }
149 42 }
150
151
2/2
✓ Branch 0 taken 406 times.
✓ Branch 1 taken 98 times.
504 void ErmakovASparMatMultSTL::MultiplyRow(int row_index, Workspace &workspace, RowData &row_data) const {
152 workspace.touched_cols.clear();
153
154
2/2
✓ Branch 0 taken 5632 times.
✓ Branch 1 taken 504 times.
6136 for (int ak = a_.row_ptr[row_index]; ak < a_.row_ptr[row_index + 1]; ++ak) {
155 5632 const int b_row = a_.col_index[ak];
156 5632 const auto a_value = a_.values[ak];
157
158
2/2
✓ Branch 0 taken 108312 times.
✓ Branch 1 taken 5632 times.
113944 for (int bk = b_.row_ptr[b_row]; bk < b_.row_ptr[b_row + 1]; ++bk) {
159
2/2
✓ Branch 0 taken 8640 times.
✓ Branch 1 taken 99672 times.
108312 const int col = b_.col_index[bk];
160 108312 const auto product = a_value * b_.values[bk];
161
162
2/2
✓ Branch 0 taken 8640 times.
✓ Branch 1 taken 99672 times.
108312 if (workspace.marks[static_cast<std::size_t>(col)] != row_index) {
163
1/2
✓ Branch 0 taken 8640 times.
✗ Branch 1 not taken.
8640 workspace.marks[static_cast<std::size_t>(col)] = row_index;
164 8640 workspace.accum[static_cast<std::size_t>(col)] = product;
165
1/2
✓ Branch 0 taken 8640 times.
✗ Branch 1 not taken.
8640 workspace.touched_cols.push_back(col);
166 } else {
167 workspace.accum[static_cast<std::size_t>(col)] += product;
168 }
169 }
170 }
171
172 std::ranges::sort(workspace.touched_cols);
173
174 row_data.cols.clear();
175 row_data.vals.clear();
176 504 row_data.cols.reserve(workspace.touched_cols.size());
177 504 row_data.vals.reserve(workspace.touched_cols.size());
178
179
2/2
✓ Branch 0 taken 8640 times.
✓ Branch 1 taken 504 times.
9144 for (int col : workspace.touched_cols) {
180
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 8640 times.
8640 const auto &value = workspace.accum[static_cast<std::size_t>(col)];
181 if (value != kZero) {
182 row_data.cols.push_back(col);
183 row_data.vals.push_back(value);
184 }
185 }
186 504 }
187
188 32 void ErmakovASparMatMultSTL::FinalizeResult(const std::vector<RowData> &rows_data) {
189 int total_nnz = 0;
190
191
2/2
✓ Branch 0 taken 504 times.
✓ Branch 1 taken 32 times.
536 for (int i = 0; i < c_.rows; ++i) {
192 504 c_.row_ptr[static_cast<std::size_t>(i)] = total_nnz;
193 504 total_nnz += static_cast<int>(rows_data[static_cast<std::size_t>(i)].vals.size());
194 }
195
196 32 c_.row_ptr[static_cast<std::size_t>(c_.rows)] = total_nnz;
197 32 c_.values.resize(static_cast<std::size_t>(total_nnz));
198 32 c_.col_index.resize(static_cast<std::size_t>(total_nnz));
199
200
2/2
✓ Branch 0 taken 504 times.
✓ Branch 1 taken 32 times.
536 for (int i = 0; i < c_.rows; ++i) {
201 504 const auto &row = rows_data[static_cast<std::size_t>(i)];
202 504 const auto offset = static_cast<std::size_t>(c_.row_ptr[static_cast<std::size_t>(i)]);
203
204 std::ranges::copy(row.cols, c_.col_index.begin() + static_cast<std::ptrdiff_t>(offset));
205 std::ranges::copy(row.vals, c_.values.begin() + static_cast<std::ptrdiff_t>(offset));
206 }
207 32 }
208
209 32 bool ErmakovASparMatMultSTL::RunImpl() {
210
1/2
✓ Branch 0 taken 32 times.
✗ Branch 1 not taken.
32 if (a_.cols != b_.rows) {
211 return false;
212 }
213
214 c_.values.clear();
215 c_.col_index.clear();
216 std::ranges::fill(c_.row_ptr, 0);
217
218
2/4
✓ Branch 0 taken 32 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 32 times.
32 if (a_.rows == 0 || b_.cols == 0) {
219 return true;
220 }
221
222 32 const std::vector<int> row_costs = BuildRowCosts(a_, b_);
223 const std::size_t total_work = std::accumulate(row_costs.begin(), row_costs.end(), std::size_t{0});
224
1/2
✓ Branch 1 taken 32 times.
✗ Branch 2 not taken.
32 const int thread_count = ResolveThreadCount(a_.rows, total_work);
225
226
1/4
✓ Branch 1 taken 32 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
32 std::vector<RowData> rows_data(static_cast<std::size_t>(a_.rows));
227
228
2/2
✓ Branch 0 taken 26 times.
✓ Branch 1 taken 6 times.
32 if (thread_count == 1) {
229 26 Workspace workspace;
230
1/2
✓ Branch 1 taken 26 times.
✗ Branch 2 not taken.
26 ResetWorkspace(workspace, b_.cols);
231
2/2
✓ Branch 0 taken 324 times.
✓ Branch 1 taken 26 times.
350 for (int row = 0; row < a_.rows; ++row) {
232
1/2
✓ Branch 1 taken 324 times.
✗ Branch 2 not taken.
324 MultiplyRow(row, workspace, rows_data[static_cast<std::size_t>(row)]);
233 }
234
1/2
✓ Branch 1 taken 26 times.
✗ Branch 2 not taken.
26 FinalizeResult(rows_data);
235 return true;
236 26 }
237
238
1/2
✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.
6 const std::vector<int> boundaries = BuildThreadBoundaries(row_costs, thread_count);
239 6 std::vector<std::thread> workers;
240
1/2
✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.
6 workers.reserve(static_cast<std::size_t>(thread_count));
241
242
2/2
✓ Branch 0 taken 16 times.
✓ Branch 1 taken 6 times.
22 for (int thread_id = 0; thread_id < thread_count; ++thread_id) {
243
1/2
✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
16 workers.emplace_back([&, thread_id] {
244 16 Workspace workspace;
245
1/2
✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
16 ResetWorkspace(workspace, b_.cols);
246
247 16 const int row_begin = boundaries[static_cast<std::size_t>(thread_id)];
248 16 const int row_end = boundaries[static_cast<std::size_t>(thread_id) + 1];
249
250
2/2
✓ Branch 0 taken 180 times.
✓ Branch 1 taken 16 times.
196 for (int row = row_begin; row < row_end; ++row) {
251
1/2
✓ Branch 1 taken 180 times.
✗ Branch 2 not taken.
180 MultiplyRow(row, workspace, rows_data[static_cast<std::size_t>(row)]);
252 }
253 16 });
254 }
255
256
2/2
✓ Branch 0 taken 16 times.
✓ Branch 1 taken 6 times.
22 for (auto &worker : workers) {
257
1/2
✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
16 worker.join();
258 }
259
260
1/2
✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.
6 FinalizeResult(rows_data);
261 return true;
262 38 }
263
264 32 bool ErmakovASparMatMultSTL::PostProcessingImpl() {
265 32 GetOutput() = c_;
266 32 return true;
267 }
268
269 } // namespace ermakov_a_spar_mat_mult
270