GCC Code Coverage Report


Directory: ./
File: tasks/guseva_crs/all/include/multiplier_all.hpp
Date: 2026-05-11 08:26:31
Exec Total Coverage
Lines: 144 172 83.7%
Functions: 8 9 88.9%
Branches: 78 142 54.9%

Line Branch Exec Source
1 #pragma once
2 #include <mpi.h>
3 #include <omp.h>
4
5 #include <algorithm>
6 #include <cmath>
7 #include <cstddef>
8 #include <cstring>
9 #include <stdexcept>
10 #include <utility>
11 #include <vector>
12
13 #include "guseva_crs/common/include/common.hpp"
14 #include "guseva_crs/common/include/multiplier.hpp"
15
16 namespace guseva_crs {
17
18 12 class MultiplierAll : public Multiplier {
19 static void PerformCalculation(std::size_t ind3, std::size_t ind4, const CRS &a, const CRS &bt, double &sum,
20 const std::vector<int> &temp) {
21
2/2
✓ Branch 0 taken 5071 times.
✓ Branch 1 taken 2009 times.
7080 for (std::size_t k = ind3; k < ind4; k++) {
22 5071 std::size_t bcol = bt.cols[k];
23 5071 int aind = temp[bcol];
24
25
2/2
✓ Branch 0 taken 705 times.
✓ Branch 1 taken 4366 times.
5071 if (aind != -1) {
26 705 sum += a.values[aind] * bt.values[k];
27 }
28 }
29 }
30
31 97 static void ComputeLocalRow(std::size_t global_i, std::size_t n, const CRS &a, const CRS &bt,
32 std::vector<std::size_t> &columns, std::vector<double> &values, std::size_t &row_nnz) {
33 97 std::vector<int> temp(n, -1);
34
35 97 std::size_t ind1 = a.row_ptrs[global_i];
36 97 std::size_t ind2 = a.row_ptrs[global_i + 1];
37
2/2
✓ Branch 0 taken 293 times.
✓ Branch 1 taken 97 times.
390 for (std::size_t j = ind1; j < ind2; j++) {
38 293 std::size_t col = a.cols[j];
39 293 temp[col] = static_cast<int>(j);
40 }
41
42
2/2
✓ Branch 0 taken 2009 times.
✓ Branch 1 taken 97 times.
2106 for (std::size_t j = 0; j < n; j++) {
43 2009 double sum = 0;
44 2009 std::size_t ind3 = bt.row_ptrs[j];
45 2009 std::size_t ind4 = bt.row_ptrs[j + 1];
46
47 PerformCalculation(ind3, ind4, a, bt, sum, temp);
48
49
2/2
✓ Branch 0 taken 511 times.
✓ Branch 1 taken 1498 times.
2009 if (std::fabs(sum) > kZERO) {
50 columns.push_back(j);
51 values.push_back(sum);
52 511 row_nnz++;
53 }
54 }
55 97 }
56
57 static void ComputeLocalResults(std::size_t start_row, std::size_t local_nrows, std::size_t n, const CRS &a,
58 const CRS &bt, std::vector<std::vector<std::size_t>> &local_columns,
59 std::vector<std::vector<double>> &local_values,
60 std::vector<std::size_t> &local_row_index) {
61 12 #pragma omp parallel for default(none) \
62 shared(n, a, bt, local_columns, local_values, local_row_index, start_row, local_nrows)
63 for (std::size_t local_i = 0; local_i < local_nrows; local_i++) {
64 std::size_t global_i = start_row + local_i;
65 ComputeLocalRow(global_i, n, a, bt, local_columns[local_i], local_values[local_i], local_row_index[local_i]);
66 }
67 }
68
69 12 static void FlattenLocalData(const std::vector<std::vector<std::size_t>> &local_columns,
70 const std::vector<std::vector<double>> &local_values,
71 std::vector<std::size_t> &flat_columns, std::vector<double> &flat_values,
72 std::vector<int> &row_sizes) {
73
2/2
✓ Branch 0 taken 97 times.
✓ Branch 1 taken 12 times.
109 for (std::size_t i = 0; i < local_columns.size(); i++) {
74 97 row_sizes[i] = static_cast<int>(local_columns[i].size());
75 97 flat_columns.insert(flat_columns.end(), local_columns[i].begin(), local_columns[i].end());
76 97 flat_values.insert(flat_values.end(), local_values[i].begin(), local_values[i].end());
77 }
78 12 }
79
80 struct ProcessData {
81 std::size_t start_row{};
82 std::size_t local_nrows{};
83 std::vector<int> row_sizes;
84 std::vector<std::size_t> flat_columns;
85 std::vector<double> flat_values;
86 };
87
88 6 static ProcessData ReceiveProcessData(int source, std::size_t p_start_row, std::size_t p_local_nrows) {
89 6 ProcessData data;
90 6 data.start_row = p_start_row;
91 6 data.local_nrows = p_local_nrows;
92
1/2
✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.
6 data.row_sizes.resize(p_local_nrows);
93
94
1/2
✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.
6 MPI_Recv(data.row_sizes.data(), static_cast<int>(p_local_nrows), MPI_INT, source, 0, MPI_COMM_WORLD,
95 MPI_STATUS_IGNORE);
96
97 int total_nz = 0;
98
2/2
✓ Branch 0 taken 46 times.
✓ Branch 1 taken 6 times.
52 for (std::size_t i = 0; i < p_local_nrows; i++) {
99 46 total_nz += data.row_sizes[i];
100 }
101
102
1/2
✓ Branch 0 taken 6 times.
✗ Branch 1 not taken.
6 if (total_nz > 0) {
103
1/2
✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.
6 data.flat_columns.resize(total_nz);
104
1/2
✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.
6 data.flat_values.resize(total_nz);
105
1/2
✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.
6 MPI_Recv(data.flat_columns.data(), total_nz, MPI_UNSIGNED_LONG, source, 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
106
1/2
✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.
6 MPI_Recv(data.flat_values.data(), total_nz, MPI_DOUBLE, source, 2, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
107 }
108
109 6 return data;
110 }
111
112 12 static void AssembleResultFromProcess(const ProcessData &data, std::vector<std::vector<std::size_t>> &columns,
113 std::vector<std::vector<double>> &values) {
114 std::size_t offset = 0;
115
2/2
✓ Branch 0 taken 97 times.
✓ Branch 1 taken 12 times.
109 for (std::size_t local_i = 0; local_i < data.local_nrows; local_i++) {
116
2/2
✓ Branch 0 taken 85 times.
✓ Branch 1 taken 12 times.
97 std::size_t global_row = data.start_row + local_i;
117 97 int row_size = data.row_sizes[local_i];
118
119
2/2
✓ Branch 0 taken 85 times.
✓ Branch 1 taken 12 times.
97 if (row_size > 0) {
120 85 columns[global_row].resize(row_size);
121 85 values[global_row].resize(row_size);
122
123
2/2
✓ Branch 0 taken 511 times.
✓ Branch 1 taken 85 times.
596 for (int j = 0; j < row_size; j++) {
124 511 columns[global_row][j] = data.flat_columns[offset + j];
125 511 values[global_row][j] = data.flat_values[offset + j];
126 }
127 85 offset += static_cast<std::size_t>(row_size);
128 }
129 }
130 12 }
131
132 6 static CRS BuildFinalMatrix(std::size_t n, std::vector<std::vector<std::size_t>> &columns,
133 std::vector<std::vector<double>> &values) {
134 6 CRS result;
135
1/2
✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.
6 result.row_ptrs.resize(n + 1, 0);
136
137 std::size_t nz = 0;
138
2/2
✓ Branch 0 taken 97 times.
✓ Branch 1 taken 6 times.
103 for (std::size_t i = 0; i < n; i++) {
139 97 result.row_ptrs[i] = nz;
140 97 nz += columns[i].size();
141 }
142 6 result.row_ptrs[n] = nz;
143
144
1/2
✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.
6 result.cols.reserve(nz);
145
1/2
✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.
6 result.values.reserve(nz);
146
2/2
✓ Branch 0 taken 97 times.
✓ Branch 1 taken 6 times.
103 for (std::size_t i = 0; i < n; i++) {
147
1/2
✓ Branch 1 taken 97 times.
✗ Branch 2 not taken.
97 result.cols.insert(result.cols.end(), columns[i].begin(), columns[i].end());
148 97 result.values.insert(result.values.end(), values[i].begin(), values[i].end());
149 }
150
151 6 result.nz = nz;
152 6 result.ncols = n;
153 6 result.nrows = n;
154
155 6 return result;
156 }
157
158 6 static void SendLocalData(int dest, const std::vector<int> &row_sizes, const std::vector<std::size_t> &flat_columns,
159 const std::vector<double> &flat_values) {
160 6 std::vector<int> row_sizes_copy = row_sizes;
161
1/2
✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.
6 MPI_Send(row_sizes_copy.data(), static_cast<int>(row_sizes_copy.size()), MPI_INT, dest, 0, MPI_COMM_WORLD);
162
163
1/2
✓ Branch 0 taken 6 times.
✗ Branch 1 not taken.
6 if (!flat_columns.empty()) {
164
1/2
✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.
6 std::vector<std::size_t> flat_columns_copy = flat_columns;
165
1/2
✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.
6 std::vector<double> flat_values_copy = flat_values;
166
1/2
✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.
6 MPI_Send(flat_columns_copy.data(), static_cast<int>(flat_columns_copy.size()), MPI_UNSIGNED_LONG, dest, 1,
167 MPI_COMM_WORLD);
168
1/2
✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.
6 MPI_Send(flat_values_copy.data(), static_cast<int>(flat_values_copy.size()), MPI_DOUBLE, dest, 2, MPI_COMM_WORLD);
169 }
170 6 }
171
172 public:
173 12 [[nodiscard]] CRS Multiply(const CRS &a, const CRS &b) const override {
174 12 int rank = -1;
175 12 int num_procs = -1;
176 12 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
177 12 MPI_Comm_size(MPI_COMM_WORLD, &num_procs);
178
179
2/4
✓ Branch 0 taken 12 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 12 times.
12 if ((rank < 0) || (num_procs < 0)) {
180 throw std::runtime_error("MPI rank or world size is incorrect");
181 }
182
183 12 std::size_t n = a.nrows;
184
185
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 12 times.
12 if (num_procs == 1) {
186 return MultiplySerial(a, b);
187 }
188
189 12 std::size_t rows_per_proc = n / static_cast<std::size_t>(num_procs);
190 12 std::size_t remainder = n % static_cast<std::size_t>(num_procs);
191 std::size_t start_row =
192
2/2
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 7 times.
12 (static_cast<std::size_t>(rank) * rows_per_proc) + std::min(static_cast<std::size_t>(rank), remainder);
193 12 std::size_t local_nrows = rows_per_proc + (std::cmp_less(rank, remainder) ? 1 : 0);
194
195 12 auto bt = this->Transpose(b);
196
197
1/2
✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.
12 std::vector<std::vector<std::size_t>> local_columns(local_nrows);
198
1/2
✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.
12 std::vector<std::vector<double>> local_values(local_nrows);
199
1/2
✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.
12 std::vector<std::size_t> local_row_index(local_nrows, 0);
200
201 ComputeLocalResults(start_row, local_nrows, n, a, bt, local_columns, local_values, local_row_index);
202
203 12 std::vector<std::size_t> flat_columns;
204 12 std::vector<double> flat_values;
205
1/4
✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
12 std::vector<int> row_sizes(local_nrows);
206
1/2
✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.
12 FlattenLocalData(local_columns, local_values, flat_columns, flat_values, row_sizes);
207
208 12 CRS result;
209
210
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 6 times.
12 if (rank == 0) {
211
1/2
✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.
6 std::vector<std::vector<std::size_t>> columns(n);
212
1/2
✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.
6 std::vector<std::vector<double>> values(n);
213
214
2/2
✓ Branch 0 taken 12 times.
✓ Branch 1 taken 6 times.
18 for (int pp = 0; pp < num_procs; pp++) {
215 std::size_t p_start_row =
216
2/2
✓ Branch 0 taken 7 times.
✓ Branch 1 taken 5 times.
12 (static_cast<std::size_t>(pp) * rows_per_proc) + std::min(static_cast<std::size_t>(pp), remainder);
217 12 std::size_t p_local_nrows = rows_per_proc + (std::cmp_less(pp, remainder) ? 1 : 0);
218
219
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 6 times.
12 if (pp == 0) {
220 6 ProcessData self_data;
221 6 self_data.start_row = p_start_row;
222 6 self_data.local_nrows = p_local_nrows;
223
1/2
✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.
6 self_data.row_sizes = row_sizes;
224
1/2
✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.
6 self_data.flat_columns = flat_columns;
225
1/2
✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.
6 self_data.flat_values = flat_values;
226
1/2
✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.
6 AssembleResultFromProcess(self_data, columns, values);
227 6 } else {
228
1/2
✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.
6 ProcessData received_data = ReceiveProcessData(pp, p_start_row, p_local_nrows);
229
1/2
✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.
6 AssembleResultFromProcess(received_data, columns, values);
230 6 }
231 }
232
233
1/2
✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.
6 result = BuildFinalMatrix(n, columns, values);
234 6 } else {
235
1/2
✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.
6 SendLocalData(0, row_sizes, flat_columns, flat_values);
236 }
237
238
1/2
✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.
12 BroadcastResult(result, rank);
239
240 return result;
241 24 }
242
243 [[nodiscard]] CRS MultiplySerial(const CRS &a, const CRS &b) const {
244 std::size_t n = a.nrows;
245 auto bt = this->Transpose(b);
246
247 std::vector<std::vector<std::size_t>> columns(n);
248 std::vector<std::vector<double>> values(n);
249 std::vector<std::size_t> row_index(n + 1, 0);
250
251 #pragma omp parallel for default(none) shared(n, a, bt, columns, values, row_index)
252 for (std::size_t i = 0; i < n; i++) {
253 std::vector<int> temp(n, -1);
254
255 std::size_t ind1 = a.row_ptrs[i];
256 std::size_t ind2 = a.row_ptrs[i + 1];
257 for (std::size_t j = ind1; j < ind2; j++) {
258 std::size_t col = a.cols[j];
259 temp[col] = static_cast<int>(j);
260 }
261
262 for (std::size_t j = 0; j < n; j++) {
263 double sum = 0;
264 std::size_t ind3 = bt.row_ptrs[j];
265 std::size_t ind4 = bt.row_ptrs[j + 1];
266
267 PerformCalculation(ind3, ind4, a, bt, sum, temp);
268
269 if (std::fabs(sum) > kZERO) {
270 columns[i].push_back(j);
271 values[i].push_back(sum);
272 row_index[i]++;
273 }
274 }
275 }
276
277 std::size_t nz = 0;
278 for (std::size_t i = 0; i < n; i++) {
279 std::size_t tmp = row_index[i];
280 row_index[i] = nz;
281 nz += tmp;
282 }
283 row_index[n] = nz;
284
285 CRS result;
286 result.cols.reserve(nz);
287 result.values.reserve(nz);
288 for (std::size_t i = 0; i < n; i++) {
289 result.cols.insert(result.cols.end(), columns[i].begin(), columns[i].end());
290 result.values.insert(result.values.end(), values[i].begin(), values[i].end());
291 }
292 result.row_ptrs = row_index;
293 result.nz = nz;
294 result.ncols = n;
295 result.nrows = n;
296
297 return result;
298 }
299
300 12 static void BroadcastResult(CRS &result, int rank) {
301 12 std::size_t nrows = result.nrows;
302 12 std::size_t ncols = result.ncols;
303 12 std::size_t nz = result.nz;
304
305 12 MPI_Bcast(&nrows, 1, MPI_UNSIGNED_LONG, 0, MPI_COMM_WORLD);
306 12 MPI_Bcast(&ncols, 1, MPI_UNSIGNED_LONG, 0, MPI_COMM_WORLD);
307 12 MPI_Bcast(&nz, 1, MPI_UNSIGNED_LONG, 0, MPI_COMM_WORLD);
308
309
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 6 times.
12 if (rank != 0) {
310 6 result.nrows = nrows;
311 6 result.ncols = ncols;
312 6 result.nz = nz;
313
1/2
✓ Branch 0 taken 6 times.
✗ Branch 1 not taken.
6 if (nrows > 0) {
314 6 result.row_ptrs.resize(nrows + 1);
315 }
316
1/2
✓ Branch 0 taken 6 times.
✗ Branch 1 not taken.
6 if (nz > 0) {
317 6 result.cols.resize(nz);
318 6 result.values.resize(nz);
319 }
320 }
321
322
1/2
✓ Branch 0 taken 12 times.
✗ Branch 1 not taken.
12 if (nrows > 0) {
323 12 MPI_Bcast(result.row_ptrs.data(), static_cast<int>(nrows + 1), MPI_UNSIGNED_LONG, 0, MPI_COMM_WORLD);
324 }
325
326
1/2
✓ Branch 0 taken 12 times.
✗ Branch 1 not taken.
12 if (nz > 0) {
327 12 MPI_Bcast(result.cols.data(), static_cast<int>(nz), MPI_UNSIGNED_LONG, 0, MPI_COMM_WORLD);
328 12 MPI_Bcast(result.values.data(), static_cast<int>(nz), MPI_DOUBLE, 0, MPI_COMM_WORLD);
329 }
330 12 }
331 };
332
333 } // namespace guseva_crs
334