GCC Code Coverage Report


Directory: ./
File: tasks/volkov_a_sparse_mat_mul_ccs/all/src/ops_all.cpp
Date: 2026-06-04 20:25:32
Exec Total Coverage
Lines: 132 132 100.0%
Functions: 12 12 100.0%
Branches: 62 84 73.8%

Line Branch Exec Source
1 #include "volkov_a_sparse_mat_mul_ccs/all/include/ops_all.hpp"
2
3 #include <mpi.h>
4 #include <omp.h>
5
6 #include <algorithm>
7 #include <cmath>
8 #include <tuple>
9 #include <utility>
10 #include <vector>
11
12 #include "volkov_a_sparse_mat_mul_ccs/common/include/common.hpp"
13
14 namespace volkov_a_sparse_mat_mul_ccs {
15
16 namespace {
17
18 template <typename Matrix>
19 30 void BroadcastMatrix(Matrix &mat, int root, MPI_Comm comm) {
20 30 int rank = 0;
21 30 MPI_Comm_rank(comm, &rank);
22
23 30 MPI_Bcast(&mat.rows_count, 1, MPI_INT, root, comm);
24 30 MPI_Bcast(&mat.cols_count, 1, MPI_INT, root, comm);
25 30 MPI_Bcast(&mat.non_zeros, 1, MPI_INT, root, comm);
26
27
2/2
✓ Branch 0 taken 15 times.
✓ Branch 1 taken 15 times.
30 if (rank != root) {
28 15 mat.col_ptrs.resize(mat.cols_count + 1);
29 15 mat.row_indices.resize(mat.non_zeros);
30 15 mat.values.resize(mat.non_zeros);
31 }
32
33 30 MPI_Bcast(mat.col_ptrs.data(), mat.cols_count + 1, MPI_INT, root, comm);
34
2/2
✓ Branch 0 taken 28 times.
✓ Branch 1 taken 2 times.
30 if (mat.non_zeros > 0) {
35 28 MPI_Bcast(mat.row_indices.data(), mat.non_zeros, MPI_INT, root, comm);
36 28 MPI_Bcast(mat.values.data(), mat.non_zeros, MPI_DOUBLE, root, comm);
37 }
38 30 }
39
40 template <typename MatrixType>
41 9 void ProcessColumnOmp(int col_idx, const MatrixType &matrix_a, const MatrixType &matrix_b,
42 std::vector<double> &col_accumulator, std::vector<int> &local_row_indices,
43 std::vector<double> &local_values) {
44 9 int b_start = matrix_b.col_ptrs[col_idx];
45 9 int b_end = matrix_b.col_ptrs[col_idx + 1];
46
47
2/2
✓ Branch 0 taken 12 times.
✓ Branch 1 taken 9 times.
21 for (int k = b_start; k < b_end; ++k) {
48 12 int b_row = matrix_b.row_indices[k];
49 12 double b_val = matrix_b.values[k];
50
51 12 int a_start = matrix_a.col_ptrs[b_row];
52 12 int a_end = matrix_a.col_ptrs[b_row + 1];
53
54
2/2
✓ Branch 0 taken 14 times.
✓ Branch 1 taken 12 times.
26 for (int idx = a_start; idx < a_end; ++idx) {
55 14 int a_row = matrix_a.row_indices[idx];
56 14 double a_val = matrix_a.values[idx];
57 14 col_accumulator[a_row] += a_val * b_val;
58 }
59 }
60
61
2/2
✓ Branch 0 taken 20 times.
✓ Branch 1 taken 9 times.
29 for (int i = 0; i < matrix_a.rows_count; ++i) {
62
2/2
✓ Branch 0 taken 12 times.
✓ Branch 1 taken 8 times.
20 if (std::abs(col_accumulator[i]) > 1e-10) {
63 local_row_indices.push_back(i);
64 local_values.push_back(col_accumulator[i]);
65 }
66 20 col_accumulator[i] = 0.0;
67 }
68 9 }
69
70 5 void FlattenLocalData(int my_cols_count, const std::vector<std::vector<int>> &local_row_indices,
71 const std::vector<std::vector<double>> &local_values, std::vector<int> &my_col_sizes, int &my_nnz,
72 std::vector<int> &my_rows_flat, std::vector<double> &my_vals_flat) {
73 5 my_nnz = 0;
74 5 my_col_sizes.resize(my_cols_count);
75
2/2
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 5 times.
9 for (int i = 0; i < my_cols_count; ++i) {
76 4 my_col_sizes[i] = static_cast<int>(local_row_indices[i].size());
77 4 my_nnz += my_col_sizes[i];
78 }
79
80 5 my_rows_flat.resize(my_nnz);
81 5 my_vals_flat.resize(my_nnz);
82 int offset = 0;
83
2/2
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 5 times.
9 for (int i = 0; i < my_cols_count; ++i) {
84 4 std::copy(local_row_indices[i].begin(), local_row_indices[i].end(), my_rows_flat.begin() + offset);
85 4 std::copy(local_values[i].begin(), local_values[i].end(), my_vals_flat.begin() + offset);
86 4 offset += my_col_sizes[i];
87 }
88 5 }
89
90 5 void SendResultsToRoot(int my_cols_count, const std::vector<int> &my_col_sizes, int my_nnz,
91 const std::vector<int> &my_rows_flat, const std::vector<double> &my_vals_flat) {
92
2/2
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 1 times.
5 if (my_cols_count > 0) {
93 4 MPI_Send(my_col_sizes.data(), my_cols_count, MPI_INT, 0, 0, MPI_COMM_WORLD);
94
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 1 times.
4 if (my_nnz > 0) {
95 3 MPI_Send(my_rows_flat.data(), my_nnz, MPI_INT, 0, 1, MPI_COMM_WORLD);
96 3 MPI_Send(my_vals_flat.data(), my_nnz, MPI_DOUBLE, 0, 2, MPI_COMM_WORLD);
97 }
98 }
99 5 }
100
101 5 void ReceiveWorkerResults(int world_size, int chunk, int rem, std::vector<std::vector<int>> &all_row_indices,
102 std::vector<std::vector<double>> &all_values) {
103
2/2
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
10 for (int proc = 1; proc < world_size; ++proc) {
104
1/2
✓ Branch 0 taken 5 times.
✗ Branch 1 not taken.
5 int proc_start_col = (proc * chunk) + std::min(proc, rem);
105
1/2
✓ Branch 0 taken 5 times.
✗ Branch 1 not taken.
5 int proc_end_col = proc_start_col + chunk + (proc < rem ? 1 : 0);
106 5 int proc_cols_count = proc_end_col - proc_start_col;
107
108
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 4 times.
5 if (proc_cols_count <= 0) {
109 1 continue;
110 }
111
112
1/2
✓ Branch 2 taken 4 times.
✗ Branch 3 not taken.
4 std::vector<int> proc_col_sizes(proc_cols_count);
113
1/2
✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
4 MPI_Recv(proc_col_sizes.data(), proc_cols_count, MPI_INT, proc, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
114
115 int proc_nnz = 0;
116
2/2
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 4 times.
8 for (int s : proc_col_sizes) {
117 4 proc_nnz += s;
118 }
119
120
1/4
✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
4 std::vector<int> proc_rows(proc_nnz);
121
1/4
✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
4 std::vector<double> proc_vals(proc_nnz);
122
123
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 1 times.
4 if (proc_nnz > 0) {
124
1/2
✓ Branch 1 taken 3 times.
✗ Branch 2 not taken.
3 MPI_Recv(proc_rows.data(), proc_nnz, MPI_INT, proc, 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
125
1/2
✓ Branch 1 taken 3 times.
✗ Branch 2 not taken.
3 MPI_Recv(proc_vals.data(), proc_nnz, MPI_DOUBLE, proc, 2, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
126 }
127
128 int curr_offset = 0;
129
2/2
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 4 times.
8 for (int i = 0; i < proc_cols_count; ++i) {
130
1/2
✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
4 all_row_indices[proc_start_col + i].resize(proc_col_sizes[i]);
131
1/2
✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
4 all_values[proc_start_col + i].resize(proc_col_sizes[i]);
132
133 4 std::copy(proc_rows.begin() + curr_offset, proc_rows.begin() + curr_offset + proc_col_sizes[i],
134 all_row_indices[proc_start_col + i].begin());
135 4 std::copy(proc_vals.begin() + curr_offset, proc_vals.begin() + curr_offset + proc_col_sizes[i],
136 all_values[proc_start_col + i].begin());
137
138 4 curr_offset += proc_col_sizes[i];
139 }
140 }
141 5 }
142
143 template <typename MatrixType>
144 5 void AssembleFinalMatrix(int total_cols, const std::vector<std::vector<int>> &all_row_indices,
145 const std::vector<std::vector<double>> &all_values, MatrixType &matrix_c) {
146 5 matrix_c.col_ptrs.assign(total_cols + 1, 0);
147
2/2
✓ Branch 0 taken 9 times.
✓ Branch 1 taken 5 times.
14 for (int j = 0; j < total_cols; ++j) {
148 9 matrix_c.col_ptrs[j + 1] = matrix_c.col_ptrs[j] + static_cast<int>(all_row_indices[j].size());
149 }
150
151 5 matrix_c.non_zeros = matrix_c.col_ptrs.back();
152 5 matrix_c.row_indices.resize(matrix_c.non_zeros);
153 5 matrix_c.values.resize(matrix_c.non_zeros);
154
155 5 #pragma omp parallel for schedule(static) default(none) shared(total_cols, matrix_c, all_row_indices, all_values)
156 for (int j = 0; j < total_cols; ++j) {
157 int offset_c = matrix_c.col_ptrs[j];
158 int size = static_cast<int>(all_row_indices[j].size());
159 for (int k = 0; k < size; ++k) {
160 matrix_c.row_indices[offset_c + k] = all_row_indices[j][k];
161 matrix_c.values[offset_c + k] = all_values[j][k];
162 }
163 }
164 5 }
165
166 template <typename MatrixType>
167 5 void GatherResultsToRoot(int total_cols, int world_size, int chunk, int rem, int start_col, int my_cols_count,
168 std::vector<std::vector<int>> &local_row_indices,
169 std::vector<std::vector<double>> &local_values, MatrixType &matrix_c) {
170 5 std::vector<std::vector<int>> all_row_indices(total_cols);
171
1/2
✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
5 std::vector<std::vector<double>> all_values(total_cols);
172
173
2/2
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
10 for (int i = 0; i < my_cols_count; ++i) {
174 5 all_row_indices[start_col + i] = std::move(local_row_indices[i]);
175 all_values[start_col + i] = std::move(local_values[i]);
176 }
177
178
1/2
✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
5 ReceiveWorkerResults(world_size, chunk, rem, all_row_indices, all_values);
179
180
1/2
✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
5 AssembleFinalMatrix(total_cols, all_row_indices, all_values, matrix_c);
181 5 }
182
183 } // namespace
184
185
1/2
✓ Branch 2 taken 10 times.
✗ Branch 3 not taken.
10 VolkovASparseMatMulCcsAll::VolkovASparseMatMulCcsAll(const InType &in) {
186 SetTypeOfTask(GetStaticTypeOfTask());
187 GetInput() = in;
188 10 }
189
190 10 bool VolkovASparseMatMulCcsAll::ValidationImpl() {
191 10 int world_rank = 0;
192 10 MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);
193
194
2/2
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
10 if (world_rank == 0) {
195 const auto &matrix_a = std::get<0>(GetInput());
196 const auto &matrix_b = std::get<1>(GetInput());
197 5 return (matrix_a.cols_count == matrix_b.rows_count);
198 }
199 return true;
200 }
201
202 10 bool VolkovASparseMatMulCcsAll::PreProcessingImpl() {
203 10 return true;
204 }
205
206 10 bool VolkovASparseMatMulCcsAll::RunImpl() {
207 10 int world_rank = 0;
208 10 int world_size = 0;
209 10 MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);
210 10 MPI_Comm_size(MPI_COMM_WORLD, &world_size);
211
212 auto &matrix_a = std::get<0>(GetInput());
213 auto &matrix_b = std::get<1>(GetInput());
214 auto &matrix_c = GetOutput();
215
216 10 BroadcastMatrix(matrix_a, 0, MPI_COMM_WORLD);
217 10 BroadcastMatrix(matrix_b, 0, MPI_COMM_WORLD);
218
219
2/2
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
10 if (world_rank == 0) {
220 5 matrix_c.rows_count = matrix_a.rows_count;
221 5 matrix_c.cols_count = matrix_b.cols_count;
222 }
223
224 10 int total_cols = matrix_b.cols_count;
225 10 int chunk = total_cols / world_size;
226 10 int rem = total_cols % world_size;
227
2/2
✓ Branch 0 taken 9 times.
✓ Branch 1 taken 1 times.
10 int start_col = (world_rank * chunk) + std::min(world_rank, rem);
228
2/2
✓ Branch 0 taken 9 times.
✓ Branch 1 taken 1 times.
10 int end_col = start_col + chunk + (world_rank < rem ? 1 : 0);
229 10 int my_cols_count = end_col - start_col;
230
231 10 std::vector<std::vector<int>> local_row_indices(my_cols_count);
232
1/2
✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
10 std::vector<std::vector<double>> local_values(my_cols_count);
233
234 10 #pragma omp parallel default(none) shared(matrix_a, matrix_b, start_col, end_col, local_row_indices, local_values)
235 {
236 std::vector<double> col_accumulator(matrix_a.rows_count, 0.0);
237 #pragma omp for schedule(dynamic)
238 for (int j = start_col; j < end_col; ++j) {
239 int local_j = j - start_col;
240 ProcessColumnOmp(j, matrix_a, matrix_b, col_accumulator, local_row_indices[local_j], local_values[local_j]);
241 }
242 }
243
244
2/2
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
10 if (world_rank == 0) {
245
1/2
✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
5 GatherResultsToRoot(total_cols, world_size, chunk, rem, start_col, my_cols_count, local_row_indices, local_values,
246 matrix_c);
247 } else {
248 5 int my_nnz = 0;
249 5 std::vector<int> my_col_sizes;
250 5 std::vector<int> my_rows_flat;
251 5 std::vector<double> my_vals_flat;
252
253
1/2
✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
5 FlattenLocalData(my_cols_count, local_row_indices, local_values, my_col_sizes, my_nnz, my_rows_flat, my_vals_flat);
254
1/2
✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
5 SendResultsToRoot(my_cols_count, my_col_sizes, my_nnz, my_rows_flat, my_vals_flat);
255 }
256
257 10 return true;
258 10 }
259
260 10 bool VolkovASparseMatMulCcsAll::PostProcessingImpl() {
261 auto &matrix_c = GetOutput();
262 10 BroadcastMatrix(matrix_c, 0, MPI_COMM_WORLD);
263 10 return true;
264 }
265
266 } // namespace volkov_a_sparse_mat_mul_ccs
267