GCC Code Coverage Report


Directory: ./
File: tasks/lobanov_d_multi_matrix_crs/all/src/ops_all.cpp
Date: 2026-06-04 20:25:32
Exec Total Coverage
Lines: 0 145 0.0%
Functions: 0 10 0.0%
Branches: 0 140 0.0%

Line Branch Exec Source
1 #include "lobanov_d_multi_matrix_crs/all/include/ops_all.hpp"
2
3 #include <mpi.h>
4 #include <omp.h>
5
6 #include <algorithm>
7 #include <array>
8 #include <cmath>
9 #include <cstddef>
10 #include <vector>
11
12 #include "lobanov_d_multi_matrix_crs/common/include/common.hpp"
13 #include "util/include/util.hpp"
14
15 namespace lobanov_d_multi_matrix_crs {
16
17 void LobanovMultyMatrixALL::SortIndices(std::vector<int> &vec) {
18 std::ranges::sort(vec);
19 }
20
21 LobanovMultyMatrixALL::LobanovMultyMatrixALL(const InType &in) {
22 SetTypeOfTask(GetStaticTypeOfTask());
23 int rank = 0;
24 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
25 if (rank == 0) {
26 GetInput() = in;
27 }
28 }
29
30 bool LobanovMultyMatrixALL::ValidationImpl() {
31 int rank = 0;
32 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
33 int valid_flag = 0;
34 if (rank == 0) {
35 const auto &input = GetInput();
36 const auto &mat_a = input.first;
37 const auto &mat_b = input.second;
38 if (mat_a.column_count == mat_b.row_count && mat_a.row_count > 0 && mat_b.column_count > 0) {
39 valid_flag = 1;
40 }
41 }
42 MPI_Bcast(&valid_flag, 1, MPI_INT, 0, MPI_COMM_WORLD);
43 return valid_flag == 1;
44 }
45
46 bool LobanovMultyMatrixALL::PreProcessingImpl() {
47 return true;
48 }
49
50 void LobanovMultyMatrixALL::DistributeSparseMatrix(CompressedRowMatrix &mat, int root, int rows, int cols) {
51 int rank = 0;
52 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
53 std::array<int, 2> sizes = {0, 0};
54 if (rank == root) {
55 sizes[0] = static_cast<int>(mat.row_pointer_data.size());
56 sizes[1] = static_cast<int>(mat.value_data.size());
57 }
58 MPI_Bcast(sizes.data(), 2, MPI_INT, root, MPI_COMM_WORLD);
59 if (rank != root) {
60 mat.row_pointer_data.resize(sizes[0]);
61 mat.value_data.resize(sizes[1]);
62 mat.column_index_data.resize(sizes[1]);
63 }
64 if (sizes[0] > 0) {
65 MPI_Bcast(mat.row_pointer_data.data(), sizes[0], MPI_INT, root, MPI_COMM_WORLD);
66 }
67 if (sizes[1] > 0) {
68 MPI_Bcast(mat.value_data.data(), sizes[1], MPI_DOUBLE, root, MPI_COMM_WORLD);
69 MPI_Bcast(mat.column_index_data.data(), sizes[1], MPI_INT, root, MPI_COMM_WORLD);
70 }
71 mat.row_count = rows;
72 mat.column_count = cols;
73 mat.non_zero_count = static_cast<int>(mat.value_data.size());
74 }
75
76 CompressedRowMatrix LobanovMultyMatrixALL::TransposeSparseMatrix(const CompressedRowMatrix &src) {
77 CompressedRowMatrix dst;
78 dst.row_count = src.column_count;
79 dst.column_count = src.row_count;
80 dst.row_pointer_data.assign(dst.row_count + 1, 0);
81
82 for (int col : src.column_index_data) {
83 dst.row_pointer_data[col + 1]++;
84 }
85 for (int i = 0; i < dst.row_count; ++i) {
86 dst.row_pointer_data[i + 1] += dst.row_pointer_data[i];
87 }
88 dst.value_data.resize(src.value_data.size());
89 dst.column_index_data.resize(src.column_index_data.size());
90 dst.non_zero_count = src.non_zero_count;
91
92 std::vector<int> cursor = dst.row_pointer_data;
93 for (int i = 0; i < src.row_count; ++i) {
94 for (int j = src.row_pointer_data[i]; j < src.row_pointer_data[i + 1]; ++j) {
95 int col = src.column_index_data[j];
96 int pos = cursor[col];
97 cursor[col]++;
98 dst.value_data[pos] = src.value_data[j];
99 dst.column_index_data[pos] = i;
100 }
101 }
102 return dst;
103 }
104
105 void LobanovMultyMatrixALL::ComputeLocalProduct(const CompressedRowMatrix &a, const CompressedRowMatrix &b_tr,
106 int start_row, int local_rows, std::vector<int> &row_nnz_counts,
107 std::vector<double> &packed_vals, std::vector<int> &packed_cols) {
108 if (local_rows <= 0) {
109 return;
110 }
111 int result_cols = b_tr.row_count;
112
113 std::vector<std::vector<double>> row_vals(local_rows);
114 std::vector<std::vector<int>> row_cols(local_rows);
115
116 #pragma omp parallel default(none) shared(a, b_tr, start_row, local_rows, row_vals, row_cols, row_nnz_counts, \
117 result_cols) num_threads(ppc::util::GetNumThreads())
118 {
119 std::vector<int> marker(result_cols, -1);
120 std::vector<double> accumulator(result_cols, 0.0);
121 std::vector<int> active_columns;
122
123 #pragma omp for schedule(dynamic)
124 for (int i = 0; i < local_rows; ++i) {
125 int global_row = start_row + i;
126 active_columns.clear();
127
128 for (int idx = a.row_pointer_data[global_row]; idx < a.row_pointer_data[global_row + 1]; ++idx) {
129 int col_a = a.column_index_data[idx];
130 double val_a = a.value_data[idx];
131
132 for (int j = b_tr.row_pointer_data[col_a]; j < b_tr.row_pointer_data[col_a + 1]; ++j) {
133 int col_res = b_tr.column_index_data[j];
134 double contrib = val_a * b_tr.value_data[j];
135 if (marker[col_res] != i) {
136 marker[col_res] = i;
137 active_columns.push_back(col_res);
138 accumulator[col_res] = contrib;
139 } else {
140 accumulator[col_res] += contrib;
141 }
142 }
143 }
144
145 row_nnz_counts[i] = static_cast<int>(active_columns.size());
146 SortIndices(active_columns);
147 for (int col : active_columns) {
148 row_vals[i].push_back(accumulator[col]);
149 row_cols[i].push_back(col);
150 accumulator[col] = 0.0;
151 }
152 }
153 }
154
155 for (int i = 0; i < local_rows; ++i) {
156 packed_vals.insert(packed_vals.end(), row_vals[i].begin(), row_vals[i].end());
157 packed_cols.insert(packed_cols.end(), row_cols[i].begin(), row_cols[i].end());
158 }
159 }
160
161 void LobanovMultyMatrixALL::MergeLocalResults(int rank, int comm_size, int total_rows, int result_cols, int local_rows,
162 CompressedRowMatrix &result_mat, const std::vector<int> &row_nnz_counts,
163 const std::vector<double> &packed_vals,
164 const std::vector<int> &packed_cols) {
165 int local_total_nnz = static_cast<int>(packed_vals.size());
166 std::vector<int> global_nnz_counts(comm_size);
167 std::vector<int> global_row_counts(comm_size);
168
169 MPI_Gather(&local_total_nnz, 1, MPI_INT, global_nnz_counts.data(), 1, MPI_INT, 0, MPI_COMM_WORLD);
170 MPI_Gather(&local_rows, 1, MPI_INT, global_row_counts.data(), 1, MPI_INT, 0, MPI_COMM_WORLD);
171
172 // Создаем неконстантные копии для MPI (mpi не меняет данные)
173 std::vector<double> packed_vals_copy = packed_vals;
174 std::vector<int> packed_cols_copy = packed_cols;
175 std::vector<int> row_nnz_counts_copy = row_nnz_counts;
176
177 if (rank == 0) {
178 result_mat.row_count = total_rows;
179 result_mat.column_count = result_cols;
180
181 std::vector<int> nnz_offsets(comm_size, 0);
182 std::vector<int> row_offsets(comm_size, 0);
183 int total_nnz = 0;
184 for (int proc = 0; proc < comm_size; ++proc) {
185 nnz_offsets[proc] = total_nnz;
186 total_nnz += global_nnz_counts[proc];
187 if (proc > 0) {
188 row_offsets[proc] = row_offsets[proc - 1] + global_row_counts[proc - 1];
189 }
190 }
191
192 result_mat.value_data.resize(total_nnz);
193 result_mat.column_index_data.resize(total_nnz);
194 result_mat.row_pointer_data.assign(static_cast<size_t>(total_rows) + 1, 0);
195 result_mat.non_zero_count = total_nnz;
196
197 MPI_Gatherv(packed_vals_copy.data(), local_total_nnz, MPI_DOUBLE, result_mat.value_data.data(),
198 global_nnz_counts.data(), nnz_offsets.data(), MPI_DOUBLE, 0, MPI_COMM_WORLD);
199 MPI_Gatherv(packed_cols_copy.data(), local_total_nnz, MPI_INT, result_mat.column_index_data.data(),
200 global_nnz_counts.data(), nnz_offsets.data(), MPI_INT, 0, MPI_COMM_WORLD);
201
202 std::vector<int> all_row_nnz(total_rows);
203 MPI_Gatherv(row_nnz_counts_copy.data(), local_rows, MPI_INT, all_row_nnz.data(), global_row_counts.data(),
204 row_offsets.data(), MPI_INT, 0, MPI_COMM_WORLD);
205 for (int i = 0; i < total_rows; ++i) {
206 result_mat.row_pointer_data[i + 1] = result_mat.row_pointer_data[i] + all_row_nnz[i];
207 }
208 } else {
209 MPI_Gatherv(packed_vals_copy.data(), local_total_nnz, MPI_DOUBLE, nullptr, nullptr, nullptr, MPI_DOUBLE, 0,
210 MPI_COMM_WORLD);
211 MPI_Gatherv(packed_cols_copy.data(), local_total_nnz, MPI_INT, nullptr, nullptr, nullptr, MPI_INT, 0,
212 MPI_COMM_WORLD);
213 MPI_Gatherv(row_nnz_counts_copy.data(), local_rows, MPI_INT, nullptr, nullptr, nullptr, MPI_INT, 0, MPI_COMM_WORLD);
214 }
215
216 DistributeSparseMatrix(result_mat, 0, total_rows, result_cols);
217 }
218
219 bool LobanovMultyMatrixALL::RunImpl() {
220 int comm_size = 0;
221 int rank = 0;
222 MPI_Comm_size(MPI_COMM_WORLD, &comm_size);
223 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
224
225 std::array<int, 3> dimensions = {0, 0, 0};
226 if (rank == 0) {
227 const auto &mat_a = GetInput().first;
228 const auto &mat_b = GetInput().second;
229 dimensions[0] = mat_a.row_count;
230 dimensions[1] = mat_a.column_count;
231 dimensions[2] = mat_b.column_count;
232 }
233 MPI_Bcast(dimensions.data(), 3, MPI_INT, 0, MPI_COMM_WORLD);
234 int a_rows = dimensions[0];
235 int a_cols = dimensions[1];
236 int b_cols = dimensions[2];
237
238 CompressedRowMatrix matrix_a;
239 CompressedRowMatrix matrix_b;
240 if (rank == 0) {
241 matrix_a = GetInput().first;
242 matrix_b = GetInput().second;
243 }
244 DistributeSparseMatrix(matrix_a, 0, a_rows, a_cols);
245 DistributeSparseMatrix(matrix_b, 0, a_cols, b_cols);
246
247 CompressedRowMatrix matrix_b_transposed = TransposeSparseMatrix(matrix_b);
248
249 int base_chunk = a_rows / comm_size;
250 int remainder = a_rows % comm_size;
251 int start_row = (rank * base_chunk) + std::min(rank, remainder);
252 int local_rows = base_chunk + (rank < remainder ? 1 : 0);
253
254 std::vector<int> local_row_nnz(local_rows, 0);
255 std::vector<double> flat_values;
256 std::vector<int> flat_columns;
257
258 ComputeLocalProduct(matrix_a, matrix_b_transposed, start_row, local_rows, local_row_nnz, flat_values, flat_columns);
259
260 CompressedRowMatrix result_matrix;
261 MergeLocalResults(rank, comm_size, a_rows, b_cols, local_rows, result_matrix, local_row_nnz, flat_values,
262 flat_columns);
263
264 if (rank == 0) {
265 GetOutput() = result_matrix;
266 }
267 return true;
268 }
269
270 bool LobanovMultyMatrixALL::PostProcessingImpl() {
271 return true;
272 }
273
274 } // namespace lobanov_d_multi_matrix_crs
275