| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #include "lobanov_d_multi_matrix_crs/all/include/ops_all.hpp" | ||
| 2 | |||
| 3 | #include <mpi.h> | ||
| 4 | #include <omp.h> | ||
| 5 | |||
| 6 | #include <algorithm> | ||
| 7 | #include <array> | ||
| 8 | #include <cmath> | ||
| 9 | #include <cstddef> | ||
| 10 | #include <vector> | ||
| 11 | |||
| 12 | #include "lobanov_d_multi_matrix_crs/common/include/common.hpp" | ||
| 13 | #include "util/include/util.hpp" | ||
| 14 | |||
| 15 | namespace lobanov_d_multi_matrix_crs { | ||
| 16 | |||
| 17 | ✗ | void LobanovMultyMatrixALL::SortIndices(std::vector<int> &vec) { | |
| 18 | std::ranges::sort(vec); | ||
| 19 | ✗ | } | |
| 20 | |||
| 21 | ✗ | LobanovMultyMatrixALL::LobanovMultyMatrixALL(const InType &in) { | |
| 22 | SetTypeOfTask(GetStaticTypeOfTask()); | ||
| 23 | ✗ | int rank = 0; | |
| 24 | ✗ | MPI_Comm_rank(MPI_COMM_WORLD, &rank); | |
| 25 | ✗ | if (rank == 0) { | |
| 26 | GetInput() = in; | ||
| 27 | } | ||
| 28 | ✗ | } | |
| 29 | |||
| 30 | ✗ | bool LobanovMultyMatrixALL::ValidationImpl() { | |
| 31 | ✗ | int rank = 0; | |
| 32 | ✗ | MPI_Comm_rank(MPI_COMM_WORLD, &rank); | |
| 33 | ✗ | int valid_flag = 0; | |
| 34 | ✗ | if (rank == 0) { | |
| 35 | const auto &input = GetInput(); | ||
| 36 | const auto &mat_a = input.first; | ||
| 37 | const auto &mat_b = input.second; | ||
| 38 | ✗ | if (mat_a.column_count == mat_b.row_count && mat_a.row_count > 0 && mat_b.column_count > 0) { | |
| 39 | ✗ | valid_flag = 1; | |
| 40 | } | ||
| 41 | } | ||
| 42 | ✗ | MPI_Bcast(&valid_flag, 1, MPI_INT, 0, MPI_COMM_WORLD); | |
| 43 | ✗ | return valid_flag == 1; | |
| 44 | } | ||
| 45 | |||
| 46 | ✗ | bool LobanovMultyMatrixALL::PreProcessingImpl() { | |
| 47 | ✗ | return true; | |
| 48 | } | ||
| 49 | |||
| 50 | ✗ | void LobanovMultyMatrixALL::DistributeSparseMatrix(CompressedRowMatrix &mat, int root, int rows, int cols) { | |
| 51 | ✗ | int rank = 0; | |
| 52 | ✗ | MPI_Comm_rank(MPI_COMM_WORLD, &rank); | |
| 53 | ✗ | std::array<int, 2> sizes = {0, 0}; | |
| 54 | ✗ | if (rank == root) { | |
| 55 | ✗ | sizes[0] = static_cast<int>(mat.row_pointer_data.size()); | |
| 56 | ✗ | sizes[1] = static_cast<int>(mat.value_data.size()); | |
| 57 | } | ||
| 58 | ✗ | MPI_Bcast(sizes.data(), 2, MPI_INT, root, MPI_COMM_WORLD); | |
| 59 | ✗ | if (rank != root) { | |
| 60 | ✗ | mat.row_pointer_data.resize(sizes[0]); | |
| 61 | ✗ | mat.value_data.resize(sizes[1]); | |
| 62 | ✗ | mat.column_index_data.resize(sizes[1]); | |
| 63 | } | ||
| 64 | ✗ | if (sizes[0] > 0) { | |
| 65 | ✗ | MPI_Bcast(mat.row_pointer_data.data(), sizes[0], MPI_INT, root, MPI_COMM_WORLD); | |
| 66 | } | ||
| 67 | ✗ | if (sizes[1] > 0) { | |
| 68 | ✗ | MPI_Bcast(mat.value_data.data(), sizes[1], MPI_DOUBLE, root, MPI_COMM_WORLD); | |
| 69 | ✗ | MPI_Bcast(mat.column_index_data.data(), sizes[1], MPI_INT, root, MPI_COMM_WORLD); | |
| 70 | } | ||
| 71 | ✗ | mat.row_count = rows; | |
| 72 | ✗ | mat.column_count = cols; | |
| 73 | ✗ | mat.non_zero_count = static_cast<int>(mat.value_data.size()); | |
| 74 | ✗ | } | |
| 75 | |||
| 76 | ✗ | CompressedRowMatrix LobanovMultyMatrixALL::TransposeSparseMatrix(const CompressedRowMatrix &src) { | |
| 77 | ✗ | CompressedRowMatrix dst; | |
| 78 | ✗ | dst.row_count = src.column_count; | |
| 79 | ✗ | dst.column_count = src.row_count; | |
| 80 | ✗ | dst.row_pointer_data.assign(dst.row_count + 1, 0); | |
| 81 | |||
| 82 | ✗ | for (int col : src.column_index_data) { | |
| 83 | ✗ | dst.row_pointer_data[col + 1]++; | |
| 84 | } | ||
| 85 | ✗ | for (int i = 0; i < dst.row_count; ++i) { | |
| 86 | ✗ | dst.row_pointer_data[i + 1] += dst.row_pointer_data[i]; | |
| 87 | } | ||
| 88 | ✗ | dst.value_data.resize(src.value_data.size()); | |
| 89 | ✗ | dst.column_index_data.resize(src.column_index_data.size()); | |
| 90 | ✗ | dst.non_zero_count = src.non_zero_count; | |
| 91 | |||
| 92 | ✗ | std::vector<int> cursor = dst.row_pointer_data; | |
| 93 | ✗ | for (int i = 0; i < src.row_count; ++i) { | |
| 94 | ✗ | for (int j = src.row_pointer_data[i]; j < src.row_pointer_data[i + 1]; ++j) { | |
| 95 | ✗ | int col = src.column_index_data[j]; | |
| 96 | ✗ | int pos = cursor[col]; | |
| 97 | ✗ | cursor[col]++; | |
| 98 | ✗ | dst.value_data[pos] = src.value_data[j]; | |
| 99 | ✗ | dst.column_index_data[pos] = i; | |
| 100 | } | ||
| 101 | } | ||
| 102 | ✗ | return dst; | |
| 103 | ✗ | } | |
| 104 | |||
| 105 | ✗ | void LobanovMultyMatrixALL::ComputeLocalProduct(const CompressedRowMatrix &a, const CompressedRowMatrix &b_tr, | |
| 106 | int start_row, int local_rows, std::vector<int> &row_nnz_counts, | ||
| 107 | std::vector<double> &packed_vals, std::vector<int> &packed_cols) { | ||
| 108 | ✗ | if (local_rows <= 0) { | |
| 109 | ✗ | return; | |
| 110 | } | ||
| 111 | ✗ | int result_cols = b_tr.row_count; | |
| 112 | |||
| 113 | ✗ | std::vector<std::vector<double>> row_vals(local_rows); | |
| 114 | ✗ | std::vector<std::vector<int>> row_cols(local_rows); | |
| 115 | |||
| 116 | ✗ | #pragma omp parallel default(none) shared(a, b_tr, start_row, local_rows, row_vals, row_cols, row_nnz_counts, \ | |
| 117 | ✗ | result_cols) num_threads(ppc::util::GetNumThreads()) | |
| 118 | { | ||
| 119 | std::vector<int> marker(result_cols, -1); | ||
| 120 | std::vector<double> accumulator(result_cols, 0.0); | ||
| 121 | std::vector<int> active_columns; | ||
| 122 | |||
| 123 | #pragma omp for schedule(dynamic) | ||
| 124 | for (int i = 0; i < local_rows; ++i) { | ||
| 125 | int global_row = start_row + i; | ||
| 126 | active_columns.clear(); | ||
| 127 | |||
| 128 | for (int idx = a.row_pointer_data[global_row]; idx < a.row_pointer_data[global_row + 1]; ++idx) { | ||
| 129 | int col_a = a.column_index_data[idx]; | ||
| 130 | double val_a = a.value_data[idx]; | ||
| 131 | |||
| 132 | for (int j = b_tr.row_pointer_data[col_a]; j < b_tr.row_pointer_data[col_a + 1]; ++j) { | ||
| 133 | int col_res = b_tr.column_index_data[j]; | ||
| 134 | double contrib = val_a * b_tr.value_data[j]; | ||
| 135 | if (marker[col_res] != i) { | ||
| 136 | marker[col_res] = i; | ||
| 137 | active_columns.push_back(col_res); | ||
| 138 | accumulator[col_res] = contrib; | ||
| 139 | } else { | ||
| 140 | accumulator[col_res] += contrib; | ||
| 141 | } | ||
| 142 | } | ||
| 143 | } | ||
| 144 | |||
| 145 | row_nnz_counts[i] = static_cast<int>(active_columns.size()); | ||
| 146 | SortIndices(active_columns); | ||
| 147 | for (int col : active_columns) { | ||
| 148 | row_vals[i].push_back(accumulator[col]); | ||
| 149 | row_cols[i].push_back(col); | ||
| 150 | accumulator[col] = 0.0; | ||
| 151 | } | ||
| 152 | } | ||
| 153 | } | ||
| 154 | |||
| 155 | ✗ | for (int i = 0; i < local_rows; ++i) { | |
| 156 | ✗ | packed_vals.insert(packed_vals.end(), row_vals[i].begin(), row_vals[i].end()); | |
| 157 | ✗ | packed_cols.insert(packed_cols.end(), row_cols[i].begin(), row_cols[i].end()); | |
| 158 | } | ||
| 159 | ✗ | } | |
| 160 | |||
| 161 | ✗ | void LobanovMultyMatrixALL::MergeLocalResults(int rank, int comm_size, int total_rows, int result_cols, int local_rows, | |
| 162 | CompressedRowMatrix &result_mat, const std::vector<int> &row_nnz_counts, | ||
| 163 | const std::vector<double> &packed_vals, | ||
| 164 | const std::vector<int> &packed_cols) { | ||
| 165 | ✗ | int local_total_nnz = static_cast<int>(packed_vals.size()); | |
| 166 | ✗ | std::vector<int> global_nnz_counts(comm_size); | |
| 167 | ✗ | std::vector<int> global_row_counts(comm_size); | |
| 168 | |||
| 169 | ✗ | MPI_Gather(&local_total_nnz, 1, MPI_INT, global_nnz_counts.data(), 1, MPI_INT, 0, MPI_COMM_WORLD); | |
| 170 | ✗ | MPI_Gather(&local_rows, 1, MPI_INT, global_row_counts.data(), 1, MPI_INT, 0, MPI_COMM_WORLD); | |
| 171 | |||
| 172 | // Создаем неконстантные копии для MPI (mpi не меняет данные) | ||
| 173 | ✗ | std::vector<double> packed_vals_copy = packed_vals; | |
| 174 | ✗ | std::vector<int> packed_cols_copy = packed_cols; | |
| 175 | ✗ | std::vector<int> row_nnz_counts_copy = row_nnz_counts; | |
| 176 | |||
| 177 | ✗ | if (rank == 0) { | |
| 178 | ✗ | result_mat.row_count = total_rows; | |
| 179 | ✗ | result_mat.column_count = result_cols; | |
| 180 | |||
| 181 | ✗ | std::vector<int> nnz_offsets(comm_size, 0); | |
| 182 | ✗ | std::vector<int> row_offsets(comm_size, 0); | |
| 183 | int total_nnz = 0; | ||
| 184 | ✗ | for (int proc = 0; proc < comm_size; ++proc) { | |
| 185 | ✗ | nnz_offsets[proc] = total_nnz; | |
| 186 | ✗ | total_nnz += global_nnz_counts[proc]; | |
| 187 | ✗ | if (proc > 0) { | |
| 188 | ✗ | row_offsets[proc] = row_offsets[proc - 1] + global_row_counts[proc - 1]; | |
| 189 | } | ||
| 190 | } | ||
| 191 | |||
| 192 | ✗ | result_mat.value_data.resize(total_nnz); | |
| 193 | ✗ | result_mat.column_index_data.resize(total_nnz); | |
| 194 | ✗ | result_mat.row_pointer_data.assign(static_cast<size_t>(total_rows) + 1, 0); | |
| 195 | ✗ | result_mat.non_zero_count = total_nnz; | |
| 196 | |||
| 197 | ✗ | MPI_Gatherv(packed_vals_copy.data(), local_total_nnz, MPI_DOUBLE, result_mat.value_data.data(), | |
| 198 | global_nnz_counts.data(), nnz_offsets.data(), MPI_DOUBLE, 0, MPI_COMM_WORLD); | ||
| 199 | ✗ | MPI_Gatherv(packed_cols_copy.data(), local_total_nnz, MPI_INT, result_mat.column_index_data.data(), | |
| 200 | global_nnz_counts.data(), nnz_offsets.data(), MPI_INT, 0, MPI_COMM_WORLD); | ||
| 201 | |||
| 202 | ✗ | std::vector<int> all_row_nnz(total_rows); | |
| 203 | ✗ | MPI_Gatherv(row_nnz_counts_copy.data(), local_rows, MPI_INT, all_row_nnz.data(), global_row_counts.data(), | |
| 204 | row_offsets.data(), MPI_INT, 0, MPI_COMM_WORLD); | ||
| 205 | ✗ | for (int i = 0; i < total_rows; ++i) { | |
| 206 | ✗ | result_mat.row_pointer_data[i + 1] = result_mat.row_pointer_data[i] + all_row_nnz[i]; | |
| 207 | } | ||
| 208 | } else { | ||
| 209 | ✗ | MPI_Gatherv(packed_vals_copy.data(), local_total_nnz, MPI_DOUBLE, nullptr, nullptr, nullptr, MPI_DOUBLE, 0, | |
| 210 | MPI_COMM_WORLD); | ||
| 211 | ✗ | MPI_Gatherv(packed_cols_copy.data(), local_total_nnz, MPI_INT, nullptr, nullptr, nullptr, MPI_INT, 0, | |
| 212 | MPI_COMM_WORLD); | ||
| 213 | ✗ | MPI_Gatherv(row_nnz_counts_copy.data(), local_rows, MPI_INT, nullptr, nullptr, nullptr, MPI_INT, 0, MPI_COMM_WORLD); | |
| 214 | } | ||
| 215 | |||
| 216 | ✗ | DistributeSparseMatrix(result_mat, 0, total_rows, result_cols); | |
| 217 | ✗ | } | |
| 218 | |||
| 219 | ✗ | bool LobanovMultyMatrixALL::RunImpl() { | |
| 220 | ✗ | int comm_size = 0; | |
| 221 | ✗ | int rank = 0; | |
| 222 | ✗ | MPI_Comm_size(MPI_COMM_WORLD, &comm_size); | |
| 223 | ✗ | MPI_Comm_rank(MPI_COMM_WORLD, &rank); | |
| 224 | |||
| 225 | ✗ | std::array<int, 3> dimensions = {0, 0, 0}; | |
| 226 | ✗ | if (rank == 0) { | |
| 227 | const auto &mat_a = GetInput().first; | ||
| 228 | const auto &mat_b = GetInput().second; | ||
| 229 | ✗ | dimensions[0] = mat_a.row_count; | |
| 230 | ✗ | dimensions[1] = mat_a.column_count; | |
| 231 | ✗ | dimensions[2] = mat_b.column_count; | |
| 232 | } | ||
| 233 | ✗ | MPI_Bcast(dimensions.data(), 3, MPI_INT, 0, MPI_COMM_WORLD); | |
| 234 | ✗ | int a_rows = dimensions[0]; | |
| 235 | ✗ | int a_cols = dimensions[1]; | |
| 236 | ✗ | int b_cols = dimensions[2]; | |
| 237 | |||
| 238 | ✗ | CompressedRowMatrix matrix_a; | |
| 239 | ✗ | CompressedRowMatrix matrix_b; | |
| 240 | ✗ | if (rank == 0) { | |
| 241 | ✗ | matrix_a = GetInput().first; | |
| 242 | ✗ | matrix_b = GetInput().second; | |
| 243 | } | ||
| 244 | ✗ | DistributeSparseMatrix(matrix_a, 0, a_rows, a_cols); | |
| 245 | ✗ | DistributeSparseMatrix(matrix_b, 0, a_cols, b_cols); | |
| 246 | |||
| 247 | ✗ | CompressedRowMatrix matrix_b_transposed = TransposeSparseMatrix(matrix_b); | |
| 248 | |||
| 249 | ✗ | int base_chunk = a_rows / comm_size; | |
| 250 | ✗ | int remainder = a_rows % comm_size; | |
| 251 | ✗ | int start_row = (rank * base_chunk) + std::min(rank, remainder); | |
| 252 | ✗ | int local_rows = base_chunk + (rank < remainder ? 1 : 0); | |
| 253 | |||
| 254 | ✗ | std::vector<int> local_row_nnz(local_rows, 0); | |
| 255 | ✗ | std::vector<double> flat_values; | |
| 256 | ✗ | std::vector<int> flat_columns; | |
| 257 | |||
| 258 | ✗ | ComputeLocalProduct(matrix_a, matrix_b_transposed, start_row, local_rows, local_row_nnz, flat_values, flat_columns); | |
| 259 | |||
| 260 | ✗ | CompressedRowMatrix result_matrix; | |
| 261 | ✗ | MergeLocalResults(rank, comm_size, a_rows, b_cols, local_rows, result_matrix, local_row_nnz, flat_values, | |
| 262 | flat_columns); | ||
| 263 | |||
| 264 | ✗ | if (rank == 0) { | |
| 265 | ✗ | GetOutput() = result_matrix; | |
| 266 | } | ||
| 267 | ✗ | return true; | |
| 268 | ✗ | } | |
| 269 | |||
| 270 | ✗ | bool LobanovMultyMatrixALL::PostProcessingImpl() { | |
| 271 | ✗ | return true; | |
| 272 | } | ||
| 273 | |||
| 274 | } // namespace lobanov_d_multi_matrix_crs | ||
| 275 |