| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #include "agafonov_i_matrix_ccs_seq/all/include/ops_all.hpp" | ||
| 2 | |||
| 3 | #include <mpi.h> | ||
| 4 | #include <tbb/blocked_range.h> | ||
| 5 | #include <tbb/parallel_for.h> | ||
| 6 | |||
| 7 | #include <algorithm> | ||
| 8 | #include <cmath> | ||
| 9 | #include <cstddef> | ||
| 10 | #include <utility> | ||
| 11 | #include <vector> | ||
| 12 | |||
| 13 | #include "agafonov_i_matrix_ccs_seq/common/include/common.hpp" | ||
| 14 | |||
| 15 | namespace agafonov_i_matrix_ccs_seq { | ||
| 16 | |||
| 17 | namespace { | ||
| 18 | |||
| 19 | ✗ | void AssembleFinalMatrix(InType::first_type &c, size_t total_cols, size_t chunk, size_t rem, int size, | |
| 20 | const std::vector<int> &all_col_nnz, const std::vector<int> &displs, | ||
| 21 | const std::vector<double> &all_vals, const std::vector<int> &all_rind) { | ||
| 22 | ✗ | std::vector<size_t> proc_col_start(size); | |
| 23 | ✗ | std::vector<size_t> proc_col_end(size); | |
| 24 | ✗ | for (int proc_idx = 0; proc_idx < size; ++proc_idx) { | |
| 25 | ✗ | proc_col_start[proc_idx] = (static_cast<size_t>(proc_idx) * chunk) + std::min(static_cast<size_t>(proc_idx), rem); | |
| 26 | ✗ | proc_col_end[proc_idx] = proc_col_start[proc_idx] + chunk + (std::cmp_less(proc_idx, rem) ? 1 : 0); | |
| 27 | } | ||
| 28 | |||
| 29 | ✗ | std::vector<std::vector<double>> col_vals(total_cols); | |
| 30 | ✗ | std::vector<std::vector<int>> col_rind(total_cols); | |
| 31 | ✗ | for (int proc_idx = 0; proc_idx < size; ++proc_idx) { | |
| 32 | ✗ | int offset = displs[proc_idx]; | |
| 33 | ✗ | for (size_t j = proc_col_start[proc_idx]; j < proc_col_end[proc_idx]; ++j) { | |
| 34 | ✗ | int n = all_col_nnz[j]; | |
| 35 | ✗ | col_vals[j].assign(all_vals.begin() + offset, all_vals.begin() + offset + n); | |
| 36 | ✗ | col_rind[j].assign(all_rind.begin() + offset, all_rind.begin() + offset + n); | |
| 37 | ✗ | offset += n; | |
| 38 | } | ||
| 39 | } | ||
| 40 | |||
| 41 | int cur_nnz = 0; | ||
| 42 | ✗ | for (size_t j = 0; j < total_cols; ++j) { | |
| 43 | ✗ | c.col_ptrs[j] = cur_nnz; | |
| 44 | ✗ | c.vals.insert(c.vals.end(), col_vals[j].begin(), col_vals[j].end()); | |
| 45 | ✗ | c.row_inds.insert(c.row_inds.end(), col_rind[j].begin(), col_rind[j].end()); | |
| 46 | ✗ | cur_nnz += static_cast<int>(col_vals[j].size()); | |
| 47 | } | ||
| 48 | ✗ | c.col_ptrs[total_cols] = cur_nnz; | |
| 49 | ✗ | c.nnz = cur_nnz; | |
| 50 | ✗ | } | |
| 51 | |||
| 52 | } // namespace | ||
| 53 | |||
| 54 | ✗ | AgafonovIMatrixCCSALL::AgafonovIMatrixCCSALL(const InType &in) { | |
| 55 | SetTypeOfTask(GetStaticTypeOfTask()); | ||
| 56 | GetInput() = in; | ||
| 57 | ✗ | } | |
| 58 | |||
| 59 | ✗ | bool AgafonovIMatrixCCSALL::ValidationImpl() { | |
| 60 | const auto &left = GetInput().first; | ||
| 61 | const auto &right = GetInput().second; | ||
| 62 | ✗ | return (left.cols_num == right.rows_num) && (left.col_ptrs.size() == left.cols_num + 1) && | |
| 63 | ✗ | (right.col_ptrs.size() == right.cols_num + 1); | |
| 64 | } | ||
| 65 | |||
| 66 | ✗ | bool AgafonovIMatrixCCSALL::PreProcessingImpl() { | |
| 67 | GetOutput().vals.clear(); | ||
| 68 | GetOutput().row_inds.clear(); | ||
| 69 | ✗ | return true; | |
| 70 | } | ||
| 71 | |||
| 72 | ✗ | void AgafonovIMatrixCCSALL::ProcessColumn(size_t j, const InType::first_type &a, const InType::second_type &b, | |
| 73 | std::vector<double> &accumulator, std::vector<size_t> &active_rows, | ||
| 74 | std::vector<bool> &row_mask, std::vector<double> &local_v, | ||
| 75 | std::vector<int> &local_r) { | ||
| 76 | ✗ | const auto b_col_start = static_cast<size_t>(b.col_ptrs[j]); | |
| 77 | ✗ | const auto b_col_end = static_cast<size_t>(b.col_ptrs[j + 1]); | |
| 78 | ✗ | if (b_col_start == b_col_end) { | |
| 79 | return; | ||
| 80 | } | ||
| 81 | |||
| 82 | ✗ | for (size_t kb = b_col_start; kb < b_col_end; ++kb) { | |
| 83 | ✗ | const auto k = static_cast<size_t>(b.row_inds[kb]); | |
| 84 | ✗ | const double v_b = b.vals[kb]; | |
| 85 | ✗ | const auto a_col_start = static_cast<size_t>(a.col_ptrs[k]); | |
| 86 | ✗ | const auto a_col_end = static_cast<size_t>(a.col_ptrs[k + 1]); | |
| 87 | |||
| 88 | ✗ | for (size_t ka = a_col_start; ka < a_col_end; ++ka) { | |
| 89 | ✗ | const auto i = static_cast<size_t>(a.row_inds[ka]); | |
| 90 | ✗ | if (!row_mask[i]) { | |
| 91 | row_mask[i] = true; | ||
| 92 | active_rows.push_back(i); | ||
| 93 | } | ||
| 94 | ✗ | accumulator[i] += a.vals[ka] * v_b; | |
| 95 | } | ||
| 96 | } | ||
| 97 | |||
| 98 | std::ranges::sort(active_rows); | ||
| 99 | |||
| 100 | ✗ | for (const auto row_idx : active_rows) { | |
| 101 | ✗ | if (std::abs(accumulator[row_idx]) > 1e-15) { | |
| 102 | local_v.push_back(accumulator[row_idx]); | ||
| 103 | ✗ | local_r.push_back(static_cast<int>(row_idx)); | |
| 104 | } | ||
| 105 | row_mask[row_idx] = false; | ||
| 106 | ✗ | accumulator[row_idx] = 0.0; | |
| 107 | } | ||
| 108 | active_rows.clear(); | ||
| 109 | } | ||
| 110 | |||
| 111 | ✗ | bool AgafonovIMatrixCCSALL::RunImpl() { | |
| 112 | ✗ | const auto &a = GetInput().first; | |
| 113 | ✗ | const auto &b = GetInput().second; | |
| 114 | auto &c = GetOutput(); | ||
| 115 | |||
| 116 | ✗ | int rank = 0; | |
| 117 | ✗ | int size = 1; | |
| 118 | ✗ | MPI_Comm_rank(MPI_COMM_WORLD, &rank); | |
| 119 | ✗ | MPI_Comm_size(MPI_COMM_WORLD, &size); | |
| 120 | |||
| 121 | ✗ | c.rows_num = a.rows_num; | |
| 122 | ✗ | c.cols_num = b.cols_num; | |
| 123 | ✗ | c.col_ptrs.assign(c.cols_num + 1, 0); | |
| 124 | |||
| 125 | ✗ | const size_t total_cols = b.cols_num; | |
| 126 | ✗ | const size_t chunk = total_cols / static_cast<size_t>(size); | |
| 127 | ✗ | const size_t rem = total_cols % static_cast<size_t>(size); | |
| 128 | ✗ | const size_t col_start = (static_cast<size_t>(rank) * chunk) + std::min(static_cast<size_t>(rank), rem); | |
| 129 | ✗ | const size_t col_end = col_start + chunk + (std::cmp_less(rank, rem) ? 1 : 0); | |
| 130 | |||
| 131 | ✗ | std::vector<std::vector<double>> local_vals(total_cols); | |
| 132 | ✗ | std::vector<std::vector<int>> local_rows(total_cols); | |
| 133 | |||
| 134 | ✗ | tbb::parallel_for(tbb::blocked_range<size_t>(col_start, col_end), [&](const tbb::blocked_range<size_t> &range) { | |
| 135 | ✗ | std::vector<double> accumulator(a.rows_num, 0.0); | |
| 136 | ✗ | std::vector<size_t> active_rows; | |
| 137 | ✗ | std::vector<bool> row_mask(a.rows_num, false); | |
| 138 | |||
| 139 | ✗ | for (size_t j = range.begin(); j < range.end(); ++j) { | |
| 140 | ✗ | ProcessColumn(j, a, b, accumulator, active_rows, row_mask, local_vals[j], local_rows[j]); | |
| 141 | } | ||
| 142 | ✗ | }); | |
| 143 | |||
| 144 | ✗ | std::vector<int> my_col_nnz(total_cols, 0); | |
| 145 | ✗ | for (size_t j = col_start; j < col_end; ++j) { | |
| 146 | ✗ | my_col_nnz[j] = static_cast<int>(local_vals[j].size()); | |
| 147 | } | ||
| 148 | |||
| 149 | ✗ | std::vector<int> all_col_nnz; | |
| 150 | ✗ | if (rank == 0) { | |
| 151 | ✗ | all_col_nnz.resize(total_cols, 0); | |
| 152 | } | ||
| 153 | |||
| 154 | ✗ | MPI_Reduce(my_col_nnz.data(), all_col_nnz.data(), static_cast<int>(total_cols), MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); | |
| 155 | |||
| 156 | ✗ | std::vector<double> my_vals; | |
| 157 | ✗ | std::vector<int> my_rind; | |
| 158 | ✗ | for (size_t j = col_start; j < col_end; ++j) { | |
| 159 | ✗ | my_vals.insert(my_vals.end(), local_vals[j].begin(), local_vals[j].end()); | |
| 160 | ✗ | my_rind.insert(my_rind.end(), local_rows[j].begin(), local_rows[j].end()); | |
| 161 | } | ||
| 162 | |||
| 163 | ✗ | int my_count = static_cast<int>(my_vals.size()); | |
| 164 | ✗ | std::vector<int> counts(size); | |
| 165 | ✗ | MPI_Gather(&my_count, 1, MPI_INT, counts.data(), 1, MPI_INT, 0, MPI_COMM_WORLD); | |
| 166 | |||
| 167 | ✗ | std::vector<int> displs(size, 0); | |
| 168 | int total_nnz = 0; | ||
| 169 | ✗ | if (rank == 0) { | |
| 170 | ✗ | for (int i = 0; i < size; ++i) { | |
| 171 | ✗ | displs[i] = total_nnz; | |
| 172 | ✗ | total_nnz += counts[i]; | |
| 173 | } | ||
| 174 | } | ||
| 175 | |||
| 176 | ✗ | std::vector<double> all_vals; | |
| 177 | ✗ | std::vector<int> all_rind; | |
| 178 | ✗ | if (rank == 0) { | |
| 179 | ✗ | all_vals.resize(total_nnz); | |
| 180 | ✗ | all_rind.resize(total_nnz); | |
| 181 | } | ||
| 182 | |||
| 183 | ✗ | MPI_Gatherv(my_vals.data(), my_count, MPI_DOUBLE, all_vals.data(), counts.data(), displs.data(), MPI_DOUBLE, 0, | |
| 184 | MPI_COMM_WORLD); | ||
| 185 | ✗ | MPI_Gatherv(my_rind.data(), my_count, MPI_INT, all_rind.data(), counts.data(), displs.data(), MPI_INT, 0, | |
| 186 | MPI_COMM_WORLD); | ||
| 187 | |||
| 188 | ✗ | if (rank == 0) { | |
| 189 | ✗ | AssembleFinalMatrix(c, total_cols, chunk, rem, size, all_col_nnz, displs, all_vals, all_rind); | |
| 190 | } | ||
| 191 | |||
| 192 | ✗ | MPI_Bcast(&c.rows_num, 1, MPI_INT, 0, MPI_COMM_WORLD); | |
| 193 | ✗ | MPI_Bcast(&c.cols_num, 1, MPI_INT, 0, MPI_COMM_WORLD); | |
| 194 | ✗ | MPI_Bcast(&c.nnz, 1, MPI_INT, 0, MPI_COMM_WORLD); | |
| 195 | |||
| 196 | ✗ | if (rank != 0) { | |
| 197 | ✗ | c.col_ptrs.resize(c.cols_num + 1); | |
| 198 | ✗ | c.vals.resize(c.nnz); | |
| 199 | ✗ | c.row_inds.resize(c.nnz); | |
| 200 | } | ||
| 201 | |||
| 202 | ✗ | MPI_Bcast(c.col_ptrs.data(), static_cast<int>(c.col_ptrs.size()), MPI_INT, 0, MPI_COMM_WORLD); | |
| 203 | |||
| 204 | ✗ | if (c.nnz > 0) { | |
| 205 | ✗ | MPI_Bcast(c.vals.data(), static_cast<int>(c.nnz), MPI_DOUBLE, 0, MPI_COMM_WORLD); | |
| 206 | ✗ | MPI_Bcast(c.row_inds.data(), static_cast<int>(c.nnz), MPI_INT, 0, MPI_COMM_WORLD); | |
| 207 | } | ||
| 208 | ✗ | return true; | |
| 209 | ✗ | } | |
| 210 | |||
| 211 | ✗ | bool AgafonovIMatrixCCSALL::PostProcessingImpl() { | |
| 212 | ✗ | return true; | |
| 213 | } | ||
| 214 | |||
| 215 | } // namespace agafonov_i_matrix_ccs_seq | ||
| 216 |