| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #include "romanov_m_matrix_ccs/all/include/ops_all.hpp" | ||
| 2 | |||
| 3 | #include <mpi.h> | ||
| 4 | #include <tbb/parallel_for.h> | ||
| 5 | |||
| 6 | #include <algorithm> | ||
| 7 | #include <array> | ||
| 8 | #include <cmath> | ||
| 9 | #include <cstddef> | ||
| 10 | #include <cstdint> | ||
| 11 | #include <utility> | ||
| 12 | #include <vector> | ||
| 13 | |||
| 14 | #include "romanov_m_matrix_ccs/common/include/common.hpp" | ||
| 15 | |||
| 16 | namespace romanov_m_matrix_ccs { | ||
| 17 | |||
| 18 | namespace { | ||
| 19 | |||
| 20 | 36 | void BroadcastSizeTVector(std::vector<size_t> &data, size_t size, int rank) { | |
| 21 | 36 | std::vector<uint64_t> buffer; | |
| 22 |
2/2✓ Branch 0 taken 18 times.
✓ Branch 1 taken 18 times.
|
36 | if (rank == 0) { |
| 23 | 18 | buffer.assign(data.begin(), data.end()); | |
| 24 | } else { | ||
| 25 |
1/2✓ Branch 1 taken 18 times.
✗ Branch 2 not taken.
|
18 | buffer.resize(size); |
| 26 | } | ||
| 27 | |||
| 28 |
1/2✓ Branch 1 taken 36 times.
✗ Branch 2 not taken.
|
36 | MPI_Bcast(static_cast<void *>(buffer.data()), static_cast<int>(size), MPI_UINT64_T, 0, MPI_COMM_WORLD); |
| 29 | |||
| 30 |
2/2✓ Branch 0 taken 18 times.
✓ Branch 1 taken 18 times.
|
36 | if (rank != 0) { |
| 31 | 18 | data.assign(buffer.begin(), buffer.end()); | |
| 32 | } | ||
| 33 | 36 | } | |
| 34 | |||
| 35 | 1 | void SendSizeTVector(const std::vector<size_t> &data) { | |
| 36 |
1/2✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
|
1 | std::vector<uint64_t> buffer(data.begin(), data.end()); |
| 37 |
1/2✓ Branch 1 taken 1 times.
✗ Branch 2 not taken.
|
1 | MPI_Send(static_cast<const void *>(buffer.data()), static_cast<int>(buffer.size()), MPI_UINT64_T, 0, 2, |
| 38 | MPI_COMM_WORLD); | ||
| 39 | 1 | } | |
| 40 | |||
| 41 | 1 | void RecvSizeTVector(std::vector<size_t> &data, int nnz, int proc) { | |
| 42 |
1/2✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
|
1 | std::vector<uint64_t> buffer(static_cast<size_t>(nnz)); |
| 43 |
1/2✓ Branch 1 taken 1 times.
✗ Branch 2 not taken.
|
1 | MPI_Recv(static_cast<void *>(buffer.data()), nnz, MPI_UINT64_T, proc, 2, MPI_COMM_WORLD, MPI_STATUS_IGNORE); |
| 44 |
1/2✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
|
1 | data.assign(buffer.begin(), buffer.end()); |
| 45 | 1 | } | |
| 46 | |||
| 47 | } // namespace | ||
| 48 | |||
| 49 |
1/2✓ Branch 2 taken 6 times.
✗ Branch 3 not taken.
|
6 | RomanovMMatrixCCSALL::RomanovMMatrixCCSALL(const InType &in) { |
| 50 | SetTypeOfTask(GetStaticTypeOfTask()); | ||
| 51 | GetInput() = in; | ||
| 52 | 6 | } | |
| 53 | |||
| 54 | 6 | bool RomanovMMatrixCCSALL::ValidationImpl() { | |
| 55 | 6 | int rank = 0; | |
| 56 | 6 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); | |
| 57 | 6 | int res = 0; | |
| 58 |
2/2✓ Branch 0 taken 3 times.
✓ Branch 1 taken 3 times.
|
6 | if (rank == 0) { |
| 59 | const auto &left = GetInput().first; | ||
| 60 | const auto &right = GetInput().second; | ||
| 61 |
2/4✓ Branch 0 taken 3 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 3 times.
|
3 | res = (left.cols_num == right.rows_num && left.cols_num > 0) ? 1 : 0; |
| 62 | } | ||
| 63 | 6 | MPI_Bcast(&res, 1, MPI_INT, 0, MPI_COMM_WORLD); | |
| 64 | 6 | return res == 1; | |
| 65 | } | ||
| 66 | |||
| 67 | 6 | bool RomanovMMatrixCCSALL::PreProcessingImpl() { | |
| 68 | 6 | return true; | |
| 69 | } | ||
| 70 | |||
| 71 | 5 | void RomanovMMatrixCCSALL::MultiplyColumn(size_t col_index, const MatrixCCS &a, const MatrixCCS &b, | |
| 72 | std::vector<double> &temp_v, std::vector<size_t> &temp_r) { | ||
| 73 | 5 | std::vector<double> accumulator(a.rows_num, 0.0); | |
| 74 |
1/4✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
5 | std::vector<bool> row_mask(a.rows_num, false); |
| 75 | 5 | std::vector<size_t> active_rows; | |
| 76 | |||
| 77 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
10 | for (size_t kb = b.col_ptrs[col_index]; kb < b.col_ptrs[col_index + 1]; ++kb) { |
| 78 | 5 | size_t k = b.row_inds[kb]; | |
| 79 | 5 | double v_b = b.vals[kb]; | |
| 80 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
10 | for (size_t ka = a.col_ptrs[k]; ka < a.col_ptrs[k + 1]; ++ka) { |
| 81 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
|
5 | size_t r_idx = a.row_inds[ka]; |
| 82 |
2/2✓ Branch 0 taken 3 times.
✓ Branch 1 taken 2 times.
|
5 | if (!row_mask[r_idx]) { |
| 83 | row_mask[r_idx] = true; | ||
| 84 | active_rows.push_back(r_idx); | ||
| 85 | } | ||
| 86 | 5 | accumulator[r_idx] += a.vals[ka] * v_b; | |
| 87 | } | ||
| 88 | } | ||
| 89 | |||
| 90 | std::ranges::sort(active_rows); | ||
| 91 |
3/4✓ Branch 0 taken 3 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 3 times.
✓ Branch 3 taken 5 times.
|
8 | for (size_t r_idx : active_rows) { |
| 92 |
1/2✓ Branch 0 taken 3 times.
✗ Branch 1 not taken.
|
3 | if (std::abs(accumulator[r_idx]) > 1e-12) { |
| 93 | temp_v.push_back(accumulator[r_idx]); | ||
| 94 | temp_r.push_back(r_idx); | ||
| 95 | } | ||
| 96 | } | ||
| 97 | 5 | } | |
| 98 | |||
| 99 | 6 | void RomanovMMatrixCCSALL::SyncMatrixData(int rank, MatrixCCS &a, MatrixCCS &b) { | |
| 100 | 6 | std::array<uint64_t, 3> dims{}; | |
| 101 |
2/2✓ Branch 0 taken 3 times.
✓ Branch 1 taken 3 times.
|
6 | if (rank == 0) { |
| 102 | 3 | dims[0] = static_cast<uint64_t>(a.rows_num); | |
| 103 | 3 | dims[1] = static_cast<uint64_t>(a.cols_num); | |
| 104 | 3 | dims[2] = static_cast<uint64_t>(b.cols_num); | |
| 105 | } | ||
| 106 | 6 | MPI_Bcast(static_cast<void *>(dims.data()), 3, MPI_UINT64_T, 0, MPI_COMM_WORLD); | |
| 107 | |||
| 108 |
2/2✓ Branch 0 taken 3 times.
✓ Branch 1 taken 3 times.
|
6 | if (rank != 0) { |
| 109 | 3 | a.rows_num = static_cast<size_t>(dims[0]); | |
| 110 | 3 | a.cols_num = static_cast<size_t>(dims[1]); | |
| 111 | 3 | b.rows_num = static_cast<size_t>(dims[1]); | |
| 112 | 3 | b.cols_num = static_cast<size_t>(dims[2]); | |
| 113 | 3 | a.col_ptrs.resize(a.cols_num + 1); | |
| 114 | 3 | b.col_ptrs.resize(b.cols_num + 1); | |
| 115 | } | ||
| 116 | |||
| 117 | 6 | BroadcastSizeTVector(a.col_ptrs, a.cols_num + 1, rank); | |
| 118 |
2/2✓ Branch 0 taken 3 times.
✓ Branch 1 taken 3 times.
|
6 | size_t a_nnz = (rank == 0) ? a.vals.size() : a.col_ptrs[a.cols_num]; |
| 119 |
2/2✓ Branch 0 taken 3 times.
✓ Branch 1 taken 3 times.
|
6 | if (rank != 0) { |
| 120 | 3 | a.row_inds.resize(a_nnz); | |
| 121 | 3 | a.vals.resize(a_nnz); | |
| 122 | } | ||
| 123 | 6 | BroadcastSizeTVector(a.row_inds, a_nnz, rank); | |
| 124 |
1/2✓ Branch 0 taken 6 times.
✗ Branch 1 not taken.
|
6 | if (a_nnz > 0) { |
| 125 | 6 | MPI_Bcast(static_cast<void *>(a.vals.data()), static_cast<int>(a_nnz), MPI_DOUBLE, 0, MPI_COMM_WORLD); | |
| 126 | } | ||
| 127 | |||
| 128 | 6 | BroadcastSizeTVector(b.col_ptrs, b.cols_num + 1, rank); | |
| 129 |
2/2✓ Branch 0 taken 3 times.
✓ Branch 1 taken 3 times.
|
6 | size_t b_nnz = (rank == 0) ? b.vals.size() : b.col_ptrs[b.cols_num]; |
| 130 |
2/2✓ Branch 0 taken 3 times.
✓ Branch 1 taken 3 times.
|
6 | if (rank != 0) { |
| 131 | 3 | b.row_inds.resize(b_nnz); | |
| 132 | 3 | b.vals.resize(b_nnz); | |
| 133 | } | ||
| 134 | 6 | BroadcastSizeTVector(b.row_inds, b_nnz, rank); | |
| 135 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 2 times.
|
6 | if (b_nnz > 0) { |
| 136 | 4 | MPI_Bcast(static_cast<void *>(b.vals.data()), static_cast<int>(b_nnz), MPI_DOUBLE, 0, MPI_COMM_WORLD); | |
| 137 | } | ||
| 138 | 6 | } | |
| 139 | |||
| 140 | 3 | void RomanovMMatrixCCSALL::MasterCollect(int size, int chunk, int remainder, std::vector<std::vector<double>> &all_v, | |
| 141 | std::vector<std::vector<size_t>> &all_r) { | ||
| 142 |
2/2✓ Branch 0 taken 3 times.
✓ Branch 1 taken 3 times.
|
6 | for (int proc_idx = 1; proc_idx < size; ++proc_idx) { |
| 143 |
1/2✓ Branch 0 taken 3 times.
✗ Branch 1 not taken.
|
3 | int p_start = (proc_idx * chunk) + std::min(proc_idx, remainder); |
| 144 |
1/2✓ Branch 0 taken 3 times.
✗ Branch 1 not taken.
|
3 | int p_cols = chunk + ((proc_idx < remainder) ? 1 : 0); |
| 145 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 3 times.
|
5 | for (int i = 0; i < p_cols; ++i) { |
| 146 | 2 | int nnz = 0; | |
| 147 | 2 | MPI_Recv(&nnz, 1, MPI_INT, proc_idx, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); | |
| 148 | 2 | auto target_idx = static_cast<size_t>(p_start) + i; | |
| 149 | 2 | all_v[target_idx].resize(static_cast<size_t>(nnz)); | |
| 150 | 2 | all_r[target_idx].resize(static_cast<size_t>(nnz)); | |
| 151 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
|
2 | if (nnz > 0) { |
| 152 | 1 | MPI_Recv(static_cast<void *>(all_v[target_idx].data()), nnz, MPI_DOUBLE, proc_idx, 1, MPI_COMM_WORLD, | |
| 153 | MPI_STATUS_IGNORE); | ||
| 154 | 1 | RecvSizeTVector(all_r[target_idx], nnz, proc_idx); | |
| 155 | } | ||
| 156 | } | ||
| 157 | } | ||
| 158 | 3 | } | |
| 159 | |||
| 160 | 3 | void RomanovMMatrixCCSALL::WorkerSend(int local_count, std::vector<std::vector<double>> &local_v, | |
| 161 | std::vector<std::vector<size_t>> &local_r) { | ||
| 162 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 3 times.
|
5 | for (int i = 0; i < local_count; ++i) { |
| 163 | 2 | auto idx = static_cast<size_t>(i); | |
| 164 | 2 | int nnz = static_cast<int>(local_v[idx].size()); | |
| 165 | 2 | MPI_Send(&nnz, 1, MPI_INT, 0, 0, MPI_COMM_WORLD); | |
| 166 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
|
2 | if (nnz > 0) { |
| 167 | 1 | MPI_Send(static_cast<const void *>(local_v[idx].data()), nnz, MPI_DOUBLE, 0, 1, MPI_COMM_WORLD); | |
| 168 | 1 | SendSizeTVector(local_r[idx]); | |
| 169 | } | ||
| 170 | } | ||
| 171 | 3 | } | |
| 172 | |||
| 173 | 6 | bool RomanovMMatrixCCSALL::RunImpl() { | |
| 174 | 6 | int rank = 0; | |
| 175 | 6 | int size = 0; | |
| 176 | 6 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); | |
| 177 | 6 | MPI_Comm_size(MPI_COMM_WORLD, &size); | |
| 178 | |||
| 179 |
2/2✓ Branch 0 taken 3 times.
✓ Branch 1 taken 3 times.
|
6 | MatrixCCS a_mat = (rank == 0) ? GetInput().first : MatrixCCS(); |
| 180 |
3/4✓ Branch 0 taken 3 times.
✓ Branch 1 taken 3 times.
✓ Branch 3 taken 3 times.
✗ Branch 4 not taken.
|
6 | MatrixCCS b_mat = (rank == 0) ? GetInput().second : MatrixCCS(); |
| 181 |
1/2✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.
|
6 | SyncMatrixData(rank, a_mat, b_mat); |
| 182 | |||
| 183 | 6 | int total_cols = static_cast<int>(b_mat.cols_num); | |
| 184 | 6 | int chunk = total_cols / size; | |
| 185 | 6 | int remainder = total_cols % size; | |
| 186 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 1 times.
|
6 | int start_col = (rank * chunk) + std::min(rank, remainder); |
| 187 |
3/4✓ Branch 0 taken 5 times.
✓ Branch 1 taken 1 times.
✓ Branch 3 taken 6 times.
✗ Branch 4 not taken.
|
11 | int end_col = start_col + (chunk + ((rank < remainder) ? 1 : 0)); |
| 188 | int local_count = std::max(0, end_col - start_col); | ||
| 189 | |||
| 190 |
1/2✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.
|
6 | std::vector<std::vector<double>> local_v(static_cast<size_t>(local_count)); |
| 191 |
1/2✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.
|
6 | std::vector<std::vector<size_t>> local_r(static_cast<size_t>(local_count)); |
| 192 | |||
| 193 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 1 times.
|
6 | if (local_count > 0) { |
| 194 |
1/2✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
|
5 | tbb::parallel_for(0, local_count, [&](int i) { |
| 195 | 5 | MultiplyColumn(static_cast<size_t>(start_col) + i, a_mat, b_mat, local_v[static_cast<size_t>(i)], | |
| 196 | 5 | local_r[static_cast<size_t>(i)]); | |
| 197 | 5 | }); | |
| 198 | } | ||
| 199 | |||
| 200 | auto &c_mat = GetOutput(); | ||
| 201 |
2/2✓ Branch 0 taken 3 times.
✓ Branch 1 taken 3 times.
|
6 | if (rank == 0) { |
| 202 |
1/2✓ Branch 1 taken 3 times.
✗ Branch 2 not taken.
|
3 | std::vector<std::vector<double>> all_v(static_cast<size_t>(total_cols)); |
| 203 |
1/2✓ Branch 1 taken 3 times.
✗ Branch 2 not taken.
|
3 | std::vector<std::vector<size_t>> all_r(static_cast<size_t>(total_cols)); |
| 204 |
2/2✓ Branch 0 taken 3 times.
✓ Branch 1 taken 3 times.
|
6 | for (int i = 0; i < local_count; ++i) { |
| 205 | 3 | auto global_idx = static_cast<size_t>(start_col) + i; | |
| 206 | auto l_idx = static_cast<size_t>(i); | ||
| 207 | all_v[global_idx] = std::move(local_v[l_idx]); | ||
| 208 | all_r[global_idx] = std::move(local_r[l_idx]); | ||
| 209 | } | ||
| 210 |
1/2✓ Branch 1 taken 3 times.
✗ Branch 2 not taken.
|
3 | MasterCollect(size, chunk, remainder, all_v, all_r); |
| 211 | |||
| 212 | 3 | c_mat.rows_num = a_mat.rows_num; | |
| 213 | 3 | c_mat.cols_num = static_cast<size_t>(total_cols); | |
| 214 |
1/2✓ Branch 1 taken 3 times.
✗ Branch 2 not taken.
|
3 | c_mat.col_ptrs.assign(c_mat.cols_num + 1, 0); |
| 215 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 3 times.
|
8 | for (size_t j = 0; j < c_mat.cols_num; ++j) { |
| 216 |
1/2✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
|
5 | c_mat.col_ptrs[j + 1] = c_mat.col_ptrs[j] + all_v[j].size(); |
| 217 |
1/2✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
|
5 | c_mat.vals.insert(c_mat.vals.end(), all_v[j].begin(), all_v[j].end()); |
| 218 |
1/2✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
|
5 | c_mat.row_inds.insert(c_mat.row_inds.end(), all_r[j].begin(), all_r[j].end()); |
| 219 | } | ||
| 220 | 3 | c_mat.nnz = c_mat.vals.size(); | |
| 221 | 3 | } else { | |
| 222 |
1/2✓ Branch 1 taken 3 times.
✗ Branch 2 not taken.
|
3 | WorkerSend(local_count, local_v, local_r); |
| 223 | } | ||
| 224 | |||
| 225 | 6 | std::array<uint64_t, 3> final_dims{}; | |
| 226 |
2/2✓ Branch 0 taken 3 times.
✓ Branch 1 taken 3 times.
|
6 | if (rank == 0) { |
| 227 | 3 | final_dims[0] = static_cast<uint64_t>(c_mat.rows_num); | |
| 228 | 3 | final_dims[1] = static_cast<uint64_t>(c_mat.cols_num); | |
| 229 | 3 | final_dims[2] = static_cast<uint64_t>(c_mat.nnz); | |
| 230 | } | ||
| 231 |
1/2✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.
|
6 | MPI_Bcast(static_cast<void *>(final_dims.data()), 3, MPI_UINT64_T, 0, MPI_COMM_WORLD); |
| 232 | |||
| 233 |
2/2✓ Branch 0 taken 3 times.
✓ Branch 1 taken 3 times.
|
6 | if (rank != 0) { |
| 234 | 3 | c_mat.rows_num = static_cast<size_t>(final_dims[0]); | |
| 235 | 3 | c_mat.cols_num = static_cast<size_t>(final_dims[1]); | |
| 236 | 3 | c_mat.nnz = static_cast<size_t>(final_dims[2]); | |
| 237 |
1/2✓ Branch 1 taken 3 times.
✗ Branch 2 not taken.
|
3 | c_mat.col_ptrs.resize(c_mat.cols_num + 1); |
| 238 |
1/2✓ Branch 1 taken 3 times.
✗ Branch 2 not taken.
|
3 | c_mat.row_inds.resize(c_mat.nnz); |
| 239 |
1/2✓ Branch 1 taken 3 times.
✗ Branch 2 not taken.
|
3 | c_mat.vals.resize(c_mat.nnz); |
| 240 | } | ||
| 241 | |||
| 242 |
1/2✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.
|
6 | BroadcastSizeTVector(c_mat.col_ptrs, c_mat.cols_num + 1, rank); |
| 243 |
1/2✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.
|
6 | BroadcastSizeTVector(c_mat.row_inds, c_mat.nnz, rank); |
| 244 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 2 times.
|
6 | if (c_mat.nnz > 0) { |
| 245 |
1/2✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
|
4 | MPI_Bcast(static_cast<void *>(c_mat.vals.data()), static_cast<int>(c_mat.nnz), MPI_DOUBLE, 0, MPI_COMM_WORLD); |
| 246 | } | ||
| 247 | |||
| 248 |
1/2✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.
|
6 | MPI_Barrier(MPI_COMM_WORLD); |
| 249 | 6 | return true; | |
| 250 | 6 | } | |
| 251 | |||
| 252 | 6 | bool RomanovMMatrixCCSALL::PostProcessingImpl() { | |
| 253 | 6 | return true; | |
| 254 | } | ||
| 255 | |||
| 256 | } // namespace romanov_m_matrix_ccs | ||
| 257 |