| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #include "melnik_i_matrix_mult_ribbon/mpi/include/ops_mpi.hpp" | ||
| 2 | |||
| 3 | #include <mpi.h> | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <array> | ||
| 7 | #include <cstddef> | ||
| 8 | #include <functional> | ||
| 9 | #include <numeric> | ||
| 10 | #include <ranges> | ||
| 11 | #include <vector> | ||
| 12 | |||
| 13 | #include "melnik_i_matrix_mult_ribbon/common/include/common.hpp" | ||
| 14 | |||
| 15 | namespace melnik_i_matrix_mult_ribbon { | ||
| 16 | |||
| 17 |
1/2✓ Branch 1 taken 18 times.
✗ Branch 2 not taken.
|
18 | MelnikIMatrixMultRibbonMPI::MelnikIMatrixMultRibbonMPI(const InType &in) { |
| 18 |
1/2✓ Branch 1 taken 18 times.
✗ Branch 2 not taken.
|
18 | MPI_Comm_rank(MPI_COMM_WORLD, &proc_rank_); |
| 19 |
1/2✓ Branch 1 taken 18 times.
✗ Branch 2 not taken.
|
18 | MPI_Comm_size(MPI_COMM_WORLD, &proc_num_); |
| 20 | |||
| 21 | SetTypeOfTask(GetStaticTypeOfTask()); | ||
| 22 | 18 | GetOutput() = std::vector<std::vector<double>>(); | |
| 23 | |||
| 24 |
2/4✓ Branch 0 taken 9 times.
✓ Branch 1 taken 9 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
18 | GetInput() = (proc_rank_ == 0) ? in : InType{}; |
| 25 |
2/2✓ Branch 0 taken 9 times.
✓ Branch 1 taken 9 times.
|
18 | if (proc_rank_ == 0) { |
| 26 |
1/2✓ Branch 1 taken 9 times.
✗ Branch 2 not taken.
|
9 | matrix_A_ = std::get<0>(GetInput()); |
| 27 |
1/2✓ Branch 1 taken 9 times.
✗ Branch 2 not taken.
|
9 | matrix_B_ = std::get<1>(GetInput()); |
| 28 | } | ||
| 29 | 18 | } | |
| 30 | |||
| 31 | 18 | bool MelnikIMatrixMultRibbonMPI::ValidationImpl() { | |
| 32 |
3/4✓ Branch 0 taken 9 times.
✓ Branch 1 taken 9 times.
✓ Branch 3 taken 9 times.
✗ Branch 4 not taken.
|
18 | bool is_valid = (proc_rank_ == 0) ? ValidateOnRoot() : true; |
| 33 | |||
| 34 | 18 | int valid_flag = is_valid ? 1 : 0; | |
| 35 | 18 | MPI_Bcast(&valid_flag, 1, MPI_INT, 0, MPI_COMM_WORLD); | |
| 36 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 18 times.
|
18 | if (valid_flag == 0) { |
| 37 | ✗ | rows_a_ = cols_a_ = rows_b_ = cols_b_ = 0; | |
| 38 | ✗ | return false; | |
| 39 | } | ||
| 40 | |||
| 41 | 18 | std::array<int, 4> dims = {static_cast<int>(rows_a_), static_cast<int>(rows_b_), static_cast<int>(cols_a_), | |
| 42 | 18 | static_cast<int>(cols_b_)}; | |
| 43 | 18 | MPI_Bcast(dims.data(), 4, MPI_INT, 0, MPI_COMM_WORLD); | |
| 44 | 18 | rows_a_ = static_cast<std::size_t>(dims[0]); | |
| 45 | 18 | rows_b_ = static_cast<std::size_t>(dims[1]); | |
| 46 | 18 | cols_a_ = static_cast<std::size_t>(dims[2]); | |
| 47 | 18 | cols_b_ = static_cast<std::size_t>(dims[3]); | |
| 48 | |||
| 49 | 18 | return GetOutput().empty(); | |
| 50 | } | ||
| 51 | |||
| 52 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 18 times.
|
18 | bool MelnikIMatrixMultRibbonMPI::PreProcessingImpl() { |
| 53 | GetOutput().clear(); | ||
| 54 | |||
| 55 |
2/2✓ Branch 0 taken 9 times.
✓ Branch 1 taken 9 times.
|
18 | if (proc_rank_ == 0) { |
| 56 | const auto &[matrix_a, matrix_b] = GetInput(); | ||
| 57 | 9 | rows_a_ = matrix_a.size(); | |
| 58 | 9 | rows_b_ = matrix_b.size(); | |
| 59 | 9 | cols_a_ = matrix_a.front().size(); | |
| 60 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 9 times.
|
9 | cols_b_ = matrix_b.front().size(); |
| 61 | |||
| 62 | flat_a_.clear(); | ||
| 63 | 9 | flat_a_.reserve(rows_a_ * cols_a_); | |
| 64 |
2/2✓ Branch 0 taken 44 times.
✓ Branch 1 taken 9 times.
|
53 | for (const auto &row : matrix_a) { |
| 65 | 44 | flat_a_.insert(flat_a_.end(), row.begin(), row.end()); | |
| 66 | } | ||
| 67 | |||
| 68 | 9 | flat_b_transposed_.assign(cols_b_ * cols_a_, 0.0); | |
| 69 |
2/2✓ Branch 0 taken 46 times.
✓ Branch 1 taken 9 times.
|
55 | for (std::size_t row_idx = 0; row_idx < cols_a_; ++row_idx) { |
| 70 |
2/2✓ Branch 0 taken 256 times.
✓ Branch 1 taken 46 times.
|
302 | for (std::size_t col_idx = 0; col_idx < cols_b_; ++col_idx) { |
| 71 | 256 | flat_b_transposed_[(col_idx * cols_a_) + row_idx] = matrix_b[row_idx][col_idx]; | |
| 72 | } | ||
| 73 | } | ||
| 74 | } | ||
| 75 | 18 | return true; | |
| 76 | } | ||
| 77 | |||
| 78 | 18 | bool MelnikIMatrixMultRibbonMPI::RunImpl() { | |
| 79 |
3/6✓ Branch 0 taken 18 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 18 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 18 times.
✗ Branch 5 not taken.
|
18 | if (rows_a_ == 0 || cols_a_ == 0 || cols_b_ == 0) { |
| 80 | return false; | ||
| 81 | } | ||
| 82 | |||
| 83 | 18 | ShareSizes(); | |
| 84 | 18 | ShareMatrixB(); | |
| 85 | |||
| 86 | 18 | std::vector<double> local_a; | |
| 87 | 18 | std::vector<int> rows_per_rank; | |
| 88 |
1/2✓ Branch 1 taken 18 times.
✗ Branch 2 not taken.
|
18 | const std::size_t local_rows = ScatterRows(local_a, rows_per_rank); |
| 89 | |||
| 90 |
3/6✓ Branch 1 taken 18 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 16 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
|
18 | std::vector<double> local_c(local_rows * cols_b_, 0.0); |
| 91 |
3/4✓ Branch 0 taken 16 times.
✓ Branch 1 taken 2 times.
✓ Branch 2 taken 16 times.
✗ Branch 3 not taken.
|
18 | if (!local_a.empty() && local_rows > 0) { |
| 92 | 16 | MultiplyLocal(local_a, local_c); | |
| 93 | } | ||
| 94 | |||
| 95 |
1/2✓ Branch 1 taken 18 times.
✗ Branch 2 not taken.
|
18 | GatherAndDistribute(rows_per_rank, local_c); |
| 96 | return true; | ||
| 97 | } | ||
| 98 | |||
| 99 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 18 times.
|
18 | bool MelnikIMatrixMultRibbonMPI::PostProcessingImpl() { |
| 100 | auto &output = GetOutput(); | ||
| 101 | output.clear(); | ||
| 102 | 18 | output.resize(rows_a_); | |
| 103 |
2/2✓ Branch 0 taken 88 times.
✓ Branch 1 taken 18 times.
|
106 | for (std::size_t i = 0; i < rows_a_; ++i) { |
| 104 | 88 | output[i].assign(cols_b_, 0.0); | |
| 105 | } | ||
| 106 | |||
| 107 |
1/2✓ Branch 0 taken 18 times.
✗ Branch 1 not taken.
|
18 | const std::size_t total = rows_a_ * cols_b_; |
| 108 |
1/2✓ Branch 0 taken 18 times.
✗ Branch 1 not taken.
|
18 | if (flat_c_.size() >= total) { |
| 109 |
2/2✓ Branch 0 taken 508 times.
✓ Branch 1 taken 18 times.
|
526 | for (std::size_t idx = 0; idx < total; ++idx) { |
| 110 | 508 | const std::size_t r = idx / cols_b_; | |
| 111 | 508 | const std::size_t c = idx % cols_b_; | |
| 112 | 508 | output[r][c] = flat_c_[idx]; | |
| 113 | } | ||
| 114 | } | ||
| 115 | |||
| 116 | 18 | return true; | |
| 117 | } | ||
| 118 | |||
| 119 |
1/2✓ Branch 0 taken 9 times.
✗ Branch 1 not taken.
|
9 | bool MelnikIMatrixMultRibbonMPI::ValidateOnRoot() { |
| 120 | const auto &matrix_a = std::get<0>(GetInput()); | ||
| 121 | const auto &matrix_b = std::get<1>(GetInput()); | ||
| 122 | |||
| 123 |
2/4✓ Branch 0 taken 9 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 9 times.
✗ Branch 3 not taken.
|
9 | if (matrix_a.empty() || matrix_b.empty()) { |
| 124 | return false; | ||
| 125 | } | ||
| 126 | |||
| 127 | const std::size_t width_a = matrix_a.front().size(); | ||
| 128 | const std::size_t width_b = matrix_b.front().size(); | ||
| 129 | const std::size_t height_b = matrix_b.size(); | ||
| 130 | |||
| 131 |
2/4✓ Branch 0 taken 9 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 9 times.
✗ Branch 3 not taken.
|
9 | if (width_a == 0 || width_b == 0 || width_a != height_b) { |
| 132 | return false; | ||
| 133 | } | ||
| 134 | |||
| 135 |
1/2✓ Branch 0 taken 9 times.
✗ Branch 1 not taken.
|
9 | if (!HasUniformRowWidth(matrix_a, width_a)) { |
| 136 | return false; | ||
| 137 | } | ||
| 138 |
1/2✓ Branch 0 taken 9 times.
✗ Branch 1 not taken.
|
9 | if (!HasUniformRowWidth(matrix_b, width_b)) { |
| 139 | return false; | ||
| 140 | } | ||
| 141 | |||
| 142 | 9 | rows_a_ = matrix_a.size(); | |
| 143 | 9 | cols_a_ = width_a; | |
| 144 | 9 | rows_b_ = height_b; | |
| 145 | 9 | cols_b_ = width_b; | |
| 146 | 9 | return true; | |
| 147 | } | ||
| 148 | |||
| 149 | ✗ | bool MelnikIMatrixMultRibbonMPI::HasUniformRowWidth(const std::vector<std::vector<double>> &matrix, | |
| 150 | std::size_t expected_width) { | ||
| 151 | return std::ranges::all_of(matrix, | ||
| 152 | ✗ | [expected_width](const std::vector<double> &row) { return row.size() == expected_width; }); | |
| 153 | } | ||
| 154 | |||
| 155 | 18 | void MelnikIMatrixMultRibbonMPI::ShareSizes() { | |
| 156 | 18 | std::array<int, 4> sizes = {static_cast<int>(rows_a_), static_cast<int>(rows_b_), static_cast<int>(cols_a_), | |
| 157 | 18 | static_cast<int>(cols_b_)}; | |
| 158 | 18 | MPI_Bcast(sizes.data(), 4, MPI_INT, 0, MPI_COMM_WORLD); | |
| 159 | 18 | rows_a_ = static_cast<std::size_t>(sizes[0]); | |
| 160 | 18 | rows_b_ = static_cast<std::size_t>(sizes[1]); | |
| 161 | 18 | cols_a_ = static_cast<std::size_t>(sizes[2]); | |
| 162 | 18 | cols_b_ = static_cast<std::size_t>(sizes[3]); | |
| 163 | 18 | } | |
| 164 | |||
| 165 | 18 | void MelnikIMatrixMultRibbonMPI::ShareMatrixB() { | |
| 166 | 18 | const std::size_t total = cols_b_ * cols_a_; | |
| 167 |
2/2✓ Branch 0 taken 9 times.
✓ Branch 1 taken 9 times.
|
18 | if (proc_rank_ != 0) { |
| 168 | 9 | flat_b_transposed_.assign(total, 0.0); | |
| 169 | } | ||
| 170 | 18 | MPI_Bcast(flat_b_transposed_.data(), static_cast<int>(total), MPI_DOUBLE, 0, MPI_COMM_WORLD); | |
| 171 | 18 | } | |
| 172 | |||
| 173 | 18 | std::size_t MelnikIMatrixMultRibbonMPI::ScatterRows(std::vector<double> &local_a, std::vector<int> &rows_per_rank) { | |
| 174 | 18 | rows_per_rank.resize(proc_num_); | |
| 175 | 18 | const int base = static_cast<int>(rows_a_ / static_cast<std::size_t>(proc_num_)); | |
| 176 | 18 | const int remainder = static_cast<int>(rows_a_ % static_cast<std::size_t>(proc_num_)); | |
| 177 |
2/2✓ Branch 0 taken 36 times.
✓ Branch 1 taken 18 times.
|
54 | for (int i = 0; i < proc_num_; ++i) { |
| 178 |
2/2✓ Branch 0 taken 32 times.
✓ Branch 1 taken 4 times.
|
68 | rows_per_rank[i] = base + (i < remainder ? 1 : 0); |
| 179 | } | ||
| 180 | |||
| 181 | 18 | std::vector<int> counts(proc_num_); | |
| 182 |
1/4✓ Branch 1 taken 18 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
18 | std::vector<int> displs(proc_num_); |
| 183 |
2/2✓ Branch 0 taken 36 times.
✓ Branch 1 taken 18 times.
|
54 | for (int i = 0, offset = 0; i < proc_num_; ++i) { |
| 184 | 36 | counts[i] = rows_per_rank[i] * static_cast<int>(cols_a_); | |
| 185 | 36 | displs[i] = offset; | |
| 186 | 36 | offset += counts[i]; | |
| 187 | } | ||
| 188 | |||
| 189 |
1/2✓ Branch 1 taken 18 times.
✗ Branch 2 not taken.
|
18 | const int local_count = rows_per_rank[proc_rank_] * static_cast<int>(cols_a_); |
| 190 |
3/6✓ Branch 1 taken 18 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 9 times.
✓ Branch 4 taken 9 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
|
18 | local_a.assign(static_cast<std::size_t>(local_count), 0.0); |
| 191 | |||
| 192 |
3/4✓ Branch 0 taken 9 times.
✓ Branch 1 taken 9 times.
✓ Branch 3 taken 18 times.
✗ Branch 4 not taken.
|
27 | MPI_Scatterv(proc_rank_ == 0 ? flat_a_.data() : nullptr, counts.data(), displs.data(), MPI_DOUBLE, local_a.data(), |
| 193 | local_count, MPI_DOUBLE, 0, MPI_COMM_WORLD); | ||
| 194 | |||
| 195 |
1/2✓ Branch 0 taken 18 times.
✗ Branch 1 not taken.
|
36 | return static_cast<std::size_t>(rows_per_rank[proc_rank_]); |
| 196 | } | ||
| 197 | |||
| 198 | 16 | void MelnikIMatrixMultRibbonMPI::MultiplyLocal(const std::vector<double> &local_a, std::vector<double> &local_c) const { | |
| 199 | using DiffT = std::ptrdiff_t; | ||
| 200 |
2/2✓ Branch 0 taken 44 times.
✓ Branch 1 taken 16 times.
|
60 | for (std::size_t row_idx = 0; row_idx < local_c.size() / cols_b_; ++row_idx) { |
| 201 | 44 | const auto a_begin = local_a.begin() + static_cast<DiffT>(row_idx * cols_a_); | |
| 202 | const auto a_end = a_begin + static_cast<DiffT>(cols_a_); | ||
| 203 |
2/2✓ Branch 0 taken 254 times.
✓ Branch 1 taken 44 times.
|
298 | for (std::size_t col_idx = 0; col_idx < cols_b_; ++col_idx) { |
| 204 | 254 | const auto b_begin = flat_b_transposed_.begin() + static_cast<DiffT>(col_idx * cols_a_); | |
| 205 | 254 | local_c[(row_idx * cols_b_) + col_idx] = | |
| 206 | 254 | std::transform_reduce(a_begin, a_end, b_begin, 0.0, std::plus<>(), std::multiplies<>()); | |
| 207 | } | ||
| 208 | } | ||
| 209 | 16 | } | |
| 210 | |||
| 211 | 18 | void MelnikIMatrixMultRibbonMPI::GatherAndDistribute(const std::vector<int> &rows_per_rank, | |
| 212 | const std::vector<double> &local_c) { | ||
| 213 | 18 | std::vector<int> counts(proc_num_); | |
| 214 |
1/4✓ Branch 1 taken 18 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
18 | std::vector<int> displs(proc_num_); |
| 215 |
2/2✓ Branch 0 taken 36 times.
✓ Branch 1 taken 18 times.
|
54 | for (int i = 0, offset = 0; i < proc_num_; ++i) { |
| 216 | 36 | counts[i] = rows_per_rank[i] * static_cast<int>(cols_b_); | |
| 217 | 36 | displs[i] = offset; | |
| 218 | 36 | offset += counts[i]; | |
| 219 | } | ||
| 220 | |||
| 221 |
2/2✓ Branch 0 taken 9 times.
✓ Branch 1 taken 9 times.
|
18 | if (proc_rank_ == 0) { |
| 222 |
1/2✓ Branch 1 taken 9 times.
✗ Branch 2 not taken.
|
9 | flat_c_.assign(rows_a_ * cols_b_, 0.0); |
| 223 | } else { | ||
| 224 | flat_c_.clear(); | ||
| 225 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 9 times.
|
9 | flat_c_.shrink_to_fit(); |
| 226 |
1/4✓ Branch 1 taken 9 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
9 | flat_c_.assign(rows_a_ * cols_b_, 0.0); |
| 227 | } | ||
| 228 | |||
| 229 |
3/4✓ Branch 0 taken 9 times.
✓ Branch 1 taken 9 times.
✓ Branch 3 taken 18 times.
✗ Branch 4 not taken.
|
27 | MPI_Gatherv(local_c.data(), counts[proc_rank_], MPI_DOUBLE, proc_rank_ == 0 ? flat_c_.data() : nullptr, counts.data(), |
| 230 | displs.data(), MPI_DOUBLE, 0, MPI_COMM_WORLD); | ||
| 231 | |||
| 232 | // Broadcast assembled result so PostProcessing can run uniformly | ||
| 233 |
1/2✓ Branch 1 taken 18 times.
✗ Branch 2 not taken.
|
18 | MPI_Bcast(flat_c_.data(), static_cast<int>(rows_a_ * cols_b_), MPI_DOUBLE, 0, MPI_COMM_WORLD); |
| 234 | 18 | } | |
| 235 | |||
| 236 | } // namespace melnik_i_matrix_mult_ribbon | ||
| 237 |