| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #include "olesnitskiy_v_striped_matrix_multiplication/mpi/include/ops_mpi.hpp" | ||
| 2 | |||
| 3 | #include <mpi.h> | ||
| 4 | |||
| 5 | #include <cstddef> | ||
| 6 | #include <tuple> | ||
| 7 | #include <utility> | ||
| 8 | #include <vector> | ||
| 9 | |||
| 10 | #include "olesnitskiy_v_striped_matrix_multiplication/common/include/common.hpp" | ||
| 11 | |||
| 12 | namespace olesnitskiy_v_striped_matrix_multiplication { | ||
| 13 | |||
| 14 |
1/2✓ Branch 1 taken 30 times.
✗ Branch 2 not taken.
|
30 | OlesnitskiyVStripedMatrixMultiplicationMPI::OlesnitskiyVStripedMatrixMultiplicationMPI(const InType &in) { |
| 15 | SetTypeOfTask(GetStaticTypeOfTask()); | ||
| 16 | GetInput() = in; | ||
| 17 | 30 | GetOutput() = {0UL, 0UL, std::vector<double>()}; | |
| 18 |
1/2✓ Branch 1 taken 30 times.
✗ Branch 2 not taken.
|
30 | MPI_Comm_rank(MPI_COMM_WORLD, &rank_); |
| 19 |
1/2✓ Branch 1 taken 30 times.
✗ Branch 2 not taken.
|
30 | MPI_Comm_size(MPI_COMM_WORLD, &world_size_); |
| 20 | 30 | } | |
| 21 | |||
| 22 | 28 | std::vector<int> OlesnitskiyVStripedMatrixMultiplicationMPI::CalculateCounts(int total, int num_parts) { | |
| 23 | 28 | std::vector<int> counts(num_parts, 0); | |
| 24 | 28 | int base = total / num_parts; | |
| 25 | 28 | int remainder = total % num_parts; | |
| 26 | |||
| 27 |
2/2✓ Branch 0 taken 56 times.
✓ Branch 1 taken 28 times.
|
84 | for (int i = 0; i < num_parts; ++i) { |
| 28 |
2/2✓ Branch 0 taken 44 times.
✓ Branch 1 taken 12 times.
|
100 | counts[i] = base + (i < remainder ? 1 : 0); |
| 29 | } | ||
| 30 | |||
| 31 | 28 | return counts; | |
| 32 | } | ||
| 33 | |||
| 34 | 28 | std::vector<int> OlesnitskiyVStripedMatrixMultiplicationMPI::CalculateDisplacements(const std::vector<int> &counts) { | |
| 35 | 28 | std::vector<int> displs(counts.size(), 0); | |
| 36 |
2/2✓ Branch 0 taken 28 times.
✓ Branch 1 taken 28 times.
|
56 | for (size_t i = 1; i < counts.size(); ++i) { |
| 37 | 28 | displs[i] = displs[i - 1] + counts[i - 1]; | |
| 38 | } | ||
| 39 | 28 | return displs; | |
| 40 | } | ||
| 41 | |||
| 42 | 30 | bool OlesnitskiyVStripedMatrixMultiplicationMPI::ValidationImpl() { | |
| 43 | const auto &[rows_a, cols_a, data_a, rows_b, cols_b, data_b] = GetInput(); | ||
| 44 | 30 | rows_a_ = rows_a; | |
| 45 | 30 | cols_a_ = cols_a; | |
| 46 | 30 | data_a_ = data_a; | |
| 47 | 30 | rows_b_ = rows_b; | |
| 48 | 30 | cols_b_ = cols_b; | |
| 49 | 30 | data_b_ = data_b; | |
| 50 |
4/8✓ Branch 0 taken 30 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 30 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 30 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 30 times.
✗ Branch 7 not taken.
|
30 | if (rows_a == 0 || cols_a == 0 || rows_b == 0 || cols_b == 0) { |
| 51 | return false; | ||
| 52 | } | ||
| 53 | |||
| 54 |
2/4✓ Branch 0 taken 30 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 30 times.
✗ Branch 3 not taken.
|
30 | if (data_a.size() != rows_a * cols_a || data_b.size() != rows_b * cols_b) { |
| 55 | return false; | ||
| 56 | } | ||
| 57 | |||
| 58 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 30 times.
|
30 | if (cols_a != rows_b) { |
| 59 | ✗ | return false; | |
| 60 | } | ||
| 61 | return true; | ||
| 62 | } | ||
| 63 | |||
| 64 | 30 | bool OlesnitskiyVStripedMatrixMultiplicationMPI::PreProcessingImpl() { | |
| 65 | 30 | rows_c_ = rows_a_; | |
| 66 | 30 | cols_c_ = cols_b_; | |
| 67 | 30 | return true; | |
| 68 | } | ||
| 69 | |||
| 70 | 28 | bool OlesnitskiyVStripedMatrixMultiplicationMPI::PrepareScatterData() { | |
| 71 | 28 | auto row_counts = CalculateCounts(static_cast<int>(rows_a_), world_size_); | |
| 72 |
1/2✓ Branch 1 taken 28 times.
✗ Branch 2 not taken.
|
28 | auto row_displs = CalculateDisplacements(row_counts); |
| 73 |
1/2✓ Branch 1 taken 28 times.
✗ Branch 2 not taken.
|
28 | rows_a_local_ = row_counts[rank_]; |
| 74 | |||
| 75 |
1/4✓ Branch 1 taken 28 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
28 | std::vector<int> sendcounts_a(world_size_); |
| 76 |
1/4✓ Branch 1 taken 28 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
28 | std::vector<int> displs_a(world_size_); |
| 77 |
2/2✓ Branch 0 taken 56 times.
✓ Branch 1 taken 28 times.
|
84 | for (int i = 0; i < world_size_; ++i) { |
| 78 | 56 | sendcounts_a[i] = row_counts[i] * static_cast<int>(cols_a_); | |
| 79 | 56 | displs_a[i] = row_displs[i] * static_cast<int>(cols_a_); | |
| 80 | } | ||
| 81 | |||
| 82 | 28 | sendcounts_a_ = std::move(sendcounts_a); | |
| 83 | 28 | displs_a_ = std::move(displs_a); | |
| 84 | 28 | row_counts_ = std::move(row_counts); | |
| 85 | 28 | row_displs_ = std::move(row_displs); | |
| 86 | |||
| 87 | 28 | return true; | |
| 88 | } | ||
| 89 | |||
| 90 | 28 | bool OlesnitskiyVStripedMatrixMultiplicationMPI::ScatterData() { | |
| 91 |
1/2✓ Branch 0 taken 28 times.
✗ Branch 1 not taken.
|
28 | if (std::cmp_less(rows_a_, world_size_)) { |
| 92 | return false; | ||
| 93 | } | ||
| 94 | |||
| 95 | 28 | if (!PrepareScatterData()) { | |
| 96 | return false; | ||
| 97 | } | ||
| 98 | |||
| 99 |
2/4✓ Branch 0 taken 28 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 28 times.
✗ Branch 3 not taken.
|
28 | if (rows_a_local_ > 0 && cols_a_ > 0) { |
| 100 | 28 | local_a_.resize(static_cast<size_t>(rows_a_local_) * cols_a_); | |
| 101 | } else { | ||
| 102 | local_a_.clear(); | ||
| 103 | } | ||
| 104 | |||
| 105 |
1/2✓ Branch 0 taken 28 times.
✗ Branch 1 not taken.
|
28 | if (!local_a_.empty()) { |
| 106 | 28 | MPI_Scatterv(data_a_.data(), sendcounts_a_.data(), displs_a_.data(), MPI_DOUBLE, local_a_.data(), | |
| 107 | 28 | sendcounts_a_[rank_], MPI_DOUBLE, 0, MPI_COMM_WORLD); | |
| 108 | } else { | ||
| 109 | ✗ | MPI_Scatterv(data_a_.data(), sendcounts_a_.data(), displs_a_.data(), MPI_DOUBLE, nullptr, 0, MPI_DOUBLE, 0, | |
| 110 | MPI_COMM_WORLD); | ||
| 111 | } | ||
| 112 | |||
| 113 | return true; | ||
| 114 | } | ||
| 115 | |||
| 116 | 28 | bool OlesnitskiyVStripedMatrixMultiplicationMPI::BroadcastMatrixB() { | |
| 117 |
2/4✓ Branch 0 taken 28 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 28 times.
✗ Branch 3 not taken.
|
28 | if (rows_b_ > 0 && cols_b_ > 0) { |
| 118 | 28 | local_b_.resize(rows_b_ * cols_b_); | |
| 119 |
2/2✓ Branch 0 taken 14 times.
✓ Branch 1 taken 14 times.
|
28 | if (rank_ == 0) { |
| 120 | 14 | local_b_ = data_b_; | |
| 121 | } | ||
| 122 | 28 | MPI_Bcast(local_b_.data(), static_cast<int>(local_b_.size()), MPI_DOUBLE, 0, MPI_COMM_WORLD); | |
| 123 | } else { | ||
| 124 | local_b_.clear(); | ||
| 125 | } | ||
| 126 | 28 | return true; | |
| 127 | } | ||
| 128 | |||
| 129 | 28 | void OlesnitskiyVStripedMatrixMultiplicationMPI::MultiplyRow(size_t row_start, size_t row_end) { | |
| 130 |
2/2✓ Branch 0 taken 78 times.
✓ Branch 1 taken 28 times.
|
106 | for (size_t local_row = row_start; local_row < row_end; ++local_row) { |
| 131 |
2/2✓ Branch 0 taken 559 times.
✓ Branch 1 taken 78 times.
|
637 | for (size_t col = 0; col < cols_c_; ++col) { |
| 132 | double sum = 0.0; | ||
| 133 |
2/2✓ Branch 0 taken 4887 times.
✓ Branch 1 taken 559 times.
|
5446 | for (size_t k = 0; k < cols_a_; ++k) { |
| 134 | 4887 | sum += local_a_[(local_row * cols_a_) + k] * local_b_[(k * cols_b_) + col]; | |
| 135 | } | ||
| 136 | 559 | local_c_[(local_row * cols_c_) + col] = sum; | |
| 137 | } | ||
| 138 | } | ||
| 139 | 28 | } | |
| 140 | |||
| 141 | 28 | bool OlesnitskiyVStripedMatrixMultiplicationMPI::ComputeLocalC() { | |
| 142 |
3/6✓ Branch 0 taken 28 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 28 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 28 times.
✗ Branch 5 not taken.
|
28 | if (rows_a_local_ > 0 && cols_c_ > 0 && cols_a_ > 0) { |
| 143 | 28 | local_c_.resize(static_cast<size_t>(rows_a_local_) * cols_c_, 0.0); | |
| 144 | 28 | MultiplyRow(0, static_cast<size_t>(rows_a_local_)); | |
| 145 | } else { | ||
| 146 | local_c_.clear(); | ||
| 147 | } | ||
| 148 | 28 | return true; | |
| 149 | } | ||
| 150 | |||
| 151 | 28 | bool OlesnitskiyVStripedMatrixMultiplicationMPI::PrepareGatherData() { | |
| 152 | 28 | recvcounts_c_.resize(world_size_); | |
| 153 | 28 | displs_c_.resize(world_size_); | |
| 154 |
2/2✓ Branch 0 taken 56 times.
✓ Branch 1 taken 28 times.
|
84 | for (int i = 0; i < world_size_; ++i) { |
| 155 | 56 | recvcounts_c_[i] = row_counts_[i] * static_cast<int>(cols_c_); | |
| 156 | 56 | displs_c_[i] = row_displs_[i] * static_cast<int>(cols_c_); | |
| 157 | } | ||
| 158 | 28 | return true; | |
| 159 | } | ||
| 160 | |||
| 161 | 28 | bool OlesnitskiyVStripedMatrixMultiplicationMPI::GatherResults() { | |
| 162 | 28 | const size_t result_size = rows_c_ * cols_c_; | |
| 163 | |||
| 164 | 28 | if (!PrepareGatherData()) { | |
| 165 | return false; | ||
| 166 | } | ||
| 167 | |||
| 168 |
3/4✓ Branch 0 taken 14 times.
✓ Branch 1 taken 14 times.
✓ Branch 2 taken 14 times.
✗ Branch 3 not taken.
|
28 | if (rank_ == 0 && result_size > 0) { |
| 169 | 14 | result_c_.resize(result_size, 0.0); | |
| 170 | } | ||
| 171 | |||
| 172 |
1/2✓ Branch 0 taken 28 times.
✗ Branch 1 not taken.
|
28 | if (!local_c_.empty()) { |
| 173 | 28 | MPI_Gatherv(local_c_.data(), recvcounts_c_[rank_], MPI_DOUBLE, | |
| 174 |
3/4✓ Branch 0 taken 14 times.
✓ Branch 1 taken 14 times.
✓ Branch 2 taken 14 times.
✗ Branch 3 not taken.
|
28 | rank_ == 0 && result_size > 0 ? result_c_.data() : nullptr, recvcounts_c_.data(), displs_c_.data(), |
| 175 | MPI_DOUBLE, 0, MPI_COMM_WORLD); | ||
| 176 | } else { | ||
| 177 | ✗ | MPI_Gatherv(nullptr, 0, MPI_DOUBLE, rank_ == 0 && result_size > 0 ? result_c_.data() : nullptr, | |
| 178 | recvcounts_c_.data(), displs_c_.data(), MPI_DOUBLE, 0, MPI_COMM_WORLD); | ||
| 179 | } | ||
| 180 | |||
| 181 | return true; | ||
| 182 | } | ||
| 183 | |||
| 184 | 15 | bool OlesnitskiyVStripedMatrixMultiplicationMPI::BroadcastResultsFromRoot() { | |
| 185 | 15 | int result_rows = static_cast<int>(rows_c_); | |
| 186 | 15 | int result_cols = static_cast<int>(cols_c_); | |
| 187 | 15 | MPI_Bcast(&result_rows, 1, MPI_INT, 0, MPI_COMM_WORLD); | |
| 188 | 15 | MPI_Bcast(&result_cols, 1, MPI_INT, 0, MPI_COMM_WORLD); | |
| 189 | |||
| 190 |
3/6✓ Branch 0 taken 15 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 15 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 15 times.
✗ Branch 5 not taken.
|
15 | if (result_rows > 0 && result_cols > 0 && !result_c_.empty()) { |
| 191 | 15 | MPI_Bcast(result_c_.data(), static_cast<int>(result_c_.size()), MPI_DOUBLE, 0, MPI_COMM_WORLD); | |
| 192 | } | ||
| 193 | |||
| 194 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 15 times.
|
15 | if (result_c_.empty()) { |
| 195 | ✗ | GetOutput() = {0UL, 0UL, std::vector<double>()}; | |
| 196 | } else { | ||
| 197 | 30 | GetOutput() = {rows_c_, cols_c_, result_c_}; | |
| 198 | } | ||
| 199 | |||
| 200 | 15 | return true; | |
| 201 | } | ||
| 202 | |||
| 203 | 15 | bool OlesnitskiyVStripedMatrixMultiplicationMPI::ReceiveResultsFromRoot() { | |
| 204 | 15 | int result_rows = 0; | |
| 205 | 15 | int result_cols = 0; | |
| 206 | 15 | MPI_Bcast(&result_rows, 1, MPI_INT, 0, MPI_COMM_WORLD); | |
| 207 | 15 | MPI_Bcast(&result_cols, 1, MPI_INT, 0, MPI_COMM_WORLD); | |
| 208 | |||
| 209 |
2/4✓ Branch 0 taken 15 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 15 times.
✗ Branch 3 not taken.
|
15 | if (result_rows > 0 && result_cols > 0) { |
| 210 |
1/2✓ Branch 2 taken 15 times.
✗ Branch 3 not taken.
|
15 | std::vector<double> received_result(static_cast<size_t>(result_rows) * static_cast<size_t>(result_cols)); |
| 211 |
1/2✓ Branch 1 taken 15 times.
✗ Branch 2 not taken.
|
15 | MPI_Bcast(received_result.data(), static_cast<int>(received_result.size()), MPI_DOUBLE, 0, MPI_COMM_WORLD); |
| 212 | 30 | GetOutput() = {static_cast<size_t>(result_rows), static_cast<size_t>(result_cols), std::move(received_result)}; | |
| 213 | 15 | } else { | |
| 214 | ✗ | GetOutput() = {0UL, 0UL, std::vector<double>()}; | |
| 215 | } | ||
| 216 | |||
| 217 | 15 | return true; | |
| 218 | } | ||
| 219 | |||
| 220 | ✗ | bool OlesnitskiyVStripedMatrixMultiplicationMPI::BroadcastResults() { | |
| 221 |
2/4✓ Branch 0 taken 14 times.
✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
28 | if (rank_ == 0) { |
| 222 | 14 | return BroadcastResultsFromRoot(); | |
| 223 | } | ||
| 224 | |||
| 225 | 14 | return ReceiveResultsFromRoot(); | |
| 226 | } | ||
| 227 | |||
| 228 | ✗ | bool OlesnitskiyVStripedMatrixMultiplicationMPI::SetOutput() { | |
| 229 | ✗ | if (rank_ == 0) { | |
| 230 | ✗ | if (result_c_.empty()) { | |
| 231 | ✗ | GetOutput() = {0UL, 0UL, std::vector<double>()}; | |
| 232 | } else { | ||
| 233 | ✗ | GetOutput() = {rows_c_, cols_c_, result_c_}; | |
| 234 | } | ||
| 235 | } | ||
| 236 | ✗ | return true; | |
| 237 | } | ||
| 238 | |||
| 239 | 30 | bool OlesnitskiyVStripedMatrixMultiplicationMPI::RunImpl() { | |
| 240 |
1/2✓ Branch 0 taken 30 times.
✗ Branch 1 not taken.
|
30 | if (std::cmp_less(rows_a_, world_size_)) { |
| 241 | 2 | return RunOnSingleProcess(); | |
| 242 | } | ||
| 243 | |||
| 244 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 28 times.
|
28 | if (!ScatterData()) { |
| 245 | return false; | ||
| 246 | } | ||
| 247 | |||
| 248 | 28 | if (!BroadcastMatrixB()) { | |
| 249 | return false; | ||
| 250 | } | ||
| 251 | |||
| 252 | 28 | if (!ComputeLocalC()) { | |
| 253 | return false; | ||
| 254 | } | ||
| 255 | |||
| 256 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 28 times.
|
28 | if (!GatherResults()) { |
| 257 | return false; | ||
| 258 | } | ||
| 259 | |||
| 260 | if (!BroadcastResults()) { | ||
| 261 | return false; | ||
| 262 | } | ||
| 263 | |||
| 264 | 28 | MPI_Barrier(MPI_COMM_WORLD); | |
| 265 | 28 | return true; | |
| 266 | } | ||
| 267 | |||
| 268 | 1 | void OlesnitskiyVStripedMatrixMultiplicationMPI::MultiplySingleProcessMatrix() { | |
| 269 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
|
2 | for (size_t i = 0; i < rows_a_; ++i) { |
| 270 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
|
2 | for (size_t j = 0; j < cols_b_; ++j) { |
| 271 | double sum = 0.0; | ||
| 272 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
|
2 | for (size_t k = 0; k < cols_a_; ++k) { |
| 273 | 1 | sum += data_a_[(i * cols_a_) + k] * data_b_[(k * cols_b_) + j]; | |
| 274 | } | ||
| 275 | 1 | result_c_[(i * cols_c_) + j] = sum; | |
| 276 | } | ||
| 277 | } | ||
| 278 | 1 | } | |
| 279 | |||
| 280 | 1 | bool OlesnitskiyVStripedMatrixMultiplicationMPI::ComputeSingleProcess() { | |
| 281 | 1 | const size_t result_size = rows_c_ * cols_c_; | |
| 282 | |||
| 283 |
1/2✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
|
1 | if (result_size > 0) { |
| 284 | 1 | result_c_.resize(result_size, 0.0); | |
| 285 | 1 | MultiplySingleProcessMatrix(); | |
| 286 | } else { | ||
| 287 | result_c_.clear(); | ||
| 288 | } | ||
| 289 | |||
| 290 | 1 | return true; | |
| 291 | } | ||
| 292 | |||
| 293 | 2 | bool OlesnitskiyVStripedMatrixMultiplicationMPI::RunOnSingleProcess() { | |
| 294 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
|
2 | if (rank_ == 0) { |
| 295 | 1 | if (!ComputeSingleProcess()) { | |
| 296 | return false; | ||
| 297 | } | ||
| 298 | |||
| 299 | 1 | if (!BroadcastResultsFromRoot()) { | |
| 300 | return false; | ||
| 301 | } | ||
| 302 | } else { | ||
| 303 | 1 | if (!ReceiveResultsFromRoot()) { | |
| 304 | return false; | ||
| 305 | } | ||
| 306 | } | ||
| 307 | |||
| 308 | 2 | MPI_Barrier(MPI_COMM_WORLD); | |
| 309 | return true; | ||
| 310 | } | ||
| 311 | |||
| 312 | 30 | bool OlesnitskiyVStripedMatrixMultiplicationMPI::PostProcessingImpl() { | |
| 313 | 30 | return true; | |
| 314 | } | ||
| 315 | |||
| 316 | } // namespace olesnitskiy_v_striped_matrix_multiplication | ||
| 317 |