| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #include "Terekhov_D_Horizontal_matrix_vector/mpi/include/ops_mpi.hpp" | ||
| 2 | |||
| 3 | #include <mpi.h> | ||
| 4 | |||
| 5 | #include <array> | ||
| 6 | #include <cmath> | ||
| 7 | #include <cstddef> | ||
| 8 | #include <vector> | ||
| 9 | |||
| 10 | #include "Terekhov_D_Horizontal_matrix_vector/common/include/common.hpp" | ||
| 11 | |||
| 12 | namespace terekhov_d_horizontal_matrix_vector { | ||
| 13 | |||
| 14 | 128 | TerekhovDHorizontalMatrixVectorMPI::TerekhovDHorizontalMatrixVectorMPI(const InType &in) { | |
| 15 | SetTypeOfTask(GetStaticTypeOfTask()); | ||
| 16 | 128 | GetOutput() = std::vector<double>(); | |
| 17 | |||
| 18 | 128 | int rank = 0; | |
| 19 |
1/2✓ Branch 1 taken 128 times.
✗ Branch 2 not taken.
|
128 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); |
| 20 | |||
| 21 |
2/2✓ Branch 0 taken 64 times.
✓ Branch 1 taken 64 times.
|
128 | if (rank == 0) { |
| 22 |
1/2✓ Branch 1 taken 64 times.
✗ Branch 2 not taken.
|
64 | matrix_A_ = in.first; |
| 23 |
1/2✓ Branch 1 taken 64 times.
✗ Branch 2 not taken.
|
64 | vector_B_ = in.second; |
| 24 | } | ||
| 25 | 128 | } | |
| 26 | |||
| 27 | 128 | bool TerekhovDHorizontalMatrixVectorMPI::ValidationImpl() { | |
| 28 | 128 | int mpi_initialized = 0; | |
| 29 | 128 | MPI_Initialized(&mpi_initialized); | |
| 30 | |||
| 31 |
1/2✓ Branch 0 taken 128 times.
✗ Branch 1 not taken.
|
128 | if (mpi_initialized == 0) { |
| 32 | return false; | ||
| 33 | } | ||
| 34 | |||
| 35 | 128 | int size = 1; | |
| 36 | 128 | MPI_Comm_size(MPI_COMM_WORLD, &size); | |
| 37 | 128 | return size >= 1; | |
| 38 | } | ||
| 39 | |||
| 40 | 128 | bool TerekhovDHorizontalMatrixVectorMPI::PreProcessingImpl() { | |
| 41 | 128 | int rank = 0; | |
| 42 | 128 | int size = 1; | |
| 43 | 128 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); | |
| 44 | 128 | MPI_Comm_size(MPI_COMM_WORLD, &size); | |
| 45 | |||
| 46 | 128 | rank_ = rank; | |
| 47 | 128 | world_size_ = size; | |
| 48 | 128 | GetOutput() = std::vector<double>(); | |
| 49 | |||
| 50 | 128 | return true; | |
| 51 | } | ||
| 52 | |||
| 53 | 128 | bool TerekhovDHorizontalMatrixVectorMPI::RunImpl() { | |
| 54 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 128 times.
|
128 | if (world_size_ == 1) { |
| 55 | ✗ | return RunSequential(); | |
| 56 | } | ||
| 57 | |||
| 58 | 128 | int rows_a = 0; | |
| 59 | 128 | int cols_a = 0; | |
| 60 | 128 | int vector_size = 0; | |
| 61 | |||
| 62 |
1/2✓ Branch 1 taken 128 times.
✗ Branch 2 not taken.
|
128 | if (!PrepareAndValidateSizes(rows_a, cols_a, vector_size)) { |
| 63 | return true; | ||
| 64 | } | ||
| 65 | |||
| 66 | 128 | std::vector<double> vector_flat(static_cast<size_t>(vector_size)); | |
| 67 |
1/2✓ Branch 1 taken 128 times.
✗ Branch 2 not taken.
|
128 | PrepareAndBroadcastVector(vector_flat, vector_size); |
| 68 | |||
| 69 | 128 | std::vector<int> my_row_indices; | |
| 70 | 128 | std::vector<double> local_a_flat; | |
| 71 | 128 | int local_rows = 0; | |
| 72 |
1/2✓ Branch 1 taken 128 times.
✗ Branch 2 not taken.
|
128 | DistributeMatrixAData(my_row_indices, local_a_flat, local_rows, rows_a, cols_a); |
| 73 | |||
| 74 |
1/4✓ Branch 1 taken 128 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
128 | std::vector<double> local_result_flat(static_cast<size_t>(local_rows), 0.0); |
| 75 | 128 | ComputeLocalMultiplication(local_a_flat, vector_flat, local_result_flat, local_rows, cols_a); | |
| 76 | |||
| 77 | 128 | std::vector<double> final_result_flat; | |
| 78 |
1/2✓ Branch 1 taken 128 times.
✗ Branch 2 not taken.
|
128 | GatherResults(final_result_flat, my_row_indices, local_result_flat, local_rows, rows_a); |
| 79 | |||
| 80 |
1/2✓ Branch 1 taken 128 times.
✗ Branch 2 not taken.
|
128 | GetOutput() = final_result_flat; |
| 81 | |||
| 82 | return true; | ||
| 83 | } | ||
| 84 | |||
| 85 | ✗ | bool TerekhovDHorizontalMatrixVectorMPI::RunSequential() { | |
| 86 | ✗ | if (rank_ != 0) { | |
| 87 | return true; | ||
| 88 | } | ||
| 89 | |||
| 90 | const auto &matrix_a = matrix_A_; | ||
| 91 | const auto &vector_b = vector_B_; | ||
| 92 | |||
| 93 | ✗ | if (matrix_a.empty() || vector_b.empty()) { | |
| 94 | ✗ | GetOutput() = std::vector<double>(); | |
| 95 | ✗ | return true; | |
| 96 | } | ||
| 97 | |||
| 98 | size_t rows_a = matrix_a.size(); | ||
| 99 | size_t cols_a = matrix_a[0].size(); | ||
| 100 | |||
| 101 | auto &output = GetOutput(); | ||
| 102 | ✗ | output = std::vector<double>(rows_a, 0.0); | |
| 103 | |||
| 104 | ✗ | for (size_t i = 0; i < rows_a; ++i) { | |
| 105 | ✗ | for (size_t j = 0; j < cols_a; ++j) { | |
| 106 | ✗ | output[i] += matrix_a[i][j] * vector_b[j]; | |
| 107 | } | ||
| 108 | } | ||
| 109 | |||
| 110 | return true; | ||
| 111 | } | ||
| 112 | |||
| 113 | 128 | bool TerekhovDHorizontalMatrixVectorMPI::PrepareAndValidateSizes(int &rows_a, int &cols_a, int &vector_size) { | |
| 114 |
2/2✓ Branch 0 taken 64 times.
✓ Branch 1 taken 64 times.
|
128 | if (rank_ == 0) { |
| 115 | 64 | rows_a = static_cast<int>(matrix_A_.size()); | |
| 116 |
1/2✓ Branch 0 taken 64 times.
✗ Branch 1 not taken.
|
64 | cols_a = rows_a > 0 ? static_cast<int>(matrix_A_[0].size()) : 0; |
| 117 | 64 | vector_size = static_cast<int>(vector_B_.size()); | |
| 118 | } | ||
| 119 | |||
| 120 | 128 | std::array<int, 3> sizes = {rows_a, cols_a, vector_size}; | |
| 121 | 128 | MPI_Bcast(sizes.data(), 3, MPI_INT, 0, MPI_COMM_WORLD); | |
| 122 | |||
| 123 | 128 | rows_a = sizes[0]; | |
| 124 | 128 | cols_a = sizes[1]; | |
| 125 | 128 | vector_size = sizes[2]; | |
| 126 | |||
| 127 |
3/6✓ Branch 0 taken 128 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 128 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 128 times.
|
128 | if (cols_a != vector_size || rows_a == 0 || cols_a == 0 || vector_size == 0) { |
| 128 | ✗ | GetOutput() = std::vector<double>(); | |
| 129 | ✗ | return false; | |
| 130 | } | ||
| 131 | |||
| 132 | return true; | ||
| 133 | } | ||
| 134 | |||
| 135 | 128 | void TerekhovDHorizontalMatrixVectorMPI::PrepareAndBroadcastVector(std::vector<double> &vector_flat, int vector_size) { | |
| 136 |
2/2✓ Branch 0 taken 64 times.
✓ Branch 1 taken 64 times.
|
128 | if (rank_ == 0) { |
| 137 |
2/2✓ Branch 0 taken 128 times.
✓ Branch 1 taken 64 times.
|
192 | for (int i = 0; i < vector_size; ++i) { |
| 138 | 128 | vector_flat[static_cast<size_t>(i)] = vector_B_[i]; | |
| 139 | } | ||
| 140 | } | ||
| 141 | |||
| 142 | 128 | MPI_Bcast(vector_flat.data(), vector_size, MPI_DOUBLE, 0, MPI_COMM_WORLD); | |
| 143 | 128 | } | |
| 144 | |||
| 145 | 64 | void TerekhovDHorizontalMatrixVectorMPI::FillLocalAFlat(const std::vector<int> &my_row_indices, | |
| 146 | std::vector<double> &local_a_flat, int cols_a) { | ||
| 147 |
2/2✓ Branch 0 taken 80 times.
✓ Branch 1 taken 64 times.
|
144 | for (size_t idx = 0; idx < my_row_indices.size(); ++idx) { |
| 148 | 80 | int global_row = my_row_indices[idx]; | |
| 149 |
2/2✓ Branch 0 taken 162 times.
✓ Branch 1 taken 80 times.
|
242 | for (int j = 0; j < cols_a; ++j) { |
| 150 | 162 | local_a_flat[(idx * static_cast<size_t>(cols_a)) + static_cast<size_t>(j)] = matrix_A_[global_row][j]; | |
| 151 | } | ||
| 152 | } | ||
| 153 | 64 | } | |
| 154 | |||
| 155 | 64 | void TerekhovDHorizontalMatrixVectorMPI::SendRowsToProcess(int dest, const std::vector<int> &dest_rows, int cols_a) { | |
| 156 | 64 | int dest_row_count = static_cast<int>(dest_rows.size()); | |
| 157 | 64 | MPI_Send(&dest_row_count, 1, MPI_INT, dest, 0, MPI_COMM_WORLD); | |
| 158 | |||
| 159 |
2/2✓ Branch 0 taken 50 times.
✓ Branch 1 taken 14 times.
|
64 | if (dest_row_count > 0) { |
| 160 | 50 | std::vector<int> rows_copy = dest_rows; | |
| 161 |
1/2✓ Branch 1 taken 50 times.
✗ Branch 2 not taken.
|
50 | MPI_Send(rows_copy.data(), dest_row_count, MPI_INT, dest, 1, MPI_COMM_WORLD); |
| 162 | |||
| 163 |
1/4✓ Branch 1 taken 50 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
50 | std::vector<double> buffer(static_cast<size_t>(dest_row_count) * static_cast<size_t>(cols_a)); |
| 164 |
2/2✓ Branch 0 taken 50 times.
✓ Branch 1 taken 50 times.
|
100 | for (int idx = 0; idx < dest_row_count; ++idx) { |
| 165 | 50 | int global_row = dest_rows[idx]; | |
| 166 |
2/2✓ Branch 0 taken 106 times.
✓ Branch 1 taken 50 times.
|
156 | for (int j = 0; j < cols_a; ++j) { |
| 167 | 106 | buffer[(static_cast<size_t>(idx) * static_cast<size_t>(cols_a)) + static_cast<size_t>(j)] = | |
| 168 | 106 | matrix_A_[global_row][j]; | |
| 169 | } | ||
| 170 | } | ||
| 171 | |||
| 172 |
1/2✓ Branch 1 taken 50 times.
✗ Branch 2 not taken.
|
50 | MPI_Send(buffer.data(), dest_row_count * cols_a, MPI_DOUBLE, dest, 2, MPI_COMM_WORLD); |
| 173 | } | ||
| 174 | 64 | } | |
| 175 | |||
| 176 | 256 | std::vector<int> TerekhovDHorizontalMatrixVectorMPI::GetRowsForProcess(int process_rank, int rows_a) const { | |
| 177 | 256 | std::vector<int> rows; | |
| 178 |
2/2✓ Branch 0 taken 520 times.
✓ Branch 1 taken 256 times.
|
776 | for (int i = 0; i < rows_a; ++i) { |
| 179 |
2/2✓ Branch 0 taken 230 times.
✓ Branch 1 taken 290 times.
|
520 | if (i % world_size_ == process_rank) { |
| 180 | rows.push_back(i); | ||
| 181 | } | ||
| 182 | } | ||
| 183 | 256 | return rows; | |
| 184 | } | ||
| 185 | |||
| 186 | 64 | void TerekhovDHorizontalMatrixVectorMPI::ReceiveRowsFromRoot(int &local_rows, std::vector<int> &my_row_indices, | |
| 187 | std::vector<double> &local_a_flat, int cols_a) { | ||
| 188 | 64 | MPI_Recv(&local_rows, 1, MPI_INT, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); | |
| 189 | |||
| 190 |
2/2✓ Branch 0 taken 50 times.
✓ Branch 1 taken 14 times.
|
64 | if (local_rows > 0) { |
| 191 | 50 | my_row_indices.resize(static_cast<size_t>(local_rows)); | |
| 192 | 50 | local_a_flat.resize(static_cast<size_t>(local_rows) * static_cast<size_t>(cols_a)); | |
| 193 | |||
| 194 | 50 | MPI_Recv(my_row_indices.data(), local_rows, MPI_INT, 0, 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE); | |
| 195 | 50 | MPI_Recv(local_a_flat.data(), local_rows * cols_a, MPI_DOUBLE, 0, 2, MPI_COMM_WORLD, MPI_STATUS_IGNORE); | |
| 196 | } | ||
| 197 | 64 | } | |
| 198 | |||
| 199 | 128 | void TerekhovDHorizontalMatrixVectorMPI::DistributeMatrixAData(std::vector<int> &my_row_indices, | |
| 200 | std::vector<double> &local_a_flat, int &local_rows, | ||
| 201 | int rows_a, int cols_a) { | ||
| 202 |
2/2✓ Branch 0 taken 98 times.
✓ Branch 1 taken 30 times.
|
128 | local_rows = (rows_a / world_size_) + (rank_ < (rows_a % world_size_) ? 1 : 0); |
| 203 | |||
| 204 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 128 times.
|
256 | my_row_indices = GetRowsForProcess(rank_, rows_a); |
| 205 | |||
| 206 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 128 times.
|
128 | if (my_row_indices.size() != static_cast<size_t>(local_rows)) { |
| 207 | ✗ | local_rows = static_cast<int>(my_row_indices.size()); | |
| 208 | } | ||
| 209 | |||
| 210 | 128 | local_a_flat.resize(static_cast<size_t>(local_rows) * static_cast<size_t>(cols_a)); | |
| 211 | |||
| 212 |
2/2✓ Branch 0 taken 64 times.
✓ Branch 1 taken 64 times.
|
128 | if (rank_ == 0) { |
| 213 | 64 | FillLocalAFlat(my_row_indices, local_a_flat, cols_a); | |
| 214 | |||
| 215 |
2/2✓ Branch 0 taken 64 times.
✓ Branch 1 taken 64 times.
|
128 | for (int dest = 1; dest < world_size_; ++dest) { |
| 216 | 64 | std::vector<int> dest_rows = GetRowsForProcess(dest, rows_a); | |
| 217 |
1/2✓ Branch 1 taken 64 times.
✗ Branch 2 not taken.
|
64 | SendRowsToProcess(dest, dest_rows, cols_a); |
| 218 | } | ||
| 219 | } else { | ||
| 220 | 64 | ReceiveRowsFromRoot(local_rows, my_row_indices, local_a_flat, cols_a); | |
| 221 | } | ||
| 222 | 128 | } | |
| 223 | |||
| 224 | ✗ | void TerekhovDHorizontalMatrixVectorMPI::CollectLocalResults(const std::vector<int> &my_row_indices, | |
| 225 | const std::vector<double> &local_result_flat, | ||
| 226 | std::vector<double> &final_result_flat) { | ||
| 227 |
2/4✓ Branch 0 taken 80 times.
✓ Branch 1 taken 64 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
144 | for (size_t idx = 0; idx < my_row_indices.size(); ++idx) { |
| 228 | 80 | int global_row = my_row_indices[idx]; | |
| 229 | 80 | final_result_flat[static_cast<size_t>(global_row)] = local_result_flat[idx]; | |
| 230 | } | ||
| 231 | ✗ | } | |
| 232 | |||
| 233 | 64 | void TerekhovDHorizontalMatrixVectorMPI::ReceiveResultsFromProcess(int src, | |
| 234 | std::vector<double> &final_result_flat) const { | ||
| 235 | 64 | int rows_a = static_cast<int>(final_result_flat.size()); | |
| 236 | 64 | std::vector<int> src_rows = GetRowsForProcess(src, rows_a); | |
| 237 | 64 | int src_row_count = static_cast<int>(src_rows.size()); | |
| 238 | |||
| 239 |
2/2✓ Branch 0 taken 50 times.
✓ Branch 1 taken 14 times.
|
64 | if (src_row_count > 0) { |
| 240 |
2/6✓ Branch 1 taken 50 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 50 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
|
50 | std::vector<double> buffer(static_cast<size_t>(src_row_count)); |
| 241 |
1/2✓ Branch 1 taken 50 times.
✗ Branch 2 not taken.
|
50 | MPI_Recv(buffer.data(), src_row_count, MPI_DOUBLE, src, 3, MPI_COMM_WORLD, MPI_STATUS_IGNORE); |
| 242 | |||
| 243 |
2/2✓ Branch 0 taken 50 times.
✓ Branch 1 taken 50 times.
|
100 | for (int idx = 0; idx < src_row_count; ++idx) { |
| 244 | 50 | int global_row = src_rows[idx]; | |
| 245 | 50 | final_result_flat[static_cast<size_t>(global_row)] = buffer[static_cast<size_t>(idx)]; | |
| 246 | } | ||
| 247 | } | ||
| 248 | 64 | } | |
| 249 | |||
| 250 | 64 | void TerekhovDHorizontalMatrixVectorMPI::SendLocalResults(const std::vector<double> &local_result_flat, | |
| 251 | int local_rows) { | ||
| 252 |
2/2✓ Branch 0 taken 50 times.
✓ Branch 1 taken 14 times.
|
64 | if (local_rows > 0) { |
| 253 | 50 | std::vector<double> data_copy = local_result_flat; | |
| 254 |
1/2✓ Branch 1 taken 50 times.
✗ Branch 2 not taken.
|
50 | MPI_Send(data_copy.data(), local_rows, MPI_DOUBLE, 0, 3, MPI_COMM_WORLD); |
| 255 | } | ||
| 256 | 64 | } | |
| 257 | |||
| 258 | 128 | void TerekhovDHorizontalMatrixVectorMPI::GatherResults(std::vector<double> &final_result_flat, | |
| 259 | const std::vector<int> &my_row_indices, | ||
| 260 | const std::vector<double> &local_result_flat, int local_rows, | ||
| 261 | int rows_a) const { | ||
| 262 |
2/2✓ Branch 0 taken 64 times.
✓ Branch 1 taken 64 times.
|
128 | if (rank_ == 0) { |
| 263 | 64 | final_result_flat.resize(static_cast<size_t>(rows_a), 0.0); | |
| 264 | |||
| 265 | CollectLocalResults(my_row_indices, local_result_flat, final_result_flat); | ||
| 266 | |||
| 267 |
2/2✓ Branch 0 taken 64 times.
✓ Branch 1 taken 64 times.
|
128 | for (int src = 1; src < world_size_; ++src) { |
| 268 | 64 | ReceiveResultsFromProcess(src, final_result_flat); | |
| 269 | } | ||
| 270 | |||
| 271 | 64 | MPI_Bcast(final_result_flat.data(), rows_a, MPI_DOUBLE, 0, MPI_COMM_WORLD); | |
| 272 | } else { | ||
| 273 | 64 | SendLocalResults(local_result_flat, local_rows); | |
| 274 | |||
| 275 | 64 | final_result_flat.resize(static_cast<size_t>(rows_a)); | |
| 276 | 64 | MPI_Bcast(final_result_flat.data(), rows_a, MPI_DOUBLE, 0, MPI_COMM_WORLD); | |
| 277 | } | ||
| 278 | 128 | } | |
| 279 | |||
| 280 | 128 | void TerekhovDHorizontalMatrixVectorMPI::ComputeLocalMultiplication(const std::vector<double> &local_a_flat, | |
| 281 | const std::vector<double> &vector_flat, | ||
| 282 | std::vector<double> &local_result_flat, | ||
| 283 | int local_rows, int cols_a) { | ||
| 284 |
2/2✓ Branch 0 taken 130 times.
✓ Branch 1 taken 128 times.
|
258 | for (int i = 0; i < local_rows; ++i) { |
| 285 | 130 | const double *a_row = &local_a_flat[static_cast<size_t>(i) * static_cast<size_t>(cols_a)]; | |
| 286 | double sum = 0.0; | ||
| 287 | |||
| 288 |
2/2✓ Branch 0 taken 268 times.
✓ Branch 1 taken 130 times.
|
398 | for (int j = 0; j < cols_a; ++j) { |
| 289 | 268 | sum += a_row[j] * vector_flat[static_cast<size_t>(j)]; | |
| 290 | } | ||
| 291 | |||
| 292 | 130 | local_result_flat[static_cast<size_t>(i)] = sum; | |
| 293 | } | ||
| 294 | 128 | } | |
| 295 | |||
| 296 | 128 | bool TerekhovDHorizontalMatrixVectorMPI::PostProcessingImpl() { | |
| 297 | 128 | return true; | |
| 298 | } | ||
| 299 | |||
| 300 | } // namespace terekhov_d_horizontal_matrix_vector | ||
| 301 |