| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #include "nikitin_a_fox_algorithm/mpi/include/ops_mpi.hpp" | ||
| 2 | |||
| 3 | #include <mpi.h> | ||
| 4 | |||
| 5 | #include <cmath> | ||
| 6 | #include <cstddef> | ||
| 7 | #include <utility> | ||
| 8 | #include <vector> | ||
| 9 | |||
| 10 | #include "nikitin_a_fox_algorithm/common/include/common.hpp" | ||
| 11 | |||
| 12 | namespace nikitin_a_fox_algorithm { | ||
| 13 | |||
| 14 |
1/2✓ Branch 1 taken 60 times.
✗ Branch 2 not taken.
|
60 | NikitinAFoxAlgorithmMPI::NikitinAFoxAlgorithmMPI(const InType &in) { |
| 15 | SetTypeOfTask(GetStaticTypeOfTask()); | ||
| 16 | GetInput() = in; | ||
| 17 | 60 | } | |
| 18 | |||
| 19 |
1/2✓ Branch 0 taken 30 times.
✗ Branch 1 not taken.
|
30 | bool NikitinAFoxAlgorithmMPI::ValidateMatricesOnRoot(const std::vector<std::vector<double>> &matrix_a, |
| 20 | const std::vector<std::vector<double>> &matrix_b) { | ||
| 21 |
2/4✓ Branch 0 taken 30 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 30 times.
|
30 | if (matrix_a.empty() || matrix_b.empty()) { |
| 22 | return false; | ||
| 23 | } | ||
| 24 | |||
| 25 | 30 | const auto n = static_cast<int>(matrix_a.size()); | |
| 26 |
2/2✓ Branch 0 taken 647 times.
✓ Branch 1 taken 30 times.
|
677 | for (int i = 0; i < n; ++i) { |
| 27 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 647 times.
|
647 | if (matrix_a[i].size() != static_cast<std::size_t>(n)) { |
| 28 | return false; | ||
| 29 | } | ||
| 30 | } | ||
| 31 | |||
| 32 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 30 times.
|
30 | if (matrix_b.size() != static_cast<std::size_t>(n)) { |
| 33 | return false; | ||
| 34 | } | ||
| 35 | |||
| 36 |
2/2✓ Branch 0 taken 647 times.
✓ Branch 1 taken 30 times.
|
677 | for (int i = 0; i < n; ++i) { |
| 37 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 647 times.
|
647 | if (matrix_b[i].size() != static_cast<std::size_t>(n)) { |
| 38 | return false; | ||
| 39 | } | ||
| 40 | } | ||
| 41 | |||
| 42 | return true; | ||
| 43 | } | ||
| 44 | |||
| 45 | 60 | bool NikitinAFoxAlgorithmMPI::ValidationImpl() { | |
| 46 | 60 | int rank = 0; | |
| 47 | 60 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); | |
| 48 | |||
| 49 |
2/2✓ Branch 0 taken 30 times.
✓ Branch 1 taken 30 times.
|
60 | if (rank == 0) { |
| 50 | 30 | const auto &matrix_a = GetInput().first; | |
| 51 | 30 | const auto &matrix_b = GetInput().second; | |
| 52 | 30 | return ValidateMatricesOnRoot(matrix_a, matrix_b); | |
| 53 | } | ||
| 54 | |||
| 55 | return true; | ||
| 56 | } | ||
| 57 | |||
| 58 | 60 | bool NikitinAFoxAlgorithmMPI::PreProcessingImpl() { | |
| 59 | 60 | return true; | |
| 60 | } | ||
| 61 | |||
| 62 | ✗ | void NikitinAFoxAlgorithmMPI::DistributeMatrixB(int n, std::vector<double> &local_b) { | |
| 63 | const auto &matrix_b = GetInput().second; | ||
| 64 | // Заполняем локальный буфер матрицей B | ||
| 65 |
2/4✓ Branch 0 taken 647 times.
✓ Branch 1 taken 30 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
677 | for (int i = 0; i < n; ++i) { |
| 66 | 647 | const auto i_offset = static_cast<std::size_t>(i) * static_cast<std::size_t>(n); | |
| 67 |
2/4✓ Branch 0 taken 45775 times.
✓ Branch 1 taken 647 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
46422 | for (int j = 0; j < n; ++j) { |
| 68 | 45775 | local_b[i_offset + static_cast<std::size_t>(j)] = matrix_b[i][j]; | |
| 69 | } | ||
| 70 | } | ||
| 71 | ✗ | } | |
| 72 | |||
| 73 | 30 | void NikitinAFoxAlgorithmMPI::SendMatrixAToProcess(int dest, int rows_per_proc, int remainder, int n, | |
| 74 | int ¤t_row) { | ||
| 75 | const auto &matrix_a = GetInput().first; | ||
| 76 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 30 times.
|
30 | const int dest_rows = (dest < remainder) ? (rows_per_proc + 1) : rows_per_proc; |
| 77 | 30 | const auto dest_elements = static_cast<std::size_t>(dest_rows) * static_cast<std::size_t>(n); | |
| 78 | 30 | std::vector<double> send_buffer(dest_elements); | |
| 79 | |||
| 80 |
2/2✓ Branch 0 taken 318 times.
✓ Branch 1 taken 30 times.
|
348 | for (int i = 0; i < dest_rows; ++i) { |
| 81 | 318 | const auto i_offset = static_cast<std::size_t>(i) * static_cast<std::size_t>(n); | |
| 82 |
2/2✓ Branch 0 taken 22789 times.
✓ Branch 1 taken 318 times.
|
23107 | for (int j = 0; j < n; ++j) { |
| 83 | 22789 | send_buffer[i_offset + static_cast<std::size_t>(j)] = matrix_a[current_row][j]; | |
| 84 | } | ||
| 85 | 318 | current_row++; | |
| 86 | } | ||
| 87 | |||
| 88 |
1/2✓ Branch 1 taken 30 times.
✗ Branch 2 not taken.
|
30 | MPI_Send(send_buffer.data(), static_cast<int>(dest_elements), MPI_DOUBLE, dest, 0, MPI_COMM_WORLD); |
| 89 | 30 | } | |
| 90 | |||
| 91 | 60 | void NikitinAFoxAlgorithmMPI::DistributeMatrixA(int rank, int size, int n, int local_rows, | |
| 92 | std::vector<double> &local_a) { | ||
| 93 |
2/2✓ Branch 0 taken 30 times.
✓ Branch 1 taken 30 times.
|
60 | if (rank == 0) { |
| 94 | const auto &matrix_a = GetInput().first; | ||
| 95 | |||
| 96 | // Сначала копируем строки для процесса 0 | ||
| 97 | 30 | int current_row = 0; | |
| 98 |
2/2✓ Branch 0 taken 329 times.
✓ Branch 1 taken 30 times.
|
359 | for (int i = 0; i < local_rows; ++i) { |
| 99 | 329 | const auto i_offset = static_cast<std::size_t>(i) * static_cast<std::size_t>(n); | |
| 100 |
2/2✓ Branch 0 taken 22986 times.
✓ Branch 1 taken 329 times.
|
23315 | for (int j = 0; j < n; ++j) { |
| 101 | 22986 | local_a[i_offset + static_cast<std::size_t>(j)] = matrix_a[current_row][j]; | |
| 102 | } | ||
| 103 | 329 | current_row++; | |
| 104 | } | ||
| 105 | |||
| 106 | // Отправляем строки остальным процессам | ||
| 107 | 30 | const int rows_per_proc = n / size; | |
| 108 | 30 | const int remainder = n % size; | |
| 109 |
2/2✓ Branch 0 taken 30 times.
✓ Branch 1 taken 30 times.
|
60 | for (int dest = 1; dest < size; ++dest) { |
| 110 | 30 | SendMatrixAToProcess(dest, rows_per_proc, remainder, n, current_row); | |
| 111 | } | ||
| 112 | } else { | ||
| 113 | // Получаем свои строки от процесса 0 | ||
| 114 | 30 | const auto local_elements = static_cast<std::size_t>(local_rows) * static_cast<std::size_t>(n); | |
| 115 | 30 | MPI_Recv(local_a.data(), static_cast<int>(local_elements), MPI_DOUBLE, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); | |
| 116 | } | ||
| 117 | 60 | } | |
| 118 | |||
| 119 | 60 | void NikitinAFoxAlgorithmMPI::LocalMatrixMultiply(int n, int local_rows, const std::vector<double> &local_a, | |
| 120 | const std::vector<double> &local_b, std::vector<double> &local_c) { | ||
| 121 |
2/2✓ Branch 0 taken 647 times.
✓ Branch 1 taken 60 times.
|
707 | for (int i = 0; i < local_rows; ++i) { |
| 122 | 647 | const auto i_idx = static_cast<std::size_t>(i) * static_cast<std::size_t>(n); | |
| 123 |
2/2✓ Branch 0 taken 45775 times.
✓ Branch 1 taken 647 times.
|
46422 | for (int j = 0; j < n; ++j) { |
| 124 | double sum = 0.0; | ||
| 125 |
2/2✓ Branch 0 taken 4501589 times.
✓ Branch 1 taken 45775 times.
|
4547364 | for (int k = 0; k < n; ++k) { |
| 126 | 4501589 | const auto k_idx = static_cast<std::size_t>(k) * static_cast<std::size_t>(n); | |
| 127 | 4501589 | sum += local_a[i_idx + static_cast<std::size_t>(k)] * local_b[k_idx + static_cast<std::size_t>(j)]; | |
| 128 | } | ||
| 129 | 45775 | local_c[i_idx + static_cast<std::size_t>(j)] = sum; | |
| 130 | } | ||
| 131 | } | ||
| 132 | 60 | } | |
| 133 | |||
| 134 | ✗ | void NikitinAFoxAlgorithmMPI::CopyLocalResultsToOutput(int local_rows, int n, const std::vector<double> &local_c) { | |
| 135 | int current_row = 0; | ||
| 136 |
2/4✓ Branch 0 taken 329 times.
✓ Branch 1 taken 30 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
359 | for (int i = 0; i < local_rows; ++i) { |
| 137 | 329 | const auto i_idx = static_cast<std::size_t>(i) * static_cast<std::size_t>(n); | |
| 138 |
2/4✓ Branch 0 taken 22986 times.
✓ Branch 1 taken 329 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
23315 | for (int j = 0; j < n; ++j) { |
| 139 | 22986 | GetOutput()[current_row][j] = local_c[i_idx + static_cast<std::size_t>(j)]; | |
| 140 | } | ||
| 141 | 329 | current_row++; | |
| 142 | } | ||
| 143 | ✗ | } | |
| 144 | |||
| 145 | 30 | void NikitinAFoxAlgorithmMPI::ReceiveResultsFromProcess(int src, int rows_per_proc, int remainder, int n, | |
| 146 | int ¤t_row) { | ||
| 147 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 30 times.
|
30 | const int src_rows = (src < remainder) ? (rows_per_proc + 1) : rows_per_proc; |
| 148 | 30 | const auto src_elements = static_cast<std::size_t>(src_rows) * static_cast<std::size_t>(n); | |
| 149 | 30 | std::vector<double> recv_buffer(src_elements); | |
| 150 | |||
| 151 |
1/2✓ Branch 1 taken 30 times.
✗ Branch 2 not taken.
|
30 | MPI_Recv(recv_buffer.data(), static_cast<int>(src_elements), MPI_DOUBLE, src, 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE); |
| 152 | |||
| 153 |
2/2✓ Branch 0 taken 318 times.
✓ Branch 1 taken 30 times.
|
348 | for (int i = 0; i < src_rows; ++i) { |
| 154 | 318 | const auto i_idx = static_cast<std::size_t>(i) * static_cast<std::size_t>(n); | |
| 155 |
2/2✓ Branch 0 taken 22789 times.
✓ Branch 1 taken 318 times.
|
23107 | for (int j = 0; j < n; ++j) { |
| 156 | 22789 | GetOutput()[current_row][j] = recv_buffer[i_idx + static_cast<std::size_t>(j)]; | |
| 157 | } | ||
| 158 | 318 | current_row++; | |
| 159 | } | ||
| 160 | 30 | } | |
| 161 | |||
| 162 | 60 | void NikitinAFoxAlgorithmMPI::GatherResults(int rank, int size, int n, int rows_per_proc, int remainder, int local_rows, | |
| 163 | const std::vector<double> &local_c) { | ||
| 164 |
2/2✓ Branch 0 taken 30 times.
✓ Branch 1 taken 30 times.
|
60 | if (rank == 0) { |
| 165 | // Создаем результирующую матрицу | ||
| 166 |
1/2✓ Branch 2 taken 30 times.
✗ Branch 3 not taken.
|
60 | GetOutput() = std::vector<std::vector<double>>(n, std::vector<double>(n, 0.0)); |
| 167 | |||
| 168 | // Копируем свои результаты | ||
| 169 | CopyLocalResultsToOutput(local_rows, n, local_c); | ||
| 170 | |||
| 171 | // Получаем результаты от других процессов | ||
| 172 | 30 | int current_row = local_rows; | |
| 173 |
2/2✓ Branch 0 taken 30 times.
✓ Branch 1 taken 30 times.
|
60 | for (int src = 1; src < size; ++src) { |
| 174 | 30 | ReceiveResultsFromProcess(src, rows_per_proc, remainder, n, current_row); | |
| 175 | } | ||
| 176 | } else { | ||
| 177 | // Отправляем свои результаты процессу 0 | ||
| 178 | 30 | const auto local_elements = static_cast<std::size_t>(local_rows) * static_cast<std::size_t>(n); | |
| 179 | 30 | MPI_Send(local_c.data(), static_cast<int>(local_elements), MPI_DOUBLE, 0, 1, MPI_COMM_WORLD); | |
| 180 | } | ||
| 181 | 60 | } | |
| 182 | |||
| 183 | 30 | void NikitinAFoxAlgorithmMPI::CreateAndSendFlatResult(int n) { | |
| 184 | 30 | const auto total_elements = static_cast<std::size_t>(n) * static_cast<std::size_t>(n); | |
| 185 | 30 | std::vector<double> flat_result(total_elements); | |
| 186 | |||
| 187 |
2/2✓ Branch 0 taken 647 times.
✓ Branch 1 taken 30 times.
|
677 | for (int i = 0; i < n; ++i) { |
| 188 | 647 | const auto i_offset = static_cast<std::size_t>(i) * static_cast<std::size_t>(n); | |
| 189 |
2/2✓ Branch 0 taken 45775 times.
✓ Branch 1 taken 647 times.
|
46422 | for (int j = 0; j < n; ++j) { |
| 190 | 45775 | flat_result[i_offset + static_cast<std::size_t>(j)] = GetOutput()[i][j]; | |
| 191 | } | ||
| 192 | } | ||
| 193 | |||
| 194 |
1/2✓ Branch 1 taken 30 times.
✗ Branch 2 not taken.
|
30 | MPI_Bcast(flat_result.data(), static_cast<int>(total_elements), MPI_DOUBLE, 0, MPI_COMM_WORLD); |
| 195 | |||
| 196 |
2/2✓ Branch 0 taken 647 times.
✓ Branch 1 taken 30 times.
|
677 | for (int i = 0; i < n; ++i) { |
| 197 | 647 | const auto i_offset = static_cast<std::size_t>(i) * static_cast<std::size_t>(n); | |
| 198 |
2/2✓ Branch 0 taken 45775 times.
✓ Branch 1 taken 647 times.
|
46422 | for (int j = 0; j < n; ++j) { |
| 199 | 45775 | GetOutput()[i][j] = flat_result[i_offset + static_cast<std::size_t>(j)]; | |
| 200 | } | ||
| 201 | } | ||
| 202 | 30 | } | |
| 203 | |||
| 204 | 30 | void NikitinAFoxAlgorithmMPI::ReceiveFlatResultAndCreateMatrix(int n) { | |
| 205 | 30 | const auto total_elements = static_cast<std::size_t>(n) * static_cast<std::size_t>(n); | |
| 206 |
1/2✓ Branch 2 taken 30 times.
✗ Branch 3 not taken.
|
30 | GetOutput() = std::vector<std::vector<double>>(n, std::vector<double>(n)); |
| 207 | |||
| 208 | 30 | std::vector<double> flat_result(total_elements); | |
| 209 |
1/2✓ Branch 1 taken 30 times.
✗ Branch 2 not taken.
|
30 | MPI_Bcast(flat_result.data(), static_cast<int>(total_elements), MPI_DOUBLE, 0, MPI_COMM_WORLD); |
| 210 | |||
| 211 |
2/2✓ Branch 0 taken 647 times.
✓ Branch 1 taken 30 times.
|
677 | for (int i = 0; i < n; ++i) { |
| 212 | 647 | const auto i_offset = static_cast<std::size_t>(i) * static_cast<std::size_t>(n); | |
| 213 |
2/2✓ Branch 0 taken 45775 times.
✓ Branch 1 taken 647 times.
|
46422 | for (int j = 0; j < n; ++j) { |
| 214 | 45775 | GetOutput()[i][j] = flat_result[i_offset + static_cast<std::size_t>(j)]; | |
| 215 | } | ||
| 216 | } | ||
| 217 | 30 | } | |
| 218 | |||
| 219 | ✗ | void NikitinAFoxAlgorithmMPI::BroadcastResultToAll(int rank, int n) { | |
| 220 | ✗ | if (rank == 0) { | |
| 221 |
1/2✓ Branch 1 taken 30 times.
✗ Branch 2 not taken.
|
30 | CreateAndSendFlatResult(n); |
| 222 | } else { | ||
| 223 |
1/2✓ Branch 1 taken 30 times.
✗ Branch 2 not taken.
|
30 | ReceiveFlatResultAndCreateMatrix(n); |
| 224 | } | ||
| 225 | ✗ | } | |
| 226 | |||
| 227 | 60 | int NikitinAFoxAlgorithmMPI::GetMatrixSize(int rank) { | |
| 228 | 60 | int n = 0; | |
| 229 |
2/2✓ Branch 0 taken 30 times.
✓ Branch 1 taken 30 times.
|
60 | if (rank == 0) { |
| 230 | 30 | n = static_cast<int>(GetInput().first.size()); | |
| 231 | } | ||
| 232 | 60 | MPI_Bcast(&n, 1, MPI_INT, 0, MPI_COMM_WORLD); | |
| 233 | 60 | return n; | |
| 234 | } | ||
| 235 | |||
| 236 | ✗ | std::pair<int, int> NikitinAFoxAlgorithmMPI::CalculateRowDistribution(int n, int size, int rank) { | |
| 237 | 60 | const int rows_per_proc = n / size; | |
| 238 | 60 | const int remainder = n % size; | |
| 239 |
0/2✗ Branch 0 not taken.
✗ Branch 1 not taken.
|
11 | const int local_rows = (rank < remainder) ? (rows_per_proc + 1) : rows_per_proc; |
| 240 | ✗ | return {rows_per_proc, local_rows}; | |
| 241 | } | ||
| 242 | |||
| 243 | 60 | bool NikitinAFoxAlgorithmMPI::RunImpl() { | |
| 244 | 60 | int rank = 0; | |
| 245 | 60 | int size = 0; | |
| 246 | 60 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); | |
| 247 | 60 | MPI_Comm_size(MPI_COMM_WORLD, &size); | |
| 248 | |||
| 249 | // Получаем размер матрицы | ||
| 250 | 60 | const int n = GetMatrixSize(rank); | |
| 251 | |||
| 252 |
1/2✓ Branch 0 taken 60 times.
✗ Branch 1 not taken.
|
60 | if (n == 0) { |
| 253 | return false; | ||
| 254 | } | ||
| 255 | |||
| 256 | // Вычисляем распределение строк | ||
| 257 |
2/2✓ Branch 0 taken 11 times.
✓ Branch 1 taken 49 times.
|
60 | const auto [rows_per_proc, local_rows] = CalculateRowDistribution(n, size, rank); |
| 258 | 60 | const auto local_elements = static_cast<std::size_t>(local_rows) * static_cast<std::size_t>(n); | |
| 259 | |||
| 260 | // 1. Создаем и распределяем матрицу B всем процессам | ||
| 261 | 60 | const auto total_elements = static_cast<std::size_t>(n) * static_cast<std::size_t>(n); | |
| 262 | 60 | std::vector<double> local_b(total_elements); | |
| 263 | |||
| 264 |
2/2✓ Branch 0 taken 30 times.
✓ Branch 1 taken 30 times.
|
60 | if (rank == 0) { |
| 265 | DistributeMatrixB(n, local_b); | ||
| 266 | } | ||
| 267 |
1/2✓ Branch 1 taken 60 times.
✗ Branch 2 not taken.
|
60 | MPI_Bcast(local_b.data(), static_cast<int>(total_elements), MPI_DOUBLE, 0, MPI_COMM_WORLD); |
| 268 | |||
| 269 | // 2. Распределяем матрицу A по строкам | ||
| 270 |
1/4✓ Branch 1 taken 60 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
60 | std::vector<double> local_a(local_elements); |
| 271 |
1/2✓ Branch 1 taken 60 times.
✗ Branch 2 not taken.
|
60 | DistributeMatrixA(rank, size, n, local_rows, local_a); |
| 272 | |||
| 273 | // 3. Локальное умножение | ||
| 274 |
1/4✓ Branch 1 taken 60 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
60 | std::vector<double> local_c(local_elements, 0.0); |
| 275 | 60 | LocalMatrixMultiply(n, local_rows, local_a, local_b, local_c); | |
| 276 | |||
| 277 | // 4. Сбор результатов на процессе 0 | ||
| 278 | 60 | const int remainder = n % size; | |
| 279 |
1/2✓ Branch 1 taken 60 times.
✗ Branch 2 not taken.
|
60 | GatherResults(rank, size, n, rows_per_proc, remainder, local_rows, local_c); |
| 280 | |||
| 281 | // 5. Рассылаем результат всем процессам (для проверки в тестах) | ||
| 282 |
2/2✓ Branch 0 taken 30 times.
✓ Branch 1 taken 30 times.
|
60 | BroadcastResultToAll(rank, n); |
| 283 | |||
| 284 | return true; | ||
| 285 | } | ||
| 286 | |||
| 287 | 60 | bool NikitinAFoxAlgorithmMPI::PostProcessingImpl() { | |
| 288 | 60 | int rank = 0; | |
| 289 | 60 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); | |
| 290 | |||
| 291 | const auto &matrix_c = GetOutput(); | ||
| 292 | 60 | return !matrix_c.empty(); | |
| 293 | } | ||
| 294 | |||
| 295 | } // namespace nikitin_a_fox_algorithm | ||
| 296 |