| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #include "khruev_a_gauss_jordan/mpi/include/ops_mpi.hpp" | ||
| 2 | |||
| 3 | #include <mpi.h> | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <cmath> | ||
| 7 | #include <cstddef> | ||
| 8 | #include <vector> | ||
| 9 | |||
| 10 | #include "khruev_a_gauss_jordan/common/include/common.hpp" | ||
| 11 | |||
| 12 | namespace khruev_a_gauss_jordan { | ||
| 13 | |||
| 14 | ✗ | int KhruevAGaussJordanMPI::GetGlobalIdx(int local_k, int rank, int size) const { | |
| 15 | ✗ | int rows = n_ / size; | |
| 16 | ✗ | int rem = n_ % size; | |
| 17 |
6/8✓ Branch 0 taken 19 times.
✓ Branch 1 taken 21 times.
✓ Branch 2 taken 19 times.
✓ Branch 3 taken 21 times.
✓ Branch 4 taken 13 times.
✓ Branch 5 taken 9 times.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
|
102 | if (rank < rem) { |
| 18 | 51 | return (rank * (rows + 1)) + local_k; | |
| 19 | } | ||
| 20 | 51 | return (rem * (rows + 1)) + ((rank - rem) * rows) + local_k; | |
| 21 | } | ||
| 22 | |||
| 23 | ✗ | KhruevAGaussJordanMPI::RowPos KhruevAGaussJordanMPI::GetRowOwner(int global_i, int size) const { | |
| 24 | 64 | int rows = n_ / size; | |
| 25 | 64 | int rem = n_ % size; | |
| 26 | |||
| 27 | ✗ | if (global_i < rem * (rows + 1)) { | |
| 28 | 28 | return RowPos{.rank = global_i / (rows + 1), .local_idx = global_i % (rows + 1)}; | |
| 29 | } | ||
| 30 | |||
| 31 | int base = rem * (rows + 1); | ||
| 32 | 36 | return RowPos{.rank = rem + ((global_i - base) / rows), .local_idx = (global_i - base) % rows}; | |
| 33 | } | ||
| 34 | |||
| 35 | 16 | int KhruevAGaussJordanMPI::FindLocalPivotIdx(int col, int rank, int size) const { | |
| 36 | 16 | int rows = n_ / size; | |
| 37 | 16 | int rem = n_ % size; | |
| 38 |
2/2✓ Branch 0 taken 7 times.
✓ Branch 1 taken 9 times.
|
16 | int my_rows = (rank < rem) ? (rows + 1) : rows; |
| 39 | |||
| 40 | int best = -1; | ||
| 41 | double best_val = -1.0; | ||
| 42 | |||
| 43 |
2/2✓ Branch 0 taken 22 times.
✓ Branch 1 taken 16 times.
|
38 | for (int k = 0; k < my_rows; ++k) { |
| 44 |
2/2✓ Branch 0 taken 3 times.
✓ Branch 1 taken 19 times.
|
22 | if (GetGlobalIdx(k, rank, size) >= col) { |
| 45 |
2/2✓ Branch 0 taken 3 times.
✓ Branch 1 taken 16 times.
|
19 | double v = std::fabs(local_data_[(k * m_) + col]); |
| 46 |
2/2✓ Branch 0 taken 3 times.
✓ Branch 1 taken 16 times.
|
19 | if (v > best_val) { |
| 47 | best_val = v; | ||
| 48 | best = k; | ||
| 49 | } | ||
| 50 | } | ||
| 51 | } | ||
| 52 | 16 | return best; | |
| 53 | } | ||
| 54 | |||
| 55 | 32 | KhruevAGaussJordanMPI::PivotPos KhruevAGaussJordanMPI::FindPivot(int col, int rank, int size) { | |
| 56 | 32 | PivotPos local{.val = -1.0, .rank = rank}; | |
| 57 | |||
| 58 | 32 | int rows = n_ / size; | |
| 59 | 32 | int rem = n_ % size; | |
| 60 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 22 times.
|
32 | int my_rows = (rank < rem) ? (rows + 1) : rows; |
| 61 | |||
| 62 |
2/2✓ Branch 0 taken 40 times.
✓ Branch 1 taken 32 times.
|
72 | for (int k = 0; k < my_rows; ++k) { |
| 63 |
2/2✓ Branch 0 taken 28 times.
✓ Branch 1 taken 12 times.
|
40 | if (GetGlobalIdx(k, rank, size) >= col) { |
| 64 | 28 | double v = std::fabs(local_data_[(k * m_) + col]); | |
| 65 | 28 | local.val = std::max(local.val, v); | |
| 66 | } | ||
| 67 | } | ||
| 68 | |||
| 69 | 32 | PivotPos global{}; | |
| 70 | 32 | MPI_Allreduce(&local, &global, 1, MPI_DOUBLE_INT, MPI_MAXLOC, MPI_COMM_WORLD); | |
| 71 | 32 | return global; | |
| 72 | } | ||
| 73 | |||
| 74 | ✗ | void KhruevAGaussJordanMPI::SwapLocalRows(int a, int b) { | |
| 75 | ✗ | if (a == b) { | |
| 76 | return; | ||
| 77 | } | ||
| 78 | ✗ | auto off_a = static_cast<std::ptrdiff_t>(a) * m_; | |
| 79 | ✗ | auto off_b = static_cast<std::ptrdiff_t>(b) * m_; | |
| 80 | std::swap_ranges(local_data_.begin() + off_a, local_data_.begin() + off_a + m_, local_data_.begin() + off_b); | ||
| 81 | } | ||
| 82 | |||
| 83 | 2 | void KhruevAGaussJordanMPI::SwapRemoteRows(int my_idx, int other_rank) { | |
| 84 | 2 | std::vector<double> tmp(m_); | |
| 85 |
1/2✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
|
2 | auto off = static_cast<std::ptrdiff_t>(my_idx) * m_; |
| 86 | |||
| 87 |
1/2✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
|
2 | MPI_Sendrecv(local_data_.data() + off, m_, MPI_DOUBLE, other_rank, 0, tmp.data(), m_, MPI_DOUBLE, other_rank, 0, |
| 88 | MPI_COMM_WORLD, MPI_STATUS_IGNORE); | ||
| 89 | |||
| 90 | std::ranges::copy(tmp, local_data_.begin() + off); | ||
| 91 | 2 | } | |
| 92 | |||
| 93 | 32 | void KhruevAGaussJordanMPI::NormalizePivotRow(int i, const RowPos &pivot, std::vector<double> &pivot_row, int rank) { | |
| 94 |
2/2✓ Branch 0 taken 16 times.
✓ Branch 1 taken 16 times.
|
32 | if (rank != pivot.rank) { |
| 95 | return; | ||
| 96 | } | ||
| 97 | |||
| 98 |
1/2✓ Branch 0 taken 16 times.
✗ Branch 1 not taken.
|
16 | auto off = static_cast<std::ptrdiff_t>(pivot.local_idx) * m_; |
| 99 | 16 | double *row = local_data_.data() + off; | |
| 100 | 16 | double div = row[i]; | |
| 101 | |||
| 102 |
1/2✓ Branch 0 taken 16 times.
✗ Branch 1 not taken.
|
16 | if (std::fabs(div) > kEps) { |
| 103 |
2/2✓ Branch 0 taken 44 times.
✓ Branch 1 taken 16 times.
|
60 | for (int j = i; j < m_; ++j) { |
| 104 | 44 | row[j] /= div; | |
| 105 | } | ||
| 106 | } | ||
| 107 |
1/2✓ Branch 0 taken 16 times.
✗ Branch 1 not taken.
|
16 | std::ranges::copy(row, row + m_, pivot_row.begin()); |
| 108 | } | ||
| 109 | |||
| 110 | 32 | void KhruevAGaussJordanMPI::ApplyElimination(int i, const std::vector<double> &pivot_row, int rank, int size) { | |
| 111 | 32 | int rows = n_ / size; | |
| 112 | 32 | int rem = n_ % size; | |
| 113 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 22 times.
|
32 | int my_rows = (rank < rem) ? (rows + 1) : rows; |
| 114 | |||
| 115 |
2/2✓ Branch 0 taken 40 times.
✓ Branch 1 taken 32 times.
|
72 | for (int k = 0; k < my_rows; ++k) { |
| 116 |
2/2✓ Branch 0 taken 16 times.
✓ Branch 1 taken 24 times.
|
40 | if (GetGlobalIdx(k, rank, size) == i) { |
| 117 | 16 | continue; | |
| 118 | } | ||
| 119 | |||
| 120 |
2/2✓ Branch 0 taken 7 times.
✓ Branch 1 taken 17 times.
|
24 | auto off = static_cast<std::ptrdiff_t>(k) * m_; |
| 121 | 24 | double *row = local_data_.data() + off; | |
| 122 | 24 | double factor = row[i]; | |
| 123 | |||
| 124 |
2/2✓ Branch 0 taken 7 times.
✓ Branch 1 taken 17 times.
|
24 | if (std::fabs(factor) > kEps) { |
| 125 |
2/2✓ Branch 0 taken 17 times.
✓ Branch 1 taken 7 times.
|
24 | for (int j = i; j < m_; ++j) { |
| 126 | 17 | row[j] -= factor * pivot_row[j]; | |
| 127 | } | ||
| 128 | } | ||
| 129 | } | ||
| 130 | 32 | } | |
| 131 | |||
| 132 |
2/2✓ Branch 0 taken 14 times.
✓ Branch 1 taken 18 times.
|
32 | void KhruevAGaussJordanMPI::SwapRows(int i, int pivot_rank, int rank, int size) { |
| 133 | RowPos target = GetRowOwner(i, size); | ||
| 134 | |||
| 135 |
2/2✓ Branch 0 taken 16 times.
✓ Branch 1 taken 16 times.
|
32 | int local_pivot = (rank == pivot_rank) ? FindLocalPivotIdx(i, rank, size) : -1; |
| 136 | |||
| 137 |
2/2✓ Branch 0 taken 30 times.
✓ Branch 1 taken 2 times.
|
32 | if (pivot_rank == target.rank) { |
| 138 |
3/4✓ Branch 0 taken 15 times.
✓ Branch 1 taken 15 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 15 times.
|
30 | if ((rank == target.rank) && (local_pivot != target.local_idx)) { |
| 139 | ✗ | SwapLocalRows(local_pivot, target.local_idx); | |
| 140 | } | ||
| 141 | return; | ||
| 142 | } | ||
| 143 | |||
| 144 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
|
2 | if (rank == pivot_rank) { |
| 145 | 1 | SwapRemoteRows(local_pivot, target.rank); | |
| 146 |
1/2✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
|
1 | } else if (rank == target.rank) { |
| 147 | 1 | SwapRemoteRows(target.local_idx, pivot_rank); | |
| 148 | } | ||
| 149 | } | ||
| 150 | |||
| 151 |
2/2✓ Branch 0 taken 14 times.
✓ Branch 1 taken 18 times.
|
32 | void KhruevAGaussJordanMPI::Eliminate(int i, int rank, int size) { |
| 152 | 32 | RowPos pivot = GetRowOwner(i, size); | |
| 153 | 32 | std::vector<double> pivot_row(m_); | |
| 154 | |||
| 155 | 32 | NormalizePivotRow(i, pivot, pivot_row, rank); | |
| 156 |
1/2✓ Branch 1 taken 32 times.
✗ Branch 2 not taken.
|
32 | MPI_Bcast(pivot_row.data(), m_, MPI_DOUBLE, pivot.rank, MPI_COMM_WORLD); |
| 157 | 32 | ApplyElimination(i, pivot_row, rank, size); | |
| 158 | 32 | } | |
| 159 | |||
| 160 |
1/2✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
|
14 | KhruevAGaussJordanMPI::KhruevAGaussJordanMPI(const InType &in) { |
| 161 | SetTypeOfTask(GetStaticTypeOfTask()); | ||
| 162 | 14 | int rank = 0; | |
| 163 |
1/2✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
|
14 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); |
| 164 |
2/2✓ Branch 0 taken 7 times.
✓ Branch 1 taken 7 times.
|
14 | if (rank == 0) { |
| 165 |
1/2✓ Branch 1 taken 7 times.
✗ Branch 2 not taken.
|
7 | GetInput() = in; |
| 166 | } | ||
| 167 | 14 | } | |
| 168 | |||
| 169 | 14 | bool KhruevAGaussJordanMPI::ValidationImpl() { | |
| 170 | 14 | int rank = 0; | |
| 171 | 14 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); | |
| 172 | |||
| 173 | 14 | int ok = 1; | |
| 174 |
2/2✓ Branch 0 taken 7 times.
✓ Branch 1 taken 7 times.
|
14 | if (rank == 0) { |
| 175 |
2/4✓ Branch 0 taken 7 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 7 times.
|
7 | ok = (!GetInput().empty() && !GetInput()[0].empty()) ? 1 : 0; |
| 176 | } | ||
| 177 | 14 | MPI_Bcast(&ok, 1, MPI_INT, 0, MPI_COMM_WORLD); | |
| 178 | 14 | return ok == 1; | |
| 179 | } | ||
| 180 | |||
| 181 | 14 | bool KhruevAGaussJordanMPI::PreProcessingImpl() { | |
| 182 | 14 | int rank = 0; | |
| 183 | 14 | int size = 0; | |
| 184 | 14 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); | |
| 185 | 14 | MPI_Comm_size(MPI_COMM_WORLD, &size); | |
| 186 | |||
| 187 |
2/2✓ Branch 0 taken 7 times.
✓ Branch 1 taken 7 times.
|
14 | if (rank == 0) { |
| 188 | 7 | n_ = static_cast<int>(GetInput().size()); | |
| 189 | 7 | m_ = static_cast<int>(GetInput()[0].size()); | |
| 190 | } | ||
| 191 | 14 | MPI_Bcast(&n_, 1, MPI_INT, 0, MPI_COMM_WORLD); | |
| 192 | 14 | MPI_Bcast(&m_, 1, MPI_INT, 0, MPI_COMM_WORLD); | |
| 193 | |||
| 194 | 14 | int rows = n_ / size; | |
| 195 | 14 | int rem = n_ % size; | |
| 196 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 10 times.
|
14 | int my_rows = (rank < rem) ? (rows + 1) : rows; |
| 197 | |||
| 198 | 14 | local_data_.assign(static_cast<size_t>(my_rows) * m_, 0.0); | |
| 199 | |||
| 200 | 14 | std::vector<int> sendcounts(size); | |
| 201 |
1/4✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
14 | std::vector<int> displs(size); |
| 202 | int offset = 0; | ||
| 203 |
2/2✓ Branch 0 taken 28 times.
✓ Branch 1 taken 14 times.
|
42 | for (int i = 0; i < size; ++i) { |
| 204 |
2/2✓ Branch 0 taken 8 times.
✓ Branch 1 taken 20 times.
|
28 | sendcounts[i] = ((i < rem) ? (rows + 1) : rows) * m_; |
| 205 | 28 | displs[i] = offset; | |
| 206 | 28 | offset += sendcounts[i]; | |
| 207 | } | ||
| 208 | |||
| 209 | 14 | std::vector<double> flat; | |
| 210 |
2/2✓ Branch 0 taken 7 times.
✓ Branch 1 taken 7 times.
|
14 | if (rank == 0) { |
| 211 |
1/2✓ Branch 1 taken 7 times.
✗ Branch 2 not taken.
|
7 | flat.resize(static_cast<size_t>(n_) * m_); |
| 212 |
2/2✓ Branch 0 taken 16 times.
✓ Branch 1 taken 7 times.
|
23 | for (int i = 0; i < n_; ++i) { |
| 213 |
1/2✓ Branch 0 taken 16 times.
✗ Branch 1 not taken.
|
16 | std::ranges::copy(GetInput()[i], flat.begin() + static_cast<std::ptrdiff_t>(i) * m_); |
| 214 | } | ||
| 215 | } | ||
| 216 | |||
| 217 |
1/2✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
|
14 | MPI_Scatterv(flat.data(), sendcounts.data(), displs.data(), MPI_DOUBLE, local_data_.data(), my_rows * m_, MPI_DOUBLE, |
| 218 | 0, MPI_COMM_WORLD); | ||
| 219 | 14 | return true; | |
| 220 | } | ||
| 221 | |||
| 222 | 14 | bool KhruevAGaussJordanMPI::RunImpl() { | |
| 223 | 14 | int rank = 0; | |
| 224 | 14 | int size = 0; | |
| 225 | 14 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); | |
| 226 | 14 | MPI_Comm_size(MPI_COMM_WORLD, &size); | |
| 227 | |||
| 228 |
2/2✓ Branch 0 taken 32 times.
✓ Branch 1 taken 14 times.
|
46 | for (int i = 0; i < n_; ++i) { |
| 229 | 32 | PivotPos piv = FindPivot(i, rank, size); | |
| 230 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 32 times.
|
32 | if (piv.val < kEps) { |
| 231 | ✗ | continue; | |
| 232 | } | ||
| 233 | 32 | SwapRows(i, piv.rank, rank, size); | |
| 234 | 32 | Eliminate(i, rank, size); | |
| 235 | } | ||
| 236 | 14 | return true; | |
| 237 | } | ||
| 238 | |||
| 239 | 14 | bool KhruevAGaussJordanMPI::PostProcessingImpl() { | |
| 240 | 14 | int rank = 0; | |
| 241 | 14 | int size = 0; | |
| 242 | 14 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); | |
| 243 | 14 | MPI_Comm_size(MPI_COMM_WORLD, &size); | |
| 244 | |||
| 245 | 14 | int rows = n_ / size; | |
| 246 | 14 | int rem = n_ % size; | |
| 247 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 10 times.
|
14 | int my_rows = (rank < rem) ? (rows + 1) : rows; |
| 248 | |||
| 249 | 14 | std::vector<double> full; | |
| 250 |
2/2✓ Branch 0 taken 7 times.
✓ Branch 1 taken 7 times.
|
14 | if (rank == 0) { |
| 251 |
1/2✓ Branch 1 taken 7 times.
✗ Branch 2 not taken.
|
7 | full.resize(static_cast<size_t>(n_) * m_); |
| 252 | } | ||
| 253 | |||
| 254 |
1/4✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
14 | std::vector<int> recvcounts(size); |
| 255 |
1/4✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
14 | std::vector<int> displs(size); |
| 256 | int offset = 0; | ||
| 257 |
2/2✓ Branch 0 taken 28 times.
✓ Branch 1 taken 14 times.
|
42 | for (int i = 0; i < size; ++i) { |
| 258 |
2/2✓ Branch 0 taken 8 times.
✓ Branch 1 taken 20 times.
|
28 | recvcounts[i] = ((i < rem) ? (rows + 1) : rows) * m_; |
| 259 | 28 | displs[i] = offset; | |
| 260 | 28 | offset += recvcounts[i]; | |
| 261 | } | ||
| 262 | |||
| 263 |
1/2✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
|
14 | MPI_Gatherv(local_data_.data(), my_rows * m_, MPI_DOUBLE, full.data(), recvcounts.data(), displs.data(), MPI_DOUBLE, |
| 264 | 0, MPI_COMM_WORLD); | ||
| 265 | |||
| 266 |
1/4✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
14 | OutType out(n_); |
| 267 |
2/2✓ Branch 0 taken 7 times.
✓ Branch 1 taken 7 times.
|
14 | if (rank == 0) { |
| 268 |
2/2✓ Branch 0 taken 16 times.
✓ Branch 1 taken 7 times.
|
23 | for (int i = 0; i < n_; ++i) { |
| 269 | 16 | out[i] = full[(i * m_) + (m_ - 1)]; | |
| 270 | } | ||
| 271 | } | ||
| 272 | |||
| 273 |
1/2✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
|
14 | MPI_Bcast(out.data(), n_, MPI_DOUBLE, 0, MPI_COMM_WORLD); |
| 274 |
1/2✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
|
14 | GetOutput() = out; |
| 275 | 14 | return true; | |
| 276 | } | ||
| 277 | |||
| 278 | } // namespace khruev_a_gauss_jordan | ||
| 279 |