| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #include "melnik_i_gauss_block_part/mpi/include/ops_mpi.hpp" | ||
| 2 | |||
| 3 | #include <mpi.h> | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <array> | ||
| 7 | #include <cmath> | ||
| 8 | #include <cstddef> | ||
| 9 | #include <cstdint> | ||
| 10 | #include <limits> | ||
| 11 | #include <ranges> | ||
| 12 | #include <utility> | ||
| 13 | #include <vector> | ||
| 14 | |||
| 15 | #include "melnik_i_gauss_block_part/common/include/common.hpp" | ||
| 16 | #include "task/include/task.hpp" | ||
| 17 | |||
| 18 | namespace melnik_i_gauss_block_part { | ||
| 19 | |||
| 20 | namespace { | ||
| 21 | |||
| 22 | inline std::size_t Idx(int y, int x, int width) { | ||
| 23 | 153 | return (static_cast<std::size_t>(y) * static_cast<std::size_t>(width)) + static_cast<std::size_t>(x); | |
| 24 | } | ||
| 25 | |||
| 26 | inline std::size_t ExtIdx(int y, int x, int ext_w) { | ||
| 27 |
4/6✓ Branch 0 taken 11 times.
✓ Branch 1 taken 7 times.
✓ Branch 3 taken 18 times.
✗ Branch 4 not taken.
✓ Branch 6 taken 18 times.
✗ Branch 7 not taken.
|
224 | return (static_cast<std::size_t>(y) * static_cast<std::size_t>(ext_w)) + static_cast<std::size_t>(x); |
| 28 | } | ||
| 29 | |||
| 30 | } // namespace | ||
| 31 | |||
| 32 | namespace { | ||
| 33 | |||
| 34 | inline std::uint8_t SelectCornerValue(bool prefer_first, std::uint8_t first, bool prefer_second, std::uint8_t second, | ||
| 35 | std::uint8_t fallback) { | ||
| 36 | 72 | if (prefer_first) { | |
| 37 | return first; | ||
| 38 | } | ||
| 39 |
8/8✓ Branch 0 taken 2 times.
✓ Branch 1 taken 9 times.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 9 times.
✓ Branch 4 taken 2 times.
✓ Branch 5 taken 9 times.
✓ Branch 6 taken 2 times.
✓ Branch 7 taken 9 times.
|
44 | if (prefer_second) { |
| 40 | 8 | return second; | |
| 41 | } | ||
| 42 | return fallback; | ||
| 43 | } | ||
| 44 | |||
| 45 | } // namespace | ||
| 46 | |||
| 47 | 18 | MelnikIGaussBlockPartMPI::Neighbours MelnikIGaussBlockPartMPI::ComputeNeighbours( | |
| 48 | const BlockInfo &blk, int grid_rows, int grid_cols, int rank, const std::vector<BlockInfo> &all_blocks) { | ||
| 49 |
1/2✓ Branch 0 taken 18 times.
✗ Branch 1 not taken.
|
18 | Neighbours nbh; |
| 50 | if (blk.Empty()) { | ||
| 51 | return nbh; | ||
| 52 | } | ||
| 53 | |||
| 54 | 18 | const int pr = rank / grid_cols; | |
| 55 | 18 | const int pc = rank % grid_cols; | |
| 56 | |||
| 57 | 144 | auto get_rank = [&](int npr, int npc) -> int { | |
| 58 |
8/8✓ Branch 0 taken 96 times.
✓ Branch 1 taken 48 times.
✓ Branch 2 taken 48 times.
✓ Branch 3 taken 48 times.
✓ Branch 4 taken 33 times.
✓ Branch 5 taken 15 times.
✓ Branch 6 taken 18 times.
✓ Branch 7 taken 15 times.
|
144 | if (npr < 0 || npr >= grid_rows || npc < 0 || npc >= grid_cols) { |
| 59 | return MPI_PROC_NULL; | ||
| 60 | } | ||
| 61 | 18 | const int neighbour = (npr * grid_cols) + npc; | |
| 62 |
1/2✓ Branch 0 taken 18 times.
✗ Branch 1 not taken.
|
18 | const int blocks_size = static_cast<int>(all_blocks.size()); |
| 63 |
1/2✓ Branch 0 taken 18 times.
✗ Branch 1 not taken.
|
18 | if (neighbour < 0 || neighbour >= blocks_size) { |
| 64 | return MPI_PROC_NULL; | ||
| 65 | } | ||
| 66 |
1/2✓ Branch 0 taken 18 times.
✗ Branch 1 not taken.
|
18 | if (all_blocks[static_cast<std::size_t>(neighbour)].Empty()) { |
| 67 | ✗ | return MPI_PROC_NULL; | |
| 68 | } | ||
| 69 | return neighbour; | ||
| 70 | 18 | }; | |
| 71 | |||
| 72 | 18 | nbh.up = get_rank(pr - 1, pc); | |
| 73 | 18 | nbh.down = get_rank(pr + 1, pc); | |
| 74 | 18 | nbh.left = get_rank(pr, pc - 1); | |
| 75 | 18 | nbh.right = get_rank(pr, pc + 1); | |
| 76 | 18 | nbh.up_left = get_rank(pr - 1, pc - 1); | |
| 77 | 18 | nbh.up_right = get_rank(pr - 1, pc + 1); | |
| 78 | 18 | nbh.down_left = get_rank(pr + 1, pc - 1); | |
| 79 | 18 | nbh.down_right = get_rank(pr + 1, pc + 1); | |
| 80 | 18 | return nbh; | |
| 81 | } | ||
| 82 | |||
| 83 | 18 | void MelnikIGaussBlockPartMPI::ExchangeRowHalos(const BlockInfo &blk, const Neighbours &nbh, int ext_w, | |
| 84 | std::vector<std::uint8_t> &ext) { | ||
| 85 | 18 | std::vector<std::uint8_t> recv_row(static_cast<std::size_t>(blk.width), 0); | |
| 86 | |||
| 87 | 18 | MPI_Sendrecv(ext.data() + ExtIdx(1, 1, ext_w), blk.width, MPI_BYTE, nbh.up, 10, recv_row.data(), blk.width, MPI_BYTE, | |
| 88 |
1/2✓ Branch 1 taken 18 times.
✗ Branch 2 not taken.
|
18 | nbh.up, 11, MPI_COMM_WORLD, MPI_STATUS_IGNORE); |
| 89 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 16 times.
|
18 | if (nbh.up != MPI_PROC_NULL) { |
| 90 | std::ranges::copy(recv_row, ext.begin() + static_cast<std::ptrdiff_t>(ExtIdx(0, 1, ext_w))); | ||
| 91 | } | ||
| 92 | |||
| 93 | 18 | MPI_Sendrecv(ext.data() + ExtIdx(blk.height, 1, ext_w), blk.width, MPI_BYTE, nbh.down, 11, recv_row.data(), blk.width, | |
| 94 |
1/2✓ Branch 1 taken 18 times.
✗ Branch 2 not taken.
|
18 | MPI_BYTE, nbh.down, 10, MPI_COMM_WORLD, MPI_STATUS_IGNORE); |
| 95 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 16 times.
|
18 | if (nbh.down != MPI_PROC_NULL) { |
| 96 |
1/2✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
|
2 | std::ranges::copy(recv_row, ext.begin() + static_cast<std::ptrdiff_t>(ExtIdx(blk.height + 1, 1, ext_w))); |
| 97 | } | ||
| 98 | 18 | } | |
| 99 | |||
| 100 | 18 | void MelnikIGaussBlockPartMPI::ExchangeColHalos(const BlockInfo &blk, const Neighbours &nbh, int ext_w, | |
| 101 | std::vector<std::uint8_t> &ext) { | ||
| 102 | 18 | std::vector<std::uint8_t> send_col(static_cast<std::size_t>(blk.height), 0); | |
| 103 |
1/4✓ Branch 1 taken 18 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
18 | std::vector<std::uint8_t> recv_col(static_cast<std::size_t>(blk.height), 0); |
| 104 | |||
| 105 |
2/2✓ Branch 0 taken 59 times.
✓ Branch 1 taken 18 times.
|
77 | for (int row = 0; row < blk.height; ++row) { |
| 106 | 59 | send_col[static_cast<std::size_t>(row)] = ext[ExtIdx(row + 1, 1, ext_w)]; | |
| 107 | } | ||
| 108 |
1/2✓ Branch 1 taken 18 times.
✗ Branch 2 not taken.
|
18 | MPI_Sendrecv(send_col.data(), blk.height, MPI_BYTE, nbh.left, 20, recv_col.data(), blk.height, MPI_BYTE, nbh.left, 21, |
| 109 | MPI_COMM_WORLD, MPI_STATUS_IGNORE); | ||
| 110 |
2/2✓ Branch 0 taken 7 times.
✓ Branch 1 taken 11 times.
|
18 | if (nbh.left != MPI_PROC_NULL) { |
| 111 |
2/2✓ Branch 0 taken 22 times.
✓ Branch 1 taken 7 times.
|
29 | for (int row = 0; row < blk.height; ++row) { |
| 112 | 22 | ext[ExtIdx(row + 1, 0, ext_w)] = recv_col[static_cast<std::size_t>(row)]; | |
| 113 | } | ||
| 114 | } | ||
| 115 | |||
| 116 |
2/2✓ Branch 0 taken 59 times.
✓ Branch 1 taken 18 times.
|
77 | for (int row = 0; row < blk.height; ++row) { |
| 117 | 59 | send_col[static_cast<std::size_t>(row)] = ext[ExtIdx(row + 1, blk.width, ext_w)]; | |
| 118 | } | ||
| 119 |
1/2✓ Branch 1 taken 18 times.
✗ Branch 2 not taken.
|
18 | MPI_Sendrecv(send_col.data(), blk.height, MPI_BYTE, nbh.right, 21, recv_col.data(), blk.height, MPI_BYTE, nbh.right, |
| 120 | 20, MPI_COMM_WORLD, MPI_STATUS_IGNORE); | ||
| 121 |
2/2✓ Branch 0 taken 7 times.
✓ Branch 1 taken 11 times.
|
18 | if (nbh.right != MPI_PROC_NULL) { |
| 122 |
2/2✓ Branch 0 taken 22 times.
✓ Branch 1 taken 7 times.
|
29 | for (int row = 0; row < blk.height; ++row) { |
| 123 | 22 | ext[ExtIdx(row + 1, blk.width + 1, ext_w)] = recv_col[static_cast<std::size_t>(row)]; | |
| 124 | } | ||
| 125 | } | ||
| 126 | 18 | } | |
| 127 | |||
| 128 | 18 | void MelnikIGaussBlockPartMPI::ExchangeCornerHalos(const BlockInfo &blk, const Neighbours &nbh, int ext_w, | |
| 129 | std::vector<std::uint8_t> &ext) { | ||
| 130 | 18 | std::uint8_t send_val = 0; | |
| 131 | 18 | std::uint8_t recv_val = 0; | |
| 132 | |||
| 133 | // - up_left <-> down_right : tags (30, 31) | ||
| 134 | // - up_right <-> down_left : tags (32, 33) | ||
| 135 | 18 | send_val = ext[ExtIdx(1, 1, ext_w)]; | |
| 136 | 18 | MPI_Sendrecv(&send_val, 1, MPI_BYTE, nbh.up_left, 30, &recv_val, 1, MPI_BYTE, nbh.up_left, 31, MPI_COMM_WORLD, | |
| 137 | MPI_STATUS_IGNORE); | ||
| 138 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 18 times.
|
18 | if (nbh.up_left != MPI_PROC_NULL) { |
| 139 | ✗ | ext[ExtIdx(0, 0, ext_w)] = recv_val; | |
| 140 | } | ||
| 141 | |||
| 142 | 18 | send_val = ext[ExtIdx(1, blk.width, ext_w)]; | |
| 143 | 18 | MPI_Sendrecv(&send_val, 1, MPI_BYTE, nbh.up_right, 32, &recv_val, 1, MPI_BYTE, nbh.up_right, 33, MPI_COMM_WORLD, | |
| 144 | MPI_STATUS_IGNORE); | ||
| 145 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 18 times.
|
18 | if (nbh.up_right != MPI_PROC_NULL) { |
| 146 | ✗ | ext[ExtIdx(0, blk.width + 1, ext_w)] = recv_val; | |
| 147 | } | ||
| 148 | |||
| 149 | 18 | send_val = ext[ExtIdx(blk.height, 1, ext_w)]; | |
| 150 | 18 | MPI_Sendrecv(&send_val, 1, MPI_BYTE, nbh.down_left, 33, &recv_val, 1, MPI_BYTE, nbh.down_left, 32, MPI_COMM_WORLD, | |
| 151 | MPI_STATUS_IGNORE); | ||
| 152 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 18 times.
|
18 | if (nbh.down_left != MPI_PROC_NULL) { |
| 153 | ✗ | ext[ExtIdx(blk.height + 1, 0, ext_w)] = recv_val; | |
| 154 | } | ||
| 155 | |||
| 156 | 18 | send_val = ext[ExtIdx(blk.height, blk.width, ext_w)]; | |
| 157 | 18 | MPI_Sendrecv(&send_val, 1, MPI_BYTE, nbh.down_right, 31, &recv_val, 1, MPI_BYTE, nbh.down_right, 30, MPI_COMM_WORLD, | |
| 158 | MPI_STATUS_IGNORE); | ||
| 159 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 18 times.
|
18 | if (nbh.down_right != MPI_PROC_NULL) { |
| 160 | ✗ | ext[ExtIdx(blk.height + 1, blk.width + 1, ext_w)] = recv_val; | |
| 161 | } | ||
| 162 | 18 | } | |
| 163 | |||
| 164 | 18 | void MelnikIGaussBlockPartMPI::FixCornersWithoutDiagonal(const BlockInfo &blk, const Neighbours &nbh, int ext_w, | |
| 165 | std::vector<std::uint8_t> &ext) { | ||
| 166 | 18 | const bool has_up = nbh.up != MPI_PROC_NULL; | |
| 167 | 18 | const bool has_down = nbh.down != MPI_PROC_NULL; | |
| 168 | 18 | const bool has_left = nbh.left != MPI_PROC_NULL; | |
| 169 | 18 | const bool has_right = nbh.right != MPI_PROC_NULL; | |
| 170 | |||
| 171 |
1/2✓ Branch 0 taken 18 times.
✗ Branch 1 not taken.
|
18 | if (nbh.up_left == MPI_PROC_NULL) { |
| 172 |
2/2✓ Branch 0 taken 11 times.
✓ Branch 1 taken 7 times.
|
29 | ext[ExtIdx(0, 0, ext_w)] = SelectCornerValue(has_left, ext[ExtIdx(1, 0, ext_w)], has_up, ext[ExtIdx(0, 1, ext_w)], |
| 173 | ext[ExtIdx(1, 1, ext_w)]); | ||
| 174 | } | ||
| 175 | |||
| 176 |
1/2✓ Branch 0 taken 18 times.
✗ Branch 1 not taken.
|
18 | if (nbh.up_right == MPI_PROC_NULL) { |
| 177 |
2/2✓ Branch 0 taken 11 times.
✓ Branch 1 taken 7 times.
|
18 | const int col = blk.width + 1; |
| 178 |
2/2✓ Branch 0 taken 11 times.
✓ Branch 1 taken 7 times.
|
29 | ext[ExtIdx(0, col, ext_w)] = SelectCornerValue(has_right, ext[ExtIdx(1, col, ext_w)], has_up, |
| 179 | ext[ExtIdx(0, blk.width, ext_w)], ext[ExtIdx(1, blk.width, ext_w)]); | ||
| 180 | } | ||
| 181 | |||
| 182 |
1/2✓ Branch 0 taken 18 times.
✗ Branch 1 not taken.
|
18 | if (nbh.down_left == MPI_PROC_NULL) { |
| 183 |
2/2✓ Branch 0 taken 11 times.
✓ Branch 1 taken 7 times.
|
18 | const int row = blk.height + 1; |
| 184 |
2/2✓ Branch 0 taken 11 times.
✓ Branch 1 taken 7 times.
|
29 | ext[ExtIdx(row, 0, ext_w)] = SelectCornerValue(has_left, ext[ExtIdx(blk.height, 0, ext_w)], has_down, |
| 185 | ext[ExtIdx(row, 1, ext_w)], ext[ExtIdx(blk.height, 1, ext_w)]); | ||
| 186 | } | ||
| 187 | |||
| 188 |
1/2✓ Branch 0 taken 18 times.
✗ Branch 1 not taken.
|
18 | if (nbh.down_right == MPI_PROC_NULL) { |
| 189 | 18 | const int row = blk.height + 1; | |
| 190 |
2/2✓ Branch 0 taken 11 times.
✓ Branch 1 taken 7 times.
|
18 | const int col = blk.width + 1; |
| 191 | 18 | ext[ExtIdx(row, col, ext_w)] = | |
| 192 |
2/2✓ Branch 0 taken 11 times.
✓ Branch 1 taken 7 times.
|
18 | SelectCornerValue(has_right, ext[ExtIdx(blk.height, col, ext_w)], has_down, ext[ExtIdx(row, blk.width, ext_w)], |
| 193 | ext[ExtIdx(blk.height, blk.width, ext_w)]); | ||
| 194 | } | ||
| 195 | 18 | } | |
| 196 | |||
| 197 |
1/2✓ Branch 1 taken 18 times.
✗ Branch 2 not taken.
|
18 | MelnikIGaussBlockPartMPI::MelnikIGaussBlockPartMPI(const InType &in) { |
| 198 | SetTypeOfTask(GetStaticTypeOfTask()); | ||
| 199 | GetInput() = in; | ||
| 200 | GetOutput().clear(); | ||
| 201 | 18 | } | |
| 202 | |||
| 203 | 18 | bool MelnikIGaussBlockPartMPI::ValidationImpl() { | |
| 204 | 18 | int rank = 0; | |
| 205 | 18 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); | |
| 206 | |||
| 207 | 18 | int width = 0; | |
| 208 | 18 | int height = 0; | |
| 209 | 18 | int valid_flag = 0; | |
| 210 |
2/2✓ Branch 0 taken 9 times.
✓ Branch 1 taken 9 times.
|
18 | if (rank == 0) { |
| 211 | const auto &[data, w, h] = GetInput(); | ||
| 212 | 9 | width = w; | |
| 213 | 9 | height = h; | |
| 214 | const std::size_t expected = | ||
| 215 |
1/2✓ Branch 0 taken 9 times.
✗ Branch 1 not taken.
|
9 | (width > 0 && height > 0) ? (static_cast<std::size_t>(width) * static_cast<std::size_t>(height)) : 0U; |
| 216 |
3/6✓ Branch 0 taken 9 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 9 times.
✗ Branch 4 not taken.
✓ Branch 5 taken 9 times.
|
9 | valid_flag = (width > 0 && height > 0 && data.size() == expected) ? 1 : 0; |
| 217 | } | ||
| 218 | |||
| 219 | 18 | MPI_Bcast(&width, 1, MPI_INT, 0, MPI_COMM_WORLD); | |
| 220 | 18 | MPI_Bcast(&height, 1, MPI_INT, 0, MPI_COMM_WORLD); | |
| 221 | 18 | MPI_Bcast(&valid_flag, 1, MPI_INT, 0, MPI_COMM_WORLD); | |
| 222 | 18 | return valid_flag == 1; | |
| 223 | } | ||
| 224 | |||
| 225 | 18 | bool MelnikIGaussBlockPartMPI::PreProcessingImpl() { | |
| 226 | // Enforce "only rank 0 owns full input data": other ranks can drop the buffer to save memory. | ||
| 227 | 18 | int rank = 0; | |
| 228 | 18 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); | |
| 229 |
2/2✓ Branch 0 taken 9 times.
✓ Branch 1 taken 9 times.
|
18 | if (rank != 0) { |
| 230 | auto &in = GetInput(); | ||
| 231 | std::get<0>(in).clear(); | ||
| 232 | std::get<0>(in).shrink_to_fit(); | ||
| 233 | } | ||
| 234 | 18 | return true; | |
| 235 | } | ||
| 236 | |||
| 237 | ✗ | int MelnikIGaussBlockPartMPI::ClampInt(int v, int low, int high) { | |
| 238 | ✗ | return std::max(low, std::min(v, high)); | |
| 239 | } | ||
| 240 | |||
| 241 | 18 | std::pair<int, int> MelnikIGaussBlockPartMPI::ComputeProcessGrid(int comm_size, int width, int height) { | |
| 242 | // Choose factorization close to square and roughly matching aspect ratio. | ||
| 243 | int best_r = 1; | ||
| 244 | int best_c = comm_size; | ||
| 245 | double best_cost = std::numeric_limits<double>::infinity(); | ||
| 246 | |||
| 247 | 18 | const double aspect = static_cast<double>(height) / static_cast<double>(width); | |
| 248 | |||
| 249 |
2/2✓ Branch 0 taken 36 times.
✓ Branch 1 taken 18 times.
|
54 | for (int rows = 1; rows <= comm_size; ++rows) { |
| 250 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 36 times.
|
36 | if (comm_size % rows != 0) { |
| 251 | ✗ | continue; | |
| 252 | } | ||
| 253 | 36 | const int cols = comm_size / rows; | |
| 254 | 36 | const double grid_aspect = static_cast<double>(rows) / static_cast<double>(cols); | |
| 255 |
2/2✓ Branch 0 taken 14 times.
✓ Branch 1 taken 22 times.
|
36 | const double cost = std::abs(grid_aspect - aspect) + (0.01 * std::abs(rows - cols)); |
| 256 |
2/2✓ Branch 0 taken 14 times.
✓ Branch 1 taken 22 times.
|
36 | if (cost < best_cost) { |
| 257 | best_cost = cost; | ||
| 258 | best_r = rows; | ||
| 259 | best_c = cols; | ||
| 260 | } | ||
| 261 | } | ||
| 262 | |||
| 263 | 18 | return {best_r, best_c}; | |
| 264 | } | ||
| 265 | |||
| 266 | ✗ | MelnikIGaussBlockPartMPI::BlockInfo MelnikIGaussBlockPartMPI::ComputeBlockInfoByCoords(int pr, int pc, int grid_rows, | |
| 267 | int grid_cols, int width, | ||
| 268 | int height) { | ||
| 269 | 36 | const int base_w = width / grid_cols; | |
| 270 | 36 | const int rem_w = width % grid_cols; | |
| 271 | 36 | const int base_h = height / grid_rows; | |
| 272 | 36 | const int rem_h = height % grid_rows; | |
| 273 | |||
| 274 | ✗ | const int local_w = base_w + (pc < rem_w ? 1 : 0); | |
| 275 |
2/4✓ Branch 0 taken 34 times.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
36 | const int local_h = base_h + (pr < rem_h ? 1 : 0); |
| 276 | |||
| 277 | 36 | const int start_x = (pc * base_w) + std::min(pc, rem_w); | |
| 278 | 36 | const int start_y = (pr * base_h) + std::min(pr, rem_h); | |
| 279 | |||
| 280 | ✗ | return BlockInfo{.start_x = start_x, .start_y = start_y, .width = local_w, .height = local_h}; | |
| 281 | } | ||
| 282 | |||
| 283 | 36 | MelnikIGaussBlockPartMPI::BlockInfo MelnikIGaussBlockPartMPI::ComputeBlockInfo(int rank, int grid_rows, int grid_cols, | |
| 284 | int width, int height) { | ||
| 285 | 36 | const int pr = rank / grid_cols; | |
| 286 |
2/2✓ Branch 0 taken 30 times.
✓ Branch 1 taken 6 times.
|
36 | const int pc = rank % grid_cols; |
| 287 | 36 | return ComputeBlockInfoByCoords(pr, pc, grid_rows, grid_cols, width, height); | |
| 288 | } | ||
| 289 | |||
| 290 | 18 | void MelnikIGaussBlockPartMPI::FillExtendedWithClamp(const std::vector<std::uint8_t> &local, const BlockInfo &blk, | |
| 291 | int ext_w, std::vector<std::uint8_t> &ext) { | ||
| 292 | 18 | const int ext_h = blk.height + 2; | |
| 293 | 18 | ext.assign(static_cast<std::size_t>(ext_w) * static_cast<std::size_t>(ext_h), 0); | |
| 294 | |||
| 295 | // Interior | ||
| 296 |
2/2✓ Branch 0 taken 59 times.
✓ Branch 1 taken 18 times.
|
77 | for (int row = 0; row < blk.height; ++row) { |
| 297 |
2/2✓ Branch 0 taken 153 times.
✓ Branch 1 taken 59 times.
|
212 | for (int col = 0; col < blk.width; ++col) { |
| 298 | 153 | ext[ExtIdx(row + 1, col + 1, ext_w)] = local[Idx(row, col, blk.width)]; | |
| 299 | } | ||
| 300 | } | ||
| 301 | |||
| 302 | // Clamp borders based on own interior | ||
| 303 |
2/2✓ Branch 0 taken 49 times.
✓ Branch 1 taken 18 times.
|
67 | for (int col = 1; col <= blk.width; ++col) { |
| 304 | 49 | ext[ExtIdx(0, col, ext_w)] = ext[ExtIdx(1, col, ext_w)]; | |
| 305 | 49 | ext[ExtIdx(blk.height + 1, col, ext_w)] = ext[ExtIdx(blk.height, col, ext_w)]; | |
| 306 | } | ||
| 307 |
2/2✓ Branch 0 taken 59 times.
✓ Branch 1 taken 18 times.
|
77 | for (int row = 1; row <= blk.height; ++row) { |
| 308 | 59 | ext[ExtIdx(row, 0, ext_w)] = ext[ExtIdx(row, 1, ext_w)]; | |
| 309 | 59 | ext[ExtIdx(row, blk.width + 1, ext_w)] = ext[ExtIdx(row, blk.width, ext_w)]; | |
| 310 | } | ||
| 311 | 18 | ext[ExtIdx(0, 0, ext_w)] = ext[ExtIdx(1, 1, ext_w)]; | |
| 312 | 18 | ext[ExtIdx(0, blk.width + 1, ext_w)] = ext[ExtIdx(1, blk.width, ext_w)]; | |
| 313 | 18 | ext[ExtIdx(blk.height + 1, 0, ext_w)] = ext[ExtIdx(blk.height, 1, ext_w)]; | |
| 314 | 18 | ext[ExtIdx(blk.height + 1, blk.width + 1, ext_w)] = ext[ExtIdx(blk.height, blk.width, ext_w)]; | |
| 315 | 18 | } | |
| 316 | |||
| 317 |
1/2✓ Branch 0 taken 18 times.
✗ Branch 1 not taken.
|
18 | void MelnikIGaussBlockPartMPI::ExchangeHalos(const BlockInfo &blk, int grid_rows, int grid_cols, int rank, |
| 318 | const std::vector<BlockInfo> &all_blocks, std::vector<std::uint8_t> &ext) { | ||
| 319 | if (blk.Empty()) { | ||
| 320 | ✗ | return; | |
| 321 | } | ||
| 322 | 18 | const Neighbours nbh = ComputeNeighbours(blk, grid_rows, grid_cols, rank, all_blocks); | |
| 323 | 18 | const int ext_w = blk.width + 2; | |
| 324 | |||
| 325 | 18 | ExchangeRowHalos(blk, nbh, ext_w, ext); | |
| 326 | 18 | ExchangeColHalos(blk, nbh, ext_w, ext); | |
| 327 | 18 | ExchangeCornerHalos(blk, nbh, ext_w, ext); | |
| 328 | 18 | FixCornersWithoutDiagonal(blk, nbh, ext_w, ext); | |
| 329 | } | ||
| 330 | |||
| 331 |
1/2✓ Branch 0 taken 18 times.
✗ Branch 1 not taken.
|
18 | void MelnikIGaussBlockPartMPI::ApplyGaussianFromExtended(const BlockInfo &blk, const std::vector<std::uint8_t> &ext, |
| 332 | std::vector<std::uint8_t> &local_out) { | ||
| 333 | static constexpr std::array<int, 9> kKernel = {1, 2, 1, 2, 4, 2, 1, 2, 1}; | ||
| 334 | static constexpr int kSum = 16; | ||
| 335 | |||
| 336 | if (blk.Empty()) { | ||
| 337 | local_out.clear(); | ||
| 338 | ✗ | return; | |
| 339 | } | ||
| 340 | |||
| 341 | 18 | const int ext_w = blk.width + 2; | |
| 342 | 18 | local_out.resize(static_cast<std::size_t>(blk.width) * static_cast<std::size_t>(blk.height)); | |
| 343 | |||
| 344 |
2/2✓ Branch 0 taken 59 times.
✓ Branch 1 taken 18 times.
|
77 | for (int row = 0; row < blk.height; ++row) { |
| 345 |
2/2✓ Branch 0 taken 153 times.
✓ Branch 1 taken 59 times.
|
212 | for (int col = 0; col < blk.width; ++col) { |
| 346 | int acc = 0; | ||
| 347 | std::size_t kernel_idx = 0; | ||
| 348 |
2/2✓ Branch 0 taken 459 times.
✓ Branch 1 taken 153 times.
|
612 | for (int dy = 0; dy < 3; ++dy) { |
| 349 |
2/2✓ Branch 0 taken 1377 times.
✓ Branch 1 taken 459 times.
|
1836 | for (int dx = 0; dx < 3; ++dx) { |
| 350 | 1377 | acc += kKernel.at(kernel_idx) * static_cast<int>(ext[ExtIdx(row + dy, col + dx, ext_w)]); | |
| 351 | 1377 | ++kernel_idx; | |
| 352 | } | ||
| 353 | } | ||
| 354 | 153 | local_out[Idx(row, col, blk.width)] = static_cast<std::uint8_t>((acc + kSum / 2) / kSum); | |
| 355 | } | ||
| 356 | } | ||
| 357 | } | ||
| 358 | |||
| 359 | 18 | bool MelnikIGaussBlockPartMPI::RunImpl() { | |
| 360 | 18 | int rank = 0; | |
| 361 | 18 | int comm_size = 1; | |
| 362 | 18 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); | |
| 363 | 18 | MPI_Comm_size(MPI_COMM_WORLD, &comm_size); | |
| 364 | |||
| 365 | 18 | int width = 0; | |
| 366 | 18 | int height = 0; | |
| 367 | const std::vector<std::uint8_t> *root_ptr = nullptr; | ||
| 368 |
2/2✓ Branch 0 taken 9 times.
✓ Branch 1 taken 9 times.
|
18 | if (rank == 0) { |
| 369 | const auto &[data, w, h] = GetInput(); | ||
| 370 | root_ptr = &data; | ||
| 371 | 9 | width = w; | |
| 372 | 9 | height = h; | |
| 373 | } | ||
| 374 | |||
| 375 | 18 | BroadcastImageSize(rank, width, height); | |
| 376 | 18 | const auto [grid_rows, grid_cols] = ComputeProcessGrid(comm_size, width, height); | |
| 377 | 18 | const std::vector<BlockInfo> blocks = BuildAllBlocks(comm_size, grid_rows, grid_cols, width, height); | |
| 378 |
2/2✓ Branch 0 taken 9 times.
✓ Branch 1 taken 9 times.
|
18 | const BlockInfo my_blk = blocks[static_cast<std::size_t>(rank)]; |
| 379 | |||
| 380 | 18 | const std::vector<std::uint8_t> empty_data{}; | |
| 381 |
2/2✓ Branch 0 taken 9 times.
✓ Branch 1 taken 9 times.
|
18 | const std::vector<std::uint8_t> &root_data = (root_ptr == nullptr) ? empty_data : *root_ptr; |
| 382 |
1/2✓ Branch 1 taken 18 times.
✗ Branch 2 not taken.
|
18 | const std::vector<std::uint8_t> local_data = ScatterBlock(rank, comm_size, width, height, blocks, my_blk, root_data); |
| 383 |
1/2✓ Branch 1 taken 18 times.
✗ Branch 2 not taken.
|
18 | const std::vector<std::uint8_t> local_out = ComputeLocal(my_blk, grid_rows, grid_cols, rank, blocks, local_data); |
| 384 |
1/2✓ Branch 1 taken 18 times.
✗ Branch 2 not taken.
|
18 | std::vector<std::uint8_t> global_out = GatherGlobal(rank, comm_size, width, height, blocks, my_blk, local_out); |
| 385 |
1/2✓ Branch 1 taken 18 times.
✗ Branch 2 not taken.
|
18 | FinalizeOutput(rank, width, height, global_out); |
| 386 | 18 | return true; | |
| 387 | } | ||
| 388 | |||
| 389 | 18 | void MelnikIGaussBlockPartMPI::BroadcastImageSize(int rank, int &width, int &height) { | |
| 390 | 18 | MPI_Bcast(&width, 1, MPI_INT, 0, MPI_COMM_WORLD); | |
| 391 | 18 | MPI_Bcast(&height, 1, MPI_INT, 0, MPI_COMM_WORLD); | |
| 392 | (void)rank; | ||
| 393 | 18 | } | |
| 394 | |||
| 395 | 18 | std::vector<MelnikIGaussBlockPartMPI::BlockInfo> MelnikIGaussBlockPartMPI::BuildAllBlocks(int comm_size, int grid_rows, | |
| 396 | int grid_cols, int width, | ||
| 397 | int height) { | ||
| 398 | 18 | std::vector<BlockInfo> blocks(static_cast<std::size_t>(comm_size)); | |
| 399 |
2/2✓ Branch 0 taken 36 times.
✓ Branch 1 taken 18 times.
|
54 | for (int rank_idx = 0; rank_idx < comm_size; ++rank_idx) { |
| 400 | 36 | blocks[static_cast<std::size_t>(rank_idx)] = ComputeBlockInfo(rank_idx, grid_rows, grid_cols, width, height); | |
| 401 | } | ||
| 402 | 18 | return blocks; | |
| 403 | } | ||
| 404 | |||
| 405 | 9 | void MelnikIGaussBlockPartMPI::SendBlocksToOthers(int comm_size, int width, int height, | |
| 406 | const std::vector<BlockInfo> &blocks, | ||
| 407 | const std::vector<std::uint8_t> &root_data) { | ||
| 408 |
2/2✓ Branch 0 taken 9 times.
✓ Branch 1 taken 9 times.
|
18 | for (int dest = 1; dest < comm_size; ++dest) { |
| 409 |
1/2✓ Branch 0 taken 9 times.
✗ Branch 1 not taken.
|
9 | const auto &blk = blocks[static_cast<std::size_t>(dest)]; |
| 410 | ✗ | if (blk.Empty()) { | |
| 411 | ✗ | MPI_Send(nullptr, 0, MPI_BYTE, dest, 0, MPI_COMM_WORLD); | |
| 412 | ✗ | continue; | |
| 413 | } | ||
| 414 | |||
| 415 | 9 | MPI_Datatype sub{}; | |
| 416 | 9 | const std::array<int, 2> sizes = {height, width}; | |
| 417 | 9 | const std::array<int, 2> subs = {blk.height, blk.width}; | |
| 418 | 9 | const std::array<int, 2> starts = {blk.start_y, blk.start_x}; | |
| 419 | 9 | MPI_Type_create_subarray(2, sizes.data(), subs.data(), starts.data(), MPI_ORDER_C, MPI_BYTE, &sub); | |
| 420 | 9 | MPI_Type_commit(&sub); | |
| 421 | 9 | MPI_Send(root_data.data(), 1, sub, dest, 0, MPI_COMM_WORLD); | |
| 422 | 9 | MPI_Type_free(&sub); | |
| 423 | } | ||
| 424 | 9 | } | |
| 425 | |||
| 426 | 18 | std::vector<std::uint8_t> MelnikIGaussBlockPartMPI::ScatterBlock(int rank, int comm_size, int width, int height, | |
| 427 | const std::vector<BlockInfo> &blocks, | ||
| 428 | const BlockInfo &my_blk, | ||
| 429 | const std::vector<std::uint8_t> &root_data) { | ||
| 430 |
1/2✓ Branch 0 taken 18 times.
✗ Branch 1 not taken.
|
18 | std::vector<std::uint8_t> local_data; |
| 431 | if (!my_blk.Empty()) { | ||
| 432 |
1/2✓ Branch 1 taken 18 times.
✗ Branch 2 not taken.
|
18 | local_data.resize(static_cast<std::size_t>(my_blk.width) * static_cast<std::size_t>(my_blk.height)); |
| 433 | } | ||
| 434 | |||
| 435 |
2/2✓ Branch 0 taken 9 times.
✓ Branch 1 taken 9 times.
|
18 | if (rank == 0) { |
| 436 | // Copy root local block | ||
| 437 | if (!my_blk.Empty()) { | ||
| 438 |
2/2✓ Branch 0 taken 30 times.
✓ Branch 1 taken 9 times.
|
39 | for (int row = 0; row < my_blk.height; ++row) { |
| 439 | 30 | const int global_row = my_blk.start_y + row; | |
| 440 | 30 | const std::size_t src_off = Idx(global_row, my_blk.start_x, width); | |
| 441 | 30 | const std::size_t dst_off = Idx(row, 0, my_blk.width); | |
| 442 | std::ranges::copy( | ||
| 443 | root_data.begin() + static_cast<std::ptrdiff_t>(src_off), | ||
| 444 | root_data.begin() + static_cast<std::ptrdiff_t>(src_off + static_cast<std::size_t>(my_blk.width)), | ||
| 445 | local_data.begin() + static_cast<std::ptrdiff_t>(dst_off)); | ||
| 446 | } | ||
| 447 | } | ||
| 448 |
1/2✓ Branch 1 taken 9 times.
✗ Branch 2 not taken.
|
9 | SendBlocksToOthers(comm_size, width, height, blocks, root_data); |
| 449 | } else { | ||
| 450 | if (!my_blk.Empty()) { | ||
| 451 |
1/2✓ Branch 1 taken 9 times.
✗ Branch 2 not taken.
|
9 | const int recv_count = my_blk.width * my_blk.height; |
| 452 |
1/2✓ Branch 1 taken 9 times.
✗ Branch 2 not taken.
|
9 | MPI_Recv(local_data.data(), recv_count, MPI_BYTE, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); |
| 453 | } else { | ||
| 454 | ✗ | MPI_Recv(nullptr, 0, MPI_BYTE, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); | |
| 455 | } | ||
| 456 | } | ||
| 457 | |||
| 458 | 18 | return local_data; | |
| 459 | } | ||
| 460 | |||
| 461 | 18 | std::vector<std::uint8_t> MelnikIGaussBlockPartMPI::ComputeLocal(const BlockInfo &my_blk, int grid_rows, int grid_cols, | |
| 462 | int rank, const std::vector<BlockInfo> &blocks, | ||
| 463 | const std::vector<std::uint8_t> &local_data) { | ||
| 464 |
1/2✓ Branch 0 taken 18 times.
✗ Branch 1 not taken.
|
18 | std::vector<std::uint8_t> local_out; |
| 465 | if (my_blk.Empty()) { | ||
| 466 | return local_out; | ||
| 467 | } | ||
| 468 | |||
| 469 | 18 | std::vector<std::uint8_t> ext; | |
| 470 | 18 | const int ext_w = my_blk.width + 2; | |
| 471 |
1/2✓ Branch 1 taken 18 times.
✗ Branch 2 not taken.
|
18 | FillExtendedWithClamp(local_data, my_blk, ext_w, ext); |
| 472 |
1/2✓ Branch 1 taken 18 times.
✗ Branch 2 not taken.
|
18 | ExchangeHalos(my_blk, grid_rows, grid_cols, rank, blocks, ext); |
| 473 |
1/2✓ Branch 1 taken 18 times.
✗ Branch 2 not taken.
|
18 | ApplyGaussianFromExtended(my_blk, ext, local_out); |
| 474 | return local_out; | ||
| 475 | } | ||
| 476 | |||
| 477 | 18 | std::vector<std::uint8_t> MelnikIGaussBlockPartMPI::GatherGlobal(int rank, int comm_size, int width, int height, | |
| 478 | const std::vector<BlockInfo> &blocks, | ||
| 479 | const BlockInfo &my_blk, | ||
| 480 | const std::vector<std::uint8_t> &local_out) { | ||
| 481 |
2/2✓ Branch 0 taken 9 times.
✓ Branch 1 taken 9 times.
|
18 | if (rank != 0) { |
| 482 | 9 | const int send_count = my_blk.Empty() ? 0 : (my_blk.width * my_blk.height); | |
| 483 | const std::uint8_t *send_ptr = send_count > 0 ? local_out.data() : nullptr; | ||
| 484 | 9 | MPI_Send(send_ptr, send_count, MPI_BYTE, 0, 1, MPI_COMM_WORLD); | |
| 485 | 9 | return {}; | |
| 486 | } | ||
| 487 | |||
| 488 |
1/2✓ Branch 1 taken 9 times.
✗ Branch 2 not taken.
|
9 | std::vector<std::uint8_t> global_out(static_cast<std::size_t>(width) * static_cast<std::size_t>(height), 0); |
| 489 | |||
| 490 | // Copy root block | ||
| 491 | if (!my_blk.Empty()) { | ||
| 492 |
2/2✓ Branch 0 taken 30 times.
✓ Branch 1 taken 9 times.
|
39 | for (int row = 0; row < my_blk.height; ++row) { |
| 493 | 30 | const std::size_t dst_off = Idx(my_blk.start_y + row, my_blk.start_x, width); | |
| 494 | 30 | const std::size_t src_off = Idx(row, 0, my_blk.width); | |
| 495 | std::ranges::copy( | ||
| 496 | local_out.begin() + static_cast<std::ptrdiff_t>(src_off), | ||
| 497 | local_out.begin() + static_cast<std::ptrdiff_t>(src_off + static_cast<std::size_t>(my_blk.width)), | ||
| 498 | global_out.begin() + static_cast<std::ptrdiff_t>(dst_off)); | ||
| 499 | } | ||
| 500 | } | ||
| 501 | |||
| 502 |
2/2✓ Branch 0 taken 9 times.
✓ Branch 1 taken 9 times.
|
18 | for (int src = 1; src < comm_size; ++src) { |
| 503 |
1/2✓ Branch 0 taken 9 times.
✗ Branch 1 not taken.
|
9 | const auto &blk = blocks[static_cast<std::size_t>(src)]; |
| 504 | ✗ | if (blk.Empty()) { | |
| 505 | ✗ | MPI_Recv(nullptr, 0, MPI_BYTE, src, 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE); | |
| 506 | ✗ | continue; | |
| 507 | } | ||
| 508 | |||
| 509 | 9 | MPI_Datatype sub{}; | |
| 510 | 9 | const std::array<int, 2> sizes = {height, width}; | |
| 511 | 9 | const std::array<int, 2> subs = {blk.height, blk.width}; | |
| 512 | 9 | const std::array<int, 2> starts = {blk.start_y, blk.start_x}; | |
| 513 |
1/2✓ Branch 1 taken 9 times.
✗ Branch 2 not taken.
|
9 | MPI_Type_create_subarray(2, sizes.data(), subs.data(), starts.data(), MPI_ORDER_C, MPI_BYTE, &sub); |
| 514 |
1/2✓ Branch 1 taken 9 times.
✗ Branch 2 not taken.
|
9 | MPI_Type_commit(&sub); |
| 515 |
1/2✓ Branch 1 taken 9 times.
✗ Branch 2 not taken.
|
9 | MPI_Recv(global_out.data(), 1, sub, src, 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE); |
| 516 |
1/2✓ Branch 1 taken 9 times.
✗ Branch 2 not taken.
|
9 | MPI_Type_free(&sub); |
| 517 | } | ||
| 518 | |||
| 519 | return global_out; | ||
| 520 | } | ||
| 521 | |||
| 522 | 18 | void MelnikIGaussBlockPartMPI::FinalizeOutput(int rank, int width, int height, std::vector<std::uint8_t> &global_out) { | |
| 523 | 18 | const auto state = GetStateOfTesting(); | |
| 524 | 18 | const std::size_t total = static_cast<std::size_t>(width) * static_cast<std::size_t>(height); | |
| 525 |
1/2✓ Branch 0 taken 18 times.
✗ Branch 1 not taken.
|
18 | if (state == ppc::task::StateOfTesting::kFunc) { |
| 526 |
2/2✓ Branch 0 taken 9 times.
✓ Branch 1 taken 9 times.
|
18 | if (rank != 0) { |
| 527 | 9 | global_out.assign(total, 0); | |
| 528 | } | ||
| 529 |
1/2✓ Branch 0 taken 18 times.
✗ Branch 1 not taken.
|
18 | if (total > 0) { |
| 530 | 18 | MPI_Bcast(global_out.data(), static_cast<int>(total), MPI_BYTE, 0, MPI_COMM_WORLD); | |
| 531 | } | ||
| 532 | GetOutput() = std::move(global_out); | ||
| 533 | 18 | return; | |
| 534 | } | ||
| 535 | |||
| 536 | ✗ | if (rank == 0) { | |
| 537 | GetOutput() = std::move(global_out); | ||
| 538 | } else { | ||
| 539 | GetOutput().clear(); | ||
| 540 | } | ||
| 541 | } | ||
| 542 | |||
| 543 | 18 | bool MelnikIGaussBlockPartMPI::PostProcessingImpl() { | |
| 544 | 18 | return true; | |
| 545 | } | ||
| 546 | |||
| 547 | } // namespace melnik_i_gauss_block_part | ||
| 548 |