| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #include "pankov_gauss_filter/mpi/include/ops_mpi.hpp" | ||
| 2 | |||
| 3 | #include <mpi.h> | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <array> | ||
| 7 | #include <cstddef> | ||
| 8 | #include <cstdint> | ||
| 9 | #include <utility> | ||
| 10 | #include <vector> | ||
| 11 | |||
| 12 | #include "pankov_gauss_filter/common/include/common.hpp" | ||
| 13 | |||
| 14 | namespace pankov_gauss_filter { | ||
| 15 | |||
| 16 | namespace { | ||
| 17 | |||
| 18 | inline int ClampInt(int v, int lo, int hi) { | ||
| 19 | return std::max(lo, std::min(v, hi)); | ||
| 20 | } | ||
| 21 | |||
| 22 | inline std::uint8_t ClampToByte(int v) { | ||
| 23 |
1/2✓ Branch 0 taken 58 times.
✗ Branch 1 not taken.
|
58 | v = std::max(0, std::min(v, 255)); |
| 24 | 58 | return static_cast<std::uint8_t>(v); | |
| 25 | } | ||
| 26 | |||
| 27 | constexpr std::array<std::array<int, 3>, 3> kGaussianKernel3x3 = {{{{1, 2, 1}}, {{2, 4, 2}}, {{1, 2, 1}}}}; | ||
| 28 | constexpr int kGaussianDiv = 16; | ||
| 29 | |||
| 30 | struct Decomposition { | ||
| 31 | std::size_t width = 0; | ||
| 32 | std::size_t base_cols = 0; | ||
| 33 | std::size_t rem_cols = 0; | ||
| 34 | |||
| 35 | [[nodiscard]] std::size_t StartColForProc(int proc_rank) const { | ||
| 36 | 9 | return (static_cast<std::size_t>(proc_rank) * base_cols) + | |
| 37 | 9 | static_cast<std::size_t>(std::min(proc_rank, static_cast<int>(rem_cols))); | |
| 38 | } | ||
| 39 | |||
| 40 | [[nodiscard]] std::size_t LocalWidthForProc(int proc_rank) const { | ||
| 41 |
7/8✓ Branch 0 taken 7 times.
✓ Branch 1 taken 3 times.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 5 times.
✗ Branch 5 not taken.
✓ Branch 7 taken 7 times.
✓ Branch 8 taken 3 times.
|
39 | return base_cols + (std::cmp_less(proc_rank, static_cast<int>(rem_cols)) ? 1U : 0U); |
| 42 | } | ||
| 43 | }; | ||
| 44 | |||
| 45 | Decomposition MakeDecomposition(std::size_t width, int proc_count) { | ||
| 46 | Decomposition dec; | ||
| 47 | 10 | dec.width = width; | |
| 48 | 10 | dec.base_cols = width / static_cast<std::size_t>(proc_count); | |
| 49 | 10 | dec.rem_cols = width % static_cast<std::size_t>(proc_count); | |
| 50 | return dec; | ||
| 51 | } | ||
| 52 | |||
| 53 | 5 | std::pair<std::vector<int>, std::vector<int>> BuildSendCountsDispls(std::size_t height, std::size_t channels, | |
| 54 | const Decomposition &dec, int proc_count) { | ||
| 55 | 5 | std::vector<int> sendcounts(static_cast<std::size_t>(proc_count), 0); | |
| 56 |
1/4✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
5 | std::vector<int> displs(static_cast<std::size_t>(proc_count), 0); |
| 57 | |||
| 58 | int disp = 0; | ||
| 59 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 5 times.
|
15 | for (int proc_rank = 0; proc_rank < proc_count; ++proc_rank) { |
| 60 | const std::size_t local_width = dec.LocalWidthForProc(proc_rank); | ||
| 61 | 10 | const std::size_t cnt = height * local_width * channels; | |
| 62 | 10 | sendcounts[static_cast<std::size_t>(proc_rank)] = static_cast<int>(cnt); | |
| 63 | 10 | displs[static_cast<std::size_t>(proc_rank)] = disp; | |
| 64 | 10 | disp += static_cast<int>(cnt); | |
| 65 | } | ||
| 66 | |||
| 67 |
1/2✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
|
10 | return {sendcounts, displs}; |
| 68 | } | ||
| 69 | |||
| 70 |
2/2✓ Branch 0 taken 7 times.
✓ Branch 1 taken 3 times.
|
10 | void PackStripeForProc(const Image &image, const Decomposition &dec, std::size_t height, std::size_t channels, |
| 71 | int proc_rank, std::vector<std::uint8_t> *packed) { | ||
| 72 | const std::size_t local_width = dec.LocalWidthForProc(proc_rank); | ||
| 73 | const std::size_t start_col = dec.StartColForProc(proc_rank); | ||
| 74 | 10 | packed->assign(height * local_width * channels, 0); | |
| 75 | |||
| 76 |
2/2✓ Branch 0 taken 9 times.
✓ Branch 1 taken 1 times.
|
10 | if (local_width == 0) { |
| 77 | return; | ||
| 78 | } | ||
| 79 | |||
| 80 |
2/2✓ Branch 0 taken 23 times.
✓ Branch 1 taken 9 times.
|
32 | for (std::size_t row_idx = 0; row_idx < height; ++row_idx) { |
| 81 |
2/2✓ Branch 0 taken 42 times.
✓ Branch 1 taken 23 times.
|
65 | for (std::size_t local_col_idx = 0; local_col_idx < local_width; ++local_col_idx) { |
| 82 | 42 | const std::size_t src_col = start_col + local_col_idx; | |
| 83 | 42 | const std::size_t src_idx = (row_idx * dec.width + src_col) * channels; | |
| 84 | 42 | const std::size_t dst_idx = (row_idx * local_width + local_col_idx) * channels; | |
| 85 |
2/2✓ Branch 0 taken 58 times.
✓ Branch 1 taken 42 times.
|
100 | for (std::size_t ch_idx = 0; ch_idx < channels; ++ch_idx) { |
| 86 | 58 | (*packed)[dst_idx + ch_idx] = image.data[src_idx + ch_idx]; | |
| 87 | } | ||
| 88 | } | ||
| 89 | } | ||
| 90 | } | ||
| 91 | |||
| 92 |
2/2✓ Branch 0 taken 7 times.
✓ Branch 1 taken 3 times.
|
10 | void DistributeStripes(const Image &image, const Decomposition &dec, std::size_t height, std::size_t channels, |
| 93 | int proc_rank, int proc_count, std::vector<std::uint8_t> *local_stripe) { | ||
| 94 | const std::size_t local_width = dec.LocalWidthForProc(proc_rank); | ||
| 95 | 10 | const std::size_t local_elems = height * local_width * channels; | |
| 96 | 10 | local_stripe->assign(local_elems, 0); | |
| 97 | |||
| 98 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
10 | if (proc_rank == 0) { |
| 99 | 5 | PackStripeForProc(image, dec, height, channels, 0, local_stripe); | |
| 100 | 5 | std::vector<std::uint8_t> tmp; | |
| 101 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
10 | for (int other_rank = 1; other_rank < proc_count; ++other_rank) { |
| 102 |
1/2✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
|
5 | PackStripeForProc(image, dec, height, channels, other_rank, &tmp); |
| 103 |
1/2✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
|
5 | MPI_Send(tmp.data(), static_cast<int>(tmp.size()), MPI_UNSIGNED_CHAR, other_rank, 0, MPI_COMM_WORLD); |
| 104 | } | ||
| 105 | } else { | ||
| 106 | 5 | MPI_Recv(local_stripe->data(), static_cast<int>(local_elems), MPI_UNSIGNED_CHAR, 0, 0, MPI_COMM_WORLD, | |
| 107 | MPI_STATUS_IGNORE); | ||
| 108 | } | ||
| 109 | 10 | } | |
| 110 | |||
| 111 | 18 | void ExtractStripeColumn(const std::vector<std::uint8_t> &stripe, std::size_t stripe_width, std::size_t height, | |
| 112 | std::size_t channels, std::size_t col_idx, std::vector<std::uint8_t> *column) { | ||
| 113 | 18 | column->assign(height * channels, 0); | |
| 114 |
2/2✓ Branch 0 taken 46 times.
✓ Branch 1 taken 18 times.
|
64 | for (std::size_t row_idx = 0; row_idx < height; ++row_idx) { |
| 115 | 46 | const std::size_t base = (row_idx * stripe_width + col_idx) * channels; | |
| 116 |
2/2✓ Branch 0 taken 62 times.
✓ Branch 1 taken 46 times.
|
108 | for (std::size_t ch_idx = 0; ch_idx < channels; ++ch_idx) { |
| 117 | 62 | (*column)[(row_idx * channels) + ch_idx] = stripe[base + ch_idx]; | |
| 118 | } | ||
| 119 | } | ||
| 120 | 18 | } | |
| 121 | |||
| 122 | 10 | void ExchangeHaloColumns(const Decomposition &dec, std::size_t height, std::size_t channels, int proc_rank, | |
| 123 | int proc_count, const std::vector<std::uint8_t> &local_stripe, std::size_t local_width, | ||
| 124 | std::vector<std::uint8_t> *left_halo, std::vector<std::uint8_t> *right_halo) { | ||
| 125 | 10 | const std::size_t col_elems = height * channels; | |
| 126 | 10 | left_halo->assign(col_elems, 0); | |
| 127 | 10 | right_halo->assign(col_elems, 0); | |
| 128 | |||
| 129 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 9 times.
|
10 | if (local_width == 0) { |
| 130 | 1 | return; | |
| 131 | } | ||
| 132 | |||
| 133 | 9 | std::vector<std::uint8_t> first_col; | |
| 134 | 9 | std::vector<std::uint8_t> last_col; | |
| 135 |
1/2✓ Branch 1 taken 9 times.
✗ Branch 2 not taken.
|
9 | ExtractStripeColumn(local_stripe, local_width, height, channels, 0, &first_col); |
| 136 |
1/2✓ Branch 1 taken 9 times.
✗ Branch 2 not taken.
|
9 | ExtractStripeColumn(local_stripe, local_width, height, channels, local_width - 1, &last_col); |
| 137 | |||
| 138 |
1/2✓ Branch 1 taken 9 times.
✗ Branch 2 not taken.
|
9 | *left_halo = first_col; // replicate by default |
| 139 |
1/2✓ Branch 1 taken 9 times.
✗ Branch 2 not taken.
|
9 | *right_halo = last_col; // replicate by default |
| 140 | |||
| 141 |
3/4✓ Branch 0 taken 4 times.
✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 4 times.
|
13 | const bool has_left_neighbor = (proc_rank > 0) && (dec.LocalWidthForProc(proc_rank - 1) > 0) && (local_width > 0); |
| 142 | const bool has_right_neighbor = | ||
| 143 |
4/4✓ Branch 0 taken 5 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 4 times.
|
14 | (proc_rank + 1 < proc_count) && (dec.LocalWidthForProc(proc_rank + 1) > 0) && (local_width > 0); |
| 144 | |||
| 145 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 5 times.
|
9 | if (has_left_neighbor) { |
| 146 |
1/2✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
|
4 | MPI_Sendrecv(first_col.data(), static_cast<int>(col_elems), MPI_UNSIGNED_CHAR, proc_rank - 1, 100, |
| 147 | left_halo->data(), static_cast<int>(col_elems), MPI_UNSIGNED_CHAR, proc_rank - 1, 200, MPI_COMM_WORLD, | ||
| 148 | MPI_STATUS_IGNORE); | ||
| 149 | } | ||
| 150 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 5 times.
|
9 | if (has_right_neighbor) { |
| 151 |
1/2✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
|
4 | MPI_Sendrecv(last_col.data(), static_cast<int>(col_elems), MPI_UNSIGNED_CHAR, proc_rank + 1, 200, |
| 152 | right_halo->data(), static_cast<int>(col_elems), MPI_UNSIGNED_CHAR, proc_rank + 1, 100, MPI_COMM_WORLD, | ||
| 153 | MPI_STATUS_IGNORE); | ||
| 154 | } | ||
| 155 | } | ||
| 156 | |||
| 157 | 58 | std::uint8_t ConvolveGaussian3x3ForStripe(const std::vector<std::uint8_t> &local_stripe, std::size_t local_width, | |
| 158 | const std::vector<std::uint8_t> &left_halo, | ||
| 159 | const std::vector<std::uint8_t> &right_halo, int height, int channels, | ||
| 160 | int row, std::size_t local_col, int channel) { | ||
| 161 | 58 | const auto u_channels = static_cast<std::size_t>(channels); | |
| 162 | |||
| 163 | 58 | const int row_prev = ClampInt(row - 1, 0, height - 1); | |
| 164 | const int row_curr = row; | ||
| 165 | 58 | const int row_next = ClampInt(row + 1, 0, height - 1); | |
| 166 | 58 | const std::array<int, 3> src_rows = {row_prev, row_curr, row_next}; | |
| 167 | |||
| 168 | int acc = 0; | ||
| 169 |
2/2✓ Branch 0 taken 174 times.
✓ Branch 1 taken 58 times.
|
232 | for (int dy_idx = 0; dy_idx < 3; ++dy_idx) { |
| 170 | 174 | const int src_row = src_rows.at(static_cast<std::size_t>(dy_idx)); | |
| 171 | 174 | const std::size_t halo_row_off = static_cast<std::size_t>(src_row) * u_channels; | |
| 172 |
2/2✓ Branch 0 taken 522 times.
✓ Branch 1 taken 174 times.
|
696 | for (int dx_idx = 0; dx_idx < 3; ++dx_idx) { |
| 173 | 522 | const int weight = kGaussianKernel3x3.at(static_cast<std::size_t>(dy_idx)).at(static_cast<std::size_t>(dx_idx)); | |
| 174 | 522 | const int offset = dx_idx - 1; | |
| 175 | |||
| 176 | std::uint8_t pix = 0; | ||
| 177 |
2/2✓ Branch 0 taken 93 times.
✓ Branch 1 taken 429 times.
|
522 | if (offset == -1 && local_col == 0) { |
| 178 | 93 | pix = left_halo[halo_row_off + static_cast<std::size_t>(channel)]; | |
| 179 |
4/4✓ Branch 0 taken 174 times.
✓ Branch 1 taken 255 times.
✓ Branch 2 taken 93 times.
✓ Branch 3 taken 81 times.
|
429 | } else if (offset == 1 && (local_col + 1 == local_width)) { |
| 180 | 93 | pix = right_halo[halo_row_off + static_cast<std::size_t>(channel)]; | |
| 181 | } else { | ||
| 182 | std::size_t src_local_col = local_col; | ||
| 183 |
2/2✓ Branch 0 taken 81 times.
✓ Branch 1 taken 255 times.
|
336 | if (offset == -1) { |
| 184 | 81 | src_local_col = local_col - 1; | |
| 185 |
2/2✓ Branch 0 taken 81 times.
✓ Branch 1 taken 174 times.
|
255 | } else if (offset == 1) { |
| 186 | 81 | src_local_col = local_col + 1; | |
| 187 | } | ||
| 188 | 336 | const std::size_t idx = ((static_cast<std::size_t>(src_row) * local_width + src_local_col) * u_channels) + | |
| 189 | 336 | static_cast<std::size_t>(channel); | |
| 190 | 336 | pix = local_stripe[idx]; | |
| 191 | } | ||
| 192 | 522 | acc += weight * static_cast<int>(pix); | |
| 193 | } | ||
| 194 | } | ||
| 195 | |||
| 196 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 58 times.
|
58 | const int rounded = (acc + (kGaussianDiv / 2)) / kGaussianDiv; |
| 197 | 58 | return ClampToByte(rounded); | |
| 198 | } | ||
| 199 | |||
| 200 | 10 | void ApplyGaussianToStripe(const std::vector<std::uint8_t> &local_stripe, std::size_t local_width, | |
| 201 | const std::vector<std::uint8_t> &left_halo, const std::vector<std::uint8_t> &right_halo, | ||
| 202 | int height, int channels, std::vector<std::uint8_t> *local_out) { | ||
| 203 | 10 | local_out->assign(static_cast<std::size_t>(height) * local_width * static_cast<std::size_t>(channels), 0); | |
| 204 | |||
| 205 |
2/2✓ Branch 0 taken 24 times.
✓ Branch 1 taken 10 times.
|
34 | for (int row = 0; row < height; ++row) { |
| 206 |
2/2✓ Branch 0 taken 42 times.
✓ Branch 1 taken 24 times.
|
66 | for (std::size_t local_col = 0; local_col < local_width; ++local_col) { |
| 207 |
2/2✓ Branch 0 taken 58 times.
✓ Branch 1 taken 42 times.
|
100 | for (int channel = 0; channel < channels; ++channel) { |
| 208 | 58 | const std::size_t out_idx = | |
| 209 | 58 | ((static_cast<std::size_t>(row) * local_width + local_col) * static_cast<std::size_t>(channels)) + | |
| 210 | 58 | static_cast<std::size_t>(channel); | |
| 211 | 58 | (*local_out)[out_idx] = ConvolveGaussian3x3ForStripe(local_stripe, local_width, left_halo, right_halo, height, | |
| 212 | channels, row, local_col, channel); | ||
| 213 | } | ||
| 214 | } | ||
| 215 | } | ||
| 216 | 10 | } | |
| 217 | |||
| 218 | 5 | void UnpackGatheredToImage(const Decomposition &dec, std::size_t height, std::size_t channels, int proc_count, | |
| 219 | const std::vector<std::uint8_t> &gathered, const std::vector<int> &displs, | ||
| 220 | std::vector<std::uint8_t> *global_out) { | ||
| 221 | 5 | global_out->assign(dec.width * height * channels, 0); | |
| 222 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 5 times.
|
15 | for (int proc_rank = 0; proc_rank < proc_count; ++proc_rank) { |
| 223 | const std::size_t local_width = dec.LocalWidthForProc(proc_rank); | ||
| 224 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 9 times.
|
10 | if (local_width == 0) { |
| 225 | 1 | continue; | |
| 226 | } | ||
| 227 | const std::size_t start_col = dec.StartColForProc(proc_rank); | ||
| 228 | 9 | const auto offset = static_cast<std::size_t>(displs[static_cast<std::size_t>(proc_rank)]); | |
| 229 |
2/2✓ Branch 0 taken 23 times.
✓ Branch 1 taken 9 times.
|
32 | for (std::size_t row_idx = 0; row_idx < height; ++row_idx) { |
| 230 |
2/2✓ Branch 0 taken 42 times.
✓ Branch 1 taken 23 times.
|
65 | for (std::size_t local_col_idx = 0; local_col_idx < local_width; ++local_col_idx) { |
| 231 | 42 | const std::size_t dst_col = start_col + local_col_idx; | |
| 232 | 42 | const std::size_t dst_idx = (row_idx * dec.width + dst_col) * channels; | |
| 233 | 42 | const std::size_t src_idx = offset + ((row_idx * local_width + local_col_idx) * channels); | |
| 234 |
2/2✓ Branch 0 taken 58 times.
✓ Branch 1 taken 42 times.
|
100 | for (std::size_t ch_idx = 0; ch_idx < channels; ++ch_idx) { |
| 235 | 58 | (*global_out)[dst_idx + ch_idx] = gathered[src_idx + ch_idx]; | |
| 236 | } | ||
| 237 | } | ||
| 238 | } | ||
| 239 | } | ||
| 240 | 5 | } | |
| 241 | |||
| 242 | } // namespace | ||
| 243 | |||
| 244 |
1/2✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
|
10 | PankovGaussFilterMPI::PankovGaussFilterMPI(const InType &in) { |
| 245 | SetTypeOfTask(GetStaticTypeOfTask()); | ||
| 246 | InType temp(in); | ||
| 247 | 10 | std::swap(GetInput(), temp); | |
| 248 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 10 times.
|
20 | GetOutput() = OutType{}; |
| 249 | 10 | } | |
| 250 | |||
| 251 |
1/2✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
|
10 | bool PankovGaussFilterMPI::ValidationImpl() { |
| 252 | const auto &in = GetInput(); | ||
| 253 |
1/2✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
|
10 | if (!GetOutput().data.empty()) { |
| 254 | return false; | ||
| 255 | } | ||
| 256 |
3/6✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 10 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 10 times.
✗ Branch 5 not taken.
|
10 | if (in.width < 0 || in.height < 0 || in.channels < 0) { |
| 257 | return false; | ||
| 258 | } | ||
| 259 |
2/4✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 10 times.
|
10 | if (in.width == 0 || in.height == 0) { |
| 260 | ✗ | return in.data.empty(); | |
| 261 | } | ||
| 262 |
1/2✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
|
10 | if (in.channels == 0) { |
| 263 | return false; | ||
| 264 | } | ||
| 265 | 10 | const auto expected = | |
| 266 | 10 | static_cast<std::size_t>(in.width) * static_cast<std::size_t>(in.height) * static_cast<std::size_t>(in.channels); | |
| 267 | 10 | return in.data.size() == expected; | |
| 268 | } | ||
| 269 | |||
| 270 | 10 | bool PankovGaussFilterMPI::PreProcessingImpl() { | |
| 271 | const auto &in = GetInput(); | ||
| 272 | auto &out = GetOutput(); | ||
| 273 | 10 | out.width = in.width; | |
| 274 | 10 | out.height = in.height; | |
| 275 | 10 | out.channels = in.channels; | |
| 276 | 10 | const auto total = | |
| 277 | 10 | static_cast<std::size_t>(in.width) * static_cast<std::size_t>(in.height) * static_cast<std::size_t>(in.channels); | |
| 278 | 10 | out.data.assign(total, 0); | |
| 279 | 10 | return true; | |
| 280 | } | ||
| 281 | |||
| 282 | 10 | bool PankovGaussFilterMPI::RunImpl() { | |
| 283 | 10 | int proc_rank = 0; | |
| 284 | 10 | int proc_count = 1; | |
| 285 | 10 | MPI_Comm_rank(MPI_COMM_WORLD, &proc_rank); | |
| 286 | 10 | MPI_Comm_size(MPI_COMM_WORLD, &proc_count); | |
| 287 | |||
| 288 | const auto &in = GetInput(); | ||
| 289 | 10 | int width = 0; | |
| 290 | 10 | int height = 0; | |
| 291 | 10 | int channels = 0; | |
| 292 | |||
| 293 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
10 | if (proc_rank == 0) { |
| 294 | 5 | width = in.width; | |
| 295 | 5 | height = in.height; | |
| 296 | 5 | channels = in.channels; | |
| 297 | } | ||
| 298 | |||
| 299 | 10 | MPI_Bcast(&width, 1, MPI_INT, 0, MPI_COMM_WORLD); | |
| 300 | 10 | MPI_Bcast(&height, 1, MPI_INT, 0, MPI_COMM_WORLD); | |
| 301 | 10 | MPI_Bcast(&channels, 1, MPI_INT, 0, MPI_COMM_WORLD); | |
| 302 | |||
| 303 |
2/4✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 10 times.
|
10 | if (width == 0 || height == 0) { |
| 304 | auto &out = GetOutput(); | ||
| 305 | ✗ | out.width = width; | |
| 306 | ✗ | out.height = height; | |
| 307 | ✗ | out.channels = channels; | |
| 308 | out.data.clear(); | ||
| 309 | ✗ | MPI_Barrier(MPI_COMM_WORLD); | |
| 310 | ✗ | return true; | |
| 311 | } | ||
| 312 | |||
| 313 | 10 | const auto u_w = static_cast<std::size_t>(width); | |
| 314 | 10 | const auto u_h = static_cast<std::size_t>(height); | |
| 315 | 10 | const auto u_ch = static_cast<std::size_t>(channels); | |
| 316 | |||
| 317 | 10 | const Decomposition dec = MakeDecomposition(u_w, proc_count); | |
| 318 |
2/2✓ Branch 0 taken 7 times.
✓ Branch 1 taken 3 times.
|
10 | const std::size_t local_width = dec.LocalWidthForProc(proc_rank); |
| 319 | 10 | const std::size_t local_elems = u_h * local_width * u_ch; | |
| 320 | |||
| 321 | 10 | std::vector<int> sendcounts(static_cast<std::size_t>(proc_count), 0); | |
| 322 |
1/4✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
10 | std::vector<int> displs(static_cast<std::size_t>(proc_count), 0); |
| 323 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
10 | if (proc_rank == 0) { |
| 324 |
1/2✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
|
5 | auto counts_displs = BuildSendCountsDispls(u_h, u_ch, dec, proc_count); |
| 325 | sendcounts = std::move(counts_displs.first); | ||
| 326 | displs = std::move(counts_displs.second); | ||
| 327 | 5 | } | |
| 328 | |||
| 329 | 10 | std::vector<std::uint8_t> local_stripe; | |
| 330 |
1/2✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
|
10 | DistributeStripes(in, dec, u_h, u_ch, proc_rank, proc_count, &local_stripe); |
| 331 | |||
| 332 | 10 | std::vector<std::uint8_t> left_halo; | |
| 333 | 10 | std::vector<std::uint8_t> right_halo; | |
| 334 |
1/2✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
|
10 | ExchangeHaloColumns(dec, u_h, u_ch, proc_rank, proc_count, local_stripe, local_width, &left_halo, &right_halo); |
| 335 | |||
| 336 | 10 | std::vector<std::uint8_t> local_out; | |
| 337 |
1/2✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
|
10 | ApplyGaussianToStripe(local_stripe, local_width, left_halo, right_halo, height, channels, &local_out); |
| 338 | |||
| 339 | 10 | const std::size_t total = u_w * u_h * u_ch; | |
| 340 | 10 | std::vector<std::uint8_t> gathered; | |
| 341 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
10 | if (proc_rank == 0) { |
| 342 |
1/2✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
|
5 | gathered.resize(total); |
| 343 | } | ||
| 344 | |||
| 345 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
10 | std::uint8_t *recv_buf = (proc_rank == 0) ? gathered.data() : nullptr; |
| 346 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
10 | int *recv_counts = (proc_rank == 0) ? sendcounts.data() : nullptr; |
| 347 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
10 | int *recv_displs = (proc_rank == 0) ? displs.data() : nullptr; |
| 348 | |||
| 349 |
1/2✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
|
10 | MPI_Gatherv(local_out.data(), static_cast<int>(local_elems), MPI_UNSIGNED_CHAR, recv_buf, recv_counts, recv_displs, |
| 350 | MPI_UNSIGNED_CHAR, 0, MPI_COMM_WORLD); | ||
| 351 | |||
| 352 | 10 | std::vector<std::uint8_t> global_out; | |
| 353 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
10 | if (proc_rank == 0) { |
| 354 |
1/2✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
|
5 | UnpackGatheredToImage(dec, u_h, u_ch, proc_count, gathered, displs, &global_out); |
| 355 | } else { | ||
| 356 |
1/4✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
5 | global_out.assign(total, 0); |
| 357 | } | ||
| 358 | |||
| 359 |
1/2✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
|
10 | MPI_Bcast(global_out.data(), static_cast<int>(global_out.size()), MPI_UNSIGNED_CHAR, 0, MPI_COMM_WORLD); |
| 360 | |||
| 361 | auto &out = GetOutput(); | ||
| 362 | 10 | out.width = width; | |
| 363 | 10 | out.height = height; | |
| 364 | 10 | out.channels = channels; | |
| 365 | 10 | out.data = std::move(global_out); | |
| 366 | |||
| 367 |
1/2✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
|
10 | MPI_Barrier(MPI_COMM_WORLD); |
| 368 | return true; | ||
| 369 | } | ||
| 370 | |||
| 371 | 10 | bool PankovGaussFilterMPI::PostProcessingImpl() { | |
| 372 | const auto &out = GetOutput(); | ||
| 373 |
2/4✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 10 times.
|
10 | if (out.width == 0 || out.height == 0) { |
| 374 | ✗ | return out.data.empty(); | |
| 375 | } | ||
| 376 | 10 | return !out.data.empty(); | |
| 377 | } | ||
| 378 | |||
| 379 | } // namespace pankov_gauss_filter | ||
| 380 |