| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #include "zenin_a_gauss_filter/mpi/include/ops_mpi.hpp" | ||
| 2 | |||
| 3 | #include <mpi.h> | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <array> | ||
| 7 | #include <cstddef> | ||
| 8 | #include <cstdint> | ||
| 9 | #include <functional> | ||
| 10 | #include <utility> | ||
| 11 | #include <vector> | ||
| 12 | |||
| 13 | #include "zenin_a_gauss_filter/common/include/common.hpp" | ||
| 14 | |||
| 15 | namespace zenin_a_gauss_filter { | ||
| 16 | |||
| 17 | namespace { | ||
| 18 | |||
| 19 | constexpr int kHalo = 1; | ||
| 20 | constexpr int kTagExpanded = 200; | ||
| 21 | constexpr int kTagResult = 500; | ||
| 22 | |||
| 23 | struct BlockInfo { | ||
| 24 | int my_h = 0, my_w = 0; | ||
| 25 | int start_y = 0, start_x = 0; | ||
| 26 | }; | ||
| 27 | |||
| 28 | std::size_t GlobalIdx(int gx, int gy, int chan, int width, int channels) { | ||
| 29 | 61854 | return ((static_cast<std::size_t>(gy) * width + gx) * channels) + static_cast<std::size_t>(chan); | |
| 30 | } | ||
| 31 | |||
| 32 | int Clampi(int v, int lo, int hi) { | ||
| 33 | return std::max(lo, std::min(hi, v)); | ||
| 34 | } | ||
| 35 | |||
| 36 | std::uint8_t Clampu8(int v) { | ||
| 37 | 51776 | return static_cast<std::uint8_t>(Clampi(v, 0, 255)); | |
| 38 | } | ||
| 39 | |||
| 40 | std::uint8_t GetLocal(const std::vector<std::uint8_t> &buf, int local_w_with_halo, int ch, int x, int y, int c) { | ||
| 41 | 51776 | const int idx = ((y * local_w_with_halo + x) * ch) + c; | |
| 42 | 51776 | return buf[idx]; | |
| 43 | } | ||
| 44 | |||
| 45 | BlockInfo CalcBlock(int pr, int pc, int h, int w, int grid_r, int grid_c) { | ||
| 46 | 176 | const int base_h = h / grid_r; | |
| 47 | 176 | const int base_w = w / grid_c; | |
| 48 | 176 | const int extra_h = h % grid_r; | |
| 49 | 176 | const int extra_w = w % grid_c; | |
| 50 | |||
| 51 | BlockInfo b; | ||
| 52 | 352 | b.my_h = base_h + (pr < extra_h ? 1 : 0); | |
| 53 |
2/4✓ Branch 0 taken 88 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 88 times.
✗ Branch 3 not taken.
|
176 | b.my_w = base_w + (pc < extra_w ? 1 : 0); |
| 54 | |||
| 55 | 88 | b.start_y = (pr * base_h) + std::min(pr, extra_h); | |
| 56 | 176 | b.start_x = (pc * base_w) + std::min(pc, extra_w); | |
| 57 | return b; | ||
| 58 | } | ||
| 59 | |||
| 60 | 88 | void FillExpandedBlock(const zenin_a_gauss_filter::Image &img, const zenin_a_gauss_filter::BlockInfo &bb, int width, | |
| 61 | int height, int channels, std::vector<std::uint8_t> *dst) { | ||
| 62 | 88 | const int hh = bb.my_h; | |
| 63 | 88 | const int ww = bb.my_w; | |
| 64 | 88 | const int dst_w = ww + (2 * kHalo); | |
| 65 | 88 | const int dst_h = hh + (2 * kHalo); | |
| 66 | |||
| 67 | 88 | dst->assign(static_cast<std::size_t>(dst_h) * dst_w * channels, 0); | |
| 68 | |||
| 69 |
2/2✓ Branch 0 taken 1129 times.
✓ Branch 1 taken 88 times.
|
1217 | for (int ly = -kHalo; ly < hh + kHalo; ++ly) { |
| 70 |
2/2✓ Branch 0 taken 39620 times.
✓ Branch 1 taken 1129 times.
|
40749 | for (int lx = -kHalo; lx < ww + kHalo; ++lx) { |
| 71 | 39620 | int gy = bb.start_y + ly; | |
| 72 | 39620 | int gx = bb.start_x + lx; | |
| 73 | |||
| 74 |
4/4✓ Branch 0 taken 37312 times.
✓ Branch 1 taken 2308 times.
✓ Branch 2 taken 37375 times.
✓ Branch 3 taken 2245 times.
|
76932 | gy = std::max(0, std::min(height - 1, gy)); |
| 75 |
4/4✓ Branch 0 taken 37362 times.
✓ Branch 1 taken 2258 times.
✓ Branch 2 taken 37345 times.
✓ Branch 3 taken 2275 times.
|
76982 | gx = std::max(0, std::min(width - 1, gx)); |
| 76 | |||
| 77 | 39620 | const int dy = ly + kHalo; | |
| 78 | 39620 | const int dx = lx + kHalo; | |
| 79 | |||
| 80 |
2/2✓ Branch 0 taken 61854 times.
✓ Branch 1 taken 39620 times.
|
101474 | for (int chan = 0; chan < channels; ++chan) { |
| 81 | 61854 | (*dst)[((dy * dst_w + dx) * channels) + chan] = img.pixels[GlobalIdx(gx, gy, chan, width, channels)]; | |
| 82 | } | ||
| 83 | } | ||
| 84 | } | ||
| 85 | 88 | } | |
| 86 | |||
| 87 | 88 | void BuildOrRecvExpandedBlock(int rank, int proc_num, int grid_cols, int width, int height, int channels, | |
| 88 | const zenin_a_gauss_filter::BlockInfo &my_block, | ||
| 89 | const std::function<zenin_a_gauss_filter::BlockInfo(int, int)> &calc_block, | ||
| 90 | const zenin_a_gauss_filter::Image *root_img, std::vector<std::uint8_t> *local_in) { | ||
| 91 |
2/2✓ Branch 0 taken 44 times.
✓ Branch 1 taken 44 times.
|
88 | if (rank == 0) { |
| 92 | 44 | FillExpandedBlock(*root_img, my_block, width, height, channels, local_in); | |
| 93 | |||
| 94 |
2/2✓ Branch 0 taken 44 times.
✓ Branch 1 taken 44 times.
|
88 | for (int rnk = 1; rnk < proc_num; ++rnk) { |
| 95 | 44 | const int rpr = rnk / grid_cols; | |
| 96 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 44 times.
|
44 | const int rpc = rnk % grid_cols; |
| 97 | 44 | const auto rb = calc_block(rpr, rpc); | |
| 98 | |||
| 99 | 44 | std::vector<std::uint8_t> pack; | |
| 100 |
1/2✓ Branch 1 taken 44 times.
✗ Branch 2 not taken.
|
44 | FillExpandedBlock(*root_img, rb, width, height, channels, &pack); |
| 101 |
1/2✓ Branch 1 taken 44 times.
✗ Branch 2 not taken.
|
44 | MPI_Send(pack.data(), static_cast<int>(pack.size()), MPI_UNSIGNED_CHAR, rnk, kTagExpanded, MPI_COMM_WORLD); |
| 102 | } | ||
| 103 | } else { | ||
| 104 | 44 | MPI_Recv(local_in->data(), static_cast<int>(local_in->size()), MPI_UNSIGNED_CHAR, 0, kTagExpanded, MPI_COMM_WORLD, | |
| 105 | MPI_STATUS_IGNORE); | ||
| 106 | } | ||
| 107 | 88 | } | |
| 108 | |||
| 109 | 88 | void ConvolveLocalBlock(const std::vector<std::uint8_t> &local_in, int lw, int my_w, int my_h, int channels, | |
| 110 | std::vector<std::uint8_t> *local_out) { | ||
| 111 | constexpr int kKernelSum = 16; | ||
| 112 | |||
| 113 |
2/2✓ Branch 0 taken 953 times.
✓ Branch 1 taken 88 times.
|
1041 | for (int yd = 0; yd < my_h; ++yd) { |
| 114 | 953 | const int ly = yd + kHalo; | |
| 115 |
2/2✓ Branch 0 taken 33290 times.
✓ Branch 1 taken 953 times.
|
34243 | for (int xd = 0; xd < my_w; ++xd) { |
| 116 | 33290 | const int lx = xd + kHalo; | |
| 117 |
2/2✓ Branch 0 taken 51776 times.
✓ Branch 1 taken 33290 times.
|
85066 | for (int chan = 0; chan < channels; ++chan) { |
| 118 | 51776 | const int v00 = static_cast<int>(zenin_a_gauss_filter::GetLocal(local_in, lw, channels, lx - 1, ly - 1, chan)); | |
| 119 | 51776 | const int v01 = static_cast<int>(zenin_a_gauss_filter::GetLocal(local_in, lw, channels, lx, ly - 1, chan)); | |
| 120 | 51776 | const int v02 = static_cast<int>(zenin_a_gauss_filter::GetLocal(local_in, lw, channels, lx + 1, ly - 1, chan)); | |
| 121 | |||
| 122 | 51776 | const int v10 = static_cast<int>(zenin_a_gauss_filter::GetLocal(local_in, lw, channels, lx - 1, ly, chan)); | |
| 123 | 51776 | const int v11 = static_cast<int>(zenin_a_gauss_filter::GetLocal(local_in, lw, channels, lx, ly, chan)); | |
| 124 | 51776 | const int v12 = static_cast<int>(zenin_a_gauss_filter::GetLocal(local_in, lw, channels, lx + 1, ly, chan)); | |
| 125 | |||
| 126 | 51776 | const int v20 = static_cast<int>(zenin_a_gauss_filter::GetLocal(local_in, lw, channels, lx - 1, ly + 1, chan)); | |
| 127 | 51776 | const int v21 = static_cast<int>(zenin_a_gauss_filter::GetLocal(local_in, lw, channels, lx, ly + 1, chan)); | |
| 128 | 51776 | const int v22 = static_cast<int>(zenin_a_gauss_filter::GetLocal(local_in, lw, channels, lx + 1, ly + 1, chan)); | |
| 129 | |||
| 130 | int sum = 0; | ||
| 131 | sum += v00 * 1; | ||
| 132 | 51776 | sum += v01 * 2; | |
| 133 | 51776 | sum += v02 * 1; | |
| 134 | 51776 | sum += v10 * 2; | |
| 135 | 51776 | sum += v11 * 4; | |
| 136 | 51776 | sum += v12 * 2; | |
| 137 | 51776 | sum += v20 * 1; | |
| 138 | 51776 | sum += v21 * 2; | |
| 139 | 51776 | sum += v22 * 1; | |
| 140 | |||
| 141 | 51776 | const int res = (sum + (kKernelSum / 2)) / kKernelSum; | |
| 142 | 51776 | (*local_out)[((yd * my_w + xd) * channels) + chan] = zenin_a_gauss_filter::Clampu8(res); | |
| 143 | } | ||
| 144 | } | ||
| 145 | } | ||
| 146 | 88 | } | |
| 147 | |||
| 148 | 88 | void CopyBlockToImage(const BlockInfo &block, const std::vector<std::uint8_t> &src, int src_w, int width, int channels, | |
| 149 | std::vector<std::uint8_t> *dst) { | ||
| 150 |
2/2✓ Branch 0 taken 953 times.
✓ Branch 1 taken 88 times.
|
1041 | for (int yd = 0; yd < block.my_h; ++yd) { |
| 151 |
2/2✓ Branch 0 taken 33290 times.
✓ Branch 1 taken 953 times.
|
34243 | for (int xd = 0; xd < block.my_w; ++xd) { |
| 152 | 33290 | const int gy = block.start_y + yd; | |
| 153 | 33290 | const int gx = block.start_x + xd; | |
| 154 |
2/2✓ Branch 0 taken 51776 times.
✓ Branch 1 taken 33290 times.
|
85066 | for (int chan = 0; chan < channels; ++chan) { |
| 155 | 51776 | (*dst)[((gy * width + gx) * channels) + chan] = src[((yd * src_w + xd) * channels) + chan]; | |
| 156 | } | ||
| 157 | } | ||
| 158 | } | ||
| 159 | 88 | } | |
| 160 | |||
| 161 | 88 | void GatherAndBroadcastResult(int rank, int proc_num, int grid_cols, int width, int channels, const BlockInfo &my_block, | |
| 162 | const std::function<BlockInfo(int, int)> &calc_block, | ||
| 163 | const std::vector<std::uint8_t> &local_out, std::vector<std::uint8_t> *final_image) { | ||
| 164 |
2/2✓ Branch 0 taken 44 times.
✓ Branch 1 taken 44 times.
|
88 | if (rank == 0) { |
| 165 | 44 | CopyBlockToImage(my_block, local_out, my_block.my_w, width, channels, final_image); | |
| 166 | |||
| 167 |
2/2✓ Branch 0 taken 44 times.
✓ Branch 1 taken 44 times.
|
88 | for (int src_rank = 1; src_rank < proc_num; ++src_rank) { |
| 168 | 44 | const int spr = src_rank / grid_cols; | |
| 169 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 44 times.
|
44 | const int spc = src_rank % grid_cols; |
| 170 | 44 | const BlockInfo sb = calc_block(spr, spc); | |
| 171 | |||
| 172 |
1/2✓ Branch 2 taken 44 times.
✗ Branch 3 not taken.
|
44 | std::vector<std::uint8_t> recv(static_cast<std::size_t>(sb.my_h) * sb.my_w * channels); |
| 173 |
1/2✓ Branch 1 taken 44 times.
✗ Branch 2 not taken.
|
44 | MPI_Recv(recv.data(), static_cast<int>(recv.size()), MPI_UNSIGNED_CHAR, src_rank, kTagResult, MPI_COMM_WORLD, |
| 174 | MPI_STATUS_IGNORE); | ||
| 175 | |||
| 176 | 44 | CopyBlockToImage(sb, recv, sb.my_w, width, channels, final_image); | |
| 177 | } | ||
| 178 | } else { | ||
| 179 | 44 | MPI_Send(local_out.data(), static_cast<int>(local_out.size()), MPI_UNSIGNED_CHAR, 0, kTagResult, MPI_COMM_WORLD); | |
| 180 | } | ||
| 181 | |||
| 182 | 88 | MPI_Bcast(final_image->data(), static_cast<int>(final_image->size()), MPI_UNSIGNED_CHAR, 0, MPI_COMM_WORLD); | |
| 183 | 88 | } | |
| 184 | |||
| 185 | } // namespace | ||
| 186 | |||
| 187 |
1/2✓ Branch 1 taken 88 times.
✗ Branch 2 not taken.
|
88 | ZeninAGaussFilterMPI::ZeninAGaussFilterMPI(const InType &in) { |
| 188 | SetTypeOfTask(GetStaticTypeOfTask()); | ||
| 189 | GetInput() = in; | ||
| 190 | 88 | GetOutput() = OutType{}; | |
| 191 | 88 | } | |
| 192 | |||
| 193 | 88 | bool ZeninAGaussFilterMPI::ValidationImpl() { | |
| 194 | 88 | int rank = 0; | |
| 195 | 88 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); | |
| 196 | |||
| 197 | bool ok = true; | ||
| 198 |
2/2✓ Branch 0 taken 44 times.
✓ Branch 1 taken 44 times.
|
88 | if (rank == 0) { |
| 199 | const auto &in = GetInput(); | ||
| 200 | 44 | const std::size_t need = static_cast<std::size_t>(in.width) * in.height * in.channels; | |
| 201 | |||
| 202 |
4/8✓ Branch 0 taken 44 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 44 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 44 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 44 times.
✗ Branch 7 not taken.
|
44 | ok = (in.width > 0) && (in.height > 0) && (in.channels == 1 || in.channels == 3) && (in.pixels.size() == need); |
| 203 | } | ||
| 204 | |||
| 205 | 88 | int ok_int = ok ? 1 : 0; | |
| 206 | |||
| 207 | 88 | MPI_Bcast(&ok_int, 1, MPI_INT, 0, MPI_COMM_WORLD); | |
| 208 | 88 | return ok_int == 1; | |
| 209 | } | ||
| 210 | |||
| 211 | 88 | bool ZeninAGaussFilterMPI::PreProcessingImpl() { | |
| 212 | 88 | int rank = 0; | |
| 213 | 88 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); | |
| 214 | 88 | MPI_Comm_size(MPI_COMM_WORLD, &proc_num_); | |
| 215 | |||
| 216 |
2/2✓ Branch 0 taken 44 times.
✓ Branch 1 taken 44 times.
|
88 | if (rank == 0) { |
| 217 | const auto &in = GetInput(); | ||
| 218 | 44 | width_ = in.width; | |
| 219 | 44 | height_ = in.height; | |
| 220 | 44 | channels_ = in.channels; | |
| 221 | } | ||
| 222 | |||
| 223 | 88 | MPI_Bcast(&width_, 1, MPI_INT, 0, MPI_COMM_WORLD); | |
| 224 | 88 | MPI_Bcast(&height_, 1, MPI_INT, 0, MPI_COMM_WORLD); | |
| 225 | 88 | MPI_Bcast(&channels_, 1, MPI_INT, 0, MPI_COMM_WORLD); | |
| 226 | |||
| 227 | 88 | std::array<int, 2> dims{0, 0}; | |
| 228 | 88 | MPI_Dims_create(proc_num_, 2, dims.data()); | |
| 229 | 88 | grid_rows_ = dims[0]; | |
| 230 | 88 | grid_cols_ = dims[1]; | |
| 231 | |||
| 232 | 88 | block_h_ = height_ / grid_rows_; | |
| 233 | 88 | block_w_ = width_ / grid_cols_; | |
| 234 | 88 | extra_h_ = height_ % grid_rows_; | |
| 235 | 88 | extra_w_ = width_ % grid_cols_; | |
| 236 | |||
| 237 |
2/2✓ Branch 0 taken 44 times.
✓ Branch 1 taken 44 times.
|
88 | if (rank == 0) { |
| 238 | auto &out = GetOutput(); | ||
| 239 | 44 | out.height = height_; | |
| 240 | 44 | out.width = width_; | |
| 241 | 44 | out.channels = channels_; | |
| 242 | 44 | out.pixels.assign(static_cast<std::size_t>(width_) * height_ * channels_, 0); | |
| 243 | } | ||
| 244 | |||
| 245 | 88 | return true; | |
| 246 | } | ||
| 247 | |||
| 248 | 88 | bool ZeninAGaussFilterMPI::RunImpl() { | |
| 249 | 88 | int rank = 0; | |
| 250 | 88 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); | |
| 251 | |||
| 252 | 88 | const int pr = rank / grid_cols_; | |
| 253 | 88 | const int pc = rank % grid_cols_; | |
| 254 | |||
| 255 |
2/2✓ Branch 0 taken 55 times.
✓ Branch 1 taken 33 times.
|
88 | const BlockInfo my_block = CalcBlock(pr, pc, height_, width_, grid_rows_, grid_cols_); |
| 256 | const int my_h = my_block.my_h; | ||
| 257 | const int my_w = my_block.my_w; | ||
| 258 | |||
| 259 | 88 | const int lw = my_w + (2 * kHalo); | |
| 260 | 88 | const int lh = my_h + (2 * kHalo); | |
| 261 | |||
| 262 | 88 | std::vector<std::uint8_t> local_in(static_cast<std::size_t>(lh) * lw * channels_, 0); | |
| 263 |
1/4✓ Branch 1 taken 88 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
88 | std::vector<std::uint8_t> local_out(static_cast<std::size_t>(my_h) * my_w * channels_, 0); |
| 264 | |||
| 265 | 88 | auto calc_block = [&](int rpr, int rpc) -> BlockInfo { | |
| 266 |
1/2✓ Branch 0 taken 88 times.
✗ Branch 1 not taken.
|
88 | return CalcBlock(rpr, rpc, height_, width_, grid_rows_, grid_cols_); |
| 267 | }; | ||
| 268 | |||
| 269 |
2/2✓ Branch 0 taken 44 times.
✓ Branch 1 taken 44 times.
|
88 | if (rank == 0) { |
| 270 | const auto &img = GetInput(); | ||
| 271 |
1/2✓ Branch 1 taken 44 times.
✗ Branch 2 not taken.
|
88 | BuildOrRecvExpandedBlock(rank, proc_num_, grid_cols_, width_, height_, channels_, my_block, calc_block, &img, |
| 272 | &local_in); | ||
| 273 | } else { | ||
| 274 |
1/2✓ Branch 1 taken 44 times.
✗ Branch 2 not taken.
|
88 | BuildOrRecvExpandedBlock(rank, proc_num_, grid_cols_, width_, height_, channels_, my_block, calc_block, nullptr, |
| 275 | &local_in); | ||
| 276 | } | ||
| 277 | |||
| 278 | 88 | ConvolveLocalBlock(local_in, lw, my_w, my_h, channels_, &local_out); | |
| 279 | |||
| 280 |
2/6✓ Branch 1 taken 88 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 88 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
|
88 | std::vector<std::uint8_t> final_image(static_cast<std::size_t>(width_) * height_ * channels_, 0); |
| 281 |
1/4✓ Branch 1 taken 88 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
88 | GatherAndBroadcastResult(rank, proc_num_, grid_cols_, width_, channels_, my_block, calc_block, local_out, |
| 282 | &final_image); | ||
| 283 | |||
| 284 |
2/2✓ Branch 1 taken 85 times.
✓ Branch 2 taken 3 times.
|
176 | GetOutput() = OutType{height_, width_, channels_, std::move(final_image)}; |
| 285 | 88 | return true; | |
| 286 | } | ||
| 287 | |||
| 288 | 88 | bool ZeninAGaussFilterMPI::PostProcessingImpl() { | |
| 289 | 88 | return true; | |
| 290 | } | ||
| 291 | |||
| 292 | } // namespace zenin_a_gauss_filter | ||
| 293 |