| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #include "makovskiy_i_gauss_filter_vert/mpi/include/ops_mpi.hpp" | ||
| 2 | |||
| 3 | #include <mpi.h> | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <array> | ||
| 7 | #include <cstddef> | ||
| 8 | #include <task/include/task.hpp> | ||
| 9 | #include <tuple> | ||
| 10 | #include <vector> | ||
| 11 | |||
| 12 | #include "makovskiy_i_gauss_filter_vert/common/include/common.hpp" | ||
| 13 | |||
| 14 | namespace makovskiy_i_gauss_filter_vert { | ||
| 15 | |||
| 16 | namespace { | ||
| 17 | |||
| 18 | 225 | int GetPixelValue(int x, int y, int strip_w, int total_h, int rank, const std::vector<int> &all_strip_widths, | |
| 19 | const std::vector<int> &left_ghost, const std::vector<int> &right_ghost, | ||
| 20 | const std::vector<int> &local_strip) { | ||
| 21 |
2/2✓ Branch 0 taken 42 times.
✓ Branch 1 taken 183 times.
|
225 | if (x < 0) { |
| 22 |
4/6✓ Branch 0 taken 21 times.
✓ Branch 1 taken 21 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 21 times.
✓ Branch 4 taken 21 times.
✗ Branch 5 not taken.
|
42 | if (rank > 0 && all_strip_widths.at(static_cast<size_t>(rank) - 1) > 0) { |
| 23 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 21 times.
|
21 | return left_ghost.at(static_cast<size_t>(std::clamp(y, 0, total_h - 1))); |
| 24 | } | ||
| 25 | 21 | return GetPixel(local_strip, x, y, strip_w, total_h); | |
| 26 | } | ||
| 27 |
2/2✓ Branch 0 taken 42 times.
✓ Branch 1 taken 141 times.
|
183 | if (x >= strip_w) { |
| 28 |
2/2✓ Branch 0 taken 21 times.
✓ Branch 1 taken 21 times.
|
42 | if (rank < static_cast<int>(all_strip_widths.size()) - 1 && |
| 29 |
2/4✗ Branch 0 not taken.
✓ Branch 1 taken 21 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 21 times.
|
21 | all_strip_widths.at(static_cast<size_t>(rank) + 1) > 0) { |
| 30 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 21 times.
|
21 | return right_ghost.at(static_cast<size_t>(std::clamp(y, 0, total_h - 1))); |
| 31 | } | ||
| 32 | 21 | return GetPixel(local_strip, x, y, strip_w, total_h); | |
| 33 | } | ||
| 34 | 141 | return GetPixel(local_strip, x, y, strip_w, total_h); | |
| 35 | } | ||
| 36 | |||
| 37 | 25 | int ApplyKernel(int row, int col, int strip_w, int total_h, int rank, const std::vector<int> &all_strip_widths, | |
| 38 | const std::vector<int> &left_ghost, const std::vector<int> &right_ghost, | ||
| 39 | const std::vector<int> &local_strip) { | ||
| 40 | 25 | const std::array<int, 9> kernel = {1, 2, 1, 2, 4, 2, 1, 2, 1}; | |
| 41 | int sum = 0; | ||
| 42 | |||
| 43 |
2/2✓ Branch 0 taken 75 times.
✓ Branch 1 taken 25 times.
|
100 | for (int k_row = -1; k_row <= 1; ++k_row) { |
| 44 |
2/2✓ Branch 0 taken 225 times.
✓ Branch 1 taken 75 times.
|
300 | for (int k_col = -1; k_col <= 1; ++k_col) { |
| 45 | 225 | int current_x = col + k_col; | |
| 46 | 225 | int current_y = row + k_row; | |
| 47 | |||
| 48 | 225 | int pixel_val = GetPixelValue(current_x, current_y, strip_w, total_h, rank, all_strip_widths, left_ghost, | |
| 49 | right_ghost, local_strip); | ||
| 50 | |||
| 51 | 225 | sum += pixel_val * kernel.at((static_cast<size_t>(k_row + 1) * 3) + static_cast<size_t>(k_col + 1)); | |
| 52 | } | ||
| 53 | } | ||
| 54 | 25 | return sum; | |
| 55 | } | ||
| 56 | |||
| 57 | } // namespace | ||
| 58 | |||
| 59 |
1/2✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
|
4 | GaussFilterMPI::GaussFilterMPI(const InType &in) { |
| 60 | InType temp(in); | ||
| 61 | this->GetInput().swap(temp); | ||
| 62 | SetTypeOfTask(ppc::task::TypeOfTask::kMPI); | ||
| 63 | 4 | } | |
| 64 | |||
| 65 |
1/2✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
|
4 | bool GaussFilterMPI::ValidationImpl() { |
| 66 | const auto &[input, width, height] = GetInput(); | ||
| 67 |
4/8✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 4 times.
✗ Branch 4 not taken.
✓ Branch 5 taken 4 times.
✗ Branch 6 not taken.
✓ Branch 7 taken 4 times.
|
4 | return !input.empty() && width > 0 && height > 0 && input.size() == static_cast<size_t>(width) * height; |
| 68 | } | ||
| 69 | |||
| 70 | 4 | bool GaussFilterMPI::PreProcessingImpl() { | |
| 71 | 4 | int rank = 0; | |
| 72 | 4 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); | |
| 73 | |||
| 74 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
|
4 | if (rank == 0) { |
| 75 | const auto &[_, width, height] = GetInput(); | ||
| 76 | 2 | total_width_ = width; | |
| 77 | 2 | total_height_ = height; | |
| 78 | } | ||
| 79 | |||
| 80 | 4 | MPI_Bcast(&total_width_, 1, MPI_INT, 0, MPI_COMM_WORLD); | |
| 81 | 4 | MPI_Bcast(&total_height_, 1, MPI_INT, 0, MPI_COMM_WORLD); | |
| 82 | |||
| 83 | 4 | int world_size = 0; | |
| 84 | 4 | MPI_Comm_size(MPI_COMM_WORLD, &world_size); | |
| 85 | |||
| 86 | 4 | const int min_strip_width = total_width_ / world_size; | |
| 87 | 4 | const int remainder = total_width_ % world_size; | |
| 88 |
2/2✓ Branch 0 taken 3 times.
✓ Branch 1 taken 1 times.
|
4 | strip_width_ = min_strip_width + (rank < remainder ? 1 : 0); |
| 89 | |||
| 90 |
1/2✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
|
4 | if (strip_width_ > 0) { |
| 91 | 4 | local_strip_.resize(static_cast<size_t>(strip_width_) * total_height_); | |
| 92 | } | ||
| 93 | |||
| 94 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
|
4 | if (rank == 0) { |
| 95 | 2 | GetOutput().resize(static_cast<size_t>(total_width_) * total_height_); | |
| 96 | } | ||
| 97 | |||
| 98 | 4 | return true; | |
| 99 | } | ||
| 100 | |||
| 101 | 2 | void GaussFilterMPI::ScatterDataRoot(int world_size) { | |
| 102 | const auto &[input, _w, _h] = GetInput(); | ||
| 103 | int offset = 0; | ||
| 104 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 2 times.
|
6 | for (int i = 0; i < world_size; ++i) { |
| 105 |
2/2✓ Branch 0 taken 3 times.
✓ Branch 1 taken 1 times.
|
4 | int current_strip_width = (total_width_ / world_size) + (i < (total_width_ % world_size) ? 1 : 0); |
| 106 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
|
4 | if (current_strip_width == 0) { |
| 107 | ✗ | continue; | |
| 108 | } | ||
| 109 | |||
| 110 | 4 | std::vector<int> strip_to_send(static_cast<size_t>(current_strip_width) * total_height_); | |
| 111 |
2/2✓ Branch 0 taken 14 times.
✓ Branch 1 taken 4 times.
|
18 | for (int row = 0; row < total_height_; ++row) { |
| 112 |
2/2✓ Branch 0 taken 25 times.
✓ Branch 1 taken 14 times.
|
39 | for (int col = 0; col < current_strip_width; ++col) { |
| 113 | 25 | strip_to_send.at((static_cast<size_t>(row) * current_strip_width) + col) = | |
| 114 |
2/4✗ Branch 0 not taken.
✓ Branch 1 taken 25 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 25 times.
|
25 | input.at((static_cast<size_t>(row) * total_width_) + offset + col); |
| 115 | } | ||
| 116 | } | ||
| 117 | |||
| 118 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
|
4 | if (i == 0) { |
| 119 |
1/2✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
|
2 | local_strip_ = strip_to_send; |
| 120 | } else { | ||
| 121 |
1/2✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
|
2 | MPI_Send(strip_to_send.data(), static_cast<int>(strip_to_send.size()), MPI_INT, i, 0, MPI_COMM_WORLD); |
| 122 | } | ||
| 123 |
1/2✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
|
4 | offset += current_strip_width; |
| 124 | } | ||
| 125 | 2 | } | |
| 126 | |||
| 127 | 2 | void GaussFilterMPI::ScatterDataLeaf() { | |
| 128 |
1/2✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
|
2 | if (strip_width_ > 0) { |
| 129 | 2 | MPI_Recv(local_strip_.data(), static_cast<int>(local_strip_.size()), MPI_INT, 0, 0, MPI_COMM_WORLD, | |
| 130 | MPI_STATUS_IGNORE); | ||
| 131 | } | ||
| 132 | 2 | } | |
| 133 | |||
| 134 | ✗ | void GaussFilterMPI::ScatterData(int rank, int world_size) { | |
| 135 | ✗ | if (rank == 0) { | |
| 136 | 2 | ScatterDataRoot(world_size); | |
| 137 | } else { | ||
| 138 | 2 | ScatterDataLeaf(); | |
| 139 | } | ||
| 140 | ✗ | } | |
| 141 | |||
| 142 | 4 | std::vector<int> GaussFilterMPI::ComputeLocal(int rank, int world_size) { | |
| 143 | 4 | std::vector<int> local_output(static_cast<size_t>(strip_width_) * total_height_); | |
| 144 |
2/6✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 4 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
|
4 | std::vector<int> all_strip_widths(world_size); |
| 145 |
1/2✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
|
4 | MPI_Allgather(&strip_width_, 1, MPI_INT, all_strip_widths.data(), 1, MPI_INT, MPI_COMM_WORLD); |
| 146 | |||
| 147 |
1/2✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
|
4 | if (strip_width_ > 0) { |
| 148 |
1/4✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
4 | std::vector<int> left_ghost(total_height_); |
| 149 |
1/4✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
4 | std::vector<int> right_ghost(total_height_); |
| 150 | 4 | std::array<MPI_Request, 4> requests{}; | |
| 151 | int req_count = 0; | ||
| 152 | |||
| 153 |
1/4✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
4 | std::vector<int> left_border(total_height_); |
| 154 |
1/4✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
4 | std::vector<int> right_border(total_height_); |
| 155 | |||
| 156 |
2/2✓ Branch 0 taken 14 times.
✓ Branch 1 taken 4 times.
|
18 | for (int row = 0; row < total_height_; ++row) { |
| 157 |
2/4✗ Branch 0 not taken.
✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 14 times.
|
14 | left_border.at(row) = local_strip_.at(static_cast<size_t>(row) * strip_width_); |
| 158 |
2/4✗ Branch 0 not taken.
✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 14 times.
|
14 | right_border.at(row) = local_strip_.at((static_cast<size_t>(row) * strip_width_) + strip_width_ - 1); |
| 159 | } | ||
| 160 | |||
| 161 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
|
4 | if (rank > 0 && all_strip_widths.at(static_cast<size_t>(rank) - 1) > 0) { |
| 162 |
1/2✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
|
2 | MPI_Isend(left_border.data(), total_height_, MPI_INT, rank - 1, 0, MPI_COMM_WORLD, &requests.at(req_count++)); |
| 163 |
1/2✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
|
2 | MPI_Irecv(left_ghost.data(), total_height_, MPI_INT, rank - 1, 1, MPI_COMM_WORLD, &requests.at(req_count++)); |
| 164 | } | ||
| 165 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
|
4 | if (rank < world_size - 1 && all_strip_widths.at(static_cast<size_t>(rank) + 1) > 0) { |
| 166 |
1/2✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
|
2 | MPI_Isend(right_border.data(), total_height_, MPI_INT, rank + 1, 1, MPI_COMM_WORLD, &requests.at(req_count++)); |
| 167 |
1/2✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
|
2 | MPI_Irecv(right_ghost.data(), total_height_, MPI_INT, rank + 1, 0, MPI_COMM_WORLD, &requests.at(req_count++)); |
| 168 | } | ||
| 169 |
1/2✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
|
4 | MPI_Waitall(req_count, requests.data(), MPI_STATUSES_IGNORE); |
| 170 | |||
| 171 | const int kernel_sum = 16; | ||
| 172 |
2/2✓ Branch 0 taken 14 times.
✓ Branch 1 taken 4 times.
|
18 | for (int row = 0; row < total_height_; ++row) { |
| 173 |
2/2✓ Branch 0 taken 25 times.
✓ Branch 1 taken 14 times.
|
39 | for (int col = 0; col < strip_width_; ++col) { |
| 174 | 25 | int sum = ApplyKernel(row, col, strip_width_, total_height_, rank, all_strip_widths, left_ghost, right_ghost, | |
| 175 |
1/2✓ Branch 1 taken 25 times.
✗ Branch 2 not taken.
|
25 | local_strip_); |
| 176 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 25 times.
|
25 | local_output.at((static_cast<size_t>(row) * strip_width_) + col) = sum / kernel_sum; |
| 177 | } | ||
| 178 | } | ||
| 179 | } | ||
| 180 | 4 | return local_output; | |
| 181 | } | ||
| 182 | |||
| 183 | 2 | void GaussFilterMPI::GatherDataRoot(int world_size, std::vector<int> &final_output, | |
| 184 | const std::vector<int> &local_output) const { | ||
| 185 | int offset = 0; | ||
| 186 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 2 times.
|
6 | for (int i = 0; i < world_size; ++i) { |
| 187 |
2/2✓ Branch 0 taken 3 times.
✓ Branch 1 taken 1 times.
|
4 | int current_strip_width = (total_width_ / world_size) + (i < (total_width_ % world_size) ? 1 : 0); |
| 188 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
|
4 | if (current_strip_width == 0) { |
| 189 | ✗ | continue; | |
| 190 | } | ||
| 191 | |||
| 192 | 4 | std::vector<int> received_strip(static_cast<size_t>(current_strip_width) * total_height_); | |
| 193 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
|
4 | if (i == 0) { |
| 194 |
1/2✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
|
2 | received_strip = local_output; |
| 195 | } else { | ||
| 196 |
1/2✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
|
2 | MPI_Recv(received_strip.data(), static_cast<int>(received_strip.size()), MPI_INT, i, 1, MPI_COMM_WORLD, |
| 197 | MPI_STATUS_IGNORE); | ||
| 198 | } | ||
| 199 | |||
| 200 |
2/2✓ Branch 0 taken 14 times.
✓ Branch 1 taken 4 times.
|
18 | for (int row = 0; row < total_height_; ++row) { |
| 201 |
2/2✓ Branch 0 taken 25 times.
✓ Branch 1 taken 14 times.
|
39 | for (int col = 0; col < current_strip_width; ++col) { |
| 202 | 25 | final_output.at((static_cast<size_t>(row) * total_width_) + offset + col) = | |
| 203 |
2/4✗ Branch 0 not taken.
✓ Branch 1 taken 25 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 25 times.
|
25 | received_strip.at((static_cast<size_t>(row) * current_strip_width) + col); |
| 204 | } | ||
| 205 | } | ||
| 206 |
1/2✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
|
4 | offset += current_strip_width; |
| 207 | } | ||
| 208 | 2 | } | |
| 209 | |||
| 210 | 2 | void GaussFilterMPI::GatherDataLeaf(const std::vector<int> &local_output) const { | |
| 211 |
1/2✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
|
2 | if (strip_width_ > 0) { |
| 212 | 2 | MPI_Send(local_output.data(), static_cast<int>(local_output.size()), MPI_INT, 0, 1, MPI_COMM_WORLD); | |
| 213 | } | ||
| 214 | 2 | } | |
| 215 | |||
| 216 | 4 | void GaussFilterMPI::GatherData(int rank, int world_size, const std::vector<int> &local_output) { | |
| 217 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
|
4 | if (rank == 0) { |
| 218 | auto &final_output = GetOutput(); | ||
| 219 | 2 | GatherDataRoot(world_size, final_output, local_output); | |
| 220 | } else { | ||
| 221 | 2 | GatherDataLeaf(local_output); | |
| 222 | } | ||
| 223 | 4 | } | |
| 224 | |||
| 225 | 4 | bool GaussFilterMPI::RunImpl() { | |
| 226 | 4 | int rank = 0; | |
| 227 | 4 | int world_size = 0; | |
| 228 | 4 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); | |
| 229 | 4 | MPI_Comm_size(MPI_COMM_WORLD, &world_size); | |
| 230 | |||
| 231 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
|
4 | ScatterData(rank, world_size); |
| 232 | 4 | std::vector<int> local_output = ComputeLocal(rank, world_size); | |
| 233 |
1/2✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
|
4 | MPI_Barrier(MPI_COMM_WORLD); |
| 234 |
1/2✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
|
4 | GatherData(rank, world_size, local_output); |
| 235 | |||
| 236 | 4 | return true; | |
| 237 | } | ||
| 238 | |||
| 239 | 4 | bool GaussFilterMPI::PostProcessingImpl() { | |
| 240 | 4 | return true; | |
| 241 | } | ||
| 242 | |||
| 243 | } // namespace makovskiy_i_gauss_filter_vert | ||
| 244 |