| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #include "kolotukhin_a_gaussian_blur/all/include/ops_all.hpp" | ||
| 2 | |||
| 3 | #include <mpi.h> | ||
| 4 | #include <omp.h> | ||
| 5 | |||
| 6 | #include <algorithm> | ||
| 7 | #include <array> | ||
| 8 | #include <cstddef> | ||
| 9 | #include <cstdint> | ||
| 10 | #include <utility> | ||
| 11 | #include <vector> | ||
| 12 | |||
| 13 | #include "kolotukhin_a_gaussian_blur/common/include/common.hpp" | ||
| 14 | |||
| 15 | namespace kolotukhin_a_gaussian_blur { | ||
| 16 | namespace { | ||
| 17 | 351 | std::uint8_t GetPixel(const std::vector<std::uint8_t> &pixel_data, int img_width, int img_height, int pos_x, | |
| 18 | int pos_y) { | ||
| 19 |
4/4✓ Branch 0 taken 36 times.
✓ Branch 1 taken 315 times.
✓ Branch 2 taken 243 times.
✓ Branch 3 taken 108 times.
|
387 | std::size_t x = static_cast<std::size_t>(std::max(0, std::min(pos_x, img_width - 1))); |
| 20 |
4/4✓ Branch 0 taken 39 times.
✓ Branch 1 taken 312 times.
✓ Branch 2 taken 195 times.
✓ Branch 3 taken 156 times.
|
390 | std::size_t y = static_cast<std::size_t>(std::max(0, std::min(pos_y, img_height - 1))); |
| 21 | 351 | return pixel_data[(y * static_cast<std::size_t>(img_width)) + x]; | |
| 22 | } | ||
| 23 | |||
| 24 | void ApplyGaussianBlur(const std::vector<std::uint8_t> &src_data, std::vector<std::uint8_t> &dst_data, int width, | ||
| 25 | int height, int start_row, int end_row) { | ||
| 26 | const static std::array<std::array<int, 3>, 3> kKernel = {{{{1, 2, 1}}, {{2, 4, 2}}, {{1, 2, 1}}}}; | ||
| 27 | const static int kSum = 16; | ||
| 28 | 8 | #pragma omp parallel for collapse(2) schedule(static) default(none) \ | |
| 29 | shared(src_data, dst_data, width, height, start_row, end_row, kKernel, kSum) | ||
| 30 | for (int row = start_row; row < end_row; row++) { | ||
| 31 | for (int col = 0; col < width; col++) { | ||
| 32 | int acc = 0; | ||
| 33 | for (int dy = -1; dy <= 1; dy++) { | ||
| 34 | for (int dx = -1; dx <= 1; dx++) { | ||
| 35 | std::uint8_t pixel = GetPixel(src_data, width, height, col + dx, row + dy); | ||
| 36 | acc += kKernel.at(1 + dy).at(1 + dx) * static_cast<int>(pixel); | ||
| 37 | } | ||
| 38 | } | ||
| 39 | dst_data[(static_cast<std::size_t>(row) * static_cast<std::size_t>(width)) + static_cast<std::size_t>(col)] = | ||
| 40 | static_cast<std::uint8_t>(acc / kSum); | ||
| 41 | } | ||
| 42 | } | ||
| 43 | } | ||
| 44 | } // namespace | ||
| 45 | |||
| 46 | 4 | void KolotukhinAGaussinBlurALL::SendWorkData(int rows_per_process, int remainder) { | |
| 47 | const auto &pixel_data = get<0>(GetInput()); | ||
| 48 | int current_row = 0; | ||
| 49 | |||
| 50 |
2/2✓ Branch 0 taken 8 times.
✓ Branch 1 taken 4 times.
|
12 | for (int dest = 0; dest < proc_count_; dest++) { |
| 51 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 4 times.
|
8 | int dest_rows = (dest < remainder) ? rows_per_process + 1 : rows_per_process; |
| 52 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 8 times.
|
8 | if (dest_rows == 0) { |
| 53 | ✗ | continue; | |
| 54 | } | ||
| 55 | int start_row = current_row; | ||
| 56 | 8 | int end_row = current_row + dest_rows; | |
| 57 | |||
| 58 | 8 | int extended_start = std::max(0, start_row - 1); | |
| 59 | 8 | int extended_end = std::min(global_height_, end_row + 1); | |
| 60 | 8 | int extended_rows = extended_end - extended_start; | |
| 61 | |||
| 62 | 8 | std::vector<std::uint8_t> extended_data(static_cast<std::size_t>(extended_rows) * | |
| 63 | 8 | static_cast<std::size_t>(global_width_)); | |
| 64 | |||
| 65 | 16 | std::copy(pixel_data.begin() + static_cast<std::ptrdiff_t>(extended_start) * global_width_, | |
| 66 | 8 | pixel_data.begin() + static_cast<std::ptrdiff_t>(extended_end) * global_width_, extended_data.begin()); | |
| 67 | |||
| 68 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 4 times.
|
8 | if (dest == 0) { |
| 69 | 4 | local_data_ = std::move(extended_data); | |
| 70 | } else { | ||
| 71 |
1/2✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
|
4 | MPI_Send(extended_data.data(), static_cast<int>(extended_data.size()), MPI_UNSIGNED_CHAR, dest, 0, |
| 72 | MPI_COMM_WORLD); | ||
| 73 | } | ||
| 74 | current_row += dest_rows; | ||
| 75 | } | ||
| 76 | 4 | } | |
| 77 | |||
| 78 | 4 | void KolotukhinAGaussinBlurALL::ReceiveWorkData() { | |
| 79 | 4 | MPI_Recv(local_data_.data(), static_cast<int>(local_data_.size()), MPI_UNSIGNED_CHAR, 0, 0, MPI_COMM_WORLD, | |
| 80 | MPI_STATUS_IGNORE); | ||
| 81 | 4 | } | |
| 82 | |||
| 83 | 8 | void KolotukhinAGaussinBlurALL::DistributeWork() { | |
| 84 | 8 | int rows_per_process = global_height_ / proc_count_; | |
| 85 | 8 | int remainder = global_height_ % proc_count_; | |
| 86 | |||
| 87 | 8 | int last_handler = proc_count_ - 1; | |
| 88 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 8 times.
|
8 | if (rows_per_process == 0) { |
| 89 | ✗ | last_handler = remainder - 1; | |
| 90 | } | ||
| 91 | |||
| 92 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 4 times.
|
8 | local_height_ = (rank_ < remainder) ? rows_per_process + 1 : rows_per_process; |
| 93 | |||
| 94 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 8 times.
|
8 | if (local_height_ == 0) { |
| 95 | local_data_.clear(); | ||
| 96 | ✗ | return; | |
| 97 | } | ||
| 98 | |||
| 99 |
3/4✓ Branch 0 taken 4 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 4 times.
✗ Branch 3 not taken.
|
8 | local_height_ = (rank_ == 0 || rank_ == last_handler) ? local_height_ + 1 : local_height_ + 2; |
| 100 | 8 | std::size_t local_size = static_cast<std::size_t>(local_height_) * static_cast<std::size_t>(global_width_); | |
| 101 | 8 | local_data_.resize(local_size, 0); | |
| 102 | |||
| 103 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 4 times.
|
8 | if (rank_ == 0) { |
| 104 | 4 | SendWorkData(rows_per_process, remainder); | |
| 105 | } else { | ||
| 106 | 4 | ReceiveWorkData(); | |
| 107 | } | ||
| 108 | } | ||
| 109 | |||
| 110 |
1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
|
8 | KolotukhinAGaussinBlurALL::KolotukhinAGaussinBlurALL(const InType &in) { |
| 111 | SetTypeOfTask(GetStaticTypeOfTask()); | ||
| 112 | GetInput() = in; | ||
| 113 | GetOutput().clear(); | ||
| 114 | |||
| 115 |
1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
|
8 | MPI_Comm_rank(MPI_COMM_WORLD, &rank_); |
| 116 |
1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
|
8 | MPI_Comm_size(MPI_COMM_WORLD, &proc_count_); |
| 117 | 8 | } | |
| 118 | |||
| 119 | 8 | bool KolotukhinAGaussinBlurALL::ValidationImpl() { | |
| 120 | const auto &pixel_data = get<0>(GetInput()); | ||
| 121 | 8 | const auto img_width = get<1>(GetInput()); | |
| 122 | 8 | const auto img_height = get<2>(GetInput()); | |
| 123 | |||
| 124 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 8 times.
|
8 | bool valid = static_cast<std::size_t>(img_height) * static_cast<std::size_t>(img_width) == pixel_data.size(); |
| 125 | |||
| 126 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 8 times.
|
8 | int local_valid = valid ? 1 : 0; |
| 127 | 8 | int global_valid = 0; | |
| 128 | 8 | MPI_Allreduce(&local_valid, &global_valid, 1, MPI_INT, MPI_MIN, MPI_COMM_WORLD); | |
| 129 | 8 | return global_valid == 1; | |
| 130 | } | ||
| 131 | |||
| 132 | 8 | bool KolotukhinAGaussinBlurALL::PreProcessingImpl() { | |
| 133 | 8 | const auto img_width = get<1>(GetInput()); | |
| 134 | 8 | const auto img_height = get<2>(GetInput()); | |
| 135 | |||
| 136 | 8 | int width = img_width; | |
| 137 | 8 | int height = img_height; | |
| 138 | |||
| 139 | 8 | MPI_Bcast(&width, 1, MPI_INT, 0, MPI_COMM_WORLD); | |
| 140 | 8 | MPI_Bcast(&height, 1, MPI_INT, 0, MPI_COMM_WORLD); | |
| 141 | |||
| 142 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 4 times.
|
8 | if (rank_ != 0) { |
| 143 | 4 | global_width_ = width; | |
| 144 | 4 | global_height_ = height; | |
| 145 | } else { | ||
| 146 | 4 | global_width_ = img_width; | |
| 147 | 4 | global_height_ = img_height; | |
| 148 | } | ||
| 149 | |||
| 150 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 4 times.
|
8 | if (rank_ == 0) { |
| 151 | 4 | GetOutput().assign(static_cast<std::size_t>(global_height_) * static_cast<std::size_t>(global_width_), 0); | |
| 152 | } | ||
| 153 | |||
| 154 | 8 | DistributeWork(); | |
| 155 | 8 | return true; | |
| 156 | } | ||
| 157 | |||
| 158 | 8 | void KolotukhinAGaussinBlurALL::GatherResults() { | |
| 159 | 8 | int rows_per_process = global_height_ / proc_count_; | |
| 160 | 8 | int remainder = global_height_ % proc_count_; | |
| 161 | |||
| 162 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 4 times.
|
8 | if (rank_ != 0) { |
| 163 | 4 | GatherResultsWorker(rows_per_process, remainder); | |
| 164 | } else { | ||
| 165 | 4 | GatherResultsRoot(rows_per_process, remainder); | |
| 166 | } | ||
| 167 | 8 | } | |
| 168 | |||
| 169 | 4 | void KolotukhinAGaussinBlurALL::GatherResultsWorker(int rows_per_process, int remainder) { | |
| 170 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
|
4 | int original_rows = (rank_ < remainder) ? rows_per_process + 1 : rows_per_process; |
| 171 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
|
4 | if (original_rows <= 0) { |
| 172 | ✗ | return; | |
| 173 | } | ||
| 174 | |||
| 175 | 4 | int halo_offset = (rank_ == 0) ? 0 : 1; | |
| 176 | 4 | std::vector<std::uint8_t> result_only_own(static_cast<std::size_t>(original_rows) * | |
| 177 | 4 | static_cast<std::size_t>(global_width_)); | |
| 178 | |||
| 179 |
1/2✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
|
4 | std::copy(local_data_.begin() + static_cast<std::ptrdiff_t>(halo_offset) * global_width_, |
| 180 | 4 | local_data_.begin() + static_cast<std::ptrdiff_t>(halo_offset + original_rows) * global_width_, | |
| 181 | result_only_own.begin()); | ||
| 182 | |||
| 183 |
1/2✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
|
4 | MPI_Send(result_only_own.data(), static_cast<int>(result_only_own.size()), MPI_UNSIGNED_CHAR, 0, 1, MPI_COMM_WORLD); |
| 184 | } | ||
| 185 | |||
| 186 | 4 | void KolotukhinAGaussinBlurALL::GatherResultsRoot(int rows_per_process, int remainder) { | |
| 187 | auto &output = GetOutput(); | ||
| 188 | 4 | std::vector<int> recv_counts(proc_count_); | |
| 189 |
1/4✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
4 | std::vector<int> displs(proc_count_); |
| 190 | |||
| 191 | int current_row = 0; | ||
| 192 |
2/2✓ Branch 0 taken 8 times.
✓ Branch 1 taken 4 times.
|
12 | for (int i = 0; i < proc_count_; i++) { |
| 193 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 4 times.
|
8 | int rows = (i < remainder) ? rows_per_process + 1 : rows_per_process; |
| 194 | 8 | recv_counts[i] = rows * global_width_; | |
| 195 | 8 | displs[i] = current_row * global_width_; | |
| 196 | 8 | current_row += rows; | |
| 197 | } | ||
| 198 | |||
| 199 |
1/2✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
|
4 | int root_original_rows = (0 < remainder) ? rows_per_process + 1 : rows_per_process; |
| 200 | 4 | std::copy(local_data_.begin(), local_data_.begin() + static_cast<std::ptrdiff_t>(root_original_rows) * global_width_, | |
| 201 | output.begin() + displs[0]); | ||
| 202 | |||
| 203 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 4 times.
|
8 | for (int src = 1; src < proc_count_; src++) { |
| 204 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
|
4 | int src_rows = (src < remainder) ? rows_per_process + 1 : rows_per_process; |
| 205 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
|
4 | if (src_rows == 0) { |
| 206 | ✗ | continue; | |
| 207 | } | ||
| 208 | |||
| 209 |
2/6✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 4 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
|
4 | std::vector<std::uint8_t> src_data(static_cast<std::size_t>(src_rows) * static_cast<std::size_t>(global_width_)); |
| 210 |
1/2✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
|
4 | MPI_Recv(src_data.data(), static_cast<int>(src_data.size()), MPI_UNSIGNED_CHAR, src, 1, MPI_COMM_WORLD, |
| 211 | MPI_STATUS_IGNORE); | ||
| 212 | |||
| 213 |
1/2✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
|
4 | std::ranges::copy(src_data, output.begin() + displs[src]); |
| 214 | } | ||
| 215 | 4 | } | |
| 216 | |||
| 217 | 8 | bool KolotukhinAGaussinBlurALL::RunImpl() { | |
| 218 |
1/2✓ Branch 0 taken 8 times.
✗ Branch 1 not taken.
|
8 | if (local_height_ == 0) { |
| 219 | return true; | ||
| 220 | } | ||
| 221 | |||
| 222 | 8 | int rows_per_process = global_height_ / proc_count_; | |
| 223 | 8 | int remainder = global_height_ % proc_count_; | |
| 224 | |||
| 225 | 8 | int last_handler = proc_count_ - 1; | |
| 226 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 8 times.
|
8 | if (rows_per_process == 0) { |
| 227 | ✗ | last_handler = remainder - 1; | |
| 228 | } | ||
| 229 | |||
| 230 | 8 | int extended_height = static_cast<int>(local_data_.size() / global_width_); | |
| 231 | |||
| 232 | 8 | int begin = (rank_ == 0) ? 0 : 1; | |
| 233 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 4 times.
|
8 | int end = (rank_ == last_handler) ? extended_height : extended_height - 1; |
| 234 | |||
| 235 | 8 | std::vector<std::uint8_t> result(local_data_.size()); | |
| 236 | 8 | ApplyGaussianBlur(local_data_, result, global_width_, extended_height, begin, end); | |
| 237 | |||
| 238 | local_data_ = std::move(result); | ||
| 239 | return true; | ||
| 240 | } | ||
| 241 | |||
| 242 | 8 | bool KolotukhinAGaussinBlurALL::PostProcessingImpl() { | |
| 243 | 8 | GatherResults(); | |
| 244 | 8 | return true; | |
| 245 | } | ||
| 246 | |||
| 247 | } // namespace kolotukhin_a_gaussian_blur | ||
| 248 |