| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #include "otcheskov_s_gauss_filter_vert_split/mpi/include/ops_mpi.hpp" | ||
| 2 | |||
| 3 | #include <mpi.h> | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <array> | ||
| 7 | #include <cmath> | ||
| 8 | #include <cstddef> | ||
| 9 | #include <cstdint> | ||
| 10 | #include <cstring> | ||
| 11 | #include <utility> | ||
| 12 | #include <vector> | ||
| 13 | |||
| 14 | #include "otcheskov_s_gauss_filter_vert_split/common/include/common.hpp" | ||
| 15 | |||
| 16 | namespace otcheskov_s_gauss_filter_vert_split { | ||
| 17 | |||
| 18 |
1/2✓ Branch 1 taken 30 times.
✗ Branch 2 not taken.
|
30 | OtcheskovSGaussFilterVertSplitMPI::OtcheskovSGaussFilterVertSplitMPI(const InType &in) { |
| 19 | 30 | int proc_rank{}; | |
| 20 | 30 | int proc_num{}; | |
| 21 |
1/2✓ Branch 1 taken 30 times.
✗ Branch 2 not taken.
|
30 | MPI_Comm_rank(MPI_COMM_WORLD, &proc_rank); |
| 22 |
1/2✓ Branch 1 taken 30 times.
✗ Branch 2 not taken.
|
30 | MPI_Comm_size(MPI_COMM_WORLD, &proc_num); |
| 23 | SetTypeOfTask(GetStaticTypeOfTask()); | ||
| 24 | |||
| 25 | 30 | proc_rank_ = static_cast<size_t>(proc_rank); | |
| 26 | 30 | proc_num_ = static_cast<size_t>(proc_num); | |
| 27 |
2/2✓ Branch 0 taken 15 times.
✓ Branch 1 taken 15 times.
|
30 | if (proc_rank_ == 0) { |
| 28 | GetInput() = in; | ||
| 29 | } | ||
| 30 | 30 | } | |
| 31 | |||
| 32 | 30 | bool OtcheskovSGaussFilterVertSplitMPI::ValidationImpl() { | |
| 33 |
2/2✓ Branch 0 taken 15 times.
✓ Branch 1 taken 15 times.
|
30 | if (proc_rank_ == 0) { |
| 34 | const auto &[metadata, data] = GetInput(); | ||
| 35 |
8/8✓ Branch 0 taken 14 times.
✓ Branch 1 taken 1 times.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 13 times.
✓ Branch 4 taken 1 times.
✓ Branch 5 taken 12 times.
✓ Branch 6 taken 1 times.
✓ Branch 7 taken 11 times.
|
30 | is_valid_ = !data.empty() && (metadata.height > 0 && metadata.width > 0 && metadata.channels > 0) && |
| 36 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 10 times.
|
11 | (data.size() == metadata.height * metadata.width * metadata.channels); |
| 37 | } | ||
| 38 | 30 | MPI_Bcast(&is_valid_, 1, MPI_C_BOOL, 0, MPI_COMM_WORLD); | |
| 39 | 30 | return is_valid_; | |
| 40 | } | ||
| 41 | |||
| 42 | 30 | bool OtcheskovSGaussFilterVertSplitMPI::PreProcessingImpl() { | |
| 43 | 30 | return true; | |
| 44 | } | ||
| 45 | |||
| 46 | 30 | bool OtcheskovSGaussFilterVertSplitMPI::RunImpl() { | |
| 47 |
2/2✓ Branch 0 taken 20 times.
✓ Branch 1 taken 10 times.
|
30 | if (!is_valid_) { |
| 48 | return false; | ||
| 49 | } | ||
| 50 | |||
| 51 | auto &[metadata, data] = GetInput(); | ||
| 52 | 20 | MPI_Bcast(&metadata, sizeof(ImageMetadata), MPI_BYTE, 0, MPI_COMM_WORLD); | |
| 53 | |||
| 54 | 20 | DistributeData(); | |
| 55 | 20 | ExchangeBoundaryColumns(); | |
| 56 | |||
| 57 | local_data_.clear(); | ||
| 58 |
2/2✓ Branch 0 taken 19 times.
✓ Branch 1 taken 1 times.
|
20 | local_data_.shrink_to_fit(); |
| 59 | |||
| 60 | 20 | ApplyGaussianFilter(); | |
| 61 | 20 | CollectResults(); | |
| 62 | 20 | return true; | |
| 63 | } | ||
| 64 | |||
| 65 | 30 | bool OtcheskovSGaussFilterVertSplitMPI::PostProcessingImpl() { | |
| 66 | 30 | return true; | |
| 67 | } | ||
| 68 | |||
| 69 | 20 | void OtcheskovSGaussFilterVertSplitMPI::DistributeData() { | |
| 70 | const auto &[in_meta, in_data] = GetInput(); | ||
| 71 | const auto &[height, width, channels] = in_meta; | ||
| 72 | |||
| 73 |
2/2✓ Branch 0 taken 19 times.
✓ Branch 1 taken 1 times.
|
20 | active_procs_ = std::min(proc_num_, width); |
| 74 | |||
| 75 |
2/2✓ Branch 0 taken 19 times.
✓ Branch 1 taken 1 times.
|
20 | if (proc_rank_ < active_procs_) { |
| 76 | 19 | const size_t base_cols = width / active_procs_; | |
| 77 | 19 | const size_t remain = width % active_procs_; | |
| 78 |
2/2✓ Branch 0 taken 15 times.
✓ Branch 1 taken 4 times.
|
19 | local_width_ = base_cols + (proc_rank_ < remain ? 1 : 0); |
| 79 | 19 | start_col_ = (base_cols * proc_rank_) + std::min(proc_rank_, remain); | |
| 80 | } else { | ||
| 81 | 1 | local_width_ = 0; | |
| 82 | 1 | start_col_ = 0; | |
| 83 | } | ||
| 84 | |||
| 85 | 20 | local_data_count_ = height * local_width_ * channels; | |
| 86 | 20 | local_data_.resize(local_data_count_); | |
| 87 | |||
| 88 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
20 | if (proc_rank_ == 0) { |
| 89 | 10 | const size_t base_cols = width / active_procs_; | |
| 90 | 10 | const size_t remain = width % active_procs_; | |
| 91 | 10 | const auto &[counts, displs] = GetCountsAndDisplacements(height, width, channels); | |
| 92 | |||
| 93 |
1/2✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
|
10 | std::vector<uint8_t> send_buffer(counts[active_procs_ - 1] + displs[active_procs_ - 1]); |
| 94 |
2/2✓ Branch 0 taken 19 times.
✓ Branch 1 taken 10 times.
|
29 | for (size_t proc = 0; proc < active_procs_; ++proc) { |
| 95 |
2/2✓ Branch 0 taken 15 times.
✓ Branch 1 taken 4 times.
|
19 | const size_t cols = base_cols + (proc < remain ? 1 : 0); |
| 96 | 19 | const size_t start_col = (base_cols * proc) + std::min(proc, remain); | |
| 97 | 19 | uint8_t *buf_ptr = send_buffer.data() + displs[proc]; | |
| 98 | |||
| 99 |
2/2✓ Branch 0 taken 523 times.
✓ Branch 1 taken 19 times.
|
542 | for (size_t row = 0; row < height; ++row) { |
| 100 | 523 | const size_t row_size = width * channels; | |
| 101 | 523 | const size_t row_offset = row * row_size; | |
| 102 | 523 | const size_t col_offset = start_col * channels; | |
| 103 | 523 | const uint8_t *src_row = in_data.data() + row_offset + col_offset; | |
| 104 | 523 | std::memcpy(buf_ptr, src_row, cols * channels); | |
| 105 | 523 | buf_ptr += cols * channels; | |
| 106 | } | ||
| 107 | } | ||
| 108 | |||
| 109 | 10 | MPI_Scatterv(send_buffer.data(), counts.data(), displs.data(), MPI_UINT8_T, local_data_.data(), | |
| 110 |
1/2✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
|
10 | static_cast<int>(local_data_count_), MPI_UINT8_T, 0, MPI_COMM_WORLD); |
| 111 | 10 | } else { | |
| 112 | 10 | MPI_Scatterv(nullptr, nullptr, nullptr, MPI_DATATYPE_NULL, local_data_.data(), static_cast<int>(local_data_count_), | |
| 113 | MPI_UINT8_T, 0, MPI_COMM_WORLD); | ||
| 114 | } | ||
| 115 | 20 | } | |
| 116 | |||
| 117 | 10 | std::pair<std::vector<int>, std::vector<int>> OtcheskovSGaussFilterVertSplitMPI::GetCountsAndDisplacements( | |
| 118 | const size_t &height, const size_t &width, const size_t &channels) const { | ||
| 119 | 10 | std::vector<int> counts(proc_num_, 0); | |
| 120 |
1/4✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
10 | std::vector<int> displs(proc_num_, 0); |
| 121 | |||
| 122 | 10 | const size_t base_cols = width / active_procs_; | |
| 123 | 10 | const size_t remain = width % active_procs_; | |
| 124 | |||
| 125 | int total_data = 0; | ||
| 126 |
2/2✓ Branch 0 taken 19 times.
✓ Branch 1 taken 10 times.
|
29 | for (size_t proc = 0; proc < active_procs_; ++proc) { |
| 127 |
2/2✓ Branch 0 taken 15 times.
✓ Branch 1 taken 4 times.
|
19 | const size_t cols = base_cols + (proc < remain ? 1 : 0); |
| 128 | 19 | counts[proc] = static_cast<int>(height * cols * channels); | |
| 129 | 19 | displs[proc] = total_data; | |
| 130 | 19 | total_data += counts[proc]; | |
| 131 | } | ||
| 132 | |||
| 133 |
1/2✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
|
20 | return {counts, displs}; |
| 134 | } | ||
| 135 | |||
| 136 | 20 | void OtcheskovSGaussFilterVertSplitMPI::ExchangeBoundaryColumns() { | |
| 137 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 19 times.
|
20 | if (local_width_ == 0) { |
| 138 | extended_data_.clear(); | ||
| 139 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
|
1 | extended_data_.shrink_to_fit(); |
| 140 | 1 | return; | |
| 141 | } | ||
| 142 | |||
| 143 | const auto &[in_meta, in_data] = GetInput(); | ||
| 144 | const size_t &height = in_meta.height; | ||
| 145 | const size_t &channels = in_meta.channels; | ||
| 146 | 19 | const size_t col_size = height * channels; | |
| 147 | |||
| 148 | int left_proc = MPI_PROC_NULL; | ||
| 149 | int right_proc = MPI_PROC_NULL; | ||
| 150 | |||
| 151 |
3/4✓ Branch 0 taken 9 times.
✓ Branch 1 taken 10 times.
✓ Branch 2 taken 9 times.
✗ Branch 3 not taken.
|
19 | if (proc_rank_ > 0 && proc_rank_ < active_procs_) { |
| 152 | 9 | left_proc = static_cast<int>(proc_rank_) - 1; | |
| 153 | } | ||
| 154 |
2/2✓ Branch 0 taken 9 times.
✓ Branch 1 taken 10 times.
|
19 | if (proc_rank_ < active_procs_ - 1) { |
| 155 | 9 | right_proc = static_cast<int>(proc_rank_) + 1; | |
| 156 | } | ||
| 157 | |||
| 158 | 19 | std::vector<uint8_t> left_col(col_size); | |
| 159 |
1/4✓ Branch 1 taken 19 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
19 | std::vector<uint8_t> right_col(col_size); |
| 160 |
1/4✓ Branch 1 taken 19 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
19 | std::vector<uint8_t> recv_left(col_size); |
| 161 |
1/4✓ Branch 1 taken 19 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
19 | std::vector<uint8_t> recv_right(col_size); |
| 162 | |||
| 163 |
2/2✓ Branch 0 taken 523 times.
✓ Branch 1 taken 19 times.
|
542 | for (size_t i = 0; i < height; ++i) { |
| 164 | 523 | const size_t row_off = i * local_width_ * channels; | |
| 165 | 523 | const size_t dst_offset = i * channels; | |
| 166 | |||
| 167 | std::memcpy(&left_col[dst_offset], &local_data_[row_off], channels); | ||
| 168 | 523 | std::memcpy(&right_col[dst_offset], &local_data_[row_off + ((local_width_ - 1) * channels)], channels); | |
| 169 | } | ||
| 170 | |||
| 171 |
2/2✓ Branch 0 taken 9 times.
✓ Branch 1 taken 10 times.
|
19 | if (left_proc != MPI_PROC_NULL) { |
| 172 |
1/2✓ Branch 1 taken 9 times.
✗ Branch 2 not taken.
|
9 | MPI_Sendrecv(left_col.data(), static_cast<int>(col_size), MPI_UINT8_T, left_proc, 0, recv_right.data(), |
| 173 | static_cast<int>(col_size), MPI_UINT8_T, left_proc, 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE); | ||
| 174 | } | ||
| 175 |
2/2✓ Branch 0 taken 9 times.
✓ Branch 1 taken 10 times.
|
19 | if (right_proc != MPI_PROC_NULL) { |
| 176 |
1/2✓ Branch 1 taken 9 times.
✗ Branch 2 not taken.
|
9 | MPI_Sendrecv(right_col.data(), static_cast<int>(col_size), MPI_UINT8_T, right_proc, 1, recv_left.data(), |
| 177 | static_cast<int>(col_size), MPI_UINT8_T, right_proc, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); | ||
| 178 | } | ||
| 179 | |||
| 180 | 19 | const size_t ext_width = local_width_ + 2; | |
| 181 |
1/2✓ Branch 1 taken 19 times.
✗ Branch 2 not taken.
|
19 | extended_data_.resize(in_meta.height * ext_width * channels); |
| 182 | |||
| 183 |
2/2✓ Branch 0 taken 523 times.
✓ Branch 1 taken 19 times.
|
542 | for (size_t i = 0; i < in_meta.height; ++i) { |
| 184 |
2/2✓ Branch 0 taken 269 times.
✓ Branch 1 taken 254 times.
|
523 | uint8_t *ext_row = &extended_data_[i * ext_width * channels]; |
| 185 | 523 | const uint8_t *loc_row = &local_data_[i * local_width_ * channels]; | |
| 186 | |||
| 187 |
2/2✓ Branch 0 taken 269 times.
✓ Branch 1 taken 254 times.
|
523 | if (proc_rank_ == 0) { |
| 188 | std::memcpy(ext_row, loc_row, channels); | ||
| 189 | } else { | ||
| 190 | 254 | std::memcpy(ext_row, &recv_right[i * channels], channels); | |
| 191 | } | ||
| 192 | |||
| 193 |
2/2✓ Branch 0 taken 269 times.
✓ Branch 1 taken 254 times.
|
523 | std::memcpy(ext_row + channels, loc_row, local_width_ * channels); |
| 194 | |||
| 195 |
2/2✓ Branch 0 taken 269 times.
✓ Branch 1 taken 254 times.
|
523 | if (proc_rank_ == active_procs_ - 1) { |
| 196 | 269 | const uint8_t *last_col = &loc_row[(local_width_ - 1) * channels]; | |
| 197 | 269 | std::memcpy(ext_row + ((ext_width - 1) * channels), last_col, channels); | |
| 198 | } else { | ||
| 199 | 254 | std::memcpy(ext_row + ((ext_width - 1) * channels), &recv_left[i * channels], channels); | |
| 200 | } | ||
| 201 | } | ||
| 202 | } | ||
| 203 | |||
| 204 | 20 | void OtcheskovSGaussFilterVertSplitMPI::ApplyGaussianFilter() { | |
| 205 |
2/2✓ Branch 0 taken 19 times.
✓ Branch 1 taken 1 times.
|
20 | if (local_width_ == 0) { |
| 206 | return; | ||
| 207 | } | ||
| 208 | const auto &[in_meta, in_data] = GetInput(); | ||
| 209 | 19 | local_output_.resize(local_data_count_); | |
| 210 | |||
| 211 |
2/2✓ Branch 0 taken 523 times.
✓ Branch 1 taken 19 times.
|
542 | for (size_t row = 0; row < in_meta.height; ++row) { |
| 212 |
2/2✓ Branch 0 taken 20655 times.
✓ Branch 1 taken 523 times.
|
21178 | for (size_t local_col = 0; local_col < local_width_; ++local_col) { |
| 213 |
2/2✓ Branch 0 taken 61523 times.
✓ Branch 1 taken 20655 times.
|
82178 | for (size_t ch = 0; ch < in_meta.channels; ++ch) { |
| 214 | 61523 | const size_t out_idx = ((row * local_width_ + local_col) * in_meta.channels) + ch; | |
| 215 | 61523 | local_output_[out_idx] = ProcessPixel(row, local_col, ch, in_meta.height, in_meta.channels); | |
| 216 | } | ||
| 217 | } | ||
| 218 | } | ||
| 219 | } | ||
| 220 | |||
| 221 | 61523 | uint8_t OtcheskovSGaussFilterVertSplitMPI::ProcessPixel(const size_t &row, const size_t &local_col, const size_t &ch, | |
| 222 | const size_t &height, const size_t &channels) { | ||
| 223 | auto mirror_coord = [&](const size_t ¤t, int off, const size_t &size) -> size_t { | ||
| 224 | 184569 | int64_t pos = static_cast<int64_t>(current) + off; | |
| 225 | 184569 | if (pos < 0) { | |
| 226 | 741 | return static_cast<size_t>(-pos - 1); | |
| 227 | } | ||
| 228 |
2/2✓ Branch 0 taken 741 times.
✓ Branch 1 taken 183087 times.
|
183828 | if (std::cmp_greater_equal(static_cast<size_t>(pos), size)) { |
| 229 | 741 | return (2 * size) - static_cast<size_t>(pos) - 1; | |
| 230 | } | ||
| 231 | return static_cast<size_t>(pos); | ||
| 232 | }; | ||
| 233 | |||
| 234 | double sum = 0.0; | ||
| 235 | 61523 | const size_t extended_width = local_width_ + 2; | |
| 236 | 61523 | const size_t ext_col = local_col + 1; | |
| 237 | |||
| 238 |
2/2✓ Branch 0 taken 184569 times.
✓ Branch 1 taken 61523 times.
|
246092 | for (int ky = 0; ky < 3; ++ky) { |
| 239 |
2/2✓ Branch 0 taken 741 times.
✓ Branch 1 taken 183828 times.
|
184569 | const size_t data_row = mirror_coord(row, ky - 1, height); |
| 240 | |||
| 241 |
2/2✓ Branch 0 taken 553707 times.
✓ Branch 1 taken 184569 times.
|
738276 | for (int kx = 0; kx < 3; ++kx) { |
| 242 | 553707 | const size_t data_col = ext_col + kx - 1; | |
| 243 | 553707 | const size_t idx = ((data_row * extended_width + data_col) * channels) + ch; | |
| 244 | 553707 | sum += extended_data_[idx] * kGaussianKernel.at(ky).at(kx); | |
| 245 | } | ||
| 246 | } | ||
| 247 | 61523 | return static_cast<uint8_t>(std::clamp(std::round(sum), 0.0, 255.0)); | |
| 248 | } | ||
| 249 | |||
| 250 | 20 | void OtcheskovSGaussFilterVertSplitMPI::CollectResults() { | |
| 251 | const auto &[in_meta, in_data] = GetInput(); | ||
| 252 | const auto &[height, width, channels] = in_meta; | ||
| 253 | 20 | const size_t base_cols = width / active_procs_; | |
| 254 | 20 | const size_t remain = width % active_procs_; | |
| 255 | 20 | const size_t row_size = width * channels; | |
| 256 | |||
| 257 | 20 | std::vector<int> counts(proc_num_, 0); | |
| 258 |
1/4✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
20 | std::vector<int> displs(proc_num_, 0); |
| 259 | |||
| 260 | int total_data = 0; | ||
| 261 |
2/2✓ Branch 0 taken 38 times.
✓ Branch 1 taken 20 times.
|
58 | for (size_t proc = 0; proc < active_procs_; ++proc) { |
| 262 |
2/2✓ Branch 0 taken 30 times.
✓ Branch 1 taken 8 times.
|
38 | const size_t cols = base_cols + (proc < remain ? 1 : 0); |
| 263 | 38 | counts[proc] = static_cast<int>(height * cols * channels); | |
| 264 | 38 | displs[proc] = total_data; | |
| 265 | 38 | total_data += counts[proc]; | |
| 266 | } | ||
| 267 | |||
| 268 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
20 | if (proc_rank_ == 0) { |
| 269 | auto &[out_meta, out_data] = GetOutput(); | ||
| 270 |
1/2✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
|
10 | out_meta = in_meta; |
| 271 |
1/2✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
|
10 | out_data.resize(in_data.size()); |
| 272 | |||
| 273 |
2/6✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 10 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
|
10 | std::vector<uint8_t> recv_buffer(total_data); |
| 274 |
1/2✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
|
10 | MPI_Gatherv(local_output_.data(), static_cast<int>(local_data_count_), MPI_UINT8_T, recv_buffer.data(), |
| 275 | counts.data(), displs.data(), MPI_UINT8_T, 0, MPI_COMM_WORLD); | ||
| 276 | |||
| 277 |
2/2✓ Branch 0 taken 269 times.
✓ Branch 1 taken 10 times.
|
279 | for (size_t i = 0; i < height; ++i) { |
| 278 | int buffer_offset = 0; | ||
| 279 |
2/2✓ Branch 0 taken 523 times.
✓ Branch 1 taken 269 times.
|
792 | for (size_t proc = 0; proc < active_procs_; ++proc) { |
| 280 |
2/2✓ Branch 0 taken 493 times.
✓ Branch 1 taken 30 times.
|
523 | const size_t cols = base_cols + (proc < remain ? 1 : 0); |
| 281 | 523 | const size_t start_col = (base_cols * proc) + std::min(proc, remain); | |
| 282 | 523 | const uint8_t *src = recv_buffer.data() + static_cast<size_t>(buffer_offset) + (i * cols * channels); | |
| 283 | 523 | uint8_t *dst = out_data.data() + (i * row_size) + (start_col * channels); | |
| 284 | 523 | std::memcpy(dst, src, cols * channels); | |
| 285 | |||
| 286 | 523 | buffer_offset += counts[proc]; | |
| 287 | } | ||
| 288 | } | ||
| 289 | } else { | ||
| 290 |
1/2✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
|
10 | MPI_Gatherv(local_output_.data(), static_cast<int>(local_data_count_), MPI_UINT8_T, nullptr, nullptr, nullptr, |
| 291 | MPI_UINT8_T, 0, MPI_COMM_WORLD); | ||
| 292 | } | ||
| 293 | 20 | } | |
| 294 | |||
| 295 | } // namespace otcheskov_s_gauss_filter_vert_split | ||
| 296 |