| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #include "pikhotskiy_r_vertical_gauss_filter/omp/include/ops_omp.hpp" | ||
| 2 | |||
| 3 | #include <omp.h> | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <cstddef> | ||
| 7 | #include <cstdint> | ||
| 8 | |||
| 9 | #include "pikhotskiy_r_vertical_gauss_filter/common/include/common.hpp" | ||
| 10 | #include "util/include/util.hpp" | ||
| 11 | |||
| 12 | namespace pikhotskiy_r_vertical_gauss_filter { | ||
| 13 | |||
| 14 | namespace { | ||
| 15 | constexpr int kKernelNorm = 16; | ||
| 16 | |||
| 17 | constexpr int ClampIndex(int value, int upper_bound) noexcept { | ||
| 18 | ✗ | if (upper_bound <= 0) { | |
| 19 | return 0; | ||
| 20 | } | ||
| 21 | ✗ | if (value < 0) { | |
| 22 | return 0; | ||
| 23 | } | ||
| 24 | ✗ | if (value >= upper_bound) { | |
| 25 | ✗ | return upper_bound - 1; | |
| 26 | } | ||
| 27 | return value; | ||
| 28 | } | ||
| 29 | |||
| 30 | constexpr std::size_t ToLinearIndex(int x_pos, int y_pos, int width) noexcept { | ||
| 31 | ✗ | return (static_cast<std::size_t>(y_pos) * static_cast<std::size_t>(width)) + static_cast<std::size_t>(x_pos); | |
| 32 | } | ||
| 33 | |||
| 34 | std::uint8_t NormalizeAndRoundUp(int sum) { | ||
| 35 | ✗ | return static_cast<std::uint8_t>((sum + (kKernelNorm - 1)) / kKernelNorm); | |
| 36 | } | ||
| 37 | } // namespace | ||
| 38 | |||
| 39 | ✗ | PikhotskiyRVerticalGaussFilterOMP::PikhotskiyRVerticalGaussFilterOMP(const InType &in) { | |
| 40 | SetTypeOfTask(GetStaticTypeOfTask()); | ||
| 41 | GetInput() = in; | ||
| 42 | ✗ | GetOutput() = OutType{}; | |
| 43 | ✗ | } | |
| 44 | |||
| 45 | ✗ | bool PikhotskiyRVerticalGaussFilterOMP::ValidationImpl() { | |
| 46 | const auto &in = GetInput(); | ||
| 47 | |||
| 48 | ✗ | if (in.width <= 0 || in.height <= 0) { | |
| 49 | return false; | ||
| 50 | } | ||
| 51 | ✗ | const auto expected_size = static_cast<std::size_t>(in.width) * static_cast<std::size_t>(in.height); | |
| 52 | ✗ | return in.data.size() == expected_size; | |
| 53 | } | ||
| 54 | |||
| 55 | ✗ | bool PikhotskiyRVerticalGaussFilterOMP::PreProcessingImpl() { | |
| 56 | const auto &in = GetInput(); | ||
| 57 | ✗ | width_ = in.width; | |
| 58 | ✗ | height_ = in.height; | |
| 59 | |||
| 60 | ✗ | const int num_threads = std::max(1, ppc::util::GetNumThreads()); | |
| 61 | ✗ | stripe_width_ = std::max(1, width_ / num_threads); | |
| 62 | |||
| 63 | ✗ | source_ = in.data; | |
| 64 | ✗ | vertical_buffer_.assign(source_.size(), 0); | |
| 65 | ✗ | result_buffer_.assign(source_.size(), 0); | |
| 66 | ✗ | return true; | |
| 67 | } | ||
| 68 | |||
| 69 | ✗ | bool PikhotskiyRVerticalGaussFilterOMP::RunImpl() { | |
| 70 | ✗ | const auto expected_size = static_cast<std::size_t>(width_) * static_cast<std::size_t>(height_); | |
| 71 | ✗ | if (width_ <= 0 || height_ <= 0 || source_.size() != expected_size || vertical_buffer_.size() != expected_size || | |
| 72 | result_buffer_.size() != expected_size) { | ||
| 73 | return false; | ||
| 74 | } | ||
| 75 | |||
| 76 | ✗ | const int num_threads = std::max(1, ppc::util::GetNumThreads()); | |
| 77 | ✗ | const int stripe_count = (width_ + stripe_width_ - 1) / stripe_width_; | |
| 78 | ✗ | omp_set_num_threads(num_threads); | |
| 79 | |||
| 80 | ✗ | #pragma omp parallel for default(none) schedule(static) shared(stripe_count) | |
| 81 | for (int stripe_index = 0; stripe_index < stripe_count; ++stripe_index) { | ||
| 82 | const int x_begin = stripe_index * stripe_width_; | ||
| 83 | const int x_end = std::min(width_, x_begin + stripe_width_); | ||
| 84 | RunVerticalPassForStripe(x_begin, x_end); | ||
| 85 | } | ||
| 86 | |||
| 87 | ✗ | #pragma omp parallel for default(none) schedule(static) shared(stripe_count) | |
| 88 | for (int stripe_index = 0; stripe_index < stripe_count; ++stripe_index) { | ||
| 89 | const int x_begin = stripe_index * stripe_width_; | ||
| 90 | const int x_end = std::min(width_, x_begin + stripe_width_); | ||
| 91 | RunHorizontalPassForStripe(x_begin, x_end); | ||
| 92 | } | ||
| 93 | |||
| 94 | ✗ | return true; | |
| 95 | } | ||
| 96 | |||
| 97 | ✗ | bool PikhotskiyRVerticalGaussFilterOMP::PostProcessingImpl() { | |
| 98 | ✗ | GetOutput().width = width_; | |
| 99 | ✗ | GetOutput().height = height_; | |
| 100 | ✗ | GetOutput().data = result_buffer_; | |
| 101 | ✗ | return true; | |
| 102 | } | ||
| 103 | |||
| 104 | ✗ | void PikhotskiyRVerticalGaussFilterOMP::RunVerticalPassForStripe(int x_begin, int x_end) { | |
| 105 | ✗ | for (int row = 0; row < height_; ++row) { | |
| 106 | ✗ | const int row_top = ClampIndex(row - 1, height_); | |
| 107 | ✗ | const int row_bottom = ClampIndex(row + 1, height_); | |
| 108 | |||
| 109 | ✗ | for (int col = x_begin; col < x_end; ++col) { | |
| 110 | ✗ | const std::size_t center = ToLinearIndex(col, row, width_); | |
| 111 | const std::size_t top = ToLinearIndex(col, row_top, width_); | ||
| 112 | const std::size_t bottom = ToLinearIndex(col, row_bottom, width_); | ||
| 113 | ✗ | vertical_buffer_[center] = | |
| 114 | ✗ | static_cast<int>(source_[top]) + (2 * static_cast<int>(source_[center])) + static_cast<int>(source_[bottom]); | |
| 115 | } | ||
| 116 | } | ||
| 117 | ✗ | } | |
| 118 | |||
| 119 | ✗ | void PikhotskiyRVerticalGaussFilterOMP::RunHorizontalPassForStripe(int x_begin, int x_end) { | |
| 120 | ✗ | for (int row = 0; row < height_; ++row) { | |
| 121 | ✗ | for (int col = x_begin; col < x_end; ++col) { | |
| 122 | ✗ | const int col_left = ClampIndex(col - 1, width_); | |
| 123 | ✗ | const int col_right = ClampIndex(col + 1, width_); | |
| 124 | const std::size_t center = ToLinearIndex(col, row, width_); | ||
| 125 | const std::size_t left = ToLinearIndex(col_left, row, width_); | ||
| 126 | const std::size_t right = ToLinearIndex(col_right, row, width_); | ||
| 127 | ✗ | const int weighted_sum = vertical_buffer_[left] + (2 * vertical_buffer_[center]) + vertical_buffer_[right]; | |
| 128 | ✗ | result_buffer_[center] = NormalizeAndRoundUp(weighted_sum); | |
| 129 | } | ||
| 130 | } | ||
| 131 | ✗ | } | |
| 132 | |||
| 133 | } // namespace pikhotskiy_r_vertical_gauss_filter | ||
| 134 |