| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #include "pikhotskiy_r_vertical_gauss_filter/stl/include/ops_stl.hpp" | ||
| 2 | |||
| 3 | #include <algorithm> | ||
| 4 | #include <cstddef> | ||
| 5 | #include <cstdint> | ||
| 6 | #include <thread> | ||
| 7 | #include <vector> | ||
| 8 | |||
| 9 | #include "pikhotskiy_r_vertical_gauss_filter/common/include/common.hpp" | ||
| 10 | #include "util/include/util.hpp" | ||
| 11 | |||
| 12 | namespace pikhotskiy_r_vertical_gauss_filter { | ||
| 13 | |||
| 14 | namespace { | ||
| 15 | constexpr int kKernelNorm = 16; | ||
| 16 | |||
| 17 | constexpr int ClampIndex(int value, int upper_bound) noexcept { | ||
| 18 | ✗ | if (upper_bound <= 0) { | |
| 19 | return 0; | ||
| 20 | } | ||
| 21 | ✗ | if (value < 0) { | |
| 22 | return 0; | ||
| 23 | } | ||
| 24 | ✗ | if (value >= upper_bound) { | |
| 25 | ✗ | return upper_bound - 1; | |
| 26 | } | ||
| 27 | return value; | ||
| 28 | } | ||
| 29 | |||
| 30 | constexpr std::size_t ToLinearIndex(int x_pos, int y_pos, int width) noexcept { | ||
| 31 | ✗ | return (static_cast<std::size_t>(y_pos) * static_cast<std::size_t>(width)) + static_cast<std::size_t>(x_pos); | |
| 32 | } | ||
| 33 | |||
| 34 | std::uint8_t NormalizeAndRoundUp(int sum) { | ||
| 35 | ✗ | return static_cast<std::uint8_t>((sum + (kKernelNorm - 1)) / kKernelNorm); | |
| 36 | } | ||
| 37 | |||
| 38 | template <class TCallback> | ||
| 39 | ✗ | void RunPassInParallel(int actual_threads, int stripes_per_worker, int extra_stripes, TCallback callback) { | |
| 40 | ✗ | std::vector<std::thread> workers; | |
| 41 | ✗ | workers.reserve(actual_threads); | |
| 42 | |||
| 43 | int stripe_begin = 0; | ||
| 44 | ✗ | for (int worker_id = 0; worker_id < actual_threads; ++worker_id) { | |
| 45 | ✗ | const int stripes_this_worker = stripes_per_worker + (worker_id < extra_stripes ? 1 : 0); | |
| 46 | ✗ | const int stripe_end = stripe_begin + stripes_this_worker; | |
| 47 | ✗ | workers.emplace_back([callback, stripe_begin, stripe_end]() { callback(stripe_begin, stripe_end); }); | |
| 48 | stripe_begin = stripe_end; | ||
| 49 | } | ||
| 50 | |||
| 51 | ✗ | for (auto &worker : workers) { | |
| 52 | ✗ | worker.join(); | |
| 53 | } | ||
| 54 | ✗ | } | |
| 55 | } // namespace | ||
| 56 | |||
| 57 | ✗ | PikhotskiyRVerticalGaussFilterSTL::PikhotskiyRVerticalGaussFilterSTL(const InType &in) { | |
| 58 | SetTypeOfTask(GetStaticTypeOfTask()); | ||
| 59 | GetInput() = in; | ||
| 60 | ✗ | GetOutput() = OutType{}; | |
| 61 | ✗ | } | |
| 62 | |||
| 63 | ✗ | bool PikhotskiyRVerticalGaussFilterSTL::ValidationImpl() { | |
| 64 | const auto &in = GetInput(); | ||
| 65 | |||
| 66 | ✗ | if (in.width <= 0 || in.height <= 0) { | |
| 67 | return false; | ||
| 68 | } | ||
| 69 | ✗ | const auto expected_size = static_cast<std::size_t>(in.width) * static_cast<std::size_t>(in.height); | |
| 70 | ✗ | return in.data.size() == expected_size; | |
| 71 | } | ||
| 72 | |||
| 73 | ✗ | bool PikhotskiyRVerticalGaussFilterSTL::PreProcessingImpl() { | |
| 74 | const auto &in = GetInput(); | ||
| 75 | ✗ | width_ = in.width; | |
| 76 | ✗ | height_ = in.height; | |
| 77 | |||
| 78 | ✗ | const int num_threads = std::max(1, ppc::util::GetNumThreads()); | |
| 79 | ✗ | stripe_width_ = std::max(1, width_ / num_threads); | |
| 80 | |||
| 81 | ✗ | source_ = in.data; | |
| 82 | ✗ | vertical_buffer_.assign(source_.size(), 0); | |
| 83 | ✗ | result_buffer_.assign(source_.size(), 0); | |
| 84 | ✗ | return true; | |
| 85 | } | ||
| 86 | |||
| 87 | ✗ | bool PikhotskiyRVerticalGaussFilterSTL::RunImpl() { | |
| 88 | ✗ | const auto expected_size = static_cast<std::size_t>(width_) * static_cast<std::size_t>(height_); | |
| 89 | ✗ | if (width_ <= 0 || height_ <= 0 || source_.size() != expected_size || vertical_buffer_.size() != expected_size || | |
| 90 | result_buffer_.size() != expected_size) { | ||
| 91 | return false; | ||
| 92 | } | ||
| 93 | |||
| 94 | ✗ | const int requested_threads = std::max(1, ppc::util::GetNumThreads()); | |
| 95 | ✗ | const int stripe_count = (width_ + stripe_width_ - 1) / stripe_width_; | |
| 96 | ✗ | const int actual_threads = std::max(1, std::min(requested_threads, stripe_count)); | |
| 97 | ✗ | const int stripes_per_worker = stripe_count / actual_threads; | |
| 98 | ✗ | const int extra_stripes = stripe_count % actual_threads; | |
| 99 | ✗ | RunPassInParallel(actual_threads, stripes_per_worker, extra_stripes, [this](int stripe_begin, int stripe_end) { | |
| 100 | ✗ | for (int stripe_index = stripe_begin; stripe_index < stripe_end; ++stripe_index) { | |
| 101 | ✗ | const int x_begin = stripe_index * stripe_width_; | |
| 102 | ✗ | const int x_end = std::min(width_, x_begin + stripe_width_); | |
| 103 | ✗ | RunVerticalPassForStripe(x_begin, x_end); | |
| 104 | } | ||
| 105 | ✗ | }); | |
| 106 | |||
| 107 | ✗ | RunPassInParallel(actual_threads, stripes_per_worker, extra_stripes, [this](int stripe_begin, int stripe_end) { | |
| 108 | ✗ | for (int stripe_index = stripe_begin; stripe_index < stripe_end; ++stripe_index) { | |
| 109 | ✗ | const int x_begin = stripe_index * stripe_width_; | |
| 110 | ✗ | const int x_end = std::min(width_, x_begin + stripe_width_); | |
| 111 | ✗ | RunHorizontalPassForStripe(x_begin, x_end); | |
| 112 | } | ||
| 113 | ✗ | }); | |
| 114 | |||
| 115 | return true; | ||
| 116 | } | ||
| 117 | |||
| 118 | ✗ | bool PikhotskiyRVerticalGaussFilterSTL::PostProcessingImpl() { | |
| 119 | ✗ | GetOutput().width = width_; | |
| 120 | ✗ | GetOutput().height = height_; | |
| 121 | ✗ | GetOutput().data = result_buffer_; | |
| 122 | ✗ | return true; | |
| 123 | } | ||
| 124 | |||
| 125 | ✗ | void PikhotskiyRVerticalGaussFilterSTL::RunVerticalPassForStripe(int x_begin, int x_end) { | |
| 126 | ✗ | for (int row = 0; row < height_; ++row) { | |
| 127 | ✗ | const int row_top = ClampIndex(row - 1, height_); | |
| 128 | ✗ | const int row_bottom = ClampIndex(row + 1, height_); | |
| 129 | |||
| 130 | ✗ | for (int col = x_begin; col < x_end; ++col) { | |
| 131 | ✗ | const std::size_t center = ToLinearIndex(col, row, width_); | |
| 132 | const std::size_t top = ToLinearIndex(col, row_top, width_); | ||
| 133 | const std::size_t bottom = ToLinearIndex(col, row_bottom, width_); | ||
| 134 | ✗ | vertical_buffer_[center] = | |
| 135 | ✗ | static_cast<int>(source_[top]) + (2 * static_cast<int>(source_[center])) + static_cast<int>(source_[bottom]); | |
| 136 | } | ||
| 137 | } | ||
| 138 | ✗ | } | |
| 139 | |||
| 140 | ✗ | void PikhotskiyRVerticalGaussFilterSTL::RunHorizontalPassForStripe(int x_begin, int x_end) { | |
| 141 | ✗ | for (int row = 0; row < height_; ++row) { | |
| 142 | ✗ | for (int col = x_begin; col < x_end; ++col) { | |
| 143 | ✗ | const int col_left = ClampIndex(col - 1, width_); | |
| 144 | ✗ | const int col_right = ClampIndex(col + 1, width_); | |
| 145 | const std::size_t center = ToLinearIndex(col, row, width_); | ||
| 146 | const std::size_t left = ToLinearIndex(col_left, row, width_); | ||
| 147 | const std::size_t right = ToLinearIndex(col_right, row, width_); | ||
| 148 | ✗ | const int weighted_sum = vertical_buffer_[left] + (2 * vertical_buffer_[center]) + vertical_buffer_[right]; | |
| 149 | ✗ | result_buffer_[center] = NormalizeAndRoundUp(weighted_sum); | |
| 150 | } | ||
| 151 | } | ||
| 152 | ✗ | } | |
| 153 | |||
| 154 | } // namespace pikhotskiy_r_vertical_gauss_filter | ||
| 155 |