| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #include "pikhotskiy_r_vertical_gauss_filter/tbb/include/ops_tbb.hpp" | ||
| 2 | |||
| 3 | #include <oneapi/tbb/blocked_range.h> | ||
| 4 | #include <oneapi/tbb/parallel_for.h> | ||
| 5 | |||
| 6 | #include <algorithm> | ||
| 7 | #include <cstddef> | ||
| 8 | #include <cstdint> | ||
| 9 | |||
| 10 | #include "pikhotskiy_r_vertical_gauss_filter/common/include/common.hpp" | ||
| 11 | |||
| 12 | namespace pikhotskiy_r_vertical_gauss_filter { | ||
| 13 | |||
| 14 | namespace { | ||
| 15 | constexpr int kKernelNorm = 16; | ||
| 16 | constexpr int kStripeDivider = 8; | ||
| 17 | |||
| 18 | constexpr int ClampIndex(int value, int upper_bound) noexcept { | ||
| 19 | ✗ | if (upper_bound <= 0) { | |
| 20 | return 0; | ||
| 21 | } | ||
| 22 | ✗ | if (value < 0) { | |
| 23 | return 0; | ||
| 24 | } | ||
| 25 | ✗ | if (value >= upper_bound) { | |
| 26 | ✗ | return upper_bound - 1; | |
| 27 | } | ||
| 28 | return value; | ||
| 29 | } | ||
| 30 | |||
| 31 | constexpr std::size_t ToLinearIndex(int x_pos, int y_pos, int width) noexcept { | ||
| 32 | ✗ | return (static_cast<std::size_t>(y_pos) * static_cast<std::size_t>(width)) + static_cast<std::size_t>(x_pos); | |
| 33 | } | ||
| 34 | |||
| 35 | std::uint8_t NormalizeAndRoundUp(int sum) { | ||
| 36 | ✗ | return static_cast<std::uint8_t>((sum + (kKernelNorm - 1)) / kKernelNorm); | |
| 37 | } | ||
| 38 | } // namespace | ||
| 39 | |||
| 40 | ✗ | PikhotskiyRVerticalGaussFilterTBB::PikhotskiyRVerticalGaussFilterTBB(const InType &in) { | |
| 41 | SetTypeOfTask(GetStaticTypeOfTask()); | ||
| 42 | GetInput() = in; | ||
| 43 | ✗ | GetOutput() = OutType{}; | |
| 44 | ✗ | } | |
| 45 | |||
| 46 | ✗ | bool PikhotskiyRVerticalGaussFilterTBB::ValidationImpl() { | |
| 47 | const auto &in = GetInput(); | ||
| 48 | |||
| 49 | ✗ | if (in.width <= 0 || in.height <= 0) { | |
| 50 | return false; | ||
| 51 | } | ||
| 52 | ✗ | const auto expected_size = static_cast<std::size_t>(in.width) * static_cast<std::size_t>(in.height); | |
| 53 | ✗ | return in.data.size() == expected_size; | |
| 54 | } | ||
| 55 | |||
| 56 | ✗ | bool PikhotskiyRVerticalGaussFilterTBB::PreProcessingImpl() { | |
| 57 | const auto &in = GetInput(); | ||
| 58 | ✗ | width_ = in.width; | |
| 59 | ✗ | height_ = in.height; | |
| 60 | ✗ | stripe_width_ = std::max(1, width_ / kStripeDivider); | |
| 61 | |||
| 62 | ✗ | source_ = in.data; | |
| 63 | ✗ | vertical_buffer_.assign(source_.size(), 0); | |
| 64 | ✗ | result_buffer_.assign(source_.size(), 0); | |
| 65 | ✗ | return true; | |
| 66 | } | ||
| 67 | |||
| 68 | ✗ | bool PikhotskiyRVerticalGaussFilterTBB::RunImpl() { | |
| 69 | ✗ | const auto expected_size = static_cast<std::size_t>(width_) * static_cast<std::size_t>(height_); | |
| 70 | ✗ | if (width_ <= 0 || height_ <= 0 || source_.size() != expected_size || vertical_buffer_.size() != expected_size || | |
| 71 | result_buffer_.size() != expected_size) { | ||
| 72 | return false; | ||
| 73 | } | ||
| 74 | |||
| 75 | ✗ | const int stripe_count = (width_ + stripe_width_ - 1) / stripe_width_; | |
| 76 | |||
| 77 | ✗ | oneapi::tbb::parallel_for(oneapi::tbb::blocked_range<int>(0, stripe_count), | |
| 78 | ✗ | [this](const oneapi::tbb::blocked_range<int> &range) { | |
| 79 | ✗ | for (int stripe_index = range.begin(); stripe_index != range.end(); ++stripe_index) { | |
| 80 | ✗ | const int x_begin = stripe_index * stripe_width_; | |
| 81 | ✗ | const int x_end = std::min(width_, x_begin + stripe_width_); | |
| 82 | ✗ | RunVerticalPassForStripe(x_begin, x_end); | |
| 83 | } | ||
| 84 | ✗ | }); | |
| 85 | |||
| 86 | ✗ | oneapi::tbb::parallel_for(oneapi::tbb::blocked_range<int>(0, stripe_count), | |
| 87 | ✗ | [this](const oneapi::tbb::blocked_range<int> &range) { | |
| 88 | ✗ | for (int stripe_index = range.begin(); stripe_index != range.end(); ++stripe_index) { | |
| 89 | ✗ | const int x_begin = stripe_index * stripe_width_; | |
| 90 | ✗ | const int x_end = std::min(width_, x_begin + stripe_width_); | |
| 91 | ✗ | RunHorizontalPassForStripe(x_begin, x_end); | |
| 92 | } | ||
| 93 | ✗ | }); | |
| 94 | |||
| 95 | ✗ | return true; | |
| 96 | } | ||
| 97 | |||
| 98 | ✗ | bool PikhotskiyRVerticalGaussFilterTBB::PostProcessingImpl() { | |
| 99 | ✗ | GetOutput().width = width_; | |
| 100 | ✗ | GetOutput().height = height_; | |
| 101 | ✗ | GetOutput().data = result_buffer_; | |
| 102 | ✗ | return true; | |
| 103 | } | ||
| 104 | |||
| 105 | ✗ | void PikhotskiyRVerticalGaussFilterTBB::RunVerticalPassForStripe(int x_begin, int x_end) { | |
| 106 | ✗ | for (int row = 0; row < height_; ++row) { | |
| 107 | ✗ | const int row_top = ClampIndex(row - 1, height_); | |
| 108 | ✗ | const int row_bottom = ClampIndex(row + 1, height_); | |
| 109 | |||
| 110 | ✗ | for (int col = x_begin; col < x_end; ++col) { | |
| 111 | ✗ | const std::size_t center = ToLinearIndex(col, row, width_); | |
| 112 | const std::size_t top = ToLinearIndex(col, row_top, width_); | ||
| 113 | const std::size_t bottom = ToLinearIndex(col, row_bottom, width_); | ||
| 114 | ✗ | vertical_buffer_[center] = | |
| 115 | ✗ | static_cast<int>(source_[top]) + (2 * static_cast<int>(source_[center])) + static_cast<int>(source_[bottom]); | |
| 116 | } | ||
| 117 | } | ||
| 118 | ✗ | } | |
| 119 | |||
| 120 | ✗ | void PikhotskiyRVerticalGaussFilterTBB::RunHorizontalPassForStripe(int x_begin, int x_end) { | |
| 121 | ✗ | for (int row = 0; row < height_; ++row) { | |
| 122 | ✗ | for (int col = x_begin; col < x_end; ++col) { | |
| 123 | ✗ | const int col_left = ClampIndex(col - 1, width_); | |
| 124 | ✗ | const int col_right = ClampIndex(col + 1, width_); | |
| 125 | const std::size_t center = ToLinearIndex(col, row, width_); | ||
| 126 | const std::size_t left = ToLinearIndex(col_left, row, width_); | ||
| 127 | const std::size_t right = ToLinearIndex(col_right, row, width_); | ||
| 128 | ✗ | const int weighted_sum = vertical_buffer_[left] + (2 * vertical_buffer_[center]) + vertical_buffer_[right]; | |
| 129 | ✗ | result_buffer_[center] = NormalizeAndRoundUp(weighted_sum); | |
| 130 | } | ||
| 131 | } | ||
| 132 | ✗ | } | |
| 133 | |||
| 134 | } // namespace pikhotskiy_r_vertical_gauss_filter | ||
| 135 |