| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #include "zaharov_g_linear_contrast_stretch/all/include/ops_all.hpp" | ||
| 2 | |||
| 3 | #include <mpi.h> | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <cstddef> | ||
| 7 | #include <cstdint> | ||
| 8 | #include <limits> | ||
| 9 | #include <thread> | ||
| 10 | #include <utility> | ||
| 11 | #include <vector> | ||
| 12 | |||
| 13 | #include "oneapi/tbb/blocked_range.h" | ||
| 14 | #include "oneapi/tbb/parallel_for.h" | ||
| 15 | #include "util/include/util.hpp" | ||
| 16 | #include "zaharov_g_linear_contrast_stretch/common/include/common.hpp" | ||
| 17 | |||
| 18 | namespace zaharov_g_linear_contrast_stretch { | ||
| 19 | |||
| 20 | namespace { | ||
| 21 | |||
| 22 | struct MinMax { | ||
| 23 | int min; | ||
| 24 | int max; | ||
| 25 | }; | ||
| 26 | |||
| 27 | 14 | std::size_t GetThreadCount(std::size_t size) { | |
| 28 |
2/2✓ Branch 1 taken 10 times.
✓ Branch 2 taken 4 times.
|
14 | const auto requested_threads = static_cast<std::size_t>(std::max(1, ppc::util::GetNumThreads())); |
| 29 |
2/2✓ Branch 0 taken 11 times.
✓ Branch 1 taken 3 times.
|
14 | return std::max<std::size_t>(1, std::min(size, requested_threads)); |
| 30 | } | ||
| 31 | |||
| 32 | 8 | std::pair<std::size_t, std::size_t> GetRankRange(std::size_t size) { | |
| 33 | 8 | int rank = 0; | |
| 34 | 8 | int world_size = 1; | |
| 35 | 8 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); | |
| 36 | 8 | MPI_Comm_size(MPI_COMM_WORLD, &world_size); | |
| 37 | |||
| 38 | 8 | const auto rank_id = static_cast<std::size_t>(rank); | |
| 39 | 8 | const auto ranks_count = static_cast<std::size_t>(world_size); | |
| 40 | 8 | return {size * rank_id / ranks_count, size * (rank_id + 1) / ranks_count}; | |
| 41 | } | ||
| 42 | |||
| 43 | 8 | MinMax FindLocalMinMaxStl(const InType &input, std::size_t begin, std::size_t end) { | |
| 44 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 8 times.
|
8 | if (begin == end) { |
| 45 | ✗ | return {.min = std::numeric_limits<uint8_t>::max(), .max = std::numeric_limits<uint8_t>::min()}; | |
| 46 | } | ||
| 47 | |||
| 48 | 8 | const std::size_t range_size = end - begin; | |
| 49 | 8 | const std::size_t thread_count = GetThreadCount(range_size); | |
| 50 | std::vector<MinMax> local_minmax( | ||
| 51 | 8 | thread_count, {.min = std::numeric_limits<uint8_t>::max(), .max = std::numeric_limits<uint8_t>::min()}); | |
| 52 | 8 | std::vector<std::thread> threads; | |
| 53 |
1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
|
8 | threads.reserve(thread_count); |
| 54 | |||
| 55 |
2/2✓ Branch 0 taken 15 times.
✓ Branch 1 taken 8 times.
|
23 | for (std::size_t thread_index = 0; thread_index < thread_count; ++thread_index) { |
| 56 | 15 | const std::size_t thread_begin = begin + (range_size * thread_index / thread_count); | |
| 57 | 15 | const std::size_t thread_end = begin + (range_size * (thread_index + 1) / thread_count); | |
| 58 |
1/2✓ Branch 1 taken 15 times.
✗ Branch 2 not taken.
|
15 | threads.emplace_back([&input, &local_minmax, thread_begin, thread_end, thread_index]() { |
| 59 | 15 | MinMax current{.min = std::numeric_limits<uint8_t>::max(), .max = std::numeric_limits<uint8_t>::min()}; | |
| 60 |
2/2✓ Branch 0 taken 1269 times.
✓ Branch 1 taken 15 times.
|
1284 | for (std::size_t i = thread_begin; i < thread_end; ++i) { |
| 61 | 1269 | const int value = static_cast<int>(input[i]); | |
| 62 | 1269 | current.min = std::min(current.min, value); | |
| 63 | 1269 | current.max = std::max(current.max, value); | |
| 64 | } | ||
| 65 | 15 | local_minmax[thread_index] = current; | |
| 66 | 15 | }); | |
| 67 | } | ||
| 68 | |||
| 69 |
2/2✓ Branch 0 taken 15 times.
✓ Branch 1 taken 8 times.
|
23 | for (auto &thread : threads) { |
| 70 |
1/2✓ Branch 1 taken 15 times.
✗ Branch 2 not taken.
|
15 | thread.join(); |
| 71 | } | ||
| 72 | |||
| 73 | 8 | MinMax result{.min = std::numeric_limits<uint8_t>::max(), .max = std::numeric_limits<uint8_t>::min()}; | |
| 74 |
2/2✓ Branch 0 taken 15 times.
✓ Branch 1 taken 8 times.
|
23 | for (const auto ¤t : local_minmax) { |
| 75 | 15 | result.min = std::min(result.min, current.min); | |
| 76 | 15 | result.max = std::max(result.max, current.max); | |
| 77 | } | ||
| 78 | 8 | return result; | |
| 79 | 8 | } | |
| 80 | |||
| 81 | 8 | MinMax FindGlobalMinMax(const InType &input) { | |
| 82 | 8 | const auto [begin, end] = GetRankRange(input.size()); | |
| 83 | 8 | const MinMax local = FindLocalMinMaxStl(input, begin, end); | |
| 84 | |||
| 85 | 8 | MinMax global{.min = std::numeric_limits<uint8_t>::max(), .max = std::numeric_limits<uint8_t>::min()}; | |
| 86 | 8 | MPI_Allreduce(&local.min, &global.min, 1, MPI_INT, MPI_MIN, MPI_COMM_WORLD); | |
| 87 | 8 | MPI_Allreduce(&local.max, &global.max, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD); | |
| 88 | 8 | return global; | |
| 89 | } | ||
| 90 | |||
| 91 | 6 | void StretchOmpRange(const InType &input, OutType &output, std::size_t begin, std::size_t end, int min_el, int denom) { | |
| 92 | 6 | const auto signed_begin = static_cast<std::int64_t>(begin); | |
| 93 | 6 | const auto signed_end = static_cast<std::int64_t>(end); | |
| 94 | 6 | #pragma omp parallel for default(none) shared(input, output, min_el, denom, signed_begin, signed_end) \ | |
| 95 | 6 | num_threads(ppc::util::GetNumThreads()) | |
| 96 | for (std::int64_t i = signed_begin; i < signed_end; ++i) { | ||
| 97 | const int value = | ||
| 98 | (static_cast<int>(input[static_cast<std::size_t>(i)]) - min_el) * std::numeric_limits<uint8_t>::max() / denom; | ||
| 99 | output[static_cast<std::size_t>(i)] = static_cast<uint8_t>(std::clamp(value, 0, 255)); | ||
| 100 | } | ||
| 101 | 6 | } | |
| 102 | |||
| 103 | 6 | void StretchStlRange(const InType &input, OutType &output, std::size_t begin, std::size_t end, int min_el, int denom) { | |
| 104 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 6 times.
|
6 | if (begin == end) { |
| 105 | ✗ | return; | |
| 106 | } | ||
| 107 | |||
| 108 | 6 | const std::size_t range_size = end - begin; | |
| 109 | 6 | const std::size_t thread_count = GetThreadCount(range_size); | |
| 110 | 6 | std::vector<std::thread> threads; | |
| 111 |
1/2✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.
|
6 | threads.reserve(thread_count); |
| 112 | |||
| 113 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 6 times.
|
16 | for (std::size_t thread_index = 0; thread_index < thread_count; ++thread_index) { |
| 114 | 10 | const std::size_t thread_begin = begin + (range_size * thread_index / thread_count); | |
| 115 | 10 | const std::size_t thread_end = begin + (range_size * (thread_index + 1) / thread_count); | |
| 116 |
1/2✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
|
10 | threads.emplace_back([&input, &output, thread_begin, thread_end, min_el, denom]() { |
| 117 |
2/2✓ Branch 0 taken 838 times.
✓ Branch 1 taken 10 times.
|
848 | for (std::size_t i = thread_begin; i < thread_end; ++i) { |
| 118 | 838 | const int value = (static_cast<int>(input[i]) - min_el) * std::numeric_limits<uint8_t>::max() / denom; | |
| 119 | 838 | output[i] = static_cast<uint8_t>(std::clamp(value, 0, 255)); | |
| 120 | } | ||
| 121 | 10 | }); | |
| 122 | } | ||
| 123 | |||
| 124 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 6 times.
|
16 | for (auto &thread : threads) { |
| 125 |
1/2✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
|
10 | thread.join(); |
| 126 | } | ||
| 127 | 6 | } | |
| 128 | |||
| 129 | void StretchTbbRange(const InType &input, OutType &output, std::size_t begin, std::size_t end, int min_el, int denom) { | ||
| 130 | 6 | oneapi::tbb::parallel_for(oneapi::tbb::blocked_range<std::size_t>(begin, end), | |
| 131 | 6 | [&input, &output, min_el, denom](const oneapi::tbb::blocked_range<std::size_t> &range) { | |
| 132 |
4/4✓ Branch 0 taken 74 times.
✓ Branch 1 taken 42 times.
✓ Branch 2 taken 768 times.
✓ Branch 3 taken 388 times.
|
1272 | for (std::size_t i = range.begin(); i != range.end(); ++i) { |
| 133 | 842 | const int value = (static_cast<int>(input[i]) - min_el) * std::numeric_limits<uint8_t>::max() / denom; | |
| 134 | 842 | output[i] = static_cast<uint8_t>(std::clamp(value, 0, 255)); | |
| 135 | } | ||
| 136 | }); | ||
| 137 | 6 | } | |
| 138 | |||
| 139 | 2 | void CopyTbb(const InType &input, OutType &output) { | |
| 140 | 2 | oneapi::tbb::parallel_for(oneapi::tbb::blocked_range<std::size_t>(0, input.size()), | |
| 141 | 22 | [&input, &output](const oneapi::tbb::blocked_range<std::size_t> &range) { | |
| 142 | 20 | std::copy(input.begin() + static_cast<std::ptrdiff_t>(range.begin()), | |
| 143 | 20 | input.begin() + static_cast<std::ptrdiff_t>(range.end()), | |
| 144 | 20 | output.begin() + static_cast<std::ptrdiff_t>(range.begin())); | |
| 145 | 20 | }); | |
| 146 | 2 | } | |
| 147 | |||
| 148 | 8 | void StretchImage(const InType &input, OutType &output, const MinMax &minmax) { | |
| 149 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 6 times.
|
8 | if (minmax.max <= minmax.min) { |
| 150 | 2 | CopyTbb(input, output); | |
| 151 | 2 | return; | |
| 152 | } | ||
| 153 | |||
| 154 | 6 | const int denom = minmax.max - minmax.min; | |
| 155 | 6 | const std::size_t first_border = input.size() / 3; | |
| 156 | 6 | const std::size_t second_border = (2 * input.size()) / 3; | |
| 157 | |||
| 158 | 6 | StretchOmpRange(input, output, 0, first_border, minmax.min, denom); | |
| 159 | 6 | StretchStlRange(input, output, first_border, second_border, minmax.min, denom); | |
| 160 | 6 | StretchTbbRange(input, output, second_border, input.size(), minmax.min, denom); | |
| 161 | } | ||
| 162 | |||
| 163 | } // namespace | ||
| 164 | |||
| 165 |
1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
|
8 | ZaharovGLinContrStrALL::ZaharovGLinContrStrALL(const InType &in) { |
| 166 | SetTypeOfTask(GetStaticTypeOfTask()); | ||
| 167 |
1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
|
8 | GetInput() = in; |
| 168 | 8 | } | |
| 169 | |||
| 170 | 8 | bool ZaharovGLinContrStrALL::ValidationImpl() { | |
| 171 | 8 | return !GetInput().empty(); | |
| 172 | } | ||
| 173 | |||
| 174 | 8 | bool ZaharovGLinContrStrALL::PreProcessingImpl() { | |
| 175 | 8 | GetOutput().resize(GetInput().size()); | |
| 176 | 8 | return true; | |
| 177 | } | ||
| 178 | |||
| 179 | 8 | bool ZaharovGLinContrStrALL::RunImpl() { | |
| 180 | const InType &input = GetInput(); | ||
| 181 | OutType &output = GetOutput(); | ||
| 182 | |||
| 183 | 8 | const MinMax minmax = FindGlobalMinMax(input); | |
| 184 | 8 | StretchImage(input, output, minmax); | |
| 185 | |||
| 186 | 8 | MPI_Barrier(MPI_COMM_WORLD); | |
| 187 | 8 | return true; | |
| 188 | } | ||
| 189 | |||
| 190 | 8 | bool ZaharovGLinContrStrALL::PostProcessingImpl() { | |
| 191 | 8 | return !GetOutput().empty(); | |
| 192 | } | ||
| 193 | |||
| 194 | } // namespace zaharov_g_linear_contrast_stretch | ||
| 195 |