| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #include "gusev_d_double_sort_even_odd_batcher_omp/omp/include/ops_omp.hpp" | ||
| 2 | |||
| 3 | #include <omp.h> | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <array> | ||
| 7 | #include <atomic> | ||
| 8 | #include <bit> | ||
| 9 | #include <cstddef> | ||
| 10 | #include <cstdint> | ||
| 11 | #include <exception> | ||
| 12 | #include <iterator> | ||
| 13 | #include <utility> | ||
| 14 | #include <vector> | ||
| 15 | |||
| 16 | #include "gusev_d_double_sort_even_odd_batcher_omp/common/include/common.hpp" | ||
| 17 | |||
| 18 | namespace gusev_d_double_sort_even_odd_batcher_omp_task_threads { | ||
| 19 | namespace { | ||
| 20 | |||
| 21 | constexpr int kRadixPasses = 8; | ||
| 22 | constexpr int kBitsPerByte = 8; | ||
| 23 | constexpr size_t kRadixBuckets = 256; | ||
| 24 | constexpr uint64_t kBucketMask = 0xFFULL; | ||
| 25 | |||
| 26 | static_assert((kRadixPasses % 2) == 0, "Radix sort expects the final data to remain in the input buffer"); | ||
| 27 | |||
| 28 | using Block = std::vector<ValueType>; | ||
| 29 | using BlockList = std::vector<Block>; | ||
| 30 | |||
| 31 | struct BlockRange { | ||
| 32 | size_t begin = 0; | ||
| 33 | size_t end = 0; | ||
| 34 | }; | ||
| 35 | |||
| 36 | uint64_t DoubleToSortableKey(ValueType value) { | ||
| 37 | const auto bits = std::bit_cast<uint64_t>(value); | ||
| 38 | const auto sign_mask = uint64_t{1} << 63; | ||
| 39 |
4/4✓ Branch 0 taken 94216 times.
✓ Branch 1 taken 81464 times.
✓ Branch 2 taken 94216 times.
✓ Branch 3 taken 81464 times.
|
351360 | return (bits & sign_mask) == 0 ? bits ^ sign_mask : ~bits; |
| 40 | } | ||
| 41 | |||
| 42 | size_t GetBucketIndex(ValueType value, int shift) { | ||
| 43 | 351360 | return static_cast<size_t>((DoubleToSortableKey(value) >> shift) & kBucketMask); | |
| 44 | } | ||
| 45 | |||
| 46 | void BuildPrefixSums(std::array<size_t, kRadixBuckets> &count) { | ||
| 47 | size_t prefix = 0; | ||
| 48 |
2/2✓ Branch 0 taken 311296 times.
✓ Branch 1 taken 1216 times.
|
312512 | for (auto &value : count) { |
| 49 | 311296 | const auto current = value; | |
| 50 | 311296 | value = prefix; | |
| 51 | 311296 | prefix += current; | |
| 52 | } | ||
| 53 | } | ||
| 54 | |||
| 55 |
2/2✓ Branch 0 taken 24 times.
✓ Branch 1 taken 152 times.
|
176 | void RadixSortDoubles(Block &data) { |
| 56 |
2/2✓ Branch 0 taken 24 times.
✓ Branch 1 taken 152 times.
|
176 | if (data.size() < 2) { |
| 57 | 24 | return; | |
| 58 | } | ||
| 59 | |||
| 60 | 152 | Block buffer(data.size()); | |
| 61 | auto *src = &data; | ||
| 62 | auto *dst = &buffer; | ||
| 63 | |||
| 64 |
2/2✓ Branch 0 taken 1216 times.
✓ Branch 1 taken 152 times.
|
1368 | for (int byte = 0; byte < kRadixPasses; ++byte) { |
| 65 | 1216 | std::array<size_t, kRadixBuckets> count{}; | |
| 66 | 1216 | const auto shift = byte * kBitsPerByte; | |
| 67 | |||
| 68 |
4/4✓ Branch 0 taken 94216 times.
✓ Branch 1 taken 81464 times.
✓ Branch 2 taken 175680 times.
✓ Branch 3 taken 1216 times.
|
176896 | for (ValueType value : *src) { |
| 69 | 175680 | count.at(GetBucketIndex(value, shift))++; | |
| 70 | } | ||
| 71 | BuildPrefixSums(count); | ||
| 72 | |||
| 73 |
4/4✓ Branch 0 taken 94216 times.
✓ Branch 1 taken 81464 times.
✓ Branch 2 taken 175680 times.
✓ Branch 3 taken 1216 times.
|
176896 | for (ValueType value : *src) { |
| 74 | const auto bucket = GetBucketIndex(value, shift); | ||
| 75 | 175680 | (*dst)[count.at(bucket)++] = value; | |
| 76 | } | ||
| 77 | |||
| 78 | std::swap(src, dst); | ||
| 79 | } | ||
| 80 | } | ||
| 81 | |||
| 82 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 216 times.
|
216 | void SplitByGlobalParity(const Block &source, size_t global_offset, Block &even, Block &odd) { |
| 83 | even.clear(); | ||
| 84 | odd.clear(); | ||
| 85 | 216 | even.reserve((source.size() + 1) / 2); | |
| 86 | 216 | odd.reserve(source.size() / 2); | |
| 87 | |||
| 88 |
2/2✓ Branch 0 taken 25774 times.
✓ Branch 1 taken 216 times.
|
25990 | for (size_t i = 0; i < source.size(); ++i) { |
| 89 |
2/2✓ Branch 0 taken 12913 times.
✓ Branch 1 taken 12861 times.
|
25774 | if (((global_offset + i) & 1U) == 0U) { |
| 90 | even.push_back(source[i]); | ||
| 91 | } else { | ||
| 92 | odd.push_back(source[i]); | ||
| 93 | } | ||
| 94 | } | ||
| 95 | 216 | } | |
| 96 | |||
| 97 | 108 | Block InterleaveParityGroups(size_t total_size, const Block &even, const Block &odd) { | |
| 98 | 108 | Block result(total_size); | |
| 99 | size_t even_index = 0; | ||
| 100 | size_t odd_index = 0; | ||
| 101 | |||
| 102 |
2/2✓ Branch 0 taken 25774 times.
✓ Branch 1 taken 108 times.
|
25882 | for (size_t i = 0; i < total_size; ++i) { |
| 103 |
2/2✓ Branch 0 taken 12913 times.
✓ Branch 1 taken 12861 times.
|
25774 | if ((i & 1U) == 0U) { |
| 104 | 12913 | result[i] = even[even_index++]; | |
| 105 | } else { | ||
| 106 | 12861 | result[i] = odd[odd_index++]; | |
| 107 | } | ||
| 108 | } | ||
| 109 | |||
| 110 | 108 | return result; | |
| 111 | } | ||
| 112 | |||
| 113 | 108 | void OddEvenFinalize(Block &result) { | |
| 114 |
2/2✓ Branch 0 taken 25774 times.
✓ Branch 1 taken 108 times.
|
25882 | for (size_t phase = 0; phase < result.size(); ++phase) { |
| 115 | 25774 | const auto start = phase & 1U; | |
| 116 |
2/2✓ Branch 0 taken 35169105 times.
✓ Branch 1 taken 25774 times.
|
35194879 | for (size_t i = start; i + 1 < result.size(); i += 2) { |
| 117 |
2/2✓ Branch 0 taken 6016 times.
✓ Branch 1 taken 35163089 times.
|
35169105 | if (result[i] > result[i + 1]) { |
| 118 | std::swap(result[i], result[i + 1]); | ||
| 119 | } | ||
| 120 | } | ||
| 121 | } | ||
| 122 | 108 | } | |
| 123 | |||
| 124 | 108 | void SplitBlocksByParity(const Block &left, const Block &right, Block &left_even, Block &left_odd, Block &right_even, | |
| 125 | Block &right_odd) { | ||
| 126 | 108 | SplitByGlobalParity(left, 0, left_even, left_odd); | |
| 127 | 108 | SplitByGlobalParity(right, left.size(), right_even, right_odd); | |
| 128 | 108 | } | |
| 129 | |||
| 130 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 108 times.
|
108 | void MergeParityGroups(const Block &left_even, const Block &right_even, const Block &left_odd, const Block &right_odd, |
| 131 | Block &merged_even, Block &merged_odd) { | ||
| 132 | merged_even.clear(); | ||
| 133 | merged_odd.clear(); | ||
| 134 | 108 | merged_even.reserve(left_even.size() + right_even.size()); | |
| 135 | 108 | merged_odd.reserve(left_odd.size() + right_odd.size()); | |
| 136 | |||
| 137 | 108 | std::ranges::merge(left_even, right_even, std::back_inserter(merged_even)); | |
| 138 | 108 | std::ranges::merge(left_odd, right_odd, std::back_inserter(merged_odd)); | |
| 139 | 108 | } | |
| 140 | |||
| 141 | 108 | Block MergeBatcherEvenOdd(const Block &left, const Block &right) { | |
| 142 | 108 | Block left_even; | |
| 143 | 108 | Block left_odd; | |
| 144 | 108 | Block right_even; | |
| 145 | 108 | Block right_odd; | |
| 146 | |||
| 147 |
1/2✓ Branch 1 taken 108 times.
✗ Branch 2 not taken.
|
108 | SplitBlocksByParity(left, right, left_even, left_odd, right_even, right_odd); |
| 148 | |||
| 149 | 108 | Block merged_even; | |
| 150 | 108 | Block merged_odd; | |
| 151 |
1/2✓ Branch 1 taken 108 times.
✗ Branch 2 not taken.
|
108 | MergeParityGroups(left_even, right_even, left_odd, right_odd, merged_even, merged_odd); |
| 152 | |||
| 153 |
1/2✓ Branch 1 taken 108 times.
✗ Branch 2 not taken.
|
108 | auto result = InterleaveParityGroups(left.size() + right.size(), merged_even, merged_odd); |
| 154 | 108 | OddEvenFinalize(result); | |
| 155 | 108 | return result; | |
| 156 | } | ||
| 157 | |||
| 158 | BlockRange GetBlockRange(size_t block_index, size_t block_count, size_t total_size) { | ||
| 159 | return { | ||
| 160 | 176 | .begin = (block_index * total_size) / block_count, | |
| 161 | 176 | .end = ((block_index + 1) * total_size) / block_count, | |
| 162 | }; | ||
| 163 | } | ||
| 164 | |||
| 165 | 68 | size_t GetBlockCount(size_t input_size) { | |
| 166 |
2/2✓ Branch 1 taken 60 times.
✓ Branch 2 taken 8 times.
|
68 | const auto omp_threads = static_cast<size_t>(std::max(1, omp_get_max_threads())); |
| 167 |
2/2✓ Branch 0 taken 50 times.
✓ Branch 1 taken 18 times.
|
68 | return std::max<size_t>(1, std::min(input_size, omp_threads)); |
| 168 | } | ||
| 169 | |||
| 170 | 176 | void FillAndSortBlock(const std::vector<ValueType> &input, Block &block, BlockRange range) { | |
| 171 | 176 | block.assign(input.begin() + static_cast<std::ptrdiff_t>(range.begin), | |
| 172 | 176 | input.begin() + static_cast<std::ptrdiff_t>(range.end)); | |
| 173 | 176 | RadixSortDoubles(block); | |
| 174 | 176 | } | |
| 175 | |||
| 176 | ✗ | void StoreFirstException(std::atomic_bool &has_exception, std::exception_ptr &exception) { | |
| 177 | ✗ | #pragma omp critical | |
| 178 | ✗ | if (!has_exception.exchange(true)) { | |
| 179 | ✗ | exception = std::current_exception(); | |
| 180 | } | ||
| 181 | ✗ | } | |
| 182 | |||
| 183 | template <class Func> | ||
| 184 |
1/2✓ Branch 0 taken 284 times.
✗ Branch 1 not taken.
|
568 | void ExecuteWithExceptionCapture(std::atomic_bool &has_exception, std::exception_ptr &exception, Func &&func) { |
| 185 |
1/2✓ Branch 0 taken 284 times.
✗ Branch 1 not taken.
|
568 | if (has_exception.load()) { |
| 186 | return; | ||
| 187 | } | ||
| 188 | |||
| 189 | try { | ||
| 190 |
1/2✓ Branch 1 taken 176 times.
✗ Branch 2 not taken.
|
352 | std::forward<Func>(func)(); |
| 191 | ✗ | } catch (...) { | |
| 192 | ✗ | StoreFirstException(has_exception, exception); | |
| 193 | } | ||
| 194 | } | ||
| 195 | |||
| 196 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 158 times.
|
158 | void RethrowIfCaptured(const std::exception_ptr &exception) { |
| 197 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 158 times.
|
158 | if (exception != nullptr) { |
| 198 | ✗ | std::rethrow_exception(exception); | |
| 199 | } | ||
| 200 | 158 | } | |
| 201 | |||
| 202 | 68 | BlockList MakeSortedBlocks(const std::vector<ValueType> &input) { | |
| 203 | 68 | const auto block_count = GetBlockCount(input.size()); | |
| 204 | const auto total_size = input.size(); | ||
| 205 | 68 | const auto signed_block_count = static_cast<std::ptrdiff_t>(block_count); | |
| 206 | |||
| 207 |
1/2✓ Branch 2 taken 68 times.
✗ Branch 3 not taken.
|
68 | BlockList blocks(block_count); |
| 208 | std::exception_ptr exception; | ||
| 209 | 68 | std::atomic_bool has_exception{false}; | |
| 210 | |||
| 211 | 68 | #pragma omp parallel for default(none) shared(input, blocks, exception, has_exception) \ | |
| 212 | firstprivate(block_count, signed_block_count, total_size) schedule(static) if (block_count > 1) | ||
| 213 | for (std::ptrdiff_t block = 0; block < signed_block_count; ++block) { | ||
| 214 | 176 | ExecuteWithExceptionCapture(has_exception, exception, [&]() { | |
| 215 | 176 | const auto index = static_cast<size_t>(block); | |
| 216 | 176 | FillAndSortBlock(input, blocks[index], GetBlockRange(index, block_count, total_size)); | |
| 217 | 176 | }); | |
| 218 | } | ||
| 219 | |||
| 220 |
1/2✓ Branch 1 taken 68 times.
✗ Branch 2 not taken.
|
68 | RethrowIfCaptured(exception); |
| 221 | |||
| 222 | 68 | return blocks; | |
| 223 | ✗ | } | |
| 224 | |||
| 225 | 108 | void MergeBlockPair(const BlockList &blocks, BlockList &next, size_t pair_index) { | |
| 226 | 108 | next[pair_index] = MergeBatcherEvenOdd(blocks[pair_index * 2], blocks[(pair_index * 2) + 1]); | |
| 227 | 108 | } | |
| 228 | |||
| 229 | 90 | BlockList MergeBlockPairs(const BlockList &blocks) { | |
| 230 | 90 | const auto signed_pair_count = static_cast<std::ptrdiff_t>(blocks.size() / 2); | |
| 231 |
1/2✓ Branch 2 taken 90 times.
✗ Branch 3 not taken.
|
90 | BlockList next((blocks.size() + 1) / 2); |
| 232 | std::exception_ptr exception; | ||
| 233 | 90 | std::atomic_bool has_exception{false}; | |
| 234 | |||
| 235 | 90 | #pragma omp parallel for default(none) shared(blocks, next, exception, has_exception) firstprivate(signed_pair_count) \ | |
| 236 | schedule(static) if (signed_pair_count > 1) | ||
| 237 | for (std::ptrdiff_t pair = 0; pair < signed_pair_count; ++pair) { | ||
| 238 | ExecuteWithExceptionCapture(has_exception, exception, | ||
| 239 |
1/2✓ Branch 1 taken 108 times.
✗ Branch 2 not taken.
|
108 | [&]() { MergeBlockPair(blocks, next, static_cast<size_t>(pair)); }); |
| 240 | } | ||
| 241 | |||
| 242 |
1/2✓ Branch 1 taken 90 times.
✗ Branch 2 not taken.
|
90 | RethrowIfCaptured(exception); |
| 243 | |||
| 244 |
2/2✓ Branch 0 taken 26 times.
✓ Branch 1 taken 64 times.
|
90 | if ((blocks.size() & 1U) != 0U) { |
| 245 |
1/2✓ Branch 1 taken 26 times.
✗ Branch 2 not taken.
|
26 | next.back() = blocks.back(); |
| 246 | } | ||
| 247 | |||
| 248 | 90 | return next; | |
| 249 | ✗ | } | |
| 250 | |||
| 251 | 68 | Block MergeBlocks(BlockList blocks) { | |
| 252 |
2/2✓ Branch 0 taken 90 times.
✓ Branch 1 taken 68 times.
|
158 | while (blocks.size() > 1) { |
| 253 | 90 | blocks = MergeBlockPairs(blocks); | |
| 254 | } | ||
| 255 | |||
| 256 | 68 | return std::move(blocks.front()); | |
| 257 | } | ||
| 258 | |||
| 259 | } // namespace | ||
| 260 | |||
| 261 |
1/2✓ Branch 1 taken 88 times.
✗ Branch 2 not taken.
|
88 | DoubleSortEvenOddBatcherOMP::DoubleSortEvenOddBatcherOMP(const InType &in) { |
| 262 | SetTypeOfTask(GetStaticTypeOfTask()); | ||
| 263 |
1/2✓ Branch 1 taken 88 times.
✗ Branch 2 not taken.
|
88 | GetInput() = in; |
| 264 | GetOutput() = {}; | ||
| 265 | 88 | } | |
| 266 | |||
| 267 | 84 | bool DoubleSortEvenOddBatcherOMP::ValidationImpl() { | |
| 268 | 84 | return GetOutput().empty(); | |
| 269 | } | ||
| 270 | |||
| 271 | 76 | bool DoubleSortEvenOddBatcherOMP::PreProcessingImpl() { | |
| 272 | 76 | input_data_ = GetInput(); | |
| 273 | result_data_.clear(); | ||
| 274 | 76 | return true; | |
| 275 | } | ||
| 276 | |||
| 277 |
2/2✓ Branch 0 taken 8 times.
✓ Branch 1 taken 68 times.
|
76 | bool DoubleSortEvenOddBatcherOMP::RunImpl() { |
| 278 |
2/2✓ Branch 0 taken 8 times.
✓ Branch 1 taken 68 times.
|
76 | if (input_data_.empty()) { |
| 279 | result_data_.clear(); | ||
| 280 | 8 | return true; | |
| 281 | } | ||
| 282 | |||
| 283 | 68 | auto blocks = MakeSortedBlocks(input_data_); | |
| 284 |
1/2✓ Branch 1 taken 68 times.
✗ Branch 2 not taken.
|
136 | result_data_ = MergeBlocks(std::move(blocks)); |
| 285 | return true; | ||
| 286 | 68 | } | |
| 287 | |||
| 288 | 72 | bool DoubleSortEvenOddBatcherOMP::PostProcessingImpl() { | |
| 289 | 72 | GetOutput() = result_data_; | |
| 290 | 72 | return true; | |
| 291 | } | ||
| 292 | |||
| 293 | } // namespace gusev_d_double_sort_even_odd_batcher_omp_task_threads | ||
| 294 |