| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #include "baldin_a_radix_sort/tbb/include/ops_tbb.hpp" | ||
| 2 | |||
| 3 | #include <tbb/tbb.h> | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <array> | ||
| 7 | #include <cstddef> | ||
| 8 | #include <cstdint> | ||
| 9 | #include <iterator> | ||
| 10 | #include <vector> | ||
| 11 | |||
| 12 | #include "baldin_a_radix_sort/common/include/common.hpp" | ||
| 13 | #include "oneapi/tbb/parallel_for.h" | ||
| 14 | #include "util/include/util.hpp" | ||
| 15 | |||
| 16 | namespace baldin_a_radix_sort { | ||
| 17 | |||
| 18 |
1/2✓ Branch 1 taken 52 times.
✗ Branch 2 not taken.
|
52 | BaldinARadixSortTBB::BaldinARadixSortTBB(const InType &in) { |
| 19 | SetTypeOfTask(GetStaticTypeOfTask()); | ||
| 20 |
1/2✓ Branch 1 taken 52 times.
✗ Branch 2 not taken.
|
52 | GetInput() = in; |
| 21 | GetOutput() = {}; | ||
| 22 | 52 | } | |
| 23 | |||
| 24 | 52 | bool BaldinARadixSortTBB::ValidationImpl() { | |
| 25 | 52 | return true; | |
| 26 | } | ||
| 27 | |||
| 28 | 52 | bool BaldinARadixSortTBB::PreProcessingImpl() { | |
| 29 | 52 | GetOutput() = GetInput(); | |
| 30 | 52 | return true; | |
| 31 | } | ||
| 32 | |||
| 33 | namespace { | ||
| 34 | |||
| 35 | 352 | void CountingSortStep(std::vector<int>::iterator in_begin, std::vector<int>::iterator in_end, | |
| 36 | std::vector<int>::iterator out_begin, size_t byte_index) { | ||
| 37 | 352 | size_t shift = byte_index * 8; | |
| 38 | 352 | std::array<size_t, 256> count = {0}; | |
| 39 | |||
| 40 |
2/2✓ Branch 0 taken 3976 times.
✓ Branch 1 taken 352 times.
|
4328 | for (auto it = in_begin; it != in_end; it++) { |
| 41 | 3976 | auto raw_val = static_cast<unsigned int>(*it); | |
| 42 | 3976 | unsigned int byte_val = (raw_val >> shift) & 0xFF; | |
| 43 | |||
| 44 |
2/2✓ Branch 0 taken 994 times.
✓ Branch 1 taken 2982 times.
|
3976 | if (byte_index == sizeof(int) - 1) { |
| 45 | 994 | byte_val ^= 128; | |
| 46 | } | ||
| 47 | 3976 | count.at(byte_val)++; | |
| 48 | } | ||
| 49 | |||
| 50 | 352 | std::array<size_t, 256> prefix{}; | |
| 51 | prefix[0] = 0; | ||
| 52 |
2/2✓ Branch 0 taken 89760 times.
✓ Branch 1 taken 352 times.
|
90112 | for (int i = 1; i < 256; i++) { |
| 53 | 89760 | prefix.at(i) = prefix.at(i - 1) + count.at(i - 1); | |
| 54 | } | ||
| 55 | |||
| 56 |
2/2✓ Branch 0 taken 3976 times.
✓ Branch 1 taken 352 times.
|
4328 | for (auto it = in_begin; it != in_end; it++) { |
| 57 | 3976 | auto raw_val = static_cast<unsigned int>(*it); | |
| 58 | 3976 | unsigned int byte_val = (raw_val >> shift) & 0xFF; | |
| 59 | |||
| 60 |
2/2✓ Branch 0 taken 994 times.
✓ Branch 1 taken 2982 times.
|
3976 | if (byte_index == sizeof(int) - 1) { |
| 61 | 994 | byte_val ^= 128; | |
| 62 | } | ||
| 63 | |||
| 64 | 3976 | *(out_begin + static_cast<int64_t>(prefix.at(byte_val))) = *it; | |
| 65 | 3976 | prefix.at(byte_val)++; | |
| 66 | } | ||
| 67 | 352 | } | |
| 68 | |||
| 69 |
2/2✓ Branch 0 taken 42 times.
✓ Branch 1 taken 88 times.
|
130 | void RadixSortLocal(std::vector<int>::iterator begin, std::vector<int>::iterator end) { |
| 70 | 130 | size_t n = std::distance(begin, end); | |
| 71 |
2/2✓ Branch 0 taken 42 times.
✓ Branch 1 taken 88 times.
|
130 | if (n < 2) { |
| 72 | 42 | return; | |
| 73 | } | ||
| 74 | |||
| 75 | 88 | std::vector<int> temp(n); | |
| 76 | |||
| 77 |
2/2✓ Branch 0 taken 352 times.
✓ Branch 1 taken 88 times.
|
440 | for (size_t i = 0; i < sizeof(int); i++) { |
| 78 | size_t shift = i; | ||
| 79 | |||
| 80 |
2/2✓ Branch 0 taken 176 times.
✓ Branch 1 taken 176 times.
|
352 | if (i % 2 == 0) { |
| 81 | 176 | CountingSortStep(begin, end, temp.begin(), shift); | |
| 82 | } else { | ||
| 83 | 176 | CountingSortStep(temp.begin(), temp.end(), begin, shift); | |
| 84 | } | ||
| 85 | } | ||
| 86 | } | ||
| 87 | |||
| 88 | } // namespace | ||
| 89 | |||
| 90 | 52 | bool BaldinARadixSortTBB::RunImpl() { | |
| 91 | auto &out = GetOutput(); | ||
| 92 | 52 | int n = static_cast<int>(out.size()); | |
| 93 | |||
| 94 | 52 | int num_chunks = ppc::util::GetNumThreads(); | |
| 95 | |||
| 96 | 52 | std::vector<int> offsets(num_chunks + 1); | |
| 97 | 52 | int chunk_size = n / num_chunks; | |
| 98 | 52 | int rem = n % num_chunks; | |
| 99 | int curr = 0; | ||
| 100 |
2/2✓ Branch 0 taken 130 times.
✓ Branch 1 taken 52 times.
|
182 | for (int i = 0; i < num_chunks; i++) { |
| 101 |
2/2✓ Branch 0 taken 87 times.
✓ Branch 1 taken 43 times.
|
130 | offsets[i] = curr; |
| 102 |
2/2✓ Branch 0 taken 87 times.
✓ Branch 1 taken 43 times.
|
217 | curr += chunk_size + (i < rem ? 1 : 0); |
| 103 | } | ||
| 104 |
1/2✓ Branch 1 taken 52 times.
✗ Branch 2 not taken.
|
52 | offsets[num_chunks] = n; |
| 105 | |||
| 106 |
1/2✓ Branch 1 taken 52 times.
✗ Branch 2 not taken.
|
182 | tbb::parallel_for(tbb::blocked_range<int>(0, num_chunks), [&](const tbb::blocked_range<int> &r) { |
| 107 |
2/2✓ Branch 0 taken 130 times.
✓ Branch 1 taken 130 times.
|
260 | for (int tid = r.begin(); tid != r.end(); tid++) { |
| 108 | 130 | auto begin = out.begin() + offsets[tid]; | |
| 109 | 130 | auto end = out.begin() + offsets[tid + 1]; | |
| 110 | 130 | RadixSortLocal(begin, end); | |
| 111 | } | ||
| 112 | 130 | }); | |
| 113 | |||
| 114 |
2/2✓ Branch 0 taken 65 times.
✓ Branch 1 taken 52 times.
|
117 | for (int step = 1; step < num_chunks; step *= 2) { |
| 115 | 65 | int num_merges = (num_chunks + (2 * step) - 1) / (2 * step); | |
| 116 |
1/4✓ Branch 1 taken 65 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
156 | tbb::parallel_for(tbb::blocked_range<int>(0, num_merges), [&](const tbb::blocked_range<int> &r) { |
| 117 |
2/2✓ Branch 0 taken 91 times.
✓ Branch 1 taken 91 times.
|
182 | for (int m_idx = r.begin(); m_idx != r.end(); m_idx++) { |
| 118 | 91 | int i = m_idx * (2 * step); | |
| 119 | |||
| 120 |
2/2✓ Branch 0 taken 78 times.
✓ Branch 1 taken 13 times.
|
91 | if (i + step < num_chunks) { |
| 121 |
2/2✓ Branch 0 taken 13 times.
✓ Branch 1 taken 65 times.
|
78 | auto begin = out.begin() + offsets[i]; |
| 122 | 78 | auto middle = out.begin() + offsets[i + step]; | |
| 123 |
2/2✓ Branch 0 taken 13 times.
✓ Branch 1 taken 65 times.
|
78 | int end_idx = std::min(i + (2 * step), num_chunks); |
| 124 | 78 | auto end = out.begin() + offsets[end_idx]; | |
| 125 | |||
| 126 | 78 | std::inplace_merge(begin, middle, end); | |
| 127 | } | ||
| 128 | } | ||
| 129 | 91 | }); | |
| 130 | } | ||
| 131 | |||
| 132 | 52 | return true; | |
| 133 | } | ||
| 134 | |||
| 135 | 52 | bool BaldinARadixSortTBB::PostProcessingImpl() { | |
| 136 | 52 | return true; | |
| 137 | } | ||
| 138 | |||
| 139 | } // namespace baldin_a_radix_sort | ||
| 140 |