| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #include "baldin_a_radix_sort/stl/include/ops_stl.hpp" | ||
| 2 | |||
| 3 | #include <algorithm> | ||
| 4 | #include <array> | ||
| 5 | #include <cstddef> | ||
| 6 | #include <cstdint> | ||
| 7 | #include <future> | ||
| 8 | #include <iterator> | ||
| 9 | #include <vector> | ||
| 10 | |||
| 11 | #include "baldin_a_radix_sort/common/include/common.hpp" | ||
| 12 | #include "util/include/util.hpp" | ||
| 13 | |||
| 14 | namespace baldin_a_radix_sort { | ||
| 15 | |||
| 16 |
1/2✓ Branch 1 taken 104 times.
✗ Branch 2 not taken.
|
104 | BaldinARadixSortSTL::BaldinARadixSortSTL(const InType &in) { |
| 17 | SetTypeOfTask(GetStaticTypeOfTask()); | ||
| 18 |
1/2✓ Branch 1 taken 104 times.
✗ Branch 2 not taken.
|
104 | GetInput() = in; |
| 19 | GetOutput() = {}; | ||
| 20 | 104 | } | |
| 21 | |||
| 22 | 104 | bool BaldinARadixSortSTL::ValidationImpl() { | |
| 23 | 104 | return true; | |
| 24 | } | ||
| 25 | |||
| 26 | 104 | bool BaldinARadixSortSTL::PreProcessingImpl() { | |
| 27 | 104 | GetOutput() = GetInput(); | |
| 28 | 104 | return true; | |
| 29 | } | ||
| 30 | |||
| 31 | namespace { | ||
| 32 | |||
| 33 | 704 | void CountingSortStep(std::vector<int>::iterator in_begin, std::vector<int>::iterator in_end, | |
| 34 | std::vector<int>::iterator out_begin, size_t byte_index) { | ||
| 35 | 704 | size_t shift = byte_index * 8; | |
| 36 | 704 | std::array<size_t, 256> count = {0}; | |
| 37 | |||
| 38 |
2/2✓ Branch 0 taken 7952 times.
✓ Branch 1 taken 704 times.
|
8656 | for (auto it = in_begin; it != in_end; it++) { |
| 39 | 7952 | auto raw_val = static_cast<unsigned int>(*it); | |
| 40 | 7952 | unsigned int byte_val = (raw_val >> shift) & 0xFF; | |
| 41 | |||
| 42 |
2/2✓ Branch 0 taken 1988 times.
✓ Branch 1 taken 5964 times.
|
7952 | if (byte_index == sizeof(int) - 1) { |
| 43 | 1988 | byte_val ^= 128; | |
| 44 | } | ||
| 45 | 7952 | count.at(byte_val)++; | |
| 46 | } | ||
| 47 | |||
| 48 | 704 | std::array<size_t, 256> prefix{}; | |
| 49 | prefix[0] = 0; | ||
| 50 |
2/2✓ Branch 0 taken 179520 times.
✓ Branch 1 taken 704 times.
|
180224 | for (int i = 1; i < 256; i++) { |
| 51 | 179520 | prefix.at(i) = prefix.at(i - 1) + count.at(i - 1); | |
| 52 | } | ||
| 53 | |||
| 54 |
2/2✓ Branch 0 taken 7952 times.
✓ Branch 1 taken 704 times.
|
8656 | for (auto it = in_begin; it != in_end; it++) { |
| 55 | 7952 | auto raw_val = static_cast<unsigned int>(*it); | |
| 56 | 7952 | unsigned int byte_val = (raw_val >> shift) & 0xFF; | |
| 57 | |||
| 58 |
2/2✓ Branch 0 taken 1988 times.
✓ Branch 1 taken 5964 times.
|
7952 | if (byte_index == sizeof(int) - 1) { |
| 59 | 1988 | byte_val ^= 128; | |
| 60 | } | ||
| 61 | |||
| 62 | 7952 | *(out_begin + static_cast<int64_t>(prefix.at(byte_val))) = *it; | |
| 63 | 7952 | prefix.at(byte_val)++; | |
| 64 | } | ||
| 65 | 704 | } | |
| 66 | |||
| 67 |
2/2✓ Branch 0 taken 84 times.
✓ Branch 1 taken 176 times.
|
260 | void RadixSortLocal(std::vector<int>::iterator begin, std::vector<int>::iterator end) { |
| 68 | 260 | size_t n = std::distance(begin, end); | |
| 69 |
2/2✓ Branch 0 taken 84 times.
✓ Branch 1 taken 176 times.
|
260 | if (n < 2) { |
| 70 | 84 | return; | |
| 71 | } | ||
| 72 | |||
| 73 | 176 | std::vector<int> temp(n); | |
| 74 | |||
| 75 |
2/2✓ Branch 0 taken 704 times.
✓ Branch 1 taken 176 times.
|
880 | for (size_t i = 0; i < sizeof(int); i++) { |
| 76 | size_t shift = i; | ||
| 77 | |||
| 78 |
2/2✓ Branch 0 taken 352 times.
✓ Branch 1 taken 352 times.
|
704 | if (i % 2 == 0) { |
| 79 | 352 | CountingSortStep(begin, end, temp.begin(), shift); | |
| 80 | } else { | ||
| 81 | 352 | CountingSortStep(temp.begin(), temp.end(), begin, shift); | |
| 82 | } | ||
| 83 | } | ||
| 84 | } | ||
| 85 | |||
| 86 | } // namespace | ||
| 87 | |||
| 88 | 104 | bool BaldinARadixSortSTL::RunImpl() { | |
| 89 | auto &out = GetOutput(); | ||
| 90 | 104 | int n = static_cast<int>(out.size()); | |
| 91 | |||
| 92 | 104 | int num_chunks = ppc::util::GetNumThreads(); | |
| 93 | |||
| 94 | 104 | std::vector<int> offsets(num_chunks + 1); | |
| 95 | 104 | int chunk_size = n / num_chunks; | |
| 96 | 104 | int rem = n % num_chunks; | |
| 97 | int curr = 0; | ||
| 98 |
2/2✓ Branch 0 taken 260 times.
✓ Branch 1 taken 104 times.
|
364 | for (int i = 0; i < num_chunks; i++) { |
| 99 |
2/2✓ Branch 0 taken 174 times.
✓ Branch 1 taken 86 times.
|
260 | offsets[i] = curr; |
| 100 |
2/2✓ Branch 0 taken 174 times.
✓ Branch 1 taken 86 times.
|
434 | curr += chunk_size + (i < rem ? 1 : 0); |
| 101 | } | ||
| 102 |
1/2✓ Branch 1 taken 104 times.
✗ Branch 2 not taken.
|
104 | offsets[num_chunks] = n; |
| 103 | |||
| 104 | 104 | std::vector<std::future<void>> futures; | |
| 105 |
1/2✓ Branch 1 taken 104 times.
✗ Branch 2 not taken.
|
104 | futures.reserve(num_chunks); |
| 106 | |||
| 107 |
2/2✓ Branch 0 taken 260 times.
✓ Branch 1 taken 104 times.
|
364 | for (int tid = 0; tid < num_chunks; ++tid) { |
| 108 |
1/2✓ Branch 1 taken 260 times.
✗ Branch 2 not taken.
|
520 | futures.push_back(std::async(std::launch::async, [tid, &out, &offsets]() { |
| 109 | 260 | auto local_begin = out.begin() + offsets[tid]; | |
| 110 | 260 | auto local_end = out.begin() + offsets[tid + 1]; | |
| 111 | 260 | RadixSortLocal(local_begin, local_end); | |
| 112 | 260 | })); | |
| 113 | } | ||
| 114 | |||
| 115 |
2/2✓ Branch 0 taken 260 times.
✓ Branch 1 taken 104 times.
|
364 | for (auto &f : futures) { |
| 116 |
1/2✓ Branch 1 taken 260 times.
✗ Branch 2 not taken.
|
260 | f.get(); |
| 117 | } | ||
| 118 | |||
| 119 |
2/2✓ Branch 0 taken 130 times.
✓ Branch 1 taken 104 times.
|
234 | for (int step = 1; step < num_chunks; step *= 2) { |
| 120 | 130 | int num_merges = (num_chunks + (2 * step) - 1) / (2 * step); | |
| 121 | |||
| 122 | 130 | std::vector<std::future<void>> merge_futures; | |
| 123 |
1/2✓ Branch 1 taken 130 times.
✗ Branch 2 not taken.
|
130 | merge_futures.reserve(num_merges); |
| 124 | |||
| 125 |
2/2✓ Branch 0 taken 182 times.
✓ Branch 1 taken 130 times.
|
312 | for (int m_idx = 0; m_idx < num_merges; ++m_idx) { |
| 126 |
1/2✓ Branch 1 taken 182 times.
✗ Branch 2 not taken.
|
364 | merge_futures.push_back(std::async(std::launch::async, [m_idx, step, num_chunks, &out, &offsets]() { |
| 127 | 182 | int i = m_idx * (2 * step); | |
| 128 | |||
| 129 |
2/2✓ Branch 0 taken 156 times.
✓ Branch 1 taken 26 times.
|
182 | if (i + step < num_chunks) { |
| 130 | 156 | auto local_begin = out.begin() + offsets[i]; | |
| 131 | 156 | auto local_middle = out.begin() + offsets[i + step]; | |
| 132 | 156 | int end_idx = std::min(i + (2 * step), num_chunks); | |
| 133 | 156 | auto local_end = out.begin() + offsets[end_idx]; | |
| 134 | |||
| 135 | 156 | std::inplace_merge(local_begin, local_middle, local_end); | |
| 136 | } | ||
| 137 | 182 | })); | |
| 138 | } | ||
| 139 | |||
| 140 |
2/2✓ Branch 0 taken 182 times.
✓ Branch 1 taken 130 times.
|
312 | for (auto &f : merge_futures) { |
| 141 |
1/2✓ Branch 1 taken 182 times.
✗ Branch 2 not taken.
|
182 | f.get(); |
| 142 | } | ||
| 143 | 130 | } | |
| 144 | |||
| 145 | 104 | return true; | |
| 146 | 104 | } | |
| 147 | |||
| 148 | 104 | bool BaldinARadixSortSTL::PostProcessingImpl() { | |
| 149 | 104 | return true; | |
| 150 | } | ||
| 151 | |||
| 152 | } // namespace baldin_a_radix_sort | ||
| 153 |