| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #include "baldin_a_radix_sort/omp/include/ops_omp.hpp" | ||
| 2 | |||
| 3 | #include <omp.h> | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <array> | ||
| 7 | #include <cstddef> | ||
| 8 | #include <cstdint> | ||
| 9 | #include <iterator> | ||
| 10 | #include <vector> | ||
| 11 | |||
| 12 | #include "baldin_a_radix_sort/common/include/common.hpp" | ||
| 13 | #include "util/include/util.hpp" | ||
| 14 | |||
| 15 | namespace baldin_a_radix_sort { | ||
| 16 | |||
| 17 |
1/2✓ Branch 1 taken 52 times.
✗ Branch 2 not taken.
|
52 | BaldinARadixSortOMP::BaldinARadixSortOMP(const InType &in) { |
| 18 | SetTypeOfTask(GetStaticTypeOfTask()); | ||
| 19 |
1/2✓ Branch 1 taken 52 times.
✗ Branch 2 not taken.
|
52 | GetInput() = in; |
| 20 | GetOutput() = {}; | ||
| 21 | 52 | } | |
| 22 | |||
| 23 | 52 | bool BaldinARadixSortOMP::ValidationImpl() { | |
| 24 | 52 | return true; | |
| 25 | } | ||
| 26 | |||
| 27 | 52 | bool BaldinARadixSortOMP::PreProcessingImpl() { | |
| 28 | 52 | GetOutput() = GetInput(); | |
| 29 | 52 | return true; | |
| 30 | } | ||
| 31 | |||
| 32 | namespace { | ||
| 33 | |||
| 34 | 176 | void CountingSortStep(std::vector<int>::iterator in_begin, std::vector<int>::iterator in_end, | |
| 35 | std::vector<int>::iterator out_begin, size_t byte_index) { | ||
| 36 | 176 | size_t shift = byte_index * 8; | |
| 37 | 176 | std::array<size_t, 256> count = {0}; | |
| 38 | |||
| 39 |
2/2✓ Branch 0 taken 4064 times.
✓ Branch 1 taken 176 times.
|
4240 | for (auto it = in_begin; it != in_end; it++) { |
| 40 | 4064 | auto raw_val = static_cast<unsigned int>(*it); | |
| 41 | 4064 | unsigned int byte_val = (raw_val >> shift) & 0xFF; | |
| 42 | |||
| 43 |
2/2✓ Branch 0 taken 1016 times.
✓ Branch 1 taken 3048 times.
|
4064 | if (byte_index == sizeof(int) - 1) { |
| 44 | 1016 | byte_val ^= 128; | |
| 45 | } | ||
| 46 | 4064 | count.at(byte_val)++; | |
| 47 | } | ||
| 48 | |||
| 49 | 176 | std::array<size_t, 256> prefix{}; | |
| 50 | prefix[0] = 0; | ||
| 51 |
2/2✓ Branch 0 taken 44880 times.
✓ Branch 1 taken 176 times.
|
45056 | for (int i = 1; i < 256; i++) { |
| 52 | 44880 | prefix.at(i) = prefix.at(i - 1) + count.at(i - 1); | |
| 53 | } | ||
| 54 | |||
| 55 |
2/2✓ Branch 0 taken 4064 times.
✓ Branch 1 taken 176 times.
|
4240 | for (auto it = in_begin; it != in_end; it++) { |
| 56 | 4064 | auto raw_val = static_cast<unsigned int>(*it); | |
| 57 | 4064 | unsigned int byte_val = (raw_val >> shift) & 0xFF; | |
| 58 | |||
| 59 |
2/2✓ Branch 0 taken 1016 times.
✓ Branch 1 taken 3048 times.
|
4064 | if (byte_index == sizeof(int) - 1) { |
| 60 | 1016 | byte_val ^= 128; | |
| 61 | } | ||
| 62 | |||
| 63 | 4064 | *(out_begin + static_cast<int64_t>(prefix.at(byte_val))) = *it; | |
| 64 | 4064 | prefix.at(byte_val)++; | |
| 65 | } | ||
| 66 | 176 | } | |
| 67 | |||
| 68 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 44 times.
|
48 | void RadixSortLocal(std::vector<int>::iterator begin, std::vector<int>::iterator end) { |
| 69 | 48 | size_t n = std::distance(begin, end); | |
| 70 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 44 times.
|
48 | if (n < 2) { |
| 71 | 4 | return; | |
| 72 | } | ||
| 73 | |||
| 74 | 44 | std::vector<int> temp(n); | |
| 75 | |||
| 76 |
2/2✓ Branch 0 taken 176 times.
✓ Branch 1 taken 44 times.
|
220 | for (size_t i = 0; i < sizeof(int); i++) { |
| 77 | size_t shift = i; | ||
| 78 | |||
| 79 |
2/2✓ Branch 0 taken 88 times.
✓ Branch 1 taken 88 times.
|
176 | if (i % 2 == 0) { |
| 80 | 88 | CountingSortStep(begin, end, temp.begin(), shift); | |
| 81 | } else { | ||
| 82 | 88 | CountingSortStep(temp.begin(), temp.end(), begin, shift); | |
| 83 | } | ||
| 84 | } | ||
| 85 | } | ||
| 86 | |||
| 87 | } // namespace | ||
| 88 | |||
| 89 |
2/2✓ Branch 0 taken 48 times.
✓ Branch 1 taken 4 times.
|
52 | bool BaldinARadixSortOMP::RunImpl() { |
| 90 | auto &out = GetOutput(); | ||
| 91 | 52 | int n = static_cast<int>(out.size()); | |
| 92 |
2/2✓ Branch 0 taken 48 times.
✓ Branch 1 taken 4 times.
|
52 | if (n == 0) { |
| 93 | return true; | ||
| 94 | } | ||
| 95 | |||
| 96 | 48 | int num_threads = ppc::util::GetNumThreads(); | |
| 97 | |||
| 98 |
2/2✓ Branch 0 taken 46 times.
✓ Branch 1 taken 2 times.
|
48 | if (n < num_threads * 100) { |
| 99 | 46 | num_threads = 1; | |
| 100 | } | ||
| 101 | |||
| 102 |
1/2✓ Branch 0 taken 48 times.
✗ Branch 1 not taken.
|
48 | if (num_threads == 1) { |
| 103 | 48 | RadixSortLocal(out.begin(), out.end()); | |
| 104 | 48 | return true; | |
| 105 | } | ||
| 106 | |||
| 107 | ✗ | std::vector<int> offsets(num_threads + 1); | |
| 108 | ✗ | int chunk = n / num_threads; | |
| 109 | ✗ | int rem = n % num_threads; | |
| 110 | int curr = 0; | ||
| 111 | ✗ | for (int i = 0; i < num_threads; i++) { | |
| 112 | ✗ | offsets[i] = curr; | |
| 113 | ✗ | curr += chunk + (i < rem ? 1 : 0); | |
| 114 | } | ||
| 115 | ✗ | offsets[num_threads] = n; | |
| 116 | |||
| 117 | ✗ | #pragma omp parallel num_threads(num_threads) default(none) shared(num_threads, offsets, out) | |
| 118 | { | ||
| 119 | int tid = omp_get_thread_num(); | ||
| 120 | auto begin = out.begin() + offsets[tid]; | ||
| 121 | auto end = out.begin() + offsets[tid + 1]; | ||
| 122 | |||
| 123 | RadixSortLocal(begin, end); | ||
| 124 | } | ||
| 125 | |||
| 126 | ✗ | for (int step = 1; step < num_threads; step *= 2) { | |
| 127 | ✗ | #pragma omp parallel for num_threads(num_threads) default(none) shared(step, num_threads, offsets, out) | |
| 128 | for (int i = 0; i < num_threads; i += 2 * step) { | ||
| 129 | if (i + step < num_threads) { | ||
| 130 | auto begin = out.begin() + offsets[i]; | ||
| 131 | auto middle = out.begin() + offsets[i + step]; | ||
| 132 | |||
| 133 | int end_idx = std::min(i + (2 * step), num_threads); | ||
| 134 | auto end = out.begin() + offsets[end_idx]; | ||
| 135 | |||
| 136 | std::inplace_merge(begin, middle, end); | ||
| 137 | } | ||
| 138 | } | ||
| 139 | } | ||
| 140 | |||
| 141 | return true; | ||
| 142 | } | ||
| 143 | |||
| 144 | 52 | bool BaldinARadixSortOMP::PostProcessingImpl() { | |
| 145 | 52 | return true; | |
| 146 | } | ||
| 147 | |||
| 148 | } // namespace baldin_a_radix_sort | ||
| 149 |