| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #include "akimov_i_radixsort_int_merge/omp/include/ops_omp.hpp" | ||
| 2 | |||
| 3 | #include <omp.h> | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <array> | ||
| 7 | #include <cstddef> | ||
| 8 | #include <cstdint> | ||
| 9 | #include <iterator> | ||
| 10 | #include <vector> | ||
| 11 | |||
| 12 | #include "akimov_i_radixsort_int_merge/common/include/common.hpp" | ||
| 13 | #include "util/include/util.hpp" | ||
| 14 | |||
| 15 | namespace akimov_i_radixsort_int_merge { | ||
| 16 | |||
| 17 | namespace { | ||
| 18 | 72 | void CountingSortStep(std::vector<int>::iterator in_begin, std::vector<int>::iterator in_end, | |
| 19 | std::vector<int>::iterator out_begin, size_t byte_index) { | ||
| 20 | 72 | size_t shift = byte_index * 8; | |
| 21 | 72 | std::array<size_t, 256> count = {0}; | |
| 22 | |||
| 23 |
2/2✓ Branch 0 taken 17760 times.
✓ Branch 1 taken 72 times.
|
17832 | for (auto it = in_begin; it != in_end; ++it) { |
| 24 | 17760 | auto raw_val = static_cast<unsigned int>(*it); | |
| 25 | 17760 | unsigned int byte_val = (raw_val >> shift) & 0xFF; | |
| 26 | |||
| 27 |
2/2✓ Branch 0 taken 4440 times.
✓ Branch 1 taken 13320 times.
|
17760 | if (byte_index == sizeof(int) - 1) { |
| 28 | 4440 | byte_val ^= 128; | |
| 29 | } | ||
| 30 | 17760 | count.at(byte_val)++; | |
| 31 | } | ||
| 32 | |||
| 33 | 72 | std::array<size_t, 256> prefix{}; | |
| 34 | prefix[0] = 0; | ||
| 35 |
2/2✓ Branch 0 taken 18360 times.
✓ Branch 1 taken 72 times.
|
18432 | for (int i = 1; i < 256; ++i) { |
| 36 | 18360 | prefix.at(i) = prefix.at(i - 1) + count.at(i - 1); | |
| 37 | } | ||
| 38 | |||
| 39 |
2/2✓ Branch 0 taken 17760 times.
✓ Branch 1 taken 72 times.
|
17832 | for (auto it = in_begin; it != in_end; ++it) { |
| 40 | 17760 | auto raw_val = static_cast<unsigned int>(*it); | |
| 41 | 17760 | unsigned int byte_val = (raw_val >> shift) & 0xFF; | |
| 42 | |||
| 43 |
2/2✓ Branch 0 taken 4440 times.
✓ Branch 1 taken 13320 times.
|
17760 | if (byte_index == sizeof(int) - 1) { |
| 44 | 4440 | byte_val ^= 128; | |
| 45 | } | ||
| 46 | |||
| 47 | 17760 | *(out_begin + static_cast<int64_t>(prefix.at(byte_val))) = *it; | |
| 48 | 17760 | prefix.at(byte_val)++; | |
| 49 | } | ||
| 50 | 72 | } | |
| 51 | |||
| 52 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 18 times.
|
18 | void RadixSortLocal(std::vector<int>::iterator begin, std::vector<int>::iterator end) { |
| 53 | 18 | size_t n = std::distance(begin, end); | |
| 54 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 18 times.
|
18 | if (n < 2) { |
| 55 | ✗ | return; | |
| 56 | } | ||
| 57 | |||
| 58 | 18 | std::vector<int> temp(n); | |
| 59 | |||
| 60 |
2/2✓ Branch 0 taken 72 times.
✓ Branch 1 taken 18 times.
|
90 | for (size_t i = 0; i < sizeof(int); ++i) { |
| 61 |
2/2✓ Branch 0 taken 36 times.
✓ Branch 1 taken 36 times.
|
72 | if (i % 2 == 0) { |
| 62 | 36 | CountingSortStep(begin, end, temp.begin(), i); | |
| 63 | } else { | ||
| 64 | 36 | CountingSortStep(temp.begin(), temp.end(), begin, i); | |
| 65 | } | ||
| 66 | } | ||
| 67 | } | ||
| 68 | } // namespace | ||
| 69 | |||
| 70 |
1/2✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.
|
12 | AkimovIRadixSortIntMergeOMP::AkimovIRadixSortIntMergeOMP(const InType &in) { |
| 71 | SetTypeOfTask(GetStaticTypeOfTask()); | ||
| 72 |
1/2✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.
|
12 | GetInput() = in; |
| 73 | 12 | GetOutput() = OutType(); | |
| 74 | 12 | } | |
| 75 | |||
| 76 | 12 | bool AkimovIRadixSortIntMergeOMP::ValidationImpl() { | |
| 77 | 12 | return !GetInput().empty(); | |
| 78 | } | ||
| 79 | |||
| 80 | 12 | bool AkimovIRadixSortIntMergeOMP::PreProcessingImpl() { | |
| 81 | 12 | GetOutput() = GetInput(); | |
| 82 | 12 | return true; | |
| 83 | } | ||
| 84 | |||
| 85 |
1/2✓ Branch 0 taken 12 times.
✗ Branch 1 not taken.
|
12 | bool AkimovIRadixSortIntMergeOMP::RunImpl() { |
| 86 | auto &arr = GetOutput(); | ||
| 87 | 12 | int n = static_cast<int>(arr.size()); | |
| 88 |
1/2✓ Branch 0 taken 12 times.
✗ Branch 1 not taken.
|
12 | if (n == 0) { |
| 89 | return true; | ||
| 90 | } | ||
| 91 | |||
| 92 | 12 | int num_threads = ppc::util::GetNumThreads(); | |
| 93 | // Для маленьких массивов используем один поток | ||
| 94 |
2/2✓ Branch 0 taken 7 times.
✓ Branch 1 taken 5 times.
|
12 | if (n < num_threads * 100) { |
| 95 | 7 | num_threads = 1; | |
| 96 | } | ||
| 97 | |||
| 98 |
2/2✓ Branch 0 taken 9 times.
✓ Branch 1 taken 3 times.
|
12 | if (num_threads == 1) { |
| 99 | 9 | RadixSortLocal(arr.begin(), arr.end()); | |
| 100 | 9 | return true; | |
| 101 | } | ||
| 102 | |||
| 103 | // Разбиваем массив на блоки | ||
| 104 | 3 | std::vector<int> offsets(num_threads + 1); | |
| 105 | 3 | int chunk = n / num_threads; | |
| 106 | 3 | int rem = n % num_threads; | |
| 107 | int curr = 0; | ||
| 108 |
2/2✓ Branch 0 taken 9 times.
✓ Branch 1 taken 3 times.
|
12 | for (int i = 0; i < num_threads; ++i) { |
| 109 |
2/2✓ Branch 0 taken 8 times.
✓ Branch 1 taken 1 times.
|
9 | offsets[i] = curr; |
| 110 |
2/2✓ Branch 0 taken 8 times.
✓ Branch 1 taken 1 times.
|
17 | curr += chunk + (i < rem ? 1 : 0); |
| 111 | } | ||
| 112 | 3 | offsets[num_threads] = n; | |
| 113 | |||
| 114 | // Параллельная сортировка блоков | ||
| 115 | 3 | #pragma omp parallel num_threads(num_threads) default(none) shared(offsets, arr) | |
| 116 | { | ||
| 117 | int tid = omp_get_thread_num(); | ||
| 118 | auto begin = arr.begin() + offsets[tid]; | ||
| 119 | auto end = arr.begin() + offsets[tid + 1]; | ||
| 120 | RadixSortLocal(begin, end); | ||
| 121 | } | ||
| 122 | |||
| 123 | // Слияние блоков (параллельное на каждом шаге) | ||
| 124 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 3 times.
|
8 | for (int step = 1; step < num_threads; step *= 2) { |
| 125 | 5 | #pragma omp parallel for num_threads(num_threads) default(none) shared(step, num_threads, offsets, arr) | |
| 126 | for (int i = 0; i < num_threads; i += 2 * step) { | ||
| 127 | if (i + step < num_threads) { | ||
| 128 | auto begin = arr.begin() + offsets[i]; | ||
| 129 | auto middle = arr.begin() + offsets[i + step]; | ||
| 130 | int end_idx = std::min(i + (2 * step), num_threads); | ||
| 131 | auto end = arr.begin() + offsets[end_idx]; | ||
| 132 | std::inplace_merge(begin, middle, end); | ||
| 133 | } | ||
| 134 | } | ||
| 135 | } | ||
| 136 | |||
| 137 | return true; | ||
| 138 | } | ||
| 139 | |||
| 140 | 12 | bool AkimovIRadixSortIntMergeOMP::PostProcessingImpl() { | |
| 141 | 12 | return true; | |
| 142 | } | ||
| 143 | |||
| 144 | } // namespace akimov_i_radixsort_int_merge | ||
| 145 |