| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #include "levonychev_i_radix_batcher_sort/omp/include/ops_omp.hpp" | ||
| 2 | |||
| 3 | #include <omp.h> | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <cstddef> | ||
| 7 | #include <ranges> | ||
| 8 | #include <vector> | ||
| 9 | |||
| 10 | #include "levonychev_i_radix_batcher_sort/common/include/common.hpp" | ||
| 11 | #include "util/include/util.hpp" | ||
| 12 | |||
| 13 | namespace levonychev_i_radix_batcher_sort { | ||
| 14 | |||
| 15 |
1/2✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
|
16 | LevonychevIRadixBatcherSortOMP::LevonychevIRadixBatcherSortOMP(const InType &in) { |
| 16 | SetTypeOfTask(GetStaticTypeOfTask()); | ||
| 17 |
1/2✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
|
16 | GetInput() = in; |
| 18 | 16 | } | |
| 19 | |||
| 20 | 120 | void LevonychevIRadixBatcherSortOMP::CountingSort(InType &arr, size_t byte_index) { | |
| 21 | const size_t byte = 256; | ||
| 22 |
1/2✓ Branch 2 taken 120 times.
✗ Branch 3 not taken.
|
120 | std::vector<int> count(byte, 0); |
| 23 |
1/4✓ Branch 1 taken 120 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
120 | OutType result(arr.size()); |
| 24 | |||
| 25 | bool is_last_byte = (byte_index == (sizeof(int) - 1ULL)); | ||
| 26 |
2/2✓ Branch 0 taken 384 times.
✓ Branch 1 taken 120 times.
|
504 | for (auto number : arr) { |
| 27 | 384 | int value_of_byte = (number >> (byte_index * 8ULL)) & 0xFF; | |
| 28 | |||
| 29 |
2/2✓ Branch 0 taken 96 times.
✓ Branch 1 taken 288 times.
|
384 | if (is_last_byte) { |
| 30 | 96 | value_of_byte ^= 0x80; | |
| 31 | } | ||
| 32 | |||
| 33 | 384 | ++count[value_of_byte]; | |
| 34 | } | ||
| 35 | |||
| 36 |
2/2✓ Branch 0 taken 30600 times.
✓ Branch 1 taken 120 times.
|
30720 | for (size_t i = 1ULL; i < byte; ++i) { |
| 37 | 30600 | count[i] += count[i - 1]; | |
| 38 | } | ||
| 39 | |||
| 40 |
2/2✓ Branch 0 taken 384 times.
✓ Branch 1 taken 120 times.
|
504 | for (int &val : std::ranges::reverse_view(arr)) { |
| 41 | 384 | int value_of_byte = (val >> (byte_index * 8ULL)) & 0xFF; | |
| 42 | |||
| 43 |
2/2✓ Branch 0 taken 96 times.
✓ Branch 1 taken 288 times.
|
384 | if (is_last_byte) { |
| 44 | 96 | value_of_byte ^= 0x80; | |
| 45 | } | ||
| 46 | |||
| 47 | 384 | result[--count[value_of_byte]] = val; | |
| 48 | } | ||
| 49 |
1/2✓ Branch 1 taken 120 times.
✗ Branch 2 not taken.
|
120 | arr = result; |
| 50 | 120 | } | |
| 51 | |||
| 52 | 16 | bool LevonychevIRadixBatcherSortOMP::ValidationImpl() { | |
| 53 | 16 | return !GetInput().empty(); | |
| 54 | } | ||
| 55 | |||
| 56 | 16 | bool LevonychevIRadixBatcherSortOMP::PreProcessingImpl() { | |
| 57 | 16 | return true; | |
| 58 | } | ||
| 59 | |||
| 60 | 75 | void LevonychevIRadixBatcherSortOMP::BatcherCompareRange(std::vector<int> &arr, int j, int k, int p2) { | |
| 61 | 75 | int range = std::min(k, static_cast<int>(arr.size()) - j - k); | |
| 62 |
2/2✓ Branch 0 taken 123 times.
✓ Branch 1 taken 75 times.
|
198 | for (int i = 0; i < range; ++i) { |
| 63 | 123 | int idx1 = j + i; | |
| 64 | 123 | int idx2 = j + i + k; | |
| 65 |
4/4✓ Branch 0 taken 117 times.
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 71 times.
✓ Branch 3 taken 46 times.
|
123 | if ((idx1 & p2) == (idx2 & p2) && (arr[idx1] > arr[idx2])) { |
| 66 | std::swap(arr[idx1], arr[idx2]); | ||
| 67 | } | ||
| 68 | } | ||
| 69 | 75 | } | |
| 70 | |||
| 71 | 12 | void LevonychevIRadixBatcherSortOMP::BatcherMergeIterative(std::vector<int> &arr, int start_p, int threads) { | |
| 72 | 12 | int n = static_cast<int>(arr.size()); | |
| 73 |
2/2✓ Branch 0 taken 15 times.
✓ Branch 1 taken 12 times.
|
27 | for (int pv = start_p; pv < n; pv <<= 1) { |
| 74 | 15 | int p2 = pv << 1; | |
| 75 |
2/2✓ Branch 0 taken 39 times.
✓ Branch 1 taken 15 times.
|
54 | for (int k = pv; k > 0; k >>= 1) { |
| 76 | 39 | #pragma omp parallel for schedule(static) default(none) shared(n, pv, p2, k, arr) num_threads(threads) | |
| 77 | for (int j = k % pv; j < n - k; j += 2 * k) { | ||
| 78 | BatcherCompareRange(arr, j, k, p2); | ||
| 79 | } | ||
| 80 | } | ||
| 81 | } | ||
| 82 | 12 | } | |
| 83 | |||
| 84 | 16 | bool LevonychevIRadixBatcherSortOMP::RunImpl() { | |
| 85 | 16 | GetOutput() = GetInput(); | |
| 86 | 16 | int n = static_cast<int>(GetOutput().size()); | |
| 87 |
2/2✓ Branch 0 taken 12 times.
✓ Branch 1 taken 4 times.
|
16 | if (n <= 1) { |
| 88 | return true; | ||
| 89 | } | ||
| 90 | |||
| 91 | 12 | int num_threads = ppc::util::GetNumThreads(); | |
| 92 | 12 | int block_size = n / num_threads; | |
| 93 | |||
| 94 | 12 | #pragma omp parallel default(none) shared(num_threads, block_size, n, std::ranges::copy) num_threads(num_threads) | |
| 95 | { | ||
| 96 | int tid = omp_get_thread_num(); | ||
| 97 | int left = tid * block_size; | ||
| 98 | int right = (tid == num_threads - 1) ? n : (tid + 1) * block_size; | ||
| 99 | |||
| 100 | if (left < right) { | ||
| 101 | std::vector<int> local_block(GetOutput().begin() + left, GetOutput().begin() + right); | ||
| 102 | for (size_t i = 0; i < sizeof(int); ++i) { | ||
| 103 | CountingSort(local_block, i); | ||
| 104 | } | ||
| 105 | std::ranges::copy(local_block, GetOutput().begin() + left); | ||
| 106 | } | ||
| 107 | } | ||
| 108 | |||
| 109 | int start_p = 1; | ||
| 110 |
2/2✓ Branch 0 taken 21 times.
✓ Branch 1 taken 12 times.
|
33 | while (start_p < block_size) { |
| 111 | 21 | start_p <<= 1; | |
| 112 | } | ||
| 113 | 12 | BatcherMergeIterative(GetOutput(), start_p, num_threads); | |
| 114 | |||
| 115 | 12 | return true; | |
| 116 | } | ||
| 117 | |||
| 118 | 16 | bool LevonychevIRadixBatcherSortOMP::PostProcessingImpl() { | |
| 119 | 16 | return true; | |
| 120 | } | ||
| 121 | |||
| 122 | } // namespace levonychev_i_radix_batcher_sort | ||
| 123 |