| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #include "mityaeva_radix/omp/include/sorter_omp.hpp" | ||
| 2 | |||
| 3 | #include <omp.h> | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <cstddef> | ||
| 7 | #include <cstdint> | ||
| 8 | #include <cstring> | ||
| 9 | #include <vector> | ||
| 10 | |||
| 11 | #include "util/include/util.hpp" | ||
| 12 | |||
| 13 | namespace mityaeva_radix { | ||
| 14 | |||
| 15 | ✗ | uint64_t SorterOmp::DoubleToSortable(uint64_t x) { | |
| 16 | ✗ | if ((x & 0x8000000000000000ULL) != 0U) { | |
| 17 | ✗ | return ~x; | |
| 18 | } | ||
| 19 | ✗ | return x | 0x8000000000000000ULL; | |
| 20 | } | ||
| 21 | |||
| 22 | ✗ | uint64_t SorterOmp::SortableToDouble(uint64_t x) { | |
| 23 | ✗ | if ((x & 0x8000000000000000ULL) != 0U) { | |
| 24 | ✗ | return x & 0x7FFFFFFFFFFFFFFFULL; | |
| 25 | } | ||
| 26 | ✗ | return ~x; | |
| 27 | } | ||
| 28 | |||
| 29 | 352 | void SorterOmp::CountingPass(std::vector<uint64_t> *current, std::vector<uint64_t> *next, int shift, int radix, | |
| 30 | int num_threads, size_t data_size) { | ||
| 31 |
1/2✓ Branch 2 taken 352 times.
✗ Branch 3 not taken.
|
352 | std::vector<std::vector<int>> thread_counters(num_threads, std::vector<int>(radix, 0)); |
| 32 | |||
| 33 | 352 | #pragma omp parallel default(none) shared(current, shift, radix, thread_counters, data_size, num_threads) | |
| 34 | { | ||
| 35 | int thread_id = omp_get_thread_num(); | ||
| 36 | size_t chunk_size = data_size / static_cast<size_t>(num_threads); | ||
| 37 | size_t start = static_cast<size_t>(thread_id) * chunk_size; | ||
| 38 | size_t end = (thread_id == num_threads - 1) ? data_size : start + chunk_size; | ||
| 39 | |||
| 40 | auto &local_counters = thread_counters[thread_id]; | ||
| 41 | |||
| 42 | for (size_t i = start; i < end; i++) { | ||
| 43 | int digit = static_cast<int>(((*current)[i] >> static_cast<size_t>(shift)) & static_cast<size_t>(radix - 1)); | ||
| 44 | local_counters[digit]++; | ||
| 45 | } | ||
| 46 | } | ||
| 47 | |||
| 48 |
1/2✓ Branch 1 taken 352 times.
✗ Branch 2 not taken.
|
352 | std::vector<int> prefix_sums(static_cast<size_t>(radix * num_threads), 0); |
| 49 | |||
| 50 | int total = 0; | ||
| 51 |
2/2✓ Branch 0 taken 90112 times.
✓ Branch 1 taken 352 times.
|
90464 | for (int digit = 0; digit < radix; digit++) { |
| 52 | int digit_sum = 0; | ||
| 53 |
2/2✓ Branch 0 taken 225280 times.
✓ Branch 1 taken 90112 times.
|
315392 | for (int thread_idx = 0; thread_idx < num_threads; thread_idx++) { |
| 54 | 225280 | prefix_sums[(thread_idx * radix) + digit] = total + digit_sum; | |
| 55 | 225280 | digit_sum += thread_counters[thread_idx][digit]; | |
| 56 | } | ||
| 57 | 90112 | total += digit_sum; | |
| 58 | } | ||
| 59 | |||
| 60 |
1/2✓ Branch 0 taken 352 times.
✗ Branch 1 not taken.
|
352 | #pragma omp parallel default(none) shared(current, next, shift, radix, prefix_sums, data_size, num_threads) |
| 61 | { | ||
| 62 | int thread_id = omp_get_thread_num(); | ||
| 63 | size_t chunk_size = data_size / static_cast<size_t>(num_threads); | ||
| 64 | size_t start = static_cast<size_t>(thread_id) * chunk_size; | ||
| 65 | size_t end = (thread_id == num_threads - 1) ? data_size : start + chunk_size; | ||
| 66 | |||
| 67 | std::vector<int> local_pos(radix, 0); | ||
| 68 | for (int digit = 0; digit < radix; digit++) { | ||
| 69 | local_pos[digit] = prefix_sums[(thread_id * radix) + digit]; | ||
| 70 | } | ||
| 71 | |||
| 72 | for (size_t i = start; i < end; i++) { | ||
| 73 | int digit = static_cast<int>(((*current)[i] >> static_cast<size_t>(shift)) & static_cast<size_t>(radix - 1)); | ||
| 74 | auto pos = static_cast<size_t>(local_pos[digit]++); | ||
| 75 | (*next)[pos] = (*current)[i]; | ||
| 76 | } | ||
| 77 | } | ||
| 78 | 352 | } | |
| 79 | |||
| 80 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 44 times.
|
48 | void SorterOmp::Sort(std::vector<double> &data) { |
| 81 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 44 times.
|
48 | if (data.size() <= 1) { |
| 82 | 4 | return; | |
| 83 | } | ||
| 84 | |||
| 85 | 44 | int num_threads = ppc::util::GetNumThreads(); | |
| 86 | 44 | omp_set_num_threads(num_threads); | |
| 87 | |||
| 88 |
1/2✓ Branch 2 taken 44 times.
✗ Branch 3 not taken.
|
44 | std::vector<double> temp(data.size()); |
| 89 |
1/4✓ Branch 1 taken 44 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
44 | std::vector<uint64_t> as_uint(data.size()); |
| 90 | |||
| 91 |
1/2✓ Branch 1 taken 44 times.
✗ Branch 2 not taken.
|
44 | #pragma omp parallel for default(none) shared(data, as_uint, num_threads) |
| 92 | for (size_t i = 0; i < data.size(); i++) { | ||
| 93 | uint64_t bits = 0; | ||
| 94 | std::memcpy(&bits, &data[i], sizeof(double)); | ||
| 95 | as_uint[i] = DoubleToSortable(bits); | ||
| 96 | } | ||
| 97 | |||
| 98 | const int bits_per_pass = 8; | ||
| 99 | const int radix = 1 << bits_per_pass; | ||
| 100 | const int passes = static_cast<int>(sizeof(uint64_t) * 8 / bits_per_pass); | ||
| 101 | |||
| 102 |
1/4✓ Branch 1 taken 44 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
44 | std::vector<uint64_t> uint_temp(data.size()); |
| 103 | 44 | std::vector<uint64_t> *current = &as_uint; | |
| 104 | std::vector<uint64_t> *next = &uint_temp; | ||
| 105 | |||
| 106 |
2/2✓ Branch 0 taken 352 times.
✓ Branch 1 taken 44 times.
|
396 | for (int pass = 0; pass < passes; pass++) { |
| 107 |
1/2✓ Branch 1 taken 352 times.
✗ Branch 2 not taken.
|
352 | int shift = pass * bits_per_pass; |
| 108 | |||
| 109 |
1/2✓ Branch 1 taken 352 times.
✗ Branch 2 not taken.
|
352 | CountingPass(current, next, shift, radix, num_threads, data.size()); |
| 110 | |||
| 111 | std::swap(current, next); | ||
| 112 | } | ||
| 113 | |||
| 114 |
1/2✓ Branch 0 taken 44 times.
✗ Branch 1 not taken.
|
44 | #pragma omp parallel for default(none) shared(data, current, as_uint, uint_temp) |
| 115 | for (size_t i = 0; i < data.size(); i++) { | ||
| 116 | uint64_t bits = 0; | ||
| 117 | if (current == &as_uint) { | ||
| 118 | bits = SortableToDouble(as_uint[i]); | ||
| 119 | } else { | ||
| 120 | bits = SortableToDouble(uint_temp[i]); | ||
| 121 | } | ||
| 122 | std::memcpy(&data[i], &bits, sizeof(double)); | ||
| 123 | } | ||
| 124 | } | ||
| 125 | |||
| 126 | } // namespace mityaeva_radix | ||
| 127 |