| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #include "mityaeva_radix/tbb/include/sorter_tbb.hpp" | ||
| 2 | |||
| 3 | #include <algorithm> | ||
| 4 | #include <cstddef> | ||
| 5 | #include <cstdint> | ||
| 6 | #include <cstring> | ||
| 7 | #include <vector> | ||
| 8 | |||
| 9 | #include "oneapi/tbb/parallel_for.h" | ||
| 10 | #include "oneapi/tbb/partitioner.h" | ||
| 11 | #include "util/include/util.hpp" | ||
| 12 | |||
| 13 | namespace mityaeva_radix { | ||
| 14 | |||
| 15 | ✗ | uint64_t SorterTbb::DoubleToSortable(uint64_t x) { | |
| 16 |
4/6✓ Branch 0 taken 384 times.
✓ Branch 1 taken 444 times.
✓ Branch 2 taken 704 times.
✓ Branch 3 taken 696 times.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
|
2228 | if ((x & 0x8000000000000000ULL) != 0U) { |
| 17 | 1088 | return ~x; | |
| 18 | } | ||
| 19 | 1140 | return x | 0x8000000000000000ULL; | |
| 20 | } | ||
| 21 | |||
| 22 | ✗ | uint64_t SorterTbb::SortableToDouble(uint64_t x) { | |
| 23 | ✗ | if ((x & 0x8000000000000000ULL) != 0U) { | |
| 24 | 1140 | return x & 0x7FFFFFFFFFFFFFFFULL; | |
| 25 | } | ||
| 26 | 1088 | return ~x; | |
| 27 | } | ||
| 28 | |||
| 29 | 352 | void SorterTbb::CountingPass(std::vector<uint64_t> *current, std::vector<uint64_t> *next, int shift, int radix, | |
| 30 | int num_threads, size_t data_size) { | ||
| 31 |
1/2✓ Branch 2 taken 352 times.
✗ Branch 3 not taken.
|
352 | std::vector<std::vector<int>> thread_counters(num_threads, std::vector<int>(radix, 0)); |
| 32 | |||
| 33 |
1/2✓ Branch 1 taken 352 times.
✗ Branch 2 not taken.
|
352 | tbb::parallel_for(0, num_threads, [&](int thread_id) { |
| 34 | 880 | size_t chunk_size = data_size / static_cast<size_t>(num_threads); | |
| 35 | 880 | size_t start = static_cast<size_t>(thread_id) * chunk_size; | |
| 36 |
2/2✓ Branch 0 taken 528 times.
✓ Branch 1 taken 352 times.
|
880 | size_t end = (thread_id == num_threads - 1) ? data_size : start + chunk_size; |
| 37 | |||
| 38 | 880 | auto &local_counters = thread_counters[thread_id]; | |
| 39 | |||
| 40 |
2/2✓ Branch 0 taken 17824 times.
✓ Branch 1 taken 880 times.
|
18704 | for (size_t i = start; i < end; i++) { |
| 41 | 17824 | int digit = static_cast<int>(((*current)[i] >> static_cast<size_t>(shift)) & static_cast<size_t>(radix - 1)); | |
| 42 | 17824 | local_counters[digit]++; | |
| 43 | } | ||
| 44 | 1232 | }, tbb::static_partitioner()); | |
| 45 | |||
| 46 |
1/2✓ Branch 1 taken 352 times.
✗ Branch 2 not taken.
|
352 | std::vector<int> prefix_sums(static_cast<size_t>(radix * num_threads), 0); |
| 47 | |||
| 48 | int total = 0; | ||
| 49 |
2/2✓ Branch 0 taken 90112 times.
✓ Branch 1 taken 352 times.
|
90464 | for (int digit = 0; digit < radix; digit++) { |
| 50 | int digit_sum = 0; | ||
| 51 |
2/2✓ Branch 0 taken 225280 times.
✓ Branch 1 taken 90112 times.
|
315392 | for (int thread_idx = 0; thread_idx < num_threads; thread_idx++) { |
| 52 | 225280 | prefix_sums[(thread_idx * radix) + digit] = total + digit_sum; | |
| 53 | 225280 | digit_sum += thread_counters[thread_idx][digit]; | |
| 54 | } | ||
| 55 | 90112 | total += digit_sum; | |
| 56 | } | ||
| 57 | |||
| 58 |
1/2✓ Branch 1 taken 352 times.
✗ Branch 2 not taken.
|
352 | tbb::parallel_for(0, num_threads, [&](int thread_id) { |
| 59 | 880 | size_t chunk_size = data_size / static_cast<size_t>(num_threads); | |
| 60 | 880 | size_t start = static_cast<size_t>(thread_id) * chunk_size; | |
| 61 |
2/2✓ Branch 0 taken 528 times.
✓ Branch 1 taken 352 times.
|
880 | size_t end = (thread_id == num_threads - 1) ? data_size : start + chunk_size; |
| 62 | |||
| 63 | 880 | std::vector<int> local_pos(radix, 0); | |
| 64 |
2/2✓ Branch 0 taken 225280 times.
✓ Branch 1 taken 880 times.
|
226160 | for (int digit = 0; digit < radix; digit++) { |
| 65 | 225280 | local_pos[digit] = prefix_sums[(thread_id * radix) + digit]; | |
| 66 | } | ||
| 67 | |||
| 68 |
2/2✓ Branch 0 taken 17824 times.
✓ Branch 1 taken 880 times.
|
18704 | for (size_t i = start; i < end; i++) { |
| 69 | 17824 | int digit = static_cast<int>(((*current)[i] >> static_cast<size_t>(shift)) & static_cast<size_t>(radix - 1)); | |
| 70 | 17824 | auto pos = static_cast<size_t>(local_pos[digit]++); | |
| 71 | 17824 | (*next)[pos] = (*current)[i]; | |
| 72 | } | ||
| 73 |
1/4✓ Branch 0 taken 352 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
352 | }, tbb::static_partitioner()); |
| 74 | 352 | } | |
| 75 | |||
| 76 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 44 times.
|
48 | void SorterTbb::Sort(std::vector<double> &data) { |
| 77 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 44 times.
|
48 | if (data.size() <= 1) { |
| 78 | 4 | return; | |
| 79 | } | ||
| 80 | |||
| 81 | 44 | int num_threads = ppc::util::GetNumThreads(); | |
| 82 |
1/2✓ Branch 2 taken 44 times.
✗ Branch 3 not taken.
|
44 | std::vector<double> temp(data.size()); |
| 83 |
1/4✓ Branch 1 taken 44 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
44 | std::vector<uint64_t> as_uint(data.size()); |
| 84 | |||
| 85 | 44 | tbb::parallel_for(tbb::blocked_range<std::size_t>(0, data.size()), | |
| 86 |
2/4✓ Branch 1 taken 44 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 44 times.
✗ Branch 5 not taken.
|
44 | [&data, &as_uint](const tbb::blocked_range<std::size_t> &range) { |
| 87 |
4/4✓ Branch 0 taken 828 times.
✓ Branch 1 taken 828 times.
✓ Branch 2 taken 1400 times.
✓ Branch 3 taken 1400 times.
|
4456 | for (auto i = range.begin(); i < range.end(); i++) { |
| 88 | uint64_t bits = 0; | ||
| 89 |
4/4✓ Branch 0 taken 384 times.
✓ Branch 1 taken 444 times.
✓ Branch 2 taken 704 times.
✓ Branch 3 taken 696 times.
|
2228 | std::memcpy(&bits, &data[i], sizeof(double)); |
| 90 | 2228 | as_uint[i] = DoubleToSortable(bits); | |
| 91 | } | ||
| 92 | }); | ||
| 93 | const int bits_per_pass = 8; | ||
| 94 | const int radix = 1 << bits_per_pass; | ||
| 95 | const int passes = static_cast<int>(sizeof(uint64_t) * 8 / bits_per_pass); | ||
| 96 |
1/4✓ Branch 1 taken 44 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
44 | std::vector<uint64_t> uint_temp(data.size()); |
| 97 | 44 | std::vector<uint64_t> *current = &as_uint; | |
| 98 | std::vector<uint64_t> *next = &uint_temp; | ||
| 99 | |||
| 100 |
2/2✓ Branch 0 taken 352 times.
✓ Branch 1 taken 44 times.
|
396 | for (int pass = 0; pass < passes; pass++) { |
| 101 |
1/2✓ Branch 1 taken 352 times.
✗ Branch 2 not taken.
|
352 | int shift = pass * bits_per_pass; |
| 102 |
1/2✓ Branch 1 taken 352 times.
✗ Branch 2 not taken.
|
352 | CountingPass(current, next, shift, radix, num_threads, data.size()); |
| 103 | std::swap(current, next); | ||
| 104 | } | ||
| 105 | |||
| 106 | 44 | tbb::parallel_for(tbb::blocked_range<size_t>(0, data.size()), | |
| 107 |
2/6✓ Branch 1 taken 44 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 44 times.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
|
2272 | [&data, ¤t, &as_uint, &uint_temp](const tbb::blocked_range<std::size_t> &range) { |
| 108 |
2/2✓ Branch 0 taken 2228 times.
✓ Branch 1 taken 2228 times.
|
4456 | for (auto i = range.begin(); i < range.end(); i++) { |
| 109 | uint64_t bits = 0; | ||
| 110 |
1/2✓ Branch 0 taken 2228 times.
✗ Branch 1 not taken.
|
2228 | if (current == &as_uint) { |
| 111 |
2/2✓ Branch 0 taken 1140 times.
✓ Branch 1 taken 1088 times.
|
4456 | bits = SortableToDouble(as_uint[i]); |
| 112 | } else { | ||
| 113 | ✗ | bits = SortableToDouble(uint_temp[i]); | |
| 114 | } | ||
| 115 | 2228 | std::memcpy(&data[i], &bits, sizeof(double)); | |
| 116 | } | ||
| 117 | 2228 | }); | |
| 118 | } | ||
| 119 | |||
| 120 | } // namespace mityaeva_radix | ||
| 121 |