| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #include "melnik_i_radix_sort_int/tbb/include/ops_tbb.hpp" | ||
| 2 | |||
| 3 | #include <algorithm> | ||
| 4 | #include <array> | ||
| 5 | #include <cstddef> | ||
| 6 | #include <cstdint> | ||
| 7 | #include <utility> | ||
| 8 | #include <vector> | ||
| 9 | |||
| 10 | #include "melnik_i_radix_sort_int/common/include/common.hpp" | ||
| 11 | #include "oneapi/tbb/blocked_range.h" | ||
| 12 | #include "oneapi/tbb/parallel_for.h" | ||
| 13 | #include "util/include/util.hpp" | ||
| 14 | |||
| 15 | namespace melnik_i_radix_sort_int { | ||
| 16 | |||
| 17 | namespace { | ||
| 18 | |||
| 19 | constexpr int kBitsPerPass = 8; | ||
| 20 | constexpr std::size_t kBuckets = 1U << kBitsPerPass; | ||
| 21 | |||
| 22 | 28 | std::vector<MelnikIRadixSortIntTBB::Range> BuildInitialRanges(std::size_t data_size, int num_ranges) { | |
| 23 | 28 | std::vector<MelnikIRadixSortIntTBB::Range> ranges; | |
| 24 |
1/2✓ Branch 1 taken 28 times.
✗ Branch 2 not taken.
|
28 | ranges.reserve(static_cast<std::size_t>(num_ranges)); |
| 25 | |||
| 26 |
2/2✓ Branch 0 taken 68 times.
✓ Branch 1 taken 28 times.
|
96 | for (int range_index = 0; range_index < num_ranges; ++range_index) { |
| 27 | 68 | const std::size_t begin = | |
| 28 | 68 | (data_size * static_cast<std::size_t>(range_index)) / static_cast<std::size_t>(num_ranges); | |
| 29 | 68 | const std::size_t end = | |
| 30 | 68 | (data_size * (static_cast<std::size_t>(range_index) + 1U)) / static_cast<std::size_t>(num_ranges); | |
| 31 |
1/4✓ Branch 1 taken 68 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
68 | ranges.push_back(MelnikIRadixSortIntTBB::Range{.begin = begin, .end = end}); |
| 32 | } | ||
| 33 | |||
| 34 | 28 | return ranges; | |
| 35 | } | ||
| 36 | |||
| 37 | } // namespace | ||
| 38 | |||
| 39 |
1/2✓ Branch 1 taken 28 times.
✗ Branch 2 not taken.
|
28 | MelnikIRadixSortIntTBB::MelnikIRadixSortIntTBB(const InType &in) { |
| 40 | SetTypeOfTask(GetStaticTypeOfTask()); | ||
| 41 |
1/2✓ Branch 1 taken 28 times.
✗ Branch 2 not taken.
|
28 | GetInput() = in; |
| 42 | GetOutput() = {}; | ||
| 43 | 28 | } | |
| 44 | |||
| 45 | 28 | bool MelnikIRadixSortIntTBB::ValidationImpl() { | |
| 46 | 28 | return !GetInput().empty(); | |
| 47 | } | ||
| 48 | |||
| 49 | 28 | bool MelnikIRadixSortIntTBB::PreProcessingImpl() { | |
| 50 | 28 | GetOutput() = GetInput(); | |
| 51 | 28 | return !GetOutput().empty(); | |
| 52 | } | ||
| 53 | |||
| 54 |
1/2✓ Branch 0 taken 28 times.
✗ Branch 1 not taken.
|
28 | bool MelnikIRadixSortIntTBB::RunImpl() { |
| 55 |
1/2✓ Branch 0 taken 28 times.
✗ Branch 1 not taken.
|
28 | if (GetOutput().empty()) { |
| 56 | return false; | ||
| 57 | } | ||
| 58 | |||
| 59 | const std::size_t data_size = GetOutput().size(); | ||
| 60 | 28 | const int requested_threads = std::max(1, ppc::util::GetNumThreads()); | |
| 61 | 28 | const int num_ranges = std::min(requested_threads, static_cast<int>(data_size)); | |
| 62 | |||
| 63 |
1/2✓ Branch 2 taken 28 times.
✗ Branch 3 not taken.
|
28 | std::vector<int> buffer(data_size); |
| 64 | auto &output = GetOutput(); | ||
| 65 | |||
| 66 |
1/2✓ Branch 1 taken 28 times.
✗ Branch 2 not taken.
|
28 | const std::vector<Range> ranges = BuildInitialRanges(data_size, num_ranges); |
| 67 | |||
| 68 | 28 | tbb::parallel_for(tbb::blocked_range<std::size_t>(0, ranges.size()), | |
| 69 |
1/4✓ Branch 1 taken 28 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
96 | [&](const tbb::blocked_range<std::size_t> &range_block) { |
| 70 |
2/2✓ Branch 0 taken 68 times.
✓ Branch 1 taken 68 times.
|
136 | for (std::size_t range_index = range_block.begin(); range_index < range_block.end(); ++range_index) { |
| 71 |
1/2✓ Branch 0 taken 68 times.
✗ Branch 1 not taken.
|
68 | const Range range = ranges[range_index]; |
| 72 |
1/2✓ Branch 0 taken 68 times.
✗ Branch 1 not taken.
|
68 | if (range.begin < range.end) { |
| 73 | 68 | RadixSortRange(output, buffer, range.begin, range.end); | |
| 74 | } | ||
| 75 | } | ||
| 76 | 68 | }); | |
| 77 | |||
| 78 |
1/2✓ Branch 1 taken 28 times.
✗ Branch 2 not taken.
|
28 | MergeSortedRanges(output, buffer, ranges); |
| 79 |
1/2✓ Branch 0 taken 28 times.
✗ Branch 1 not taken.
|
28 | return !output.empty(); |
| 80 | } | ||
| 81 | |||
| 82 |
1/2✓ Branch 0 taken 28 times.
✗ Branch 1 not taken.
|
28 | bool MelnikIRadixSortIntTBB::PostProcessingImpl() { |
| 83 | 28 | return std::ranges::is_sorted(GetOutput()); | |
| 84 | } | ||
| 85 | |||
| 86 | 41 | void MelnikIRadixSortIntTBB::CountingSortByByte(const std::vector<int> &source, std::vector<int> &destination, | |
| 87 | std::size_t begin, std::size_t end, std::int64_t exp, | ||
| 88 | std::int64_t offset) { | ||
| 89 | 41 | std::array<std::size_t, kBuckets> count{}; | |
| 90 | count.fill(0); | ||
| 91 | |||
| 92 |
2/2✓ Branch 0 taken 117 times.
✓ Branch 1 taken 41 times.
|
158 | for (std::size_t index = begin; index < end; ++index) { |
| 93 | 117 | const std::int64_t shifted_value = static_cast<std::int64_t>(source[index]) + offset; | |
| 94 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 117 times.
|
117 | const auto bucket = static_cast<std::size_t>((shifted_value / exp) % static_cast<std::int64_t>(kBuckets)); |
| 95 | 117 | ++count.at(bucket); | |
| 96 | } | ||
| 97 | |||
| 98 | 41 | std::array<std::size_t, kBuckets> positions{}; | |
| 99 | 41 | positions.at(0) = begin; | |
| 100 |
2/2✓ Branch 0 taken 10455 times.
✓ Branch 1 taken 41 times.
|
10496 | for (std::size_t bucket = 1; bucket < kBuckets; ++bucket) { |
| 101 | 10455 | positions.at(bucket) = positions.at(bucket - 1U) + count.at(bucket - 1U); | |
| 102 | } | ||
| 103 | |||
| 104 |
2/2✓ Branch 0 taken 117 times.
✓ Branch 1 taken 41 times.
|
158 | for (std::size_t index = begin; index < end; ++index) { |
| 105 | 117 | const std::int64_t shifted_value = static_cast<std::int64_t>(source[index]) + offset; | |
| 106 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 117 times.
|
117 | const auto bucket = static_cast<std::size_t>((shifted_value / exp) % static_cast<std::int64_t>(kBuckets)); |
| 107 | 117 | destination[positions.at(bucket)] = source[index]; | |
| 108 | 117 | ++positions.at(bucket); | |
| 109 | } | ||
| 110 | 41 | } | |
| 111 | |||
| 112 | 68 | void MelnikIRadixSortIntTBB::RadixSortRange(std::vector<int> &data, std::vector<int> &buffer, std::size_t begin, | |
| 113 | std::size_t end) { | ||
| 114 |
2/2✓ Branch 0 taken 27 times.
✓ Branch 1 taken 41 times.
|
68 | if (end - begin <= 1) { |
| 115 | 27 | return; | |
| 116 | } | ||
| 117 | |||
| 118 | std::vector<int> *source = &data; | ||
| 119 | std::vector<int> *destination = &buffer; | ||
| 120 | const auto range_begin = data.begin() + static_cast<ptrdiff_t>(begin); | ||
| 121 | const auto range_end = data.begin() + static_cast<ptrdiff_t>(end); | ||
| 122 |
2/2✓ Branch 0 taken 7 times.
✓ Branch 1 taken 34 times.
|
41 | const auto [min_it, max_it] = std::ranges::minmax_element(range_begin, range_end); |
| 123 | 41 | const auto min_value = static_cast<std::int64_t>(*min_it); | |
| 124 | 41 | const auto max_value = static_cast<std::int64_t>(*max_it); | |
| 125 |
2/2✓ Branch 0 taken 7 times.
✓ Branch 1 taken 34 times.
|
41 | const std::int64_t offset = (min_value < 0) ? -min_value : 0; |
| 126 | 41 | const std::int64_t max_shifted_value = max_value + offset; | |
| 127 | |||
| 128 |
2/2✓ Branch 0 taken 41 times.
✓ Branch 1 taken 41 times.
|
82 | for (std::int64_t exp = 1; max_shifted_value / exp > 0; exp <<= kBitsPerPass) { |
| 129 | 41 | CountingSortByByte(*source, *destination, begin, end, exp, offset); | |
| 130 | std::swap(source, destination); | ||
| 131 | } | ||
| 132 | |||
| 133 |
1/2✓ Branch 0 taken 41 times.
✗ Branch 1 not taken.
|
41 | if (source != &data) { |
| 134 | 41 | std::copy(source->begin() + static_cast<ptrdiff_t>(begin), source->begin() + static_cast<ptrdiff_t>(end), | |
| 135 | data.begin() + static_cast<ptrdiff_t>(begin)); | ||
| 136 | } | ||
| 137 | } | ||
| 138 | |||
| 139 | 40 | void MelnikIRadixSortIntTBB::MergeRanges(const std::vector<int> &source, std::vector<int> &destination, Range left, | |
| 140 | Range right, std::size_t write_begin) { | ||
| 141 | 40 | std::size_t left_index = left.begin; | |
| 142 | 40 | std::size_t right_index = right.begin; | |
| 143 | std::size_t write_index = write_begin; | ||
| 144 | |||
| 145 |
4/4✓ Branch 0 taken 110 times.
✓ Branch 1 taken 23 times.
✓ Branch 2 taken 93 times.
✓ Branch 3 taken 17 times.
|
133 | while (left_index < left.end && right_index < right.end) { |
| 146 |
2/2✓ Branch 0 taken 55 times.
✓ Branch 1 taken 38 times.
|
93 | if (source[left_index] <= source[right_index]) { |
| 147 | 55 | destination[write_index] = source[left_index]; | |
| 148 | 55 | ++left_index; | |
| 149 | } else { | ||
| 150 | 38 | destination[write_index] = source[right_index]; | |
| 151 | 38 | ++right_index; | |
| 152 | } | ||
| 153 | 93 | ++write_index; | |
| 154 | } | ||
| 155 | |||
| 156 |
2/2✓ Branch 0 taken 17 times.
✓ Branch 1 taken 23 times.
|
40 | if (left_index < left.end) { |
| 157 | 17 | std::copy(source.begin() + static_cast<ptrdiff_t>(left_index), source.begin() + static_cast<ptrdiff_t>(left.end), | |
| 158 | destination.begin() + static_cast<ptrdiff_t>(write_index)); | ||
| 159 | 17 | return; | |
| 160 | } | ||
| 161 | |||
| 162 | 23 | std::copy(source.begin() + static_cast<ptrdiff_t>(right_index), source.begin() + static_cast<ptrdiff_t>(right.end), | |
| 163 | destination.begin() + static_cast<ptrdiff_t>(write_index)); | ||
| 164 | } | ||
| 165 | |||
| 166 | 28 | void MelnikIRadixSortIntTBB::MergeSortedRanges(std::vector<int> &data, std::vector<int> &buffer, | |
| 167 | const std::vector<Range> &ranges) { | ||
| 168 | 28 | std::vector<int> *source = &data; | |
| 169 | 28 | std::vector<int> *destination = &buffer; | |
| 170 | 28 | std::vector<Range> current_ranges = ranges; | |
| 171 | |||
| 172 |
2/2✓ Branch 0 taken 35 times.
✓ Branch 1 taken 28 times.
|
63 | while (current_ranges.size() > 1U) { |
| 173 | 35 | const std::size_t merged_count = (current_ranges.size() + 1U) / 2U; | |
| 174 |
1/4✓ Branch 1 taken 35 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
35 | std::vector<Range> next_ranges(merged_count); |
| 175 | 35 | tbb::parallel_for(tbb::blocked_range<std::size_t>(0, merged_count), | |
| 176 |
1/4✓ Branch 1 taken 35 times.
✗ Branch 2 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
|
84 | [&](const tbb::blocked_range<std::size_t> &pair_block) { |
| 177 |
2/2✓ Branch 0 taken 49 times.
✓ Branch 1 taken 49 times.
|
98 | for (std::size_t pair_index = pair_block.begin(); pair_index < pair_block.end(); ++pair_index) { |
| 178 | 49 | const std::size_t left_pos = pair_index * 2U; | |
| 179 |
2/2✓ Branch 0 taken 9 times.
✓ Branch 1 taken 40 times.
|
49 | const Range left = current_ranges[left_pos]; |
| 180 | |||
| 181 |
2/2✓ Branch 0 taken 9 times.
✓ Branch 1 taken 40 times.
|
49 | if (left_pos + 1U >= current_ranges.size()) { |
| 182 | 9 | std::copy(source->begin() + static_cast<ptrdiff_t>(left.begin), | |
| 183 | 9 | source->begin() + static_cast<ptrdiff_t>(left.end), | |
| 184 | 9 | destination->begin() + static_cast<ptrdiff_t>(left.begin)); | |
| 185 | 9 | next_ranges[pair_index] = left; | |
| 186 | 9 | continue; | |
| 187 | } | ||
| 188 | |||
| 189 | 40 | const Range right = current_ranges[left_pos + 1U]; | |
| 190 | 40 | MergeRanges(*source, *destination, left, right, left.begin); | |
| 191 | 40 | next_ranges[pair_index] = Range{.begin = left.begin, .end = right.end}; | |
| 192 | } | ||
| 193 | 49 | }); | |
| 194 | |||
| 195 | current_ranges = std::move(next_ranges); | ||
| 196 | std::swap(source, destination); | ||
| 197 | } | ||
| 198 | |||
| 199 |
2/2✓ Branch 0 taken 7 times.
✓ Branch 1 taken 21 times.
|
28 | if (source != &data) { |
| 200 | data.swap(*source); | ||
| 201 | } | ||
| 202 | 28 | } | |
| 203 | |||
| 204 | } // namespace melnik_i_radix_sort_int | ||
| 205 |