| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #include "frolova_s_radix_sort_double/all/include/ops_all.hpp" | ||
| 2 | |||
| 3 | #include <omp.h> | ||
| 4 | #include <tbb/parallel_for.h> | ||
| 5 | |||
| 6 | #include <algorithm> | ||
| 7 | #include <bit> | ||
| 8 | #include <cstddef> | ||
| 9 | #include <cstdint> | ||
| 10 | #include <utility> | ||
| 11 | #include <vector> | ||
| 12 | |||
| 13 | #include "frolova_s_radix_sort_double/common/include/common.hpp" | ||
| 14 | |||
| 15 | namespace frolova_s_radix_sort_double { | ||
| 16 | |||
| 17 |
1/2✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.
|
20 | FrolovaSRadixSortDoubleALL::FrolovaSRadixSortDoubleALL(const InType &in) { |
| 18 | SetTypeOfTask(GetStaticTypeOfTask()); | ||
| 19 |
1/2✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.
|
20 | GetInput() = in; |
| 20 | 20 | } | |
| 21 | |||
| 22 | 20 | bool FrolovaSRadixSortDoubleALL::ValidationImpl() { | |
| 23 | 20 | return !GetInput().empty(); | |
| 24 | } | ||
| 25 | |||
| 26 | 20 | bool FrolovaSRadixSortDoubleALL::PreProcessingImpl() { | |
| 27 | 20 | return true; | |
| 28 | } | ||
| 29 | |||
| 30 | 40 | void FrolovaSRadixSortDoubleALL::RadixSortChunk(std::vector<double> &chunk) { | |
| 31 | const int radix = 256; | ||
| 32 | const int num_bits = 8; | ||
| 33 | const int num_passes = sizeof(uint64_t); | ||
| 34 | |||
| 35 | 40 | std::vector<double> temp(chunk.size()); | |
| 36 | |||
| 37 |
2/2✓ Branch 0 taken 320 times.
✓ Branch 1 taken 40 times.
|
360 | for (int pass = 0; pass < num_passes; pass++) { |
| 38 |
1/4✓ Branch 1 taken 320 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
320 | std::vector<int> count(radix, 0); |
| 39 |
2/2✓ Branch 0 taken 66560 times.
✓ Branch 1 taken 320 times.
|
66880 | for (double value : chunk) { |
| 40 | auto bits = std::bit_cast<uint64_t>(value); | ||
| 41 | 66560 | int byte = static_cast<int>((bits >> (pass * num_bits)) & 0xFF); | |
| 42 | 66560 | count[byte]++; | |
| 43 | } | ||
| 44 | int total = 0; | ||
| 45 |
2/2✓ Branch 0 taken 81920 times.
✓ Branch 1 taken 320 times.
|
82240 | for (int i = 0; i < radix; i++) { |
| 46 | 81920 | int old = count[i]; | |
| 47 | 81920 | count[i] = total; | |
| 48 | 81920 | total += old; | |
| 49 | } | ||
| 50 |
2/2✓ Branch 0 taken 66560 times.
✓ Branch 1 taken 320 times.
|
66880 | for (double value : chunk) { |
| 51 | auto bits = std::bit_cast<uint64_t>(value); | ||
| 52 | 66560 | int byte = static_cast<int>((bits >> (pass * num_bits)) & 0xFF); | |
| 53 | 66560 | temp[count[byte]++] = value; | |
| 54 | } | ||
| 55 | chunk.swap(temp); | ||
| 56 | } | ||
| 57 | 40 | } | |
| 58 | |||
| 59 | 40 | void FrolovaSRadixSortDoubleALL::ProcessChunk(std::vector<double> &chunk) { | |
| 60 | 40 | RadixSortChunk(chunk); | |
| 61 | |||
| 62 | 40 | std::vector<double> negative; | |
| 63 |
1/2✓ Branch 1 taken 40 times.
✗ Branch 2 not taken.
|
40 | std::vector<double> positive; |
| 64 |
1/2✓ Branch 1 taken 40 times.
✗ Branch 2 not taken.
|
40 | negative.reserve(chunk.size()); |
| 65 |
1/2✓ Branch 1 taken 40 times.
✗ Branch 2 not taken.
|
40 | positive.reserve(chunk.size()); |
| 66 | |||
| 67 |
4/4✓ Branch 0 taken 1982 times.
✓ Branch 1 taken 6338 times.
✓ Branch 2 taken 8320 times.
✓ Branch 3 taken 40 times.
|
8360 | for (double val : chunk) { |
| 68 |
2/2✓ Branch 0 taken 1982 times.
✓ Branch 1 taken 6338 times.
|
8320 | if ((std::bit_cast<uint64_t>(val) >> 63) != 0U) { |
| 69 | negative.push_back(val); | ||
| 70 | } else { | ||
| 71 | positive.push_back(val); | ||
| 72 | } | ||
| 73 | } | ||
| 74 | |||
| 75 | std::ranges::reverse(negative); | ||
| 76 | |||
| 77 | size_t pos = 0; | ||
| 78 |
2/2✓ Branch 0 taken 1982 times.
✓ Branch 1 taken 40 times.
|
2022 | for (double val : negative) { |
| 79 | 1982 | chunk[pos++] = val; | |
| 80 | } | ||
| 81 |
2/2✓ Branch 0 taken 6338 times.
✓ Branch 1 taken 40 times.
|
6378 | for (double val : positive) { |
| 82 | 6338 | chunk[pos++] = val; | |
| 83 | } | ||
| 84 | 40 | } | |
| 85 | |||
| 86 | 20 | std::vector<double> FrolovaSRadixSortDoubleALL::SimpleMerge(const std::vector<double> &a, | |
| 87 | const std::vector<double> &b) { | ||
| 88 |
1/2✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.
|
20 | std::vector<double> res; |
| 89 |
1/2✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.
|
20 | res.reserve(a.size() + b.size()); |
| 90 | |||
| 91 | size_t i = 0; | ||
| 92 | size_t j = 0; | ||
| 93 | |||
| 94 |
4/4✓ Branch 0 taken 6 times.
✓ Branch 1 taken 6578 times.
✓ Branch 2 taken 14 times.
✓ Branch 3 taken 6564 times.
|
6584 | while (i < a.size() && j < b.size()) { |
| 95 |
2/2✓ Branch 0 taken 2428 times.
✓ Branch 1 taken 4136 times.
|
6564 | if (a[i] <= b[j]) { |
| 96 |
1/2✓ Branch 0 taken 2428 times.
✗ Branch 1 not taken.
|
2428 | res.push_back(a[i++]); |
| 97 | } else { | ||
| 98 |
1/2✓ Branch 0 taken 4136 times.
✗ Branch 1 not taken.
|
4136 | res.push_back(b[j++]); |
| 99 | } | ||
| 100 | } | ||
| 101 |
2/2✓ Branch 0 taken 1732 times.
✓ Branch 1 taken 20 times.
|
1752 | while (i < a.size()) { |
| 102 |
1/2✓ Branch 0 taken 1732 times.
✗ Branch 1 not taken.
|
1732 | res.push_back(a[i++]); |
| 103 | } | ||
| 104 |
2/2✓ Branch 0 taken 24 times.
✓ Branch 1 taken 20 times.
|
44 | while (j < b.size()) { |
| 105 |
1/2✓ Branch 0 taken 24 times.
✗ Branch 1 not taken.
|
24 | res.push_back(b[j++]); |
| 106 | } | ||
| 107 | 20 | return res; | |
| 108 | } | ||
| 109 | |||
| 110 |
1/2✓ Branch 0 taken 20 times.
✗ Branch 1 not taken.
|
20 | std::vector<double> FrolovaSRadixSortDoubleALL::ParallelMerge(std::vector<std::vector<double>> &chunks) { |
| 111 |
1/2✓ Branch 0 taken 20 times.
✗ Branch 1 not taken.
|
20 | if (chunks.empty()) { |
| 112 | ✗ | return {}; | |
| 113 | } | ||
| 114 | |||
| 115 |
2/2✓ Branch 0 taken 20 times.
✓ Branch 1 taken 20 times.
|
40 | while (chunks.size() > 1) { |
| 116 |
1/2✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.
|
20 | std::vector<std::vector<double>> next_chunks; |
| 117 |
1/2✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.
|
20 | next_chunks.resize((chunks.size() + 1) / 2); |
| 118 | |||
| 119 | 20 | size_t half_size = chunks.size() / 2; | |
| 120 | |||
| 121 | 20 | #pragma omp parallel for default(none) shared(chunks, next_chunks, half_size) | |
| 122 | for (size_t i = 0; i < half_size; ++i) { | ||
| 123 | next_chunks[i] = SimpleMerge(chunks[(2 * i)], chunks[(2 * i) + 1]); | ||
| 124 | } | ||
| 125 | |||
| 126 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 20 times.
|
20 | if (chunks.size() % 2 != 0) { |
| 127 | next_chunks.back() = std::move(chunks.back()); | ||
| 128 | } | ||
| 129 | |||
| 130 | 20 | chunks = std::move(next_chunks); | |
| 131 | 20 | } | |
| 132 | |||
| 133 | return std::move(chunks[0]); | ||
| 134 | } | ||
| 135 | |||
| 136 |
1/2✓ Branch 0 taken 20 times.
✗ Branch 1 not taken.
|
20 | bool FrolovaSRadixSortDoubleALL::RunImpl() { |
| 137 | const std::vector<double> &input = GetInput(); | ||
| 138 |
1/2✓ Branch 0 taken 20 times.
✗ Branch 1 not taken.
|
20 | if (input.empty()) { |
| 139 | return true; | ||
| 140 | } | ||
| 141 | |||
| 142 | 20 | int num_chunks = omp_get_max_threads(); | |
| 143 | 20 | if (num_chunks <= 0) { | |
| 144 | num_chunks = 1; | ||
| 145 | } | ||
| 146 | |||
| 147 | 20 | int total_size = static_cast<int>(input.size()); | |
| 148 | 20 | int chunk_size = total_size / num_chunks; | |
| 149 | 20 | int remainder = total_size % num_chunks; | |
| 150 | |||
| 151 | 20 | std::vector<std::vector<double>> chunks(num_chunks); | |
| 152 | int offset = 0; | ||
| 153 |
2/2✓ Branch 0 taken 40 times.
✓ Branch 1 taken 20 times.
|
60 | for (int i = 0; i < num_chunks; ++i) { |
| 154 |
1/2✓ Branch 0 taken 40 times.
✗ Branch 1 not taken.
|
40 | int cur_size = chunk_size + (i < remainder ? 1 : 0); |
| 155 |
1/2✓ Branch 0 taken 40 times.
✗ Branch 1 not taken.
|
40 | if (cur_size > 0) { |
| 156 |
1/2✓ Branch 1 taken 40 times.
✗ Branch 2 not taken.
|
40 | chunks[i].assign(input.begin() + offset, input.begin() + offset + cur_size); |
| 157 | 40 | offset += cur_size; | |
| 158 | } | ||
| 159 | } | ||
| 160 | |||
| 161 | 20 | std::erase_if(chunks, [](const std::vector<double> &c) { return c.empty(); }); | |
| 162 | |||
| 163 | 20 | num_chunks = static_cast<int>(chunks.size()); | |
| 164 | |||
| 165 |
1/2✓ Branch 2 taken 20 times.
✗ Branch 3 not taken.
|
60 | tbb::parallel_for(0, num_chunks, [&](int i) { ProcessChunk(chunks[i]); }); |
| 166 | |||
| 167 |
1/2✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.
|
20 | std::vector<double> sorted = ParallelMerge(chunks); |
| 168 | GetOutput() = std::move(sorted); | ||
| 169 | |||
| 170 | return true; | ||
| 171 | 20 | } | |
| 172 | |||
| 173 | 20 | bool FrolovaSRadixSortDoubleALL::PostProcessingImpl() { | |
| 174 | 20 | return true; | |
| 175 | } | ||
| 176 | |||
| 177 | } // namespace frolova_s_radix_sort_double | ||
| 178 |