| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #include "zenin_a_radix_sort_double_batcher_merge/all/include/ops_all.hpp" | ||
| 2 | |||
| 3 | #include <mpi.h> | ||
| 4 | #include <tbb/parallel_invoke.h> | ||
| 5 | |||
| 6 | #include <algorithm> | ||
| 7 | #include <cstddef> | ||
| 8 | #include <cstdint> | ||
| 9 | #include <cstring> | ||
| 10 | #include <limits> | ||
| 11 | #include <vector> | ||
| 12 | |||
| 13 | #include "zenin_a_radix_sort_double_batcher_merge/common/include/common.hpp" | ||
| 14 | |||
| 15 | namespace zenin_a_radix_sort_double_batcher_merge { | ||
| 16 | |||
| 17 |
1/2✓ Branch 1 taken 24 times.
✗ Branch 2 not taken.
|
24 | ZeninARadixSortDoubleBatcherMergeALL::ZeninARadixSortDoubleBatcherMergeALL(const InType &in) { |
| 18 | SetTypeOfTask(GetStaticTypeOfTask()); | ||
| 19 |
1/2✓ Branch 1 taken 24 times.
✗ Branch 2 not taken.
|
24 | GetInput() = in; |
| 20 | GetOutput() = {}; | ||
| 21 | 24 | } | |
| 22 | |||
| 23 | 24 | bool ZeninARadixSortDoubleBatcherMergeALL::ValidationImpl() { | |
| 24 | 24 | return true; | |
| 25 | } | ||
| 26 | 24 | bool ZeninARadixSortDoubleBatcherMergeALL::PreProcessingImpl() { | |
| 27 | 24 | local_data_ = GetInput(); | |
| 28 | 24 | return true; | |
| 29 | } | ||
| 30 | |||
| 31 | 24 | bool ZeninARadixSortDoubleBatcherMergeALL::PostProcessingImpl() { | |
| 32 | 24 | GetOutput() = local_data_; | |
| 33 | 24 | return true; | |
| 34 | } | ||
| 35 | ✗ | uint64_t ZeninARadixSortDoubleBatcherMergeALL::PackDouble(double v) noexcept { | |
| 36 | uint64_t bits = 0ULL; | ||
| 37 | std::memcpy(&bits, &v, sizeof(bits)); | ||
| 38 |
2/4✓ Branch 0 taken 10 times.
✓ Branch 1 taken 54 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
64 | if ((bits & (1ULL << 63)) != 0ULL) { |
| 39 | 10 | bits = ~bits; | |
| 40 | } else { | ||
| 41 | 54 | bits ^= (1ULL << 63); | |
| 42 | } | ||
| 43 | ✗ | return bits; | |
| 44 | } | ||
| 45 | |||
| 46 | ✗ | double ZeninARadixSortDoubleBatcherMergeALL::UnpackDouble(uint64_t k) noexcept { | |
| 47 | ✗ | if ((k & (1ULL << 63)) != 0ULL) { | |
| 48 | 54 | k ^= (1ULL << 63); | |
| 49 | } else { | ||
| 50 | 10 | k = ~k; | |
| 51 | } | ||
| 52 | double v = 0.0; | ||
| 53 | std::memcpy(&v, &k, sizeof(v)); | ||
| 54 | ✗ | return v; | |
| 55 | } | ||
| 56 | |||
| 57 |
2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 32 times.
|
38 | void ZeninARadixSortDoubleBatcherMergeALL::LSDRadixSort(std::vector<double> &array) { |
| 58 | const std::size_t n = array.size(); | ||
| 59 |
2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 32 times.
|
38 | if (n <= 1U) { |
| 60 | 6 | return; | |
| 61 | } | ||
| 62 | |||
| 63 | constexpr int kBits = 8; | ||
| 64 | constexpr int kBuckets = 1 << kBits; | ||
| 65 | constexpr int kPasses = static_cast<int>((sizeof(uint64_t) * 8) / kBits); | ||
| 66 | |||
| 67 | 32 | std::vector<uint64_t> keys(n); | |
| 68 |
2/2✓ Branch 0 taken 64 times.
✓ Branch 1 taken 32 times.
|
96 | for (std::size_t i = 0; i < n; ++i) { |
| 69 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 54 times.
|
128 | keys[i] = PackDouble(array[i]); |
| 70 | } | ||
| 71 | |||
| 72 |
1/4✓ Branch 1 taken 32 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
32 | std::vector<uint64_t> tmp_keys(n); |
| 73 |
1/4✓ Branch 1 taken 32 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
32 | std::vector<double> tmp_vals(n); |
| 74 | |||
| 75 |
2/2✓ Branch 0 taken 256 times.
✓ Branch 1 taken 32 times.
|
288 | for (int pass = 0; pass < kPasses; ++pass) { |
| 76 | 256 | int shift = pass * kBits; | |
| 77 |
1/4✓ Branch 1 taken 256 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
256 | std::vector<std::size_t> cnt(kBuckets + 1, 0U); |
| 78 | |||
| 79 |
2/2✓ Branch 0 taken 512 times.
✓ Branch 1 taken 256 times.
|
768 | for (std::size_t i = 0; i < n; ++i) { |
| 80 | 512 | auto d = static_cast<std::size_t>((keys[i] >> shift) & (kBuckets - 1)); | |
| 81 | 512 | ++cnt[d + 1]; | |
| 82 | } | ||
| 83 |
2/2✓ Branch 0 taken 65536 times.
✓ Branch 1 taken 256 times.
|
65792 | for (int i = 0; i < kBuckets; ++i) { |
| 84 | 65536 | cnt[i + 1] += cnt[i]; | |
| 85 | } | ||
| 86 | |||
| 87 |
2/2✓ Branch 0 taken 512 times.
✓ Branch 1 taken 256 times.
|
768 | for (std::size_t i = 0; i < n; ++i) { |
| 88 | 512 | auto d = static_cast<std::size_t>((keys[i] >> shift) & (kBuckets - 1)); | |
| 89 | 512 | std::size_t pos = cnt[d]++; | |
| 90 | 512 | tmp_keys[pos] = keys[i]; | |
| 91 | 512 | tmp_vals[pos] = array[i]; | |
| 92 | } | ||
| 93 | |||
| 94 | keys.swap(tmp_keys); | ||
| 95 | array.swap(tmp_vals); | ||
| 96 | } | ||
| 97 | |||
| 98 |
2/2✓ Branch 0 taken 64 times.
✓ Branch 1 taken 32 times.
|
96 | for (std::size_t i = 0; i < n; ++i) { |
| 99 |
2/2✓ Branch 0 taken 54 times.
✓ Branch 1 taken 10 times.
|
128 | array[i] = UnpackDouble(keys[i]); |
| 100 | } | ||
| 101 | } | ||
| 102 | |||
| 103 | ✗ | void ZeninARadixSortDoubleBatcherMergeALL::BlocksComparing(std::vector<double> &arr, size_t i, size_t step) { | |
| 104 |
4/6✓ Branch 0 taken 67 times.
✓ Branch 1 taken 27 times.
✓ Branch 2 taken 56 times.
✓ Branch 3 taken 48 times.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
|
198 | for (size_t k = 0; k < step; ++k) { |
| 105 |
4/6✓ Branch 0 taken 10 times.
✓ Branch 1 taken 57 times.
✓ Branch 2 taken 22 times.
✓ Branch 3 taken 34 times.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
|
123 | if (arr[i + k] > arr[i + k + step]) { |
| 106 | std::swap(arr[i + k], arr[i + k + step]); | ||
| 107 | } | ||
| 108 | } | ||
| 109 | ✗ | } | |
| 110 | |||
| 111 | 27 | void ZeninARadixSortDoubleBatcherMergeALL::BatcherOddEvenMerge(std::vector<double> &arr, size_t n) { | |
| 112 |
1/2✓ Branch 0 taken 27 times.
✗ Branch 1 not taken.
|
27 | if (n <= 1) { |
| 113 | return; | ||
| 114 | } | ||
| 115 | |||
| 116 | 27 | size_t step = n / 2; | |
| 117 | BlocksComparing(arr, 0, step); | ||
| 118 | |||
| 119 | 27 | step /= 2; | |
| 120 |
2/2✓ Branch 0 taken 32 times.
✓ Branch 1 taken 27 times.
|
59 | for (; step > 0; step /= 2) { |
| 121 |
2/2✓ Branch 0 taken 48 times.
✓ Branch 1 taken 32 times.
|
80 | for (size_t i = step; i < n - step; i += step * 2) { |
| 122 | BlocksComparing(arr, i, step); | ||
| 123 | } | ||
| 124 | } | ||
| 125 | } | ||
| 126 | |||
| 127 | 22 | void ZeninARadixSortDoubleBatcherMergeALL::SortChunk(std::vector<double> &chunk, size_t chunk_size) { | |
| 128 | 22 | size_t half = chunk_size / 2; | |
| 129 |
2/2✓ Branch 0 taken 16 times.
✓ Branch 1 taken 6 times.
|
22 | if (half > 0) { |
| 130 |
1/2✓ Branch 2 taken 16 times.
✗ Branch 3 not taken.
|
16 | std::vector<double> left(chunk.begin(), chunk.begin() + static_cast<std::ptrdiff_t>(half)); |
| 131 |
1/4✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
16 | std::vector<double> right(chunk.begin() + static_cast<std::ptrdiff_t>(half), chunk.end()); |
| 132 |
2/6✓ Branch 3 taken 16 times.
✗ Branch 4 not taken.
✓ Branch 5 taken 16 times.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
|
64 | tbb::parallel_invoke([&]() { LSDRadixSort(left); }, [&]() { LSDRadixSort(right); }); |
| 133 | std::ranges::copy(left, chunk.begin()); | ||
| 134 | std::ranges::copy(right, chunk.begin() + static_cast<std::ptrdiff_t>(half)); | ||
| 135 | 16 | BatcherOddEvenMerge(chunk, chunk_size); | |
| 136 | } else { | ||
| 137 | 6 | LSDRadixSort(chunk); | |
| 138 | } | ||
| 139 | 22 | } | |
| 140 | |||
| 141 | 11 | void ZeninARadixSortDoubleBatcherMergeALL::FinalMerge(size_t chunk_size, size_t pow2, size_t original_size) { | |
| 142 |
2/2✓ Branch 0 taken 11 times.
✓ Branch 1 taken 11 times.
|
22 | for (size_t size = chunk_size; size < pow2; size *= 2) { |
| 143 | 11 | size_t merges_count = pow2 / (size * 2); | |
| 144 |
2/2✓ Branch 0 taken 11 times.
✓ Branch 1 taken 11 times.
|
22 | for (size_t i = 0; i < merges_count; ++i) { |
| 145 | 11 | size_t lo = i * 2 * size; | |
| 146 | std::vector<double> block(local_data_.begin() + static_cast<std::ptrdiff_t>(lo), | ||
| 147 | 11 | local_data_.begin() + static_cast<std::ptrdiff_t>(lo + (2 * size))); | |
| 148 | 11 | BatcherOddEvenMerge(block, (2 * size)); | |
| 149 | std::ranges::copy(block, local_data_.begin() + static_cast<std::ptrdiff_t>(lo)); | ||
| 150 | } | ||
| 151 | } | ||
| 152 | 11 | local_data_.resize(original_size); | |
| 153 | 11 | } | |
| 154 | |||
| 155 | 24 | bool ZeninARadixSortDoubleBatcherMergeALL::RunImpl() { | |
| 156 | 24 | int rank = 0; | |
| 157 | 24 | int num_procs = 1; | |
| 158 | 24 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); | |
| 159 | 24 | MPI_Comm_size(MPI_COMM_WORLD, &num_procs); | |
| 160 | |||
| 161 | 24 | size_t original_size = 0; | |
| 162 |
2/2✓ Branch 0 taken 12 times.
✓ Branch 1 taken 12 times.
|
24 | if (rank == 0) { |
| 163 | 12 | original_size = local_data_.size(); | |
| 164 | } | ||
| 165 | 24 | MPI_Bcast(&original_size, 1, MPI_UNSIGNED_LONG, 0, MPI_COMM_WORLD); | |
| 166 | |||
| 167 |
2/2✓ Branch 0 taken 22 times.
✓ Branch 1 taken 2 times.
|
24 | if (original_size == 0) { |
| 168 | return true; | ||
| 169 | } | ||
| 170 | |||
| 171 |
2/4✓ Branch 0 taken 22 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 22 times.
✗ Branch 3 not taken.
|
22 | bool procs_is_pow2 = (num_procs > 1) && ((num_procs & (num_procs - 1)) == 0); |
| 172 | |||
| 173 | if (!procs_is_pow2) { | ||
| 174 | ✗ | if (rank == 0) { | |
| 175 | ✗ | LSDRadixSort(local_data_); | |
| 176 | } | ||
| 177 | ✗ | if (rank != 0) { | |
| 178 | ✗ | local_data_.resize(original_size); | |
| 179 | } | ||
| 180 | ✗ | MPI_Bcast(local_data_.data(), static_cast<int>(original_size), MPI_DOUBLE, 0, MPI_COMM_WORLD); | |
| 181 | ✗ | return true; | |
| 182 | } | ||
| 183 | |||
| 184 | 22 | size_t pow2 = 1; | |
| 185 |
2/2✓ Branch 0 taken 52 times.
✓ Branch 1 taken 22 times.
|
74 | while (pow2 < original_size) { |
| 186 | 52 | pow2 <<= 1; | |
| 187 | } | ||
| 188 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 22 times.
|
24 | while (pow2 % static_cast<size_t>(num_procs) != 0) { |
| 189 | 2 | pow2 <<= 1; | |
| 190 | } | ||
| 191 | 22 | MPI_Bcast(&pow2, 1, MPI_UNSIGNED_LONG, 0, MPI_COMM_WORLD); | |
| 192 | |||
| 193 |
2/2✓ Branch 0 taken 11 times.
✓ Branch 1 taken 11 times.
|
22 | if (rank == 0) { |
| 194 | 11 | local_data_.resize(pow2, std::numeric_limits<double>::max()); | |
| 195 | } | ||
| 196 | |||
| 197 | 22 | size_t chunk_size = pow2 / static_cast<size_t>(num_procs); | |
| 198 | 22 | std::vector<double> chunk(chunk_size); | |
| 199 | |||
| 200 |
1/2✓ Branch 1 taken 22 times.
✗ Branch 2 not taken.
|
22 | MPI_Scatter(local_data_.data(), static_cast<int>(chunk_size), MPI_DOUBLE, chunk.data(), static_cast<int>(chunk_size), |
| 201 | MPI_DOUBLE, 0, MPI_COMM_WORLD); | ||
| 202 | |||
| 203 |
1/2✓ Branch 1 taken 22 times.
✗ Branch 2 not taken.
|
22 | SortChunk(chunk, chunk_size); |
| 204 | |||
| 205 |
1/2✓ Branch 1 taken 22 times.
✗ Branch 2 not taken.
|
22 | MPI_Gather(chunk.data(), static_cast<int>(chunk_size), MPI_DOUBLE, local_data_.data(), static_cast<int>(chunk_size), |
| 206 | MPI_DOUBLE, 0, MPI_COMM_WORLD); | ||
| 207 | |||
| 208 |
2/2✓ Branch 0 taken 11 times.
✓ Branch 1 taken 11 times.
|
22 | if (rank == 0) { |
| 209 |
1/2✓ Branch 1 taken 11 times.
✗ Branch 2 not taken.
|
11 | FinalMerge(chunk_size, pow2, original_size); |
| 210 | } | ||
| 211 | |||
| 212 |
2/2✓ Branch 0 taken 11 times.
✓ Branch 1 taken 11 times.
|
22 | if (rank != 0) { |
| 213 |
1/2✓ Branch 1 taken 11 times.
✗ Branch 2 not taken.
|
11 | local_data_.resize(original_size); |
| 214 | } | ||
| 215 |
1/2✓ Branch 1 taken 22 times.
✗ Branch 2 not taken.
|
22 | MPI_Bcast(local_data_.data(), static_cast<int>(original_size), MPI_DOUBLE, 0, MPI_COMM_WORLD); |
| 216 | |||
| 217 | return true; | ||
| 218 | } | ||
| 219 | |||
| 220 | } // namespace zenin_a_radix_sort_double_batcher_merge | ||
| 221 |