| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #include "leonova_a_radix_merge_sort/mpi/include/ops_mpi.hpp" | ||
| 2 | |||
| 3 | #include <mpi.h> | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <array> | ||
| 7 | #include <cmath> | ||
| 8 | #include <cstddef> | ||
| 9 | #include <cstdint> | ||
| 10 | #include <cstring> | ||
| 11 | #include <utility> | ||
| 12 | #include <vector> | ||
| 13 | |||
| 14 | #include "leonova_a_radix_merge_sort/common/include/common.hpp" | ||
| 15 | |||
| 16 | namespace leonova_a_radix_merge_sort { | ||
| 17 | |||
| 18 |
1/2✓ Branch 1 taken 40 times.
✗ Branch 2 not taken.
|
40 | LeonovaARadixMergeSortMPI::LeonovaARadixMergeSortMPI(const InType &in) { |
| 19 | SetTypeOfTask(GetStaticTypeOfTask()); | ||
| 20 |
1/2✓ Branch 1 taken 40 times.
✗ Branch 2 not taken.
|
40 | GetInput() = in; |
| 21 | 40 | GetOutput() = OutType(); | |
| 22 | 40 | } | |
| 23 | |||
| 24 | 80 | bool LeonovaARadixMergeSortMPI::ValidationImpl() { | |
| 25 | 80 | MPI_Comm_rank(MPI_COMM_WORLD, &rank_); | |
| 26 | 80 | MPI_Comm_size(MPI_COMM_WORLD, &world_size_); | |
| 27 | 80 | int is_valid = 1; | |
| 28 | |||
| 29 |
2/2✓ Branch 0 taken 40 times.
✓ Branch 1 taken 40 times.
|
80 | if (rank_ == 0) { |
| 30 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 40 times.
|
40 | if (GetInput().empty()) { |
| 31 | ✗ | is_valid = 0; | |
| 32 | } | ||
| 33 | } | ||
| 34 | |||
| 35 | 80 | MPI_Bcast(&is_valid, 1, MPI_INT, 0, MPI_COMM_WORLD); | |
| 36 | |||
| 37 | 80 | return is_valid == 1; | |
| 38 | } | ||
| 39 | |||
| 40 | 40 | bool LeonovaARadixMergeSortMPI::PreProcessingImpl() { | |
| 41 | 40 | return true; | |
| 42 | } | ||
| 43 | |||
| 44 | 40 | bool LeonovaARadixMergeSortMPI::RunImpl() { | |
| 45 |
1/2✓ Branch 1 taken 40 times.
✗ Branch 2 not taken.
|
40 | if (!ValidationImpl()) { |
| 46 | return false; | ||
| 47 | } | ||
| 48 | |||
| 49 | const auto &full_input = GetInput(); | ||
| 50 | size_t total_size = full_input.size(); | ||
| 51 | |||
| 52 | 40 | std::vector<int> send_counts; | |
| 53 | 40 | std::vector<int> displacements; | |
| 54 |
1/2✓ Branch 1 taken 40 times.
✗ Branch 2 not taken.
|
40 | send_counts.reserve(world_size_); |
| 55 |
1/2✓ Branch 1 taken 40 times.
✗ Branch 2 not taken.
|
40 | displacements.reserve(world_size_); |
| 56 | |||
| 57 |
1/2✓ Branch 1 taken 40 times.
✗ Branch 2 not taken.
|
40 | send_counts.resize(world_size_, 0); |
| 58 |
1/4✓ Branch 1 taken 40 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
40 | displacements.resize(world_size_, 0); |
| 59 | |||
| 60 |
2/2✓ Branch 0 taken 20 times.
✓ Branch 1 taken 20 times.
|
40 | if (rank_ == 0) { |
| 61 | 20 | auto base_size = static_cast<int>(total_size / world_size_); | |
| 62 | 20 | auto remainder = static_cast<int>(total_size % world_size_); | |
| 63 | |||
| 64 |
2/2✓ Branch 0 taken 40 times.
✓ Branch 1 taken 20 times.
|
60 | for (int index = 0; index < world_size_; ++index) { |
| 65 |
4/4✓ Branch 0 taken 30 times.
✓ Branch 1 taken 10 times.
✓ Branch 2 taken 20 times.
✓ Branch 3 taken 20 times.
|
70 | send_counts[index] = base_size + (index < remainder ? 1 : 0); |
| 66 |
2/2✓ Branch 0 taken 20 times.
✓ Branch 1 taken 20 times.
|
40 | if (index > 0) { |
| 67 | 20 | displacements[index] = displacements[index - 1] + send_counts[index - 1]; | |
| 68 | } | ||
| 69 | } | ||
| 70 | } | ||
| 71 | |||
| 72 |
1/2✓ Branch 1 taken 40 times.
✗ Branch 2 not taken.
|
40 | MPI_Bcast(send_counts.data(), world_size_, MPI_INT, 0, MPI_COMM_WORLD); |
| 73 |
1/2✓ Branch 1 taken 40 times.
✗ Branch 2 not taken.
|
40 | MPI_Bcast(displacements.data(), world_size_, MPI_INT, 0, MPI_COMM_WORLD); |
| 74 | |||
| 75 |
1/2✓ Branch 1 taken 40 times.
✗ Branch 2 not taken.
|
40 | int local_count = send_counts[rank_]; |
| 76 |
1/2✓ Branch 1 taken 40 times.
✗ Branch 2 not taken.
|
40 | local_data_.resize(local_count); |
| 77 | |||
| 78 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 40 times.
|
40 | if (world_size_ == 1) { |
| 79 | ✗ | local_data_ = full_input; | |
| 80 | } else { | ||
| 81 |
1/2✓ Branch 1 taken 40 times.
✗ Branch 2 not taken.
|
40 | MPI_Scatterv(full_input.data(), send_counts.data(), displacements.data(), MPI_DOUBLE, local_data_.data(), |
| 82 | local_count, MPI_DOUBLE, 0, MPI_COMM_WORLD); | ||
| 83 | } | ||
| 84 | |||
| 85 |
2/2✓ Branch 0 taken 39 times.
✓ Branch 1 taken 1 times.
|
40 | if (!local_data_.empty()) { |
| 86 |
1/2✓ Branch 1 taken 39 times.
✗ Branch 2 not taken.
|
39 | RadixMergeSort(local_data_, 0, local_data_.size()); |
| 87 | } | ||
| 88 | |||
| 89 |
1/2✓ Branch 1 taken 40 times.
✗ Branch 2 not taken.
|
40 | HierarchicalMerge(); |
| 90 | |||
| 91 |
1/2✓ Branch 1 taken 40 times.
✗ Branch 2 not taken.
|
40 | BroadcastResult(); |
| 92 | return true; | ||
| 93 | } | ||
| 94 | |||
| 95 | 40 | bool LeonovaARadixMergeSortMPI::PostProcessingImpl() { | |
| 96 | 40 | return true; | |
| 97 | } | ||
| 98 | |||
| 99 | inline uint64_t LeonovaARadixMergeSortMPI::TransformDoubleToKey(double value) { | ||
| 100 | uint64_t int_value = 0; | ||
| 101 | std::memcpy(&int_value, &value, sizeof(double)); | ||
| 102 | |||
| 103 | constexpr uint64_t kSignBitMask = 0x8000000000000000ULL; | ||
| 104 |
17/22✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 10 times.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 304 times.
✓ Branch 8 taken 11 times.
✓ Branch 9 taken 676 times.
✓ Branch 10 taken 5 times.
✓ Branch 11 taken 335 times.
✓ Branch 12 taken 7 times.
✓ Branch 13 taken 318 times.
✓ Branch 14 taken 7 times.
✓ Branch 15 taken 318 times.
✓ Branch 16 taken 2 times.
✓ Branch 17 taken 323 times.
✓ Branch 18 taken 1 times.
✓ Branch 19 taken 324 times.
✓ Branch 20 taken 5 times.
✓ Branch 21 taken 74 times.
|
1755 | return ((int_value & kSignBitMask) != 0U) ? ~int_value : (int_value | kSignBitMask); |
| 105 | } | ||
| 106 | |||
| 107 | 58 | void LeonovaARadixMergeSortMPI::ComputeKeysForVector(const std::vector<double> &vec, std::vector<uint64_t> &keys) { | |
| 108 | size_t size = vec.size(); | ||
| 109 | 58 | keys.resize(size); | |
| 110 | |||
| 111 | size_t index = 0; | ||
| 112 |
2/2✓ Branch 0 taken 325 times.
✓ Branch 1 taken 58 times.
|
383 | for (; index + 3 < size; index += 4) { |
| 113 |
2/2✓ Branch 0 taken 7 times.
✓ Branch 1 taken 318 times.
|
325 | keys[index] = TransformDoubleToKey(vec[index]); |
| 114 |
2/2✓ Branch 0 taken 7 times.
✓ Branch 1 taken 318 times.
|
325 | keys[index + 1] = TransformDoubleToKey(vec[index + 1]); |
| 115 |
4/4✓ Branch 0 taken 2 times.
✓ Branch 1 taken 323 times.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 324 times.
|
650 | keys[index + 2] = TransformDoubleToKey(vec[index + 2]); |
| 116 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 324 times.
|
650 | keys[index + 3] = TransformDoubleToKey(vec[index + 3]); |
| 117 | } | ||
| 118 |
2/2✓ Branch 0 taken 79 times.
✓ Branch 1 taken 58 times.
|
137 | for (; index < size; ++index) { |
| 119 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 74 times.
|
158 | keys[index] = TransformDoubleToKey(vec[index]); |
| 120 | } | ||
| 121 | 58 | } | |
| 122 | |||
| 123 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 19 times.
|
19 | void LeonovaARadixMergeSortMPI::MergeWithKeys(std::vector<double> &result, const std::vector<double> &vec1, |
| 124 | const std::vector<uint64_t> &keys1, const std::vector<double> &vec2, | ||
| 125 | const std::vector<uint64_t> &keys2) { | ||
| 126 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 19 times.
|
19 | size_t total_size = vec1.size() + vec2.size(); |
| 127 | result.clear(); | ||
| 128 | 19 | result.reserve(total_size); | |
| 129 | |||
| 130 | auto index = static_cast<typename std::vector<double>::difference_type>(0); | ||
| 131 | auto jndex = static_cast<typename std::vector<double>::difference_type>(0); | ||
| 132 | |||
| 133 |
4/4✓ Branch 0 taken 364 times.
✓ Branch 1 taken 7 times.
✓ Branch 2 taken 352 times.
✓ Branch 3 taken 12 times.
|
371 | while (static_cast<size_t>(index) < vec1.size() && static_cast<size_t>(jndex) < vec2.size()) { |
| 134 | 352 | const uint64_t key1 = keys1[static_cast<size_t>(index)]; | |
| 135 | 352 | const uint64_t key2 = keys2[static_cast<size_t>(jndex)]; | |
| 136 | |||
| 137 |
2/2✓ Branch 0 taken 24 times.
✓ Branch 1 taken 328 times.
|
352 | if (key1 < key2) { |
| 138 |
1/2✓ Branch 0 taken 24 times.
✗ Branch 1 not taken.
|
24 | result.push_back(vec1[static_cast<size_t>(index++)]); |
| 139 | } else { | ||
| 140 |
1/2✓ Branch 0 taken 328 times.
✗ Branch 1 not taken.
|
328 | result.push_back(vec2[static_cast<size_t>(jndex++)]); |
| 141 | } | ||
| 142 | } | ||
| 143 | |||
| 144 |
2/2✓ Branch 0 taken 12 times.
✓ Branch 1 taken 7 times.
|
19 | if (static_cast<size_t>(index) < vec1.size()) { |
| 145 | 12 | result.insert(result.end(), vec1.begin() + index, vec1.end()); | |
| 146 | } | ||
| 147 |
2/2✓ Branch 0 taken 7 times.
✓ Branch 1 taken 12 times.
|
19 | if (static_cast<size_t>(jndex) < vec2.size()) { |
| 148 | 7 | result.insert(result.end(), vec2.begin() + jndex, vec2.end()); | |
| 149 | } | ||
| 150 | 19 | } | |
| 151 | |||
| 152 | ✗ | bool LeonovaARadixMergeSortMPI::ShouldMergeWithPartner(int step, int &partner_rank, bool &is_sender) const { | |
| 153 | 40 | int mask = step * 2; | |
| 154 | |||
| 155 |
2/4✓ Branch 0 taken 20 times.
✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
40 | if ((rank_ & (mask - 1)) == 0) { // rank_ % mask == 0 |
| 156 | 20 | partner_rank = rank_ + step; | |
| 157 |
1/4✓ Branch 0 taken 20 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
20 | if (partner_rank >= world_size_) { |
| 158 | ✗ | partner_rank = -1; | |
| 159 | } | ||
| 160 | ✗ | return partner_rank != -1; | |
| 161 | } | ||
| 162 | |||
| 163 |
1/4✓ Branch 0 taken 20 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
20 | if ((rank_ & (mask - 1)) == step) { // rank_ % mask == step |
| 164 | 20 | partner_rank = rank_ - step; | |
| 165 | ✗ | is_sender = true; | |
| 166 | ✗ | return true; | |
| 167 | } | ||
| 168 | |||
| 169 | return false; | ||
| 170 | } | ||
| 171 | |||
| 172 | 20 | void LeonovaARadixMergeSortMPI::SendLocalData(int partner_rank) { | |
| 173 | 20 | int data_size = static_cast<int>(local_data_.size()); | |
| 174 | 20 | MPI_Send(&data_size, 1, MPI_INT, partner_rank, 0, MPI_COMM_WORLD); | |
| 175 | |||
| 176 |
2/2✓ Branch 0 taken 19 times.
✓ Branch 1 taken 1 times.
|
20 | if (data_size > 0) { |
| 177 | 19 | MPI_Send(local_data_.data(), data_size, MPI_DOUBLE, partner_rank, 0, MPI_COMM_WORLD); | |
| 178 | } | ||
| 179 | |||
| 180 | local_data_.clear(); | ||
| 181 | local_keys_.clear(); | ||
| 182 |
2/2✓ Branch 0 taken 19 times.
✓ Branch 1 taken 1 times.
|
20 | local_data_.shrink_to_fit(); |
| 183 |
2/2✓ Branch 0 taken 19 times.
✓ Branch 1 taken 1 times.
|
20 | local_keys_.shrink_to_fit(); |
| 184 | 20 | } | |
| 185 | |||
| 186 | 20 | void LeonovaARadixMergeSortMPI::ReceiveAndMergeData(int partner_rank, std::vector<double> &partner_data_buffer, | |
| 187 | std::vector<uint64_t> &partner_keys_buffer) { | ||
| 188 | 20 | int partner_size = 0; | |
| 189 | 20 | MPI_Recv(&partner_size, 1, MPI_INT, partner_rank, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); | |
| 190 | |||
| 191 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 19 times.
|
20 | if (partner_size <= 0) { |
| 192 | 1 | return; | |
| 193 | } | ||
| 194 | |||
| 195 | 19 | partner_data_buffer.resize(partner_size); | |
| 196 | 19 | MPI_Recv(partner_data_buffer.data(), partner_size, MPI_DOUBLE, partner_rank, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); | |
| 197 | |||
| 198 | 19 | partner_keys_buffer.resize(partner_size); | |
| 199 |
2/2✓ Branch 0 taken 340 times.
✓ Branch 1 taken 19 times.
|
359 | for (int index = 0; index < partner_size; ++index) { |
| 200 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 335 times.
|
680 | partner_keys_buffer[index] = TransformDoubleToKey(partner_data_buffer[index]); |
| 201 | } | ||
| 202 | |||
| 203 |
1/2✓ Branch 0 taken 19 times.
✗ Branch 1 not taken.
|
19 | if (!local_data_.empty()) { |
| 204 |
1/2✓ Branch 1 taken 19 times.
✗ Branch 2 not taken.
|
19 | std::vector<double> merged; |
| 205 |
1/2✓ Branch 1 taken 19 times.
✗ Branch 2 not taken.
|
19 | merged.reserve(local_data_.size() + partner_size); |
| 206 | |||
| 207 |
1/2✓ Branch 1 taken 19 times.
✗ Branch 2 not taken.
|
19 | MergeWithKeys(merged, local_data_, local_keys_, partner_data_buffer, partner_keys_buffer); |
| 208 | |||
| 209 | local_data_.swap(merged); | ||
| 210 |
1/2✓ Branch 1 taken 19 times.
✗ Branch 2 not taken.
|
19 | ComputeKeysForVector(local_data_, local_keys_); |
| 211 | } else { | ||
| 212 | local_data_.swap(partner_data_buffer); | ||
| 213 | local_keys_.swap(partner_keys_buffer); | ||
| 214 | } | ||
| 215 | } | ||
| 216 | |||
| 217 |
2/2✓ Branch 0 taken 39 times.
✓ Branch 1 taken 1 times.
|
40 | void LeonovaARadixMergeSortMPI::HierarchicalMerge() { |
| 218 | int step = 1; | ||
| 219 | |||
| 220 |
2/2✓ Branch 0 taken 39 times.
✓ Branch 1 taken 1 times.
|
40 | if (!local_data_.empty()) { |
| 221 | 39 | ComputeKeysForVector(local_data_, local_keys_); | |
| 222 | } | ||
| 223 | |||
| 224 | 40 | std::vector<double> partner_data_buffer; | |
| 225 |
1/2✓ Branch 1 taken 40 times.
✗ Branch 2 not taken.
|
40 | std::vector<uint64_t> partner_keys_buffer; |
| 226 |
1/2✓ Branch 1 taken 40 times.
✗ Branch 2 not taken.
|
40 | partner_data_buffer.reserve(local_data_.capacity()); |
| 227 |
1/2✓ Branch 1 taken 40 times.
✗ Branch 2 not taken.
|
40 | partner_keys_buffer.reserve(local_keys_.capacity()); |
| 228 | |||
| 229 |
2/2✓ Branch 0 taken 40 times.
✓ Branch 1 taken 40 times.
|
80 | while (step < world_size_) { |
| 230 | int partner_rank = -1; | ||
| 231 | bool is_sender = false; | ||
| 232 | |||
| 233 |
1/2✓ Branch 0 taken 20 times.
✗ Branch 1 not taken.
|
20 | if (ShouldMergeWithPartner(step, partner_rank, is_sender)) { |
| 234 |
2/2✓ Branch 0 taken 20 times.
✓ Branch 1 taken 20 times.
|
40 | if (is_sender) { |
| 235 |
1/2✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.
|
20 | SendLocalData(partner_rank); |
| 236 | } else { | ||
| 237 |
1/2✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.
|
20 | ReceiveAndMergeData(partner_rank, partner_data_buffer, partner_keys_buffer); |
| 238 | } | ||
| 239 | } | ||
| 240 | |||
| 241 |
1/2✓ Branch 1 taken 40 times.
✗ Branch 2 not taken.
|
40 | MPI_Barrier(MPI_COMM_WORLD); |
| 242 | step *= 2; | ||
| 243 | } | ||
| 244 | 40 | } | |
| 245 | |||
| 246 | 40 | void LeonovaARadixMergeSortMPI::BroadcastResult() { | |
| 247 | 40 | int result_size = 0; | |
| 248 |
2/2✓ Branch 0 taken 20 times.
✓ Branch 1 taken 20 times.
|
40 | if (rank_ == 0) { |
| 249 | 20 | result_size = static_cast<int>(local_data_.size()); | |
| 250 | } | ||
| 251 | |||
| 252 | 40 | MPI_Bcast(&result_size, 1, MPI_INT, 0, MPI_COMM_WORLD); | |
| 253 | |||
| 254 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 40 times.
|
40 | if (result_size <= 0) { |
| 255 | GetOutput().clear(); | ||
| 256 | ✗ | return; | |
| 257 | } | ||
| 258 | |||
| 259 |
2/2✓ Branch 0 taken 20 times.
✓ Branch 1 taken 20 times.
|
40 | if (rank_ != 0) { |
| 260 | 20 | GetOutput().resize(result_size); | |
| 261 | } else { | ||
| 262 | 20 | GetOutput() = std::move(local_data_); | |
| 263 | } | ||
| 264 | |||
| 265 |
1/2✓ Branch 0 taken 40 times.
✗ Branch 1 not taken.
|
40 | if (result_size > 0) { |
| 266 | 40 | MPI_Bcast(GetOutput().data(), result_size, MPI_DOUBLE, 0, MPI_COMM_WORLD); | |
| 267 | } | ||
| 268 | } | ||
| 269 | |||
| 270 | ✗ | void LeonovaARadixMergeSortMPI::CountFrequencies(const std::vector<uint64_t> &keys, int shift, | |
| 271 | std::array<size_t, 256> &count) { | ||
| 272 |
2/4✓ Branch 0 taken 5496 times.
✓ Branch 1 taken 368 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
5864 | for (uint64_t key : keys) { |
| 273 | ✗ | auto byte_val = static_cast<uint8_t>((key >> shift) & 0xFF); | |
| 274 | auto byte_index = static_cast<size_t>(byte_val); | ||
| 275 | 5496 | auto *count_ptr = count.data() + byte_index; | |
| 276 | 5496 | ++(*count_ptr); | |
| 277 | } | ||
| 278 | ✗ | } | |
| 279 | |||
| 280 | ✗ | void LeonovaARadixMergeSortMPI::ComputePrefixSums(std::array<size_t, 256> &count) { | |
| 281 | size_t total = 0; | ||
| 282 |
2/4✓ Branch 0 taken 94208 times.
✓ Branch 1 taken 368 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
94576 | for (size_t &elem : count) { |
| 283 | 94208 | size_t old_count = elem; | |
| 284 | 94208 | elem = total; | |
| 285 | 94208 | total += old_count; | |
| 286 | } | ||
| 287 | ✗ | } | |
| 288 | |||
| 289 | 368 | void LeonovaARadixMergeSortMPI::DistributeElements(const std::vector<double> &arr, const std::vector<uint64_t> &keys, | |
| 290 | size_t left, int shift, const std::array<size_t, 256> &count, | ||
| 291 | std::vector<double> &temp_arr, std::vector<uint64_t> &temp_keys) { | ||
| 292 | 368 | std::array<size_t, 256> local_count = count; | |
| 293 | |||
| 294 |
2/2✓ Branch 0 taken 5496 times.
✓ Branch 1 taken 368 times.
|
5864 | for (size_t index = 0; index < keys.size(); ++index) { |
| 295 | 5496 | auto byte_val = static_cast<uint8_t>((keys[index] >> shift) & 0xFF); | |
| 296 | auto byte_index = static_cast<size_t>(byte_val); | ||
| 297 | 5496 | auto *local_count_ptr = local_count.data() + byte_index; | |
| 298 | 5496 | size_t pos = (*local_count_ptr)++; | |
| 299 | 5496 | temp_arr[pos] = arr[left + index]; | |
| 300 | 5496 | temp_keys[pos] = keys[index]; | |
| 301 | } | ||
| 302 | 368 | } | |
| 303 | |||
| 304 | 46 | void LeonovaARadixMergeSortMPI::RadixSort(std::vector<double> &arr, size_t left, size_t right) { | |
| 305 | 46 | size_t size = right - left; | |
| 306 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 46 times.
|
46 | if (size <= 1) { |
| 307 | ✗ | return; | |
| 308 | } | ||
| 309 | |||
| 310 | 46 | std::vector<uint64_t> keys(size); | |
| 311 |
2/2✓ Branch 0 taken 687 times.
✓ Branch 1 taken 46 times.
|
733 | for (size_t index = 0; index < size; ++index) { |
| 312 |
2/2✓ Branch 0 taken 11 times.
✓ Branch 1 taken 676 times.
|
1374 | keys[index] = TransformDoubleToKey(arr[left + index]); |
| 313 | } | ||
| 314 | |||
| 315 |
1/4✓ Branch 1 taken 46 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
46 | std::vector<double> temp_arr(size); |
| 316 |
1/4✓ Branch 1 taken 46 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
46 | std::vector<uint64_t> temp_keys(size); |
| 317 | |||
| 318 |
2/2✓ Branch 0 taken 368 times.
✓ Branch 1 taken 46 times.
|
414 | for (int byte_pos = 0; byte_pos < kNumBytes; ++byte_pos) { |
| 319 | 368 | const int shift = byte_pos * kByteSize; | |
| 320 | 368 | std::array<size_t, 256> count = {}; | |
| 321 | |||
| 322 | CountFrequencies(keys, shift, count); | ||
| 323 | ComputePrefixSums(count); | ||
| 324 | 368 | DistributeElements(arr, keys, left, shift, count, temp_arr, temp_keys); | |
| 325 | |||
| 326 | auto left_it = arr.begin() + static_cast<typename std::vector<double>::difference_type>(left); | ||
| 327 | std::ranges::copy(temp_arr, left_it); | ||
| 328 | keys.swap(temp_keys); | ||
| 329 | } | ||
| 330 | } | ||
| 331 | |||
| 332 | 20 | void LeonovaARadixMergeSortMPI::CopyRemainingElements(const std::vector<double> &arr, size_t src_offset, | |
| 333 | size_t src_size, std::vector<double> &merged, | ||
| 334 | size_t dest_offset) { | ||
| 335 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
20 | if (src_size > 0) { |
| 336 | auto src_begin = arr.begin() + static_cast<typename std::vector<double>::difference_type>(src_offset); | ||
| 337 | 10 | auto src_end = arr.begin() + static_cast<typename std::vector<double>::difference_type>(src_offset + src_size); | |
| 338 | auto dest_begin = merged.begin() + static_cast<typename std::vector<double>::difference_type>(dest_offset); | ||
| 339 | 10 | std::copy(src_begin, src_end, dest_begin); | |
| 340 | } | ||
| 341 | 20 | } | |
| 342 | |||
| 343 | 10 | void LeonovaARadixMergeSortMPI::MergeTwoParts(const std::vector<double> &arr, size_t left, size_t mid, size_t right, | |
| 344 | std::vector<double> &merged) { | ||
| 345 | 10 | const size_t left_size = mid - left; | |
| 346 | 10 | const size_t right_size = right - mid; | |
| 347 | |||
| 348 | size_t index = 0; | ||
| 349 | size_t jndex = 0; | ||
| 350 | size_t kndex = 0; | ||
| 351 | |||
| 352 | double left_val = NAN; | ||
| 353 | double right_val = NAN; | ||
| 354 | uint64_t left_key = 0; | ||
| 355 | uint64_t right_key = 0; | ||
| 356 | |||
| 357 |
1/2✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
|
10 | if (left_size > 0) { |
| 358 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
|
10 | left_val = arr[left]; |
| 359 | left_key = TransformDoubleToKey(left_val); | ||
| 360 | } | ||
| 361 |
1/2✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
|
10 | if (right_size > 0) { |
| 362 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
|
10 | right_val = arr[mid]; |
| 363 | right_key = TransformDoubleToKey(right_val); | ||
| 364 | } | ||
| 365 | |||
| 366 |
2/2✓ Branch 0 taken 314 times.
✓ Branch 1 taken 10 times.
|
324 | while (index < left_size && jndex < right_size) { |
| 367 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 314 times.
|
314 | if (left_key < right_key) { |
| 368 | ✗ | merged[kndex++] = left_val; | |
| 369 | ✗ | ++index; | |
| 370 | ✗ | if (index < left_size) { | |
| 371 | ✗ | left_val = arr[left + index]; | |
| 372 | left_key = TransformDoubleToKey(left_val); | ||
| 373 | } | ||
| 374 | } else { | ||
| 375 |
2/2✓ Branch 0 taken 304 times.
✓ Branch 1 taken 10 times.
|
314 | merged[kndex++] = right_val; |
| 376 | 314 | ++jndex; | |
| 377 |
2/2✓ Branch 0 taken 304 times.
✓ Branch 1 taken 10 times.
|
314 | if (jndex < right_size) { |
| 378 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 304 times.
|
304 | right_val = arr[mid + jndex]; |
| 379 | right_key = TransformDoubleToKey(right_val); | ||
| 380 | } | ||
| 381 | } | ||
| 382 | } | ||
| 383 | |||
| 384 | 10 | CopyRemainingElements(arr, left + index, left_size - index, merged, kndex); | |
| 385 | 10 | kndex += left_size - index; | |
| 386 | 10 | CopyRemainingElements(arr, mid + jndex, right_size - jndex, merged, kndex); | |
| 387 | 10 | } | |
| 388 | |||
| 389 | 39 | void LeonovaARadixMergeSortMPI::RadixMergeSort(std::vector<double> &arr, size_t left, size_t right) { | |
| 390 | struct SortTask { | ||
| 391 | size_t left; | ||
| 392 | size_t right; | ||
| 393 | bool sorted; | ||
| 394 | }; | ||
| 395 | |||
| 396 | 39 | std::vector<SortTask> stack; | |
| 397 |
1/2✓ Branch 1 taken 39 times.
✗ Branch 2 not taken.
|
39 | stack.reserve(128); |
| 398 | |||
| 399 | // Начинаем с исходной задачи | ||
| 400 |
1/2✓ Branch 1 taken 39 times.
✗ Branch 2 not taken.
|
39 | stack.push_back({left, right, false}); |
| 401 | |||
| 402 |
2/2✓ Branch 0 taken 69 times.
✓ Branch 1 taken 39 times.
|
108 | while (!stack.empty()) { |
| 403 |
2/2✓ Branch 0 taken 3 times.
✓ Branch 1 taken 66 times.
|
69 | SortTask current = stack.back(); |
| 404 | stack.pop_back(); | ||
| 405 | |||
| 406 | 69 | size_t size = current.right - current.left; | |
| 407 | |||
| 408 |
2/2✓ Branch 0 taken 3 times.
✓ Branch 1 taken 66 times.
|
69 | if (size <= 1) { |
| 409 | 3 | continue; | |
| 410 | } | ||
| 411 | |||
| 412 |
2/2✓ Branch 0 taken 46 times.
✓ Branch 1 taken 20 times.
|
66 | if (size <= kRadixThreshold) { |
| 413 |
1/2✓ Branch 1 taken 46 times.
✗ Branch 2 not taken.
|
46 | RadixSort(arr, current.left, current.right); |
| 414 | 46 | continue; | |
| 415 | } | ||
| 416 | |||
| 417 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
20 | if (!current.sorted) { |
| 418 | 10 | size_t mid = current.left + (size >> 1); | |
| 419 | |||
| 420 |
1/2✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
|
10 | stack.push_back({current.left, current.right, true}); |
| 421 |
1/2✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
|
10 | stack.push_back({mid, current.right, false}); |
| 422 |
1/2✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
|
10 | stack.push_back({current.left, mid, false}); |
| 423 | } else { | ||
| 424 | // Обе части отсортированы, нужно их слить | ||
| 425 | 10 | size_t mid = current.left + (size >> 1); | |
| 426 |
1/4✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
10 | std::vector<double> merged(size); |
| 427 | 10 | MergeTwoParts(arr, current.left, mid, current.right, merged); | |
| 428 | auto left_it = arr.begin() + static_cast<typename std::vector<double>::difference_type>(current.left); | ||
| 429 | std::ranges::copy(merged, left_it); | ||
| 430 | } | ||
| 431 | } | ||
| 432 | 39 | } | |
| 433 | |||
| 434 | } // namespace leonova_a_radix_merge_sort | ||
| 435 |