| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #include "perepelkin_i_qsort_batcher_oddeven_merge/mpi/include/ops_mpi.hpp" | ||
| 2 | |||
| 3 | #include <mpi.h> | ||
| 4 | |||
| 5 | #include <cstddef> | ||
| 6 | #include <cstdlib> | ||
| 7 | #include <limits> | ||
| 8 | #include <stack> | ||
| 9 | #include <tuple> | ||
| 10 | #include <utility> | ||
| 11 | #include <vector> | ||
| 12 | |||
| 13 | #include "perepelkin_i_qsort_batcher_oddeven_merge/common/include/common.hpp" | ||
| 14 | |||
| 15 | namespace perepelkin_i_qsort_batcher_oddeven_merge { | ||
| 16 | |||
| 17 |
1/2✓ Branch 1 taken 24 times.
✗ Branch 2 not taken.
|
24 | PerepelkinIQsortBatcherOddEvenMergeMPI::PerepelkinIQsortBatcherOddEvenMergeMPI(const InType &in) { |
| 18 |
1/2✓ Branch 1 taken 24 times.
✗ Branch 2 not taken.
|
24 | MPI_Comm_rank(MPI_COMM_WORLD, &proc_rank_); |
| 19 |
1/2✓ Branch 1 taken 24 times.
✗ Branch 2 not taken.
|
24 | MPI_Comm_size(MPI_COMM_WORLD, &proc_num_); |
| 20 | |||
| 21 | SetTypeOfTask(GetStaticTypeOfTask()); | ||
| 22 |
2/2✓ Branch 0 taken 12 times.
✓ Branch 1 taken 12 times.
|
24 | if (proc_rank_ == 0) { |
| 23 |
1/2✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.
|
12 | GetInput() = in; |
| 24 | } | ||
| 25 | 24 | GetOutput() = std::vector<double>(); | |
| 26 | 24 | } | |
| 27 | |||
| 28 | 24 | bool PerepelkinIQsortBatcherOddEvenMergeMPI::ValidationImpl() { | |
| 29 | 24 | return GetOutput().empty(); | |
| 30 | } | ||
| 31 | |||
| 32 | 24 | bool PerepelkinIQsortBatcherOddEvenMergeMPI::PreProcessingImpl() { | |
| 33 | 24 | return true; | |
| 34 | } | ||
| 35 | |||
| 36 | 24 | bool PerepelkinIQsortBatcherOddEvenMergeMPI::RunImpl() { | |
| 37 | // [1] Broadcast original and padded sizes | ||
| 38 | 24 | size_t original_size = 0; | |
| 39 | 24 | size_t padded_size = 0; | |
| 40 | |||
| 41 | 24 | BcastSizes(original_size, padded_size); | |
| 42 | |||
| 43 |
2/2✓ Branch 0 taken 22 times.
✓ Branch 1 taken 2 times.
|
24 | if (original_size == 0) { |
| 44 | return true; | ||
| 45 | } | ||
| 46 | |||
| 47 | // [2] Distribute data | ||
| 48 | 22 | std::vector<double> padded_input; | |
| 49 |
2/2✓ Branch 0 taken 11 times.
✓ Branch 1 taken 11 times.
|
22 | if (proc_rank_ == 0) { |
| 50 |
1/2✓ Branch 1 taken 11 times.
✗ Branch 2 not taken.
|
11 | padded_input = GetInput(); |
| 51 |
2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 5 times.
|
11 | if (padded_size > original_size) { |
| 52 |
1/4✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
6 | padded_input.resize(padded_size, std::numeric_limits<double>::infinity()); |
| 53 | } | ||
| 54 | } | ||
| 55 | |||
| 56 | 22 | std::vector<int> counts; | |
| 57 | 22 | std::vector<int> displs; | |
| 58 | 22 | std::vector<double> local_data; | |
| 59 |
1/2✓ Branch 1 taken 22 times.
✗ Branch 2 not taken.
|
22 | DistributeData(padded_size, padded_input, counts, displs, local_data); |
| 60 | |||
| 61 | // [3] Local sort | ||
| 62 |
1/2✓ Branch 1 taken 22 times.
✗ Branch 2 not taken.
|
22 | std::qsort(local_data.data(), local_data.size(), sizeof(double), [](const void *a, const void *b) { |
| 63 | double arg1 = *static_cast<const double *>(a); | ||
| 64 | double arg2 = *static_cast<const double *>(b); | ||
| 65 | if (arg1 < arg2) { | ||
| 66 | return -1; | ||
| 67 | } | ||
| 68 | if (arg1 > arg2) { | ||
| 69 | return 1; | ||
| 70 | } | ||
| 71 | return 0; | ||
| 72 | }); | ||
| 73 | |||
| 74 | // [4] Global merge via comparator network | ||
| 75 | 22 | std::vector<std::pair<int, int>> comparators; | |
| 76 |
1/2✓ Branch 1 taken 22 times.
✗ Branch 2 not taken.
|
22 | BuildComparators(comparators); |
| 77 |
1/2✓ Branch 1 taken 22 times.
✗ Branch 2 not taken.
|
22 | ProcessComparators(counts, local_data, comparators); |
| 78 | |||
| 79 | // [5] Gather result on root | ||
| 80 | 22 | std::vector<double> gathered; | |
| 81 |
2/2✓ Branch 0 taken 11 times.
✓ Branch 1 taken 11 times.
|
22 | if (proc_rank_ == 0) { |
| 82 |
1/2✓ Branch 1 taken 11 times.
✗ Branch 2 not taken.
|
11 | gathered.resize(padded_size); |
| 83 | } | ||
| 84 | |||
| 85 |
1/2✓ Branch 1 taken 22 times.
✗ Branch 2 not taken.
|
22 | MPI_Gatherv(local_data.data(), static_cast<int>(local_data.size()), MPI_DOUBLE, gathered.data(), counts.data(), |
| 86 | displs.data(), MPI_DOUBLE, 0, MPI_COMM_WORLD); | ||
| 87 | |||
| 88 |
2/2✓ Branch 0 taken 11 times.
✓ Branch 1 taken 11 times.
|
22 | if (proc_rank_ == 0) { |
| 89 |
1/2✓ Branch 1 taken 11 times.
✗ Branch 2 not taken.
|
11 | gathered.resize(original_size); |
| 90 | GetOutput() = std::move(gathered); | ||
| 91 | } | ||
| 92 | |||
| 93 | // [6] Bcast output to all processes | ||
| 94 |
1/2✓ Branch 1 taken 22 times.
✗ Branch 2 not taken.
|
22 | GetOutput().resize(original_size); |
| 95 |
1/2✓ Branch 1 taken 22 times.
✗ Branch 2 not taken.
|
22 | MPI_Bcast(GetOutput().data(), static_cast<int>(original_size), MPI_DOUBLE, 0, MPI_COMM_WORLD); |
| 96 | |||
| 97 | return true; | ||
| 98 | } | ||
| 99 | |||
| 100 | 24 | void PerepelkinIQsortBatcherOddEvenMergeMPI::BcastSizes(size_t &original_size, size_t &padded_size) { | |
| 101 |
2/2✓ Branch 0 taken 12 times.
✓ Branch 1 taken 12 times.
|
24 | if (proc_rank_ == 0) { |
| 102 | 12 | original_size = GetInput().size(); | |
| 103 | 12 | const size_t remainder = original_size % proc_num_; | |
| 104 |
2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 6 times.
|
12 | padded_size = original_size + (remainder == 0 ? 0 : (proc_num_ - remainder)); |
| 105 | } | ||
| 106 | |||
| 107 | 24 | MPI_Bcast(&original_size, 1, MPI_UNSIGNED_LONG_LONG, 0, MPI_COMM_WORLD); | |
| 108 | 24 | MPI_Bcast(&padded_size, 1, MPI_UNSIGNED_LONG_LONG, 0, MPI_COMM_WORLD); | |
| 109 | 24 | } | |
| 110 | |||
| 111 | 22 | void PerepelkinIQsortBatcherOddEvenMergeMPI::DistributeData(const size_t &padded_size, | |
| 112 | const std::vector<double> &padded_input, | ||
| 113 | std::vector<int> &counts, std::vector<int> &displs, | ||
| 114 | std::vector<double> &local_data) const { | ||
| 115 | 22 | const int base_size = static_cast<int>(padded_size / proc_num_); | |
| 116 | |||
| 117 | 22 | counts.resize(proc_num_); | |
| 118 | 22 | displs.resize(proc_num_); | |
| 119 | |||
| 120 |
2/2✓ Branch 0 taken 44 times.
✓ Branch 1 taken 22 times.
|
66 | for (int i = 0, offset = 0; i < proc_num_; i++) { |
| 121 | 44 | counts[i] = base_size; | |
| 122 | 44 | displs[i] = offset; | |
| 123 | 44 | offset += base_size; | |
| 124 | } | ||
| 125 | |||
| 126 | 22 | const int local_size = counts[proc_rank_]; | |
| 127 | 22 | local_data.resize(local_size); | |
| 128 | |||
| 129 | 22 | MPI_Scatterv(padded_input.data(), counts.data(), displs.data(), MPI_DOUBLE, local_data.data(), local_size, MPI_DOUBLE, | |
| 130 | 0, MPI_COMM_WORLD); | ||
| 131 | 22 | } | |
| 132 | |||
| 133 | 22 | void PerepelkinIQsortBatcherOddEvenMergeMPI::BuildComparators(std::vector<std::pair<int, int>> &comparators) const { | |
| 134 | 22 | std::vector<int> procs(proc_num_); | |
| 135 |
2/2✓ Branch 0 taken 44 times.
✓ Branch 1 taken 22 times.
|
66 | for (int i = 0; i < proc_num_; i++) { |
| 136 | 44 | procs[i] = i; | |
| 137 | } | ||
| 138 | |||
| 139 |
1/2✓ Branch 1 taken 22 times.
✗ Branch 2 not taken.
|
22 | BuildStageB(procs, comparators); |
| 140 | 22 | } | |
| 141 | |||
| 142 | 22 | void PerepelkinIQsortBatcherOddEvenMergeMPI::BuildStageB(const std::vector<int> &procs, | |
| 143 | std::vector<std::pair<int, int>> &comparators) { | ||
| 144 | // Task: (subarray, is_merge_phase) | ||
| 145 | std::stack<std::pair<std::vector<int>, bool>> tasks; | ||
| 146 |
1/2✓ Branch 1 taken 22 times.
✗ Branch 2 not taken.
|
22 | tasks.emplace(procs, false); |
| 147 | |||
| 148 |
2/2✓ Branch 0 taken 88 times.
✓ Branch 1 taken 22 times.
|
110 | while (!tasks.empty()) { |
| 149 | auto [current, is_merge] = tasks.top(); | ||
| 150 | tasks.pop(); | ||
| 151 | |||
| 152 |
2/2✓ Branch 0 taken 44 times.
✓ Branch 1 taken 44 times.
|
88 | if (current.size() <= 1) { |
| 153 | 44 | continue; | |
| 154 | } | ||
| 155 | |||
| 156 | // Split phase: divide and schedule children | ||
| 157 |
1/2✓ Branch 1 taken 44 times.
✗ Branch 2 not taken.
|
44 | auto mid = static_cast<DiffT>(current.size() / 2); |
| 158 |
1/4✓ Branch 1 taken 44 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
44 | std::vector<int> left(current.begin(), current.begin() + mid); |
| 159 |
1/4✓ Branch 1 taken 44 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
44 | std::vector<int> right(current.begin() + mid, current.end()); |
| 160 | |||
| 161 |
3/4✓ Branch 0 taken 22 times.
✓ Branch 1 taken 22 times.
✓ Branch 2 taken 22 times.
✗ Branch 3 not taken.
|
66 | if (is_merge) { |
| 162 |
1/2✓ Branch 1 taken 22 times.
✗ Branch 2 not taken.
|
22 | BuildStageS(left, right, comparators); |
| 163 | continue; | ||
| 164 | } | ||
| 165 | |||
| 166 | // Schedule merge after children complete | ||
| 167 |
1/2✓ Branch 1 taken 22 times.
✗ Branch 2 not taken.
|
22 | tasks.emplace(current, true); |
| 168 |
1/2✓ Branch 1 taken 22 times.
✗ Branch 2 not taken.
|
22 | tasks.emplace(right, false); // Right child first (LIFO order) |
| 169 |
2/6✓ Branch 1 taken 22 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 22 times.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
|
22 | tasks.emplace(left, false); |
| 170 | } | ||
| 171 | 22 | } | |
| 172 | |||
| 173 | 22 | void PerepelkinIQsortBatcherOddEvenMergeMPI::BuildStageS(const std::vector<int> &up, const std::vector<int> &down, | |
| 174 | std::vector<std::pair<int, int>> &comparators) { | ||
| 175 | // Task: (part_up, part_down, is_merge_phase) | ||
| 176 | std::stack<std::tuple<std::vector<int>, std::vector<int>, bool>> tasks; | ||
| 177 |
1/2✓ Branch 1 taken 22 times.
✗ Branch 2 not taken.
|
22 | tasks.emplace(up, down, false); |
| 178 | |||
| 179 |
2/2✓ Branch 0 taken 22 times.
✓ Branch 1 taken 22 times.
|
44 | while (!tasks.empty()) { |
| 180 | auto [part_up, part_down, is_merge] = tasks.top(); | ||
| 181 | tasks.pop(); | ||
| 182 | 22 | const size_t total_size = part_up.size() + part_down.size(); | |
| 183 | |||
| 184 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 22 times.
|
22 | if (total_size <= 1) { |
| 185 | ✗ | continue; | |
| 186 | } | ||
| 187 |
1/2✓ Branch 0 taken 22 times.
✗ Branch 1 not taken.
|
22 | if (total_size == 2) { |
| 188 |
1/2✓ Branch 1 taken 22 times.
✗ Branch 2 not taken.
|
22 | comparators.emplace_back(part_up[0], part_down[0]); |
| 189 | 22 | continue; | |
| 190 | } | ||
| 191 | |||
| 192 | ✗ | if (!is_merge) { | |
| 193 | // Split phase: separate odd/even indices | ||
| 194 | ✗ | auto [a_odd, a_even] = Split(part_up); | |
| 195 | ✗ | auto [b_odd, b_even] = Split(part_down); | |
| 196 | |||
| 197 | // Schedule merge after recursive processing | ||
| 198 | ✗ | tasks.emplace(part_up, part_down, true); | |
| 199 | ✗ | tasks.emplace(a_even, b_even, false); | |
| 200 | ✗ | tasks.emplace(a_odd, b_odd, false); | |
| 201 | continue; | ||
| 202 | ✗ | } | |
| 203 | |||
| 204 | // Merge phase: connect adjacent elements | ||
| 205 | ✗ | std::vector<int> merged; | |
| 206 | ✗ | merged.reserve(total_size); | |
| 207 | ✗ | merged.insert(merged.end(), part_up.begin(), part_up.end()); | |
| 208 | ✗ | merged.insert(merged.end(), part_down.begin(), part_down.end()); | |
| 209 | |||
| 210 | ✗ | for (size_t i = 1; i < merged.size() - 1; i += 2) { | |
| 211 | ✗ | comparators.emplace_back(merged[i], merged[i + 1]); | |
| 212 | } | ||
| 213 | } | ||
| 214 | 22 | } | |
| 215 | |||
| 216 | ✗ | std::pair<std::vector<int>, std::vector<int>> PerepelkinIQsortBatcherOddEvenMergeMPI::Split( | |
| 217 | const std::vector<int> &data) { | ||
| 218 | ✗ | std::vector<int> odd; | |
| 219 | ✗ | std::vector<int> even; | |
| 220 | ✗ | for (size_t i = 0; i < data.size(); i++) { | |
| 221 | ✗ | if (i % 2 == 0) { | |
| 222 | even.push_back(data[i]); | ||
| 223 | } else { | ||
| 224 | odd.push_back(data[i]); | ||
| 225 | } | ||
| 226 | } | ||
| 227 | ✗ | return std::make_pair(std::move(odd), std::move(even)); | |
| 228 | } | ||
| 229 | |||
| 230 | 22 | void PerepelkinIQsortBatcherOddEvenMergeMPI::ProcessComparators( | |
| 231 | const std::vector<int> &counts, std::vector<double> &local_data, | ||
| 232 | const std::vector<std::pair<int, int>> &comparators) const { | ||
| 233 | 22 | std::vector<double> peer_buffer; | |
| 234 | 22 | std::vector<double> temp; | |
| 235 | |||
| 236 |
2/2✓ Branch 0 taken 22 times.
✓ Branch 1 taken 22 times.
|
44 | for (const auto &comp : comparators) { |
| 237 | 22 | const int first = comp.first; | |
| 238 | 22 | const int second = comp.second; | |
| 239 | |||
| 240 |
3/4✓ Branch 0 taken 11 times.
✓ Branch 1 taken 11 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 11 times.
|
22 | if (proc_rank_ != first && proc_rank_ != second) { |
| 241 | ✗ | continue; | |
| 242 | } | ||
| 243 | |||
| 244 |
2/2✓ Branch 0 taken 11 times.
✓ Branch 1 taken 11 times.
|
22 | const int peer = (proc_rank_ == first) ? second : first; |
| 245 |
1/2✓ Branch 1 taken 22 times.
✗ Branch 2 not taken.
|
22 | const int local_size = counts[proc_rank_]; |
| 246 | 22 | const int peer_size = counts[peer]; | |
| 247 | |||
| 248 |
1/2✓ Branch 1 taken 22 times.
✗ Branch 2 not taken.
|
22 | peer_buffer.resize(peer_size); |
| 249 |
1/2✓ Branch 1 taken 22 times.
✗ Branch 2 not taken.
|
22 | temp.resize(local_size); |
| 250 | |||
| 251 | MPI_Status status; | ||
| 252 |
1/2✓ Branch 1 taken 22 times.
✗ Branch 2 not taken.
|
22 | MPI_Sendrecv(local_data.data(), local_size, MPI_DOUBLE, peer, 0, peer_buffer.data(), peer_size, MPI_DOUBLE, peer, 0, |
| 253 | MPI_COMM_WORLD, &status); | ||
| 254 | |||
| 255 | 22 | MergeBlocks(local_data, peer_buffer, temp, proc_rank_ == first); | |
| 256 | local_data.swap(temp); | ||
| 257 | } | ||
| 258 | 22 | } | |
| 259 | |||
| 260 |
2/2✓ Branch 0 taken 11 times.
✓ Branch 1 taken 11 times.
|
22 | void PerepelkinIQsortBatcherOddEvenMergeMPI::MergeBlocks(const std::vector<double> &local_data, |
| 261 | const std::vector<double> &peer_buffer, | ||
| 262 | std::vector<double> &temp, bool keep_lower) { | ||
| 263 | 22 | const int local_size = static_cast<int>(local_data.size()); | |
| 264 | 22 | const int peer_size = static_cast<int>(peer_buffer.size()); | |
| 265 | |||
| 266 |
2/2✓ Branch 0 taken 11 times.
✓ Branch 1 taken 11 times.
|
22 | if (keep_lower) { |
| 267 |
2/2✓ Branch 0 taken 35 times.
✓ Branch 1 taken 11 times.
|
46 | for (int tmp_index = 0, res_index = 0, cur_index = 0; tmp_index < local_size; tmp_index++) { |
| 268 |
2/2✓ Branch 0 taken 18 times.
✓ Branch 1 taken 17 times.
|
35 | const double result = local_data[res_index]; |
| 269 | 35 | const double current = peer_buffer[cur_index]; | |
| 270 |
2/2✓ Branch 0 taken 18 times.
✓ Branch 1 taken 17 times.
|
35 | if (result < current) { |
| 271 | 18 | temp[tmp_index] = result; | |
| 272 | 18 | res_index++; | |
| 273 | } else { | ||
| 274 | 17 | temp[tmp_index] = current; | |
| 275 | 17 | cur_index++; | |
| 276 | } | ||
| 277 | } | ||
| 278 | } else { | ||
| 279 |
2/2✓ Branch 0 taken 35 times.
✓ Branch 1 taken 11 times.
|
46 | for (int tmp_index = local_size - 1, res_index = local_size - 1, cur_index = peer_size - 1; tmp_index >= 0; |
| 280 | tmp_index--) { | ||
| 281 |
2/2✓ Branch 0 taken 18 times.
✓ Branch 1 taken 17 times.
|
35 | const double result = local_data[res_index]; |
| 282 | 35 | const double current = peer_buffer[cur_index]; | |
| 283 |
2/2✓ Branch 0 taken 18 times.
✓ Branch 1 taken 17 times.
|
35 | if (result > current) { |
| 284 | 18 | temp[tmp_index] = result; | |
| 285 | 18 | res_index--; | |
| 286 | } else { | ||
| 287 | 17 | temp[tmp_index] = current; | |
| 288 | 17 | cur_index--; | |
| 289 | } | ||
| 290 | } | ||
| 291 | } | ||
| 292 | 22 | } | |
| 293 | |||
| 294 | 24 | bool PerepelkinIQsortBatcherOddEvenMergeMPI::PostProcessingImpl() { | |
| 295 | 24 | return true; | |
| 296 | } | ||
| 297 | |||
| 298 | } // namespace perepelkin_i_qsort_batcher_oddeven_merge | ||
| 299 |