| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #include "shekhirev_v_hoare_batcher_sort/all/include/ops_all.hpp" | ||
| 2 | |||
| 3 | #include <mpi.h> | ||
| 4 | #include <omp.h> | ||
| 5 | |||
| 6 | #include <algorithm> | ||
| 7 | #include <cstddef> | ||
| 8 | #include <limits> | ||
| 9 | #include <utility> | ||
| 10 | #include <vector> | ||
| 11 | |||
| 12 | #include "shekhirev_v_hoare_batcher_sort/common/include/common.hpp" | ||
| 13 | |||
| 14 | namespace shekhirev_v_hoare_batcher_sort { | ||
| 15 | |||
| 16 | namespace { | ||
| 17 | |||
| 18 | 1946 | void SplitPartition(std::vector<int> &arr, int &l, int &r, int &i, int &j) { | |
| 19 | 1946 | int pivot = arr[l + ((r - l) / 2)]; | |
| 20 | 1946 | i = l; | |
| 21 | 1946 | j = r; | |
| 22 |
2/2✓ Branch 0 taken 4530 times.
✓ Branch 1 taken 1946 times.
|
8422 | while (i <= j) { |
| 23 |
2/2✓ Branch 0 taken 18414 times.
✓ Branch 1 taken 4530 times.
|
22944 | while (arr[i] < pivot) { |
| 24 | 18414 | i++; | |
| 25 | } | ||
| 26 |
2/2✓ Branch 0 taken 5216 times.
✓ Branch 1 taken 4530 times.
|
9746 | while (arr[j] > pivot) { |
| 27 | 5216 | j--; | |
| 28 | } | ||
| 29 |
2/2✓ Branch 0 taken 688 times.
✓ Branch 1 taken 3842 times.
|
4530 | if (i <= j) { |
| 30 | 3842 | std::swap(arr[i], arr[j]); | |
| 31 | 3842 | i++; | |
| 32 | 3842 | j--; | |
| 33 | } | ||
| 34 | } | ||
| 35 | 1946 | } | |
| 36 | |||
| 37 | 1946 | void ProcessPartition(std::vector<int> &arr, int &l, int &r, std::vector<std::pair<int, int>> &stack) { | |
| 38 | 1946 | int i = 0; | |
| 39 | 1946 | int j = 0; | |
| 40 | 1946 | SplitPartition(arr, l, r, i, j); | |
| 41 | |||
| 42 |
2/2✓ Branch 0 taken 908 times.
✓ Branch 1 taken 1038 times.
|
1946 | if (j - l < r - i) { |
| 43 |
2/2✓ Branch 0 taken 550 times.
✓ Branch 1 taken 358 times.
|
908 | if (i < r) { |
| 44 | 550 | stack.emplace_back(i, r); | |
| 45 | } | ||
| 46 | 908 | r = j; | |
| 47 | } else { | ||
| 48 |
2/2✓ Branch 0 taken 666 times.
✓ Branch 1 taken 372 times.
|
1038 | if (l < j) { |
| 49 | 666 | stack.emplace_back(l, j); | |
| 50 | } | ||
| 51 | 1038 | l = i; | |
| 52 | } | ||
| 53 | 1946 | } | |
| 54 | |||
| 55 | 36 | void OptimizedHoareSort(std::vector<int> &arr, int left, int right) { | |
| 56 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 36 times.
|
36 | if (left >= right) { |
| 57 | ✗ | return; | |
| 58 | } | ||
| 59 | 36 | std::vector<std::pair<int, int>> stack; | |
| 60 |
1/2✓ Branch 1 taken 36 times.
✗ Branch 2 not taken.
|
36 | stack.reserve(64); |
| 61 |
1/2✓ Branch 1 taken 36 times.
✗ Branch 2 not taken.
|
36 | stack.emplace_back(left, right); |
| 62 | |||
| 63 |
2/2✓ Branch 0 taken 1252 times.
✓ Branch 1 taken 36 times.
|
1288 | while (!stack.empty()) { |
| 64 | auto [l, r] = stack.back(); | ||
| 65 | stack.pop_back(); | ||
| 66 |
2/2✓ Branch 0 taken 1946 times.
✓ Branch 1 taken 1252 times.
|
3198 | while (l < r) { |
| 67 |
1/2✓ Branch 1 taken 1946 times.
✗ Branch 2 not taken.
|
1946 | ProcessPartition(arr, l, r, stack); |
| 68 | } | ||
| 69 | } | ||
| 70 | } | ||
| 71 | |||
| 72 |
1/2✓ Branch 0 taken 12 times.
✗ Branch 1 not taken.
|
12 | void OMPLocalSort(std::vector<int> &arr) { |
| 73 | 12 | int size = static_cast<int>(arr.size()); | |
| 74 |
1/2✓ Branch 0 taken 12 times.
✗ Branch 1 not taken.
|
12 | if (size <= 1) { |
| 75 | return; | ||
| 76 | } | ||
| 77 | |||
| 78 | 12 | int max_threads = omp_get_max_threads(); | |
| 79 | 12 | if (max_threads <= 0) { | |
| 80 | max_threads = 1; | ||
| 81 | } | ||
| 82 | |||
| 83 | int num_threads = 1; | ||
| 84 |
3/4✓ Branch 0 taken 12 times.
✓ Branch 1 taken 12 times.
✓ Branch 2 taken 12 times.
✗ Branch 3 not taken.
|
24 | while (num_threads * 2 <= max_threads && num_threads * 2 <= size) { |
| 85 | num_threads *= 2; | ||
| 86 | } | ||
| 87 | |||
| 88 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 12 times.
|
12 | if (num_threads <= 1) { |
| 89 | ✗ | OptimizedHoareSort(arr, 0, size - 1); | |
| 90 | ✗ | return; | |
| 91 | } | ||
| 92 | |||
| 93 | 12 | #pragma omp parallel for default(none) shared(arr, num_threads, size) | |
| 94 | for (int i = 0; i < num_threads; ++i) { | ||
| 95 | int chunk_size = size / num_threads; | ||
| 96 | int start = i * chunk_size; | ||
| 97 | int end = (i == num_threads - 1) ? size - 1 : (start + chunk_size - 1); | ||
| 98 | OptimizedHoareSort(arr, start, end); | ||
| 99 | } | ||
| 100 | |||
| 101 | 12 | OptimizedHoareSort(arr, 0, size - 1); | |
| 102 | } | ||
| 103 | |||
| 104 | 12 | void MpiCompareAndSwap(std::vector<int> &local_arr, int neighbor, bool keep_low_half) { | |
| 105 | 12 | int size = static_cast<int>(local_arr.size()); | |
| 106 |
1/2✓ Branch 2 taken 12 times.
✗ Branch 3 not taken.
|
12 | std::vector<int> neighbor_arr(static_cast<std::size_t>(size)); |
| 107 | |||
| 108 |
1/2✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.
|
12 | MPI_Sendrecv(local_arr.data(), size, MPI_INT, neighbor, 0, neighbor_arr.data(), size, MPI_INT, neighbor, 0, |
| 109 | MPI_COMM_WORLD, MPI_STATUS_IGNORE); | ||
| 110 | |||
| 111 |
1/4✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
|
12 | std::vector<int> merged_arr(static_cast<std::size_t>(size) * 2); |
| 112 | 12 | std::ranges::merge(local_arr, neighbor_arr, merged_arr.begin()); | |
| 113 | |||
| 114 |
2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 6 times.
|
12 | if (keep_low_half) { |
| 115 | 6 | std::copy(merged_arr.begin(), merged_arr.begin() + size, local_arr.begin()); | |
| 116 | } else { | ||
| 117 | 6 | std::copy(merged_arr.begin() + size, merged_arr.end(), local_arr.begin()); | |
| 118 | } | ||
| 119 | 12 | } | |
| 120 | |||
| 121 | 12 | void BatcherExchange(std::vector<int> &local_data, int rank, int world_size, int p_step, int k_step) { | |
| 122 |
2/2✓ Branch 0 taken 12 times.
✓ Branch 1 taken 12 times.
|
24 | for (int j_idx = k_step % p_step; j_idx + k_step < world_size; j_idx += (k_step * 2)) { |
| 123 | 12 | int upper_bound = std::min(k_step, world_size - j_idx - k_step); | |
| 124 |
2/2✓ Branch 0 taken 12 times.
✓ Branch 1 taken 12 times.
|
24 | for (int i_idx = 0; i_idx < upper_bound; ++i_idx) { |
| 125 | 12 | int r1 = j_idx + i_idx; | |
| 126 | 12 | int r2 = j_idx + i_idx + k_step; | |
| 127 |
1/2✓ Branch 0 taken 12 times.
✗ Branch 1 not taken.
|
12 | if ((r1 / (p_step * 2)) == (r2 / (p_step * 2))) { |
| 128 |
2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 6 times.
|
12 | if (rank == r1) { |
| 129 | 6 | MpiCompareAndSwap(local_data, r2, true); | |
| 130 |
1/2✓ Branch 0 taken 6 times.
✗ Branch 1 not taken.
|
6 | } else if (rank == r2) { |
| 131 | 6 | MpiCompareAndSwap(local_data, r1, false); | |
| 132 | } | ||
| 133 | } | ||
| 134 | } | ||
| 135 | } | ||
| 136 | 12 | } | |
| 137 | |||
| 138 | 12 | void BatcherMergeNetwork(std::vector<int> &local_data, int rank, int world_size) { | |
| 139 |
2/2✓ Branch 0 taken 12 times.
✓ Branch 1 taken 12 times.
|
24 | for (int p_step = 1; p_step < world_size; p_step *= 2) { |
| 140 |
2/2✓ Branch 0 taken 12 times.
✓ Branch 1 taken 12 times.
|
24 | for (int k_step = p_step; k_step > 0; k_step /= 2) { |
| 141 | 12 | BatcherExchange(local_data, rank, world_size, p_step, k_step); | |
| 142 | } | ||
| 143 | } | ||
| 144 | 12 | } | |
| 145 | |||
| 146 | } // namespace | ||
| 147 | |||
| 148 |
1/2✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
|
16 | ShekhirevHoareBatcherSortALL::ShekhirevHoareBatcherSortALL(const InType &in) { |
| 149 | SetTypeOfTask(GetStaticTypeOfTask()); | ||
| 150 |
1/2✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
|
16 | GetInput() = in; |
| 151 | 16 | } | |
| 152 | |||
| 153 | 16 | bool ShekhirevHoareBatcherSortALL::ValidationImpl() { | |
| 154 | 16 | return true; | |
| 155 | } | ||
| 156 | |||
| 157 | 16 | bool ShekhirevHoareBatcherSortALL::PreProcessingImpl() { | |
| 158 | 16 | input_ = GetInput(); | |
| 159 | 16 | return true; | |
| 160 | } | ||
| 161 | |||
| 162 | 16 | bool ShekhirevHoareBatcherSortALL::RunImpl() { | |
| 163 | 16 | int rank = 0; | |
| 164 | 16 | int world_size = 1; | |
| 165 | 16 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); | |
| 166 | 16 | MPI_Comm_size(MPI_COMM_WORLD, &world_size); | |
| 167 | |||
| 168 |
2/2✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
|
16 | int total_n = (rank == 0) ? static_cast<int>(input_.size()) : 0; |
| 169 | 16 | MPI_Bcast(&total_n, 1, MPI_INT, 0, MPI_COMM_WORLD); | |
| 170 | |||
| 171 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 12 times.
|
16 | if (total_n <= 1) { |
| 172 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
|
4 | if (rank == 0) { |
| 173 | 2 | output_ = input_; | |
| 174 | } | ||
| 175 | 4 | return true; | |
| 176 | } | ||
| 177 | |||
| 178 | 12 | int chunk_size = (total_n + world_size - 1) / world_size; | |
| 179 | 12 | int padded_total_size = chunk_size * world_size; | |
| 180 | |||
| 181 | 12 | std::vector<int> local_data(static_cast<std::size_t>(chunk_size)); | |
| 182 | 12 | std::vector<int> send_buffer; | |
| 183 | |||
| 184 |
2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 6 times.
|
12 | if (rank == 0) { |
| 185 |
1/2✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.
|
6 | send_buffer = input_; |
| 186 |
1/4✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
6 | send_buffer.resize(static_cast<std::size_t>(padded_total_size), std::numeric_limits<int>::max()); |
| 187 | } | ||
| 188 | |||
| 189 |
1/2✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.
|
12 | MPI_Scatter(send_buffer.data(), chunk_size, MPI_INT, local_data.data(), chunk_size, MPI_INT, 0, MPI_COMM_WORLD); |
| 190 | |||
| 191 |
1/2✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.
|
12 | OMPLocalSort(local_data); |
| 192 | |||
| 193 |
1/2✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.
|
12 | BatcherMergeNetwork(local_data, rank, world_size); |
| 194 | |||
| 195 | 12 | std::vector<int> gather_buffer; | |
| 196 |
2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 6 times.
|
12 | if (rank == 0) { |
| 197 |
1/2✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.
|
6 | gather_buffer.resize(static_cast<std::size_t>(padded_total_size)); |
| 198 | } | ||
| 199 |
1/2✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.
|
12 | MPI_Gather(local_data.data(), chunk_size, MPI_INT, gather_buffer.data(), chunk_size, MPI_INT, 0, MPI_COMM_WORLD); |
| 200 | |||
| 201 |
2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 6 times.
|
12 | if (rank == 0) { |
| 202 |
1/2✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.
|
6 | gather_buffer.resize(total_n); |
| 203 | 6 | output_ = std::move(gather_buffer); | |
| 204 | } | ||
| 205 | |||
| 206 | return true; | ||
| 207 | } | ||
| 208 | |||
| 209 | 16 | bool ShekhirevHoareBatcherSortALL::PostProcessingImpl() { | |
| 210 | 16 | int rank = 0; | |
| 211 | 16 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); | |
| 212 |
2/2✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
|
16 | if (rank == 0) { |
| 213 | 8 | GetOutput() = output_; | |
| 214 | } | ||
| 215 | 16 | return true; | |
| 216 | } | ||
| 217 | |||
| 218 | } // namespace shekhirev_v_hoare_batcher_sort | ||
| 219 |