| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #include "smetanin_d_hoare_even_odd_batchelor/all/include/ops_all.hpp" | ||
| 2 | |||
| 3 | #include <mpi.h> | ||
| 4 | #include <oneapi/tbb/parallel_for.h> | ||
| 5 | |||
| 6 | #include <algorithm> | ||
| 7 | #include <atomic> | ||
| 8 | #include <cstddef> | ||
| 9 | #include <queue> | ||
| 10 | #include <stack> | ||
| 11 | #include <thread> | ||
| 12 | #include <utility> | ||
| 13 | #include <vector> | ||
| 14 | |||
| 15 | #include "smetanin_d_hoare_even_odd_batchelor/common/include/common.hpp" | ||
| 16 | #include "util/include/util.hpp" | ||
| 17 | |||
| 18 | namespace smetanin_d_hoare_even_odd_batchelor { | ||
| 19 | |||
| 20 | namespace { | ||
| 21 | |||
| 22 | constexpr int kTaskCutoff = 1000; | ||
| 23 | |||
| 24 | 11102 | int HoarePartition(std::vector<int> &arr, int lo, int hi) { | |
| 25 | 11102 | int pivot = arr[lo + ((hi - lo) / 2)]; | |
| 26 | 11102 | int i = lo - 1; | |
| 27 | 11102 | int j = hi + 1; | |
| 28 | while (true) { | ||
| 29 | 39944 | ++i; | |
| 30 |
2/2✓ Branch 0 taken 54937 times.
✓ Branch 1 taken 39944 times.
|
94881 | while (arr[i] < pivot) { |
| 31 | 54937 | ++i; | |
| 32 | } | ||
| 33 | 39944 | --j; | |
| 34 |
2/2✓ Branch 0 taken 56354 times.
✓ Branch 1 taken 39944 times.
|
96298 | while (arr[j] > pivot) { |
| 35 | 56354 | --j; | |
| 36 | } | ||
| 37 |
2/2✓ Branch 0 taken 11102 times.
✓ Branch 1 taken 28842 times.
|
39944 | if (i >= j) { |
| 38 | 11102 | return j; | |
| 39 | } | ||
| 40 | std::swap(arr[i], arr[j]); | ||
| 41 | } | ||
| 42 | } | ||
| 43 | |||
| 44 | 11102 | void OddEvenMerge(std::vector<int> &arr, int lo, int hi) { | |
| 45 | 11102 | int n = hi - lo + 1; | |
| 46 |
2/2✓ Branch 0 taken 27103 times.
✓ Branch 1 taken 11102 times.
|
38205 | for (int step = 1; step < n; step *= 2) { |
| 47 |
2/2✓ Branch 0 taken 162864 times.
✓ Branch 1 taken 27103 times.
|
189967 | for (int i = lo; i + step <= hi; i += step * 2) { |
| 48 |
2/2✓ Branch 0 taken 54965 times.
✓ Branch 1 taken 107899 times.
|
162864 | if (arr[i] > arr[i + step]) { |
| 49 | std::swap(arr[i], arr[i + step]); | ||
| 50 | } | ||
| 51 | } | ||
| 52 | } | ||
| 53 | 11102 | } | |
| 54 | |||
| 55 | 30 | void HoarSortBatcherSeq(std::vector<int> &arr, int lo, int hi) { | |
| 56 | std::stack<std::pair<int, int>> stk; | ||
| 57 | stk.emplace(lo, hi); | ||
| 58 |
2/2✓ Branch 0 taken 22190 times.
✓ Branch 1 taken 30 times.
|
22220 | while (!stk.empty()) { |
| 59 | auto [l, r] = stk.top(); | ||
| 60 | stk.pop(); | ||
| 61 |
2/2✓ Branch 0 taken 11110 times.
✓ Branch 1 taken 11080 times.
|
22190 | if (l >= r) { |
| 62 | 11110 | continue; | |
| 63 | } | ||
| 64 | 11080 | int p = HoarePartition(arr, l, r); | |
| 65 |
2/2✓ Branch 0 taken 3861 times.
✓ Branch 1 taken 7219 times.
|
11080 | if ((p - l) > (r - p - 1)) { |
| 66 | stk.emplace(l, p); | ||
| 67 |
1/2✓ Branch 1 taken 3861 times.
✗ Branch 2 not taken.
|
3861 | stk.emplace(p + 1, r); |
| 68 | } else { | ||
| 69 |
2/4✓ Branch 1 taken 7219 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 7219 times.
✗ Branch 5 not taken.
|
7219 | stk.emplace(p + 1, r); |
| 70 | stk.emplace(l, p); | ||
| 71 | } | ||
| 72 | 11080 | OddEvenMerge(arr, l, r); | |
| 73 | } | ||
| 74 | 30 | } | |
| 75 | |||
| 76 | 52 | void HoarSortBatcherOMPImpl(std::vector<int> &arr, int lo, int hi) { | |
| 77 |
1/2✓ Branch 0 taken 52 times.
✗ Branch 1 not taken.
|
52 | if (lo >= hi) { |
| 78 | return; | ||
| 79 | } | ||
| 80 |
2/2✓ Branch 0 taken 30 times.
✓ Branch 1 taken 22 times.
|
52 | if (hi - lo < kTaskCutoff) { |
| 81 | 30 | HoarSortBatcherSeq(arr, lo, hi); | |
| 82 | 30 | return; | |
| 83 | } | ||
| 84 | 22 | int p = HoarePartition(arr, lo, hi); | |
| 85 | 22 | OddEvenMerge(arr, lo, hi); | |
| 86 | 22 | #pragma omp task default(none) shared(arr) firstprivate(lo, p) | |
| 87 | HoarSortBatcherOMPImpl(arr, lo, p); | ||
| 88 | 22 | #pragma omp task default(none) shared(arr) firstprivate(hi, p) | |
| 89 | HoarSortBatcherOMPImpl(arr, p + 1, hi); | ||
| 90 | 22 | #pragma omp taskwait | |
| 91 | } | ||
| 92 | |||
| 93 | 10 | void BuildScatterLayout(int n, int comm_size, std::vector<int> &counts, std::vector<int> &displs) { | |
| 94 | 10 | counts.resize(static_cast<size_t>(comm_size)); | |
| 95 | 10 | displs.resize(static_cast<size_t>(comm_size)); | |
| 96 | 10 | const int base = n / comm_size; | |
| 97 | 10 | const int rem = n % comm_size; | |
| 98 | int offset = 0; | ||
| 99 |
2/2✓ Branch 0 taken 20 times.
✓ Branch 1 taken 10 times.
|
30 | for (int i = 0; i < comm_size; ++i) { |
| 100 |
1/2✓ Branch 0 taken 20 times.
✗ Branch 1 not taken.
|
40 | counts[static_cast<size_t>(i)] = base + (i < rem ? 1 : 0); |
| 101 | 20 | displs[static_cast<size_t>(i)] = offset; | |
| 102 | 20 | offset += counts[static_cast<size_t>(i)]; | |
| 103 | } | ||
| 104 | 10 | } | |
| 105 | |||
| 106 | 5 | std::vector<int> KWayMergeChunks(const std::vector<int> &gathered, const std::vector<int> &counts, | |
| 107 | const std::vector<int> &displs, int comm_size) { | ||
| 108 | struct Item { | ||
| 109 | int val; | ||
| 110 | int chunk; | ||
| 111 | int next_j; | ||
| 112 | }; | ||
| 113 |
2/6✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✓ Branch 4 taken 5026 times.
✓ Branch 5 taken 6080 times.
|
11106 | auto cmp = [](const Item &a, const Item &b) { return a.val > b.val; }; |
| 114 | 5 | std::priority_queue<Item, std::vector<Item>, decltype(cmp)> pq(cmp); | |
| 115 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 5 times.
|
15 | for (int i = 0; i < comm_size; ++i) { |
| 116 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
|
10 | const int cnt = counts[static_cast<size_t>(i)]; |
| 117 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
|
10 | if (cnt <= 0) { |
| 118 | ✗ | continue; | |
| 119 | } | ||
| 120 | 10 | const int dsp = displs[static_cast<size_t>(i)]; | |
| 121 |
1/4✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
10 | pq.push(Item{.val = gathered[static_cast<size_t>(dsp)], .chunk = i, .next_j = 1}); |
| 122 | } | ||
| 123 |
1/2✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
|
5 | std::vector<int> out; |
| 124 |
1/2✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
|
5 | out.reserve(gathered.size()); |
| 125 |
2/2✓ Branch 0 taken 11112 times.
✓ Branch 1 taken 5 times.
|
11117 | while (!pq.empty()) { |
| 126 | 11112 | Item x = pq.top(); | |
| 127 | pq.pop(); | ||
| 128 | out.push_back(x.val); | ||
| 129 |
2/2✓ Branch 0 taken 11102 times.
✓ Branch 1 taken 10 times.
|
11112 | const int cnt = counts[static_cast<size_t>(x.chunk)]; |
| 130 |
2/2✓ Branch 0 taken 11102 times.
✓ Branch 1 taken 10 times.
|
11112 | if (x.next_j < cnt) { |
| 131 | 11102 | const int dsp = displs[static_cast<size_t>(x.chunk)]; | |
| 132 | 11102 | const std::size_t idx = static_cast<std::size_t>(dsp) + static_cast<std::size_t>(x.next_j); | |
| 133 |
1/4✓ Branch 1 taken 11102 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
11102 | pq.push(Item{.val = gathered[idx], .chunk = x.chunk, .next_j = x.next_j + 1}); |
| 134 | } | ||
| 135 | } | ||
| 136 | 5 | return out; | |
| 137 | } | ||
| 138 | |||
| 139 | } // namespace | ||
| 140 | |||
| 141 |
1/2✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
|
14 | SmetaninDHoarSortALL::SmetaninDHoarSortALL(const InType &in) { |
| 142 | SetTypeOfTask(GetStaticTypeOfTask()); | ||
| 143 |
1/2✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
|
14 | GetInput() = in; |
| 144 | 14 | } | |
| 145 | |||
| 146 | 14 | bool SmetaninDHoarSortALL::ValidationImpl() { | |
| 147 | 14 | return true; | |
| 148 | } | ||
| 149 | |||
| 150 | 14 | bool SmetaninDHoarSortALL::PreProcessingImpl() { | |
| 151 | 14 | GetOutput() = GetInput(); | |
| 152 | 14 | return true; | |
| 153 | } | ||
| 154 | |||
| 155 | 14 | bool SmetaninDHoarSortALL::RunImpl() { | |
| 156 | 14 | int rank = 0; | |
| 157 | 14 | int comm_size = 1; | |
| 158 | 14 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); | |
| 159 | 14 | MPI_Comm_size(MPI_COMM_WORLD, &comm_size); | |
| 160 | |||
| 161 | auto &data = GetOutput(); | ||
| 162 | 14 | const int n = static_cast<int>(data.size()); | |
| 163 | |||
| 164 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 10 times.
|
14 | if (n <= 1) { |
| 165 | 4 | MPI_Barrier(MPI_COMM_WORLD); | |
| 166 | 4 | return true; | |
| 167 | } | ||
| 168 | |||
| 169 | 10 | std::vector<int> counts; | |
| 170 | 10 | std::vector<int> displs; | |
| 171 |
1/2✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
|
10 | BuildScatterLayout(n, comm_size, counts, displs); |
| 172 | |||
| 173 |
1/2✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
|
10 | const int local_n = counts[static_cast<size_t>(rank)]; |
| 174 |
1/4✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
10 | std::vector<int> local(static_cast<size_t>(local_n)); |
| 175 | |||
| 176 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
10 | const int *send_root = (rank == 0) ? data.data() : nullptr; |
| 177 |
1/2✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
|
10 | MPI_Scatterv(send_root, counts.data(), displs.data(), MPI_INT, local.data(), local_n, MPI_INT, 0, MPI_COMM_WORLD); |
| 178 | |||
| 179 |
2/2✓ Branch 0 taken 8 times.
✓ Branch 1 taken 2 times.
|
10 | if (local_n > 1) { |
| 180 | 8 | #pragma omp parallel default(none) shared(local, local_n) | |
| 181 | #pragma omp single | ||
| 182 | HoarSortBatcherOMPImpl(local, 0, local_n - 1); | ||
| 183 | } | ||
| 184 | |||
| 185 | 10 | std::vector<int> gathered; | |
| 186 | int *recv_root = nullptr; | ||
| 187 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
10 | if (rank == 0) { |
| 188 |
1/2✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
|
5 | gathered.resize(static_cast<size_t>(n)); |
| 189 | recv_root = gathered.data(); | ||
| 190 | } | ||
| 191 |
1/2✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
|
10 | MPI_Gatherv(local.data(), local_n, MPI_INT, recv_root, counts.data(), displs.data(), MPI_INT, 0, MPI_COMM_WORLD); |
| 192 | |||
| 193 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
10 | if (rank == 0) { |
| 194 |
1/4✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
|
10 | data = KWayMergeChunks(gathered, counts, displs, comm_size); |
| 195 | std::ranges::sort(data); | ||
| 196 | } | ||
| 197 | |||
| 198 |
1/2✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
|
10 | MPI_Bcast(data.data(), n, MPI_INT, 0, MPI_COMM_WORLD); |
| 199 | |||
| 200 |
1/2✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
|
10 | const int num_threads = ppc::util::GetNumThreads(); |
| 201 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
10 | if (rank == 0) { |
| 202 | 5 | std::atomic<int> counter(0); | |
| 203 | 5 | std::vector<std::thread> threads; | |
| 204 |
1/2✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
|
5 | threads.reserve(static_cast<size_t>(num_threads)); |
| 205 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 5 times.
|
15 | for (int i = 0; i < num_threads; i++) { |
| 206 |
1/2✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
|
10 | threads.emplace_back([&counter]() { counter++; }); |
| 207 | } | ||
| 208 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 5 times.
|
15 | for (auto &t : threads) { |
| 209 |
1/2✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
|
10 | t.join(); |
| 210 | } | ||
| 211 | |||
| 212 | 5 | std::atomic<int> counter2(0); | |
| 213 |
2/4✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 5 times.
✗ Branch 4 not taken.
|
15 | tbb::parallel_for(0, num_threads, [&](int /*i*/) { counter2++; }); |
| 214 | |||
| 215 |
2/4✓ Branch 0 taken 5 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 5 times.
|
5 | if (counter.load() != num_threads || counter2.load() != num_threads) { |
| 216 | ✗ | MPI_Barrier(MPI_COMM_WORLD); | |
| 217 | return false; | ||
| 218 | } | ||
| 219 | 5 | } | |
| 220 | |||
| 221 |
1/2✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
|
10 | MPI_Barrier(MPI_COMM_WORLD); |
| 222 | return true; | ||
| 223 | } | ||
| 224 | |||
| 225 | 14 | bool SmetaninDHoarSortALL::PostProcessingImpl() { | |
| 226 | 14 | return true; | |
| 227 | } | ||
| 228 | |||
| 229 | } // namespace smetanin_d_hoare_even_odd_batchelor | ||
| 230 |