| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #include "khruev_a_radix_sorting_int_bather_merge/all/include/ops_all.hpp" | ||
| 2 | |||
| 3 | #include <mpi.h> | ||
| 4 | #include <omp.h> | ||
| 5 | |||
| 6 | #include <algorithm> | ||
| 7 | #include <cstddef> | ||
| 8 | #include <cstdint> | ||
| 9 | #include <limits> | ||
| 10 | #include <utility> | ||
| 11 | #include <vector> | ||
| 12 | |||
| 13 | #include "khruev_a_radix_sorting_int_bather_merge/common/include/common.hpp" | ||
| 14 | |||
| 15 | namespace khruev_a_radix_sorting_int_bather_merge { | ||
| 16 | |||
| 17 | ✗ | void KhruevARadixSortingIntBatherMergeALL::CompareExchange(std::vector<int> &a, size_t i, size_t j) { | |
| 18 | ✗ | if (a[i] > a[j]) { | |
| 19 | std::swap(a[i], a[j]); | ||
| 20 | } | ||
| 21 | ✗ | } | |
| 22 | |||
| 23 | 26 | void KhruevARadixSortingIntBatherMergeALL::RadixSort(std::vector<int> &arr) { | |
| 24 | const int bits = 8; | ||
| 25 | const int buckets = 1 << bits; | ||
| 26 | const int mask = buckets - 1; | ||
| 27 | const int passes = 32 / bits; | ||
| 28 | |||
| 29 | 26 | std::vector<int> temp(arr.size()); | |
| 30 | std::vector<int> *src = &arr; | ||
| 31 | std::vector<int> *dst = &temp; | ||
| 32 | |||
| 33 |
2/2✓ Branch 0 taken 104 times.
✓ Branch 1 taken 26 times.
|
130 | for (int pass = 0; pass < passes; pass++) { |
| 34 |
1/4✓ Branch 1 taken 104 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
104 | std::vector<int> count(buckets, 0); |
| 35 | 104 | int shift = pass * bits; | |
| 36 | |||
| 37 |
2/2✓ Branch 0 taken 248 times.
✓ Branch 1 taken 104 times.
|
352 | for (int x : *src) { |
| 38 | 248 | uint32_t ux = static_cast<uint32_t>(x) ^ 0x80000000U; | |
| 39 | 248 | uint32_t digit = (ux >> shift) & mask; | |
| 40 | 248 | count[digit]++; | |
| 41 | } | ||
| 42 | |||
| 43 |
2/2✓ Branch 0 taken 26520 times.
✓ Branch 1 taken 104 times.
|
26624 | for (int i = 1; i < buckets; i++) { |
| 44 | 26520 | count[i] += count[i - 1]; | |
| 45 | } | ||
| 46 | |||
| 47 |
2/2✓ Branch 0 taken 248 times.
✓ Branch 1 taken 104 times.
|
352 | for (size_t i = src->size(); i-- > 0;) { |
| 48 | 248 | uint32_t ux = static_cast<uint32_t>((*src)[i]) ^ 0x80000000U; | |
| 49 | 248 | uint32_t digit = (ux >> shift) & mask; | |
| 50 | 248 | (*dst)[--count[digit]] = (*src)[i]; | |
| 51 | } | ||
| 52 | |||
| 53 | std::swap(src, dst); | ||
| 54 | } | ||
| 55 | 26 | } | |
| 56 | |||
| 57 | 19 | void KhruevARadixSortingIntBatherMergeALL::OddEvenMerge(std::vector<int> &a, size_t n) { | |
| 58 |
1/2✓ Branch 0 taken 19 times.
✗ Branch 1 not taken.
|
19 | if (n < 2) { |
| 59 | return; | ||
| 60 | } | ||
| 61 | |||
| 62 | 19 | size_t po = n / 2; | |
| 63 | |||
| 64 | 19 | #pragma omp parallel for default(none) shared(a, po) | |
| 65 | for (size_t i = 0; i < po; ++i) { | ||
| 66 | CompareExchange(a, i, i + po); | ||
| 67 | } | ||
| 68 | |||
| 69 | 19 | po >>= 1; | |
| 70 | |||
| 71 |
2/2✓ Branch 0 taken 27 times.
✓ Branch 1 taken 19 times.
|
46 | for (; po > 0; po >>= 1) { |
| 72 | 27 | size_t num_blocks = (n - (2 * po)) / (2 * po); | |
| 73 | |||
| 74 | 27 | #pragma omp parallel for default(none) shared(a, po, num_blocks) | |
| 75 | for (size_t block_idx = 0; block_idx < num_blocks; ++block_idx) { | ||
| 76 | size_t i = po + (block_idx * 2 * po); | ||
| 77 | for (size_t j = 0; j < po; ++j) { | ||
| 78 | CompareExchange(a, i + j, i + j + po); | ||
| 79 | } | ||
| 80 | } | ||
| 81 | } | ||
| 82 | } | ||
| 83 | |||
| 84 | 14 | void KhruevARadixSortingIntBatherMergeALL::ScatterDataHelper(int rank, int active_procs, int chunk_size, size_t pow2, | |
| 85 | std::vector<int> &local_data) { | ||
| 86 |
2/2✓ Branch 0 taken 7 times.
✓ Branch 1 taken 7 times.
|
14 | if (rank == 0) { |
| 87 | 7 | std::vector<int> padded_data = GetInput(); | |
| 88 |
1/4✓ Branch 1 taken 7 times.
✗ Branch 2 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
|
7 | padded_data.resize(pow2, std::numeric_limits<int>::max()); |
| 89 | |||
| 90 | auto chunk_diff = static_cast<std::ptrdiff_t>(chunk_size); | ||
| 91 | 7 | std::copy(padded_data.begin(), padded_data.begin() + chunk_diff, local_data.begin()); | |
| 92 | |||
| 93 |
2/2✓ Branch 0 taken 7 times.
✓ Branch 1 taken 7 times.
|
14 | for (int i = 1; i < active_procs; ++i) { |
| 94 | 7 | auto offset = static_cast<std::ptrdiff_t>(i) * chunk_size; | |
| 95 |
1/2✓ Branch 1 taken 7 times.
✗ Branch 2 not taken.
|
7 | MPI_Send(padded_data.data() + offset, chunk_size, MPI_INT, i, 0, MPI_COMM_WORLD); |
| 96 | } | ||
| 97 | } else { | ||
| 98 | 7 | MPI_Recv(local_data.data(), chunk_size, MPI_INT, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); | |
| 99 | } | ||
| 100 | 14 | } | |
| 101 | |||
| 102 |
2/2✓ Branch 0 taken 12 times.
✓ Branch 1 taken 2 times.
|
14 | void KhruevARadixSortingIntBatherMergeALL::LocalSortHelper(std::vector<int> &local_data) { |
| 103 | 14 | size_t half_local = local_data.size() / 2; | |
| 104 |
2/2✓ Branch 0 taken 12 times.
✓ Branch 1 taken 2 times.
|
14 | if (half_local > 0 && local_data.size() >= 2) { |
| 105 | auto half_diff = static_cast<std::ptrdiff_t>(half_local); | ||
| 106 |
1/2✓ Branch 2 taken 12 times.
✗ Branch 3 not taken.
|
12 | std::vector<int> left(local_data.begin(), local_data.begin() + half_diff); |
| 107 |
1/4✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
12 | std::vector<int> right(local_data.begin() + half_diff, local_data.end()); |
| 108 | |||
| 109 |
1/2✓ Branch 0 taken 12 times.
✗ Branch 1 not taken.
|
12 | #pragma omp parallel sections default(none) shared(left, right) |
| 110 | { | ||
| 111 | #pragma omp section | ||
| 112 | { | ||
| 113 | RadixSort(left); | ||
| 114 | } | ||
| 115 | #pragma omp section | ||
| 116 | { | ||
| 117 | RadixSort(right); | ||
| 118 | } | ||
| 119 | } | ||
| 120 | |||
| 121 | std::ranges::copy(left, local_data.begin()); | ||
| 122 | std::ranges::copy(right, local_data.begin() + half_diff); | ||
| 123 | 12 | OddEvenMerge(local_data, local_data.size()); | |
| 124 | } else { | ||
| 125 | 2 | RadixSort(local_data); | |
| 126 | } | ||
| 127 | 14 | } | |
| 128 | |||
| 129 | 14 | void KhruevARadixSortingIntBatherMergeALL::TreeMergeHelper(int rank, int active_procs, int chunk_size, | |
| 130 | std::vector<int> &local_data) { | ||
| 131 |
2/2✓ Branch 0 taken 14 times.
✓ Branch 1 taken 7 times.
|
21 | for (int step = 1; step < active_procs; step *= 2) { |
| 132 |
2/2✓ Branch 0 taken 7 times.
✓ Branch 1 taken 7 times.
|
14 | if (rank % (2 * step) == 0) { |
| 133 | 7 | int sender = rank + step; | |
| 134 | 7 | int recv_count = chunk_size * step; | |
| 135 |
1/2✓ Branch 2 taken 7 times.
✗ Branch 3 not taken.
|
7 | std::vector<int> recv_data(recv_count); |
| 136 | |||
| 137 |
1/2✓ Branch 1 taken 7 times.
✗ Branch 2 not taken.
|
7 | MPI_Recv(recv_data.data(), recv_count, MPI_INT, sender, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); |
| 138 | 7 | local_data.insert(local_data.end(), recv_data.begin(), recv_data.end()); | |
| 139 | 7 | OddEvenMerge(local_data, local_data.size()); | |
| 140 | } else { | ||
| 141 | 7 | int receiver = rank - step; | |
| 142 | 7 | int send_count = static_cast<int>(local_data.size()); | |
| 143 | 7 | MPI_Send(local_data.data(), send_count, MPI_INT, receiver, 0, MPI_COMM_WORLD); | |
| 144 | 7 | break; | |
| 145 | } | ||
| 146 | } | ||
| 147 | 14 | } | |
| 148 | |||
| 149 |
1/2✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
|
16 | KhruevARadixSortingIntBatherMergeALL::KhruevARadixSortingIntBatherMergeALL(const InType &in) { |
| 150 | SetTypeOfTask(GetStaticTypeOfTask()); | ||
| 151 |
1/2✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
|
16 | GetInput() = in; |
| 152 | 16 | } | |
| 153 | |||
| 154 | 16 | bool KhruevARadixSortingIntBatherMergeALL::ValidationImpl() { | |
| 155 | 16 | int rank = 0; | |
| 156 | 16 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); | |
| 157 | |||
| 158 | 16 | int is_valid = 0; | |
| 159 |
2/2✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
|
16 | if (rank == 0) { |
| 160 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 8 times.
|
8 | is_valid = !GetInput().empty() ? 1 : 0; |
| 161 | } | ||
| 162 | 16 | MPI_Bcast(&is_valid, 1, MPI_INT, 0, MPI_COMM_WORLD); | |
| 163 | 16 | return is_valid == 1; | |
| 164 | } | ||
| 165 | |||
| 166 | 16 | bool KhruevARadixSortingIntBatherMergeALL::PreProcessingImpl() { | |
| 167 | 16 | int rank = 0; | |
| 168 | 16 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); | |
| 169 |
2/2✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
|
16 | if (rank == 0) { |
| 170 | 8 | GetOutput().resize(GetInput().size()); | |
| 171 | } | ||
| 172 | 16 | return true; | |
| 173 | } | ||
| 174 | |||
| 175 | 16 | bool KhruevARadixSortingIntBatherMergeALL::RunImpl() { | |
| 176 | 16 | int rank = 0; | |
| 177 | 16 | int num_procs = 0; | |
| 178 | 16 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); | |
| 179 | 16 | MPI_Comm_size(MPI_COMM_WORLD, &num_procs); | |
| 180 | |||
| 181 | 16 | int original_size_int = 0; | |
| 182 |
2/2✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
|
16 | if (rank == 0) { |
| 183 | 8 | original_size_int = static_cast<int>(GetInput().size()); | |
| 184 | } | ||
| 185 | |||
| 186 | 16 | MPI_Bcast(&original_size_int, 1, MPI_INT, 0, MPI_COMM_WORLD); | |
| 187 | 16 | auto original_size = static_cast<size_t>(original_size_int); | |
| 188 | |||
| 189 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 14 times.
|
16 | if (original_size <= 1) { |
| 190 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
|
2 | if (rank == 0) { |
| 191 | 1 | GetOutput() = GetInput(); | |
| 192 | } else { | ||
| 193 | 1 | GetOutput().resize(original_size); | |
| 194 | } | ||
| 195 |
1/2✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
|
2 | if (original_size == 1) { |
| 196 | 2 | MPI_Bcast(GetOutput().data(), 1, MPI_INT, 0, MPI_COMM_WORLD); | |
| 197 | } | ||
| 198 | 2 | return true; | |
| 199 | } | ||
| 200 | |||
| 201 | int active_procs = 1; | ||
| 202 |
2/2✓ Branch 0 taken 14 times.
✓ Branch 1 taken 14 times.
|
28 | while (active_procs * 2 <= num_procs) { |
| 203 | active_procs *= 2; | ||
| 204 | } | ||
| 205 | |||
| 206 | 14 | std::vector<int> local_data; | |
| 207 | |||
| 208 |
1/2✓ Branch 0 taken 14 times.
✗ Branch 1 not taken.
|
14 | if (rank < active_procs) { |
| 209 | size_t pow2 = 1; | ||
| 210 |
2/2✓ Branch 0 taken 40 times.
✓ Branch 1 taken 14 times.
|
54 | while (pow2 < original_size) { |
| 211 | 40 | pow2 <<= 1; | |
| 212 | } | ||
| 213 | while (std::cmp_less(pow2, active_procs)) { | ||
| 214 | ✗ | pow2 <<= 1; | |
| 215 | } | ||
| 216 | |||
| 217 | 14 | int chunk_size = static_cast<int>(pow2 / active_procs); | |
| 218 |
1/2✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
|
14 | local_data.resize(chunk_size); |
| 219 | |||
| 220 |
1/2✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
|
14 | ScatterDataHelper(rank, active_procs, chunk_size, pow2, local_data); |
| 221 |
1/2✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
|
14 | LocalSortHelper(local_data); |
| 222 |
1/2✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
|
14 | TreeMergeHelper(rank, active_procs, chunk_size, local_data); |
| 223 | } | ||
| 224 | |||
| 225 |
1/2✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
|
14 | local_data.resize(original_size); |
| 226 |
1/2✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
|
14 | MPI_Bcast(local_data.data(), static_cast<int>(original_size), MPI_INT, 0, MPI_COMM_WORLD); |
| 227 |
1/2✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
|
14 | GetOutput() = local_data; |
| 228 | |||
| 229 | return true; | ||
| 230 | } | ||
| 231 | |||
| 232 | 16 | bool KhruevARadixSortingIntBatherMergeALL::PostProcessingImpl() { | |
| 233 | 16 | int rank = 0; | |
| 234 | 16 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); | |
| 235 |
2/2✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
|
16 | if (rank == 0) { |
| 236 | 8 | return GetOutput().size() == GetInput().size(); | |
| 237 | } | ||
| 238 | return true; | ||
| 239 | } | ||
| 240 | |||
| 241 | } // namespace khruev_a_radix_sorting_int_bather_merge | ||
| 242 |