| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #include "levonychev_i_radix_batcher_sort/all/include/ops_all.hpp" | ||
| 2 | |||
| 3 | #include <mpi.h> | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <array> | ||
| 7 | #include <cmath> | ||
| 8 | #include <cstddef> | ||
| 9 | #include <future> | ||
| 10 | #include <iterator> | ||
| 11 | #include <thread> | ||
| 12 | #include <vector> | ||
| 13 | |||
| 14 | #include "levonychev_i_radix_batcher_sort/common/include/common.hpp" | ||
| 15 | |||
| 16 | namespace levonychev_i_radix_batcher_sort { | ||
| 17 | |||
| 18 |
1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
|
8 | LevonychevIRadixBatcherSortALL::LevonychevIRadixBatcherSortALL(const InType &in) { |
| 19 | SetTypeOfTask(GetStaticTypeOfTask()); | ||
| 20 |
1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
|
8 | GetInput() = in; |
| 21 | 8 | } | |
| 22 | |||
| 23 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 25 times.
|
25 | void LevonychevIRadixBatcherSortALL::LocalRadixSort(std::vector<int> &arr) { |
| 24 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 25 times.
|
25 | if (arr.empty()) { |
| 25 | ✗ | return; | |
| 26 | } | ||
| 27 | 25 | int n = static_cast<int>(arr.size()); | |
| 28 | 25 | std::vector<int> buffer(n); | |
| 29 | |||
| 30 |
2/2✓ Branch 0 taken 100 times.
✓ Branch 1 taken 25 times.
|
125 | for (int byte_idx = 0; byte_idx < 4; ++byte_idx) { |
| 31 | 100 | std::array<int, 256> count{}; | |
| 32 | bool is_last_byte = (byte_idx == 3); | ||
| 33 | |||
| 34 |
2/2✓ Branch 0 taken 100 times.
✓ Branch 1 taken 100 times.
|
200 | for (int x : arr) { |
| 35 | 100 | unsigned char b = (static_cast<unsigned int>(x) >> (byte_idx * 8)) & 0xFF; | |
| 36 |
2/2✓ Branch 0 taken 25 times.
✓ Branch 1 taken 75 times.
|
100 | if (is_last_byte) { |
| 37 | 25 | b ^= 0x80; | |
| 38 | } | ||
| 39 | 100 | count.at(b)++; | |
| 40 | } | ||
| 41 | |||
| 42 | 100 | std::array<size_t, 256> offsets{}; | |
| 43 | offsets.at(0) = 0; | ||
| 44 |
2/2✓ Branch 0 taken 25500 times.
✓ Branch 1 taken 100 times.
|
25600 | for (int i = 1; i < 256; ++i) { |
| 45 | 25500 | offsets.at(i) = offsets.at(i - 1) + count.at(i - 1); | |
| 46 | } | ||
| 47 | |||
| 48 |
2/2✓ Branch 0 taken 100 times.
✓ Branch 1 taken 100 times.
|
200 | for (int x : arr) { |
| 49 | 100 | unsigned char b = (static_cast<unsigned int>(x) >> (byte_idx * 8)) & 0xFF; | |
| 50 |
2/2✓ Branch 0 taken 25 times.
✓ Branch 1 taken 75 times.
|
100 | if (is_last_byte) { |
| 51 | 25 | b ^= 0x80; | |
| 52 | } | ||
| 53 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 100 times.
|
100 | buffer.at(offsets.at(b)++) = x; |
| 54 | } | ||
| 55 |
1/2✓ Branch 1 taken 100 times.
✗ Branch 2 not taken.
|
100 | arr = buffer; |
| 56 | } | ||
| 57 | } | ||
| 58 | |||
| 59 | 8 | void LevonychevIRadixBatcherSortALL::NetworkMergeAndSplit(std::vector<int> &local_data, int partner, bool keep_low) { | |
| 60 | 8 | int local_size = static_cast<int>(local_data.size()); | |
| 61 | 8 | int partner_size = 0; | |
| 62 | |||
| 63 | 8 | MPI_Sendrecv(&local_size, 1, MPI_INT, partner, 0, &partner_size, 1, MPI_INT, partner, 0, MPI_COMM_WORLD, | |
| 64 | MPI_STATUS_IGNORE); | ||
| 65 | |||
| 66 | 8 | std::vector<int> partner_data(partner_size); | |
| 67 |
1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
|
8 | MPI_Sendrecv(local_data.data(), local_size, MPI_INT, partner, 1, partner_data.data(), partner_size, MPI_INT, partner, |
| 68 | 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE); | ||
| 69 | |||
| 70 | 8 | std::vector<int> merged; | |
| 71 |
1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
|
8 | merged.reserve(local_size + partner_size); |
| 72 |
1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
|
8 | std::ranges::merge(local_data, partner_data, std::back_inserter(merged)); |
| 73 | |||
| 74 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 4 times.
|
8 | if (keep_low) { |
| 75 |
1/2✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
|
4 | local_data.assign(merged.begin(), merged.begin() + local_size); |
| 76 | } else { | ||
| 77 |
1/2✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
|
4 | local_data.assign(merged.end() - local_size, merged.end()); |
| 78 | } | ||
| 79 | 8 | } | |
| 80 | |||
| 81 | 8 | void LevonychevIRadixBatcherSortALL::CalculateDistribution(int total_n, int size, std::vector<int> &counts, | |
| 82 | std::vector<int> &displs) { | ||
| 83 | 8 | int base_size = total_n / size; | |
| 84 | 8 | int extra = total_n % size; | |
| 85 |
2/2✓ Branch 0 taken 16 times.
✓ Branch 1 taken 8 times.
|
24 | for (int i = 0; i < size; ++i) { |
| 86 |
4/4✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.
✓ Branch 2 taken 8 times.
✓ Branch 3 taken 8 times.
|
30 | counts[i] = base_size + (i < extra ? 1 : 0); |
| 87 |
2/2✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
|
16 | displs[i] = (i == 0) ? 0 : displs[i - 1] + counts[i - 1]; |
| 88 | } | ||
| 89 | 8 | } | |
| 90 | |||
| 91 | 8 | void LevonychevIRadixBatcherSortALL::LocalSortPhase(std::vector<int> &local_data) { | |
| 92 | 8 | int num_threads = static_cast<int>(std::thread::hardware_concurrency()); | |
| 93 | 8 | int loc_n = static_cast<int>(local_data.size()); | |
| 94 | |||
| 95 | 8 | std::vector<std::vector<int>> blocks(num_threads); | |
| 96 | int current_pos = 0; | ||
| 97 |
2/2✓ Branch 0 taken 32 times.
✓ Branch 1 taken 8 times.
|
40 | for (int i = 0; i < num_threads; ++i) { |
| 98 |
2/2✓ Branch 0 taken 31 times.
✓ Branch 1 taken 1 times.
|
32 | int b_size = (loc_n / num_threads) + (i < (loc_n % num_threads) ? 1 : 0); |
| 99 |
2/2✓ Branch 0 taken 25 times.
✓ Branch 1 taken 7 times.
|
32 | if (b_size > 0) { |
| 100 |
1/2✓ Branch 1 taken 25 times.
✗ Branch 2 not taken.
|
25 | blocks[i].assign(local_data.begin() + current_pos, local_data.begin() + current_pos + b_size); |
| 101 | 25 | current_pos += b_size; | |
| 102 | } | ||
| 103 | } | ||
| 104 | |||
| 105 | 8 | std::vector<std::future<void>> sort_futures; | |
| 106 |
2/2✓ Branch 0 taken 32 times.
✓ Branch 1 taken 8 times.
|
40 | for (auto &b : blocks) { |
| 107 |
2/2✓ Branch 0 taken 25 times.
✓ Branch 1 taken 7 times.
|
32 | if (!b.empty()) { |
| 108 |
2/4✓ Branch 1 taken 25 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 25 times.
✗ Branch 5 not taken.
|
75 | sort_futures.push_back(std::async(std::launch::async, [&b]() { LocalRadixSort(b); })); |
| 109 | } | ||
| 110 | } | ||
| 111 |
2/2✓ Branch 0 taken 25 times.
✓ Branch 1 taken 8 times.
|
33 | for (auto &f : sort_futures) { |
| 112 |
1/2✓ Branch 1 taken 25 times.
✗ Branch 2 not taken.
|
25 | f.wait(); |
| 113 | } | ||
| 114 | |||
| 115 |
1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
|
8 | LocalBatcherMerge(blocks); |
| 116 | |||
| 117 | local_data.clear(); | ||
| 118 |
2/2✓ Branch 0 taken 32 times.
✓ Branch 1 taken 8 times.
|
40 | for (const auto &b : blocks) { |
| 119 | 32 | local_data.insert(local_data.end(), b.begin(), b.end()); | |
| 120 | } | ||
| 121 | 8 | } | |
| 122 | |||
| 123 |
2/2✓ Branch 0 taken 32 times.
✓ Branch 1 taken 8 times.
|
40 | void LevonychevIRadixBatcherSortALL::CompareAndMergeBlocks(std::vector<int> &b1, std::vector<int> &b2) { |
| 124 |
4/4✓ Branch 0 taken 32 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 30 times.
|
40 | if (b1.empty() || b2.empty()) { |
| 125 | 10 | return; | |
| 126 | } | ||
| 127 | |||
| 128 |
1/2✓ Branch 1 taken 30 times.
✗ Branch 2 not taken.
|
30 | std::vector<int> merged; |
| 129 |
1/2✓ Branch 1 taken 30 times.
✗ Branch 2 not taken.
|
30 | merged.reserve(b1.size() + b2.size()); |
| 130 |
1/2✓ Branch 1 taken 30 times.
✗ Branch 2 not taken.
|
30 | std::ranges::merge(b1, b2, std::back_inserter(merged)); |
| 131 | |||
| 132 | auto mid = b1.size(); | ||
| 133 | auto mid_diff = static_cast<std::ptrdiff_t>(mid); | ||
| 134 | |||
| 135 |
1/2✓ Branch 1 taken 30 times.
✗ Branch 2 not taken.
|
30 | b1.assign(merged.begin(), merged.begin() + mid_diff); |
| 136 |
1/2✓ Branch 0 taken 30 times.
✗ Branch 1 not taken.
|
30 | b2.assign(merged.begin() + mid_diff, merged.end()); |
| 137 | } | ||
| 138 | |||
| 139 | 24 | void LevonychevIRadixBatcherSortALL::BatcherStep(std::vector<std::vector<int>> &blocks, int pr, int k) { | |
| 140 | 24 | int n_blocks = static_cast<int>(blocks.size()); | |
| 141 | 24 | std::vector<std::future<void>> m_futures; | |
| 142 | |||
| 143 |
2/2✓ Branch 0 taken 32 times.
✓ Branch 1 taken 24 times.
|
56 | for (int j = k % pr; j <= n_blocks - 1 - k; j += 2 * k) { |
| 144 |
2/2✓ Branch 0 taken 40 times.
✓ Branch 1 taken 32 times.
|
72 | for (int i = 0; i < std::min(k, n_blocks - j - k); ++i) { |
| 145 | 40 | int i1 = j + i; | |
| 146 | 40 | int i2 = j + i + k; | |
| 147 | |||
| 148 |
1/2✓ Branch 0 taken 40 times.
✗ Branch 1 not taken.
|
40 | if ((i1 / (pr * 2)) == (i2 / (pr * 2))) { |
| 149 | m_futures.push_back( | ||
| 150 |
1/2✓ Branch 1 taken 40 times.
✗ Branch 2 not taken.
|
120 | std::async(std::launch::async, [&blocks, i1, i2]() { CompareAndMergeBlocks(blocks[i1], blocks[i2]); })); |
| 151 | } | ||
| 152 | } | ||
| 153 | } | ||
| 154 | |||
| 155 |
2/2✓ Branch 0 taken 40 times.
✓ Branch 1 taken 24 times.
|
64 | for (auto &f : m_futures) { |
| 156 |
1/2✓ Branch 1 taken 40 times.
✗ Branch 2 not taken.
|
40 | f.wait(); |
| 157 | } | ||
| 158 | 24 | } | |
| 159 | |||
| 160 | 8 | void LevonychevIRadixBatcherSortALL::LocalBatcherMerge(std::vector<std::vector<int>> &blocks) { | |
| 161 | 8 | int n_blocks = static_cast<int>(blocks.size()); | |
| 162 | |||
| 163 |
2/2✓ Branch 0 taken 16 times.
✓ Branch 1 taken 8 times.
|
24 | for (int pr = 1; pr < n_blocks; pr <<= 1) { |
| 164 |
2/2✓ Branch 0 taken 24 times.
✓ Branch 1 taken 16 times.
|
40 | for (int k = pr; k > 0; k >>= 1) { |
| 165 | 24 | BatcherStep(blocks, pr, k); | |
| 166 | } | ||
| 167 | } | ||
| 168 | 8 | } | |
| 169 | |||
| 170 | 8 | void LevonychevIRadixBatcherSortALL::GlobalCompareExchange(std::vector<int> &local_data, int rank, int i1, int i2) { | |
| 171 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 4 times.
|
8 | if (rank == i1) { |
| 172 | 4 | NetworkMergeAndSplit(local_data, i2, true); | |
| 173 |
1/2✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
|
4 | } else if (rank == i2) { |
| 174 | 4 | NetworkMergeAndSplit(local_data, i1, false); | |
| 175 | } | ||
| 176 | 8 | } | |
| 177 | 8 | void LevonychevIRadixBatcherSortALL::GlobalBatcherStep(std::vector<int> &local_data, int rank, int size, int p, int k) { | |
| 178 |
2/2✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
|
16 | for (int j = k % p; j <= size - 1 - k; j += 2 * k) { |
| 179 |
2/2✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
|
16 | for (int i = 0; i < std::min(k, size - j - k); ++i) { |
| 180 | 8 | int idx1 = j + i; | |
| 181 | 8 | int idx2 = j + i + k; | |
| 182 | |||
| 183 |
1/2✓ Branch 0 taken 8 times.
✗ Branch 1 not taken.
|
8 | if ((idx1 / (p * 2)) == (idx2 / (p * 2))) { |
| 184 | 8 | GlobalCompareExchange(local_data, rank, idx1, idx2); | |
| 185 | } | ||
| 186 | } | ||
| 187 | } | ||
| 188 | 8 | } | |
| 189 | |||
| 190 | 8 | void LevonychevIRadixBatcherSortALL::GlobalSortPhase(std::vector<int> &local_data, int rank, int size) { | |
| 191 |
2/2✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
|
16 | for (int pr = 1; pr < size; pr <<= 1) { |
| 192 |
2/2✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
|
16 | for (int k = pr; k > 0; k >>= 1) { |
| 193 | 8 | GlobalBatcherStep(local_data, rank, size, pr, k); | |
| 194 | 8 | MPI_Barrier(MPI_COMM_WORLD); | |
| 195 | } | ||
| 196 | } | ||
| 197 | 8 | } | |
| 198 | |||
| 199 | 8 | bool LevonychevIRadixBatcherSortALL::RunImpl() { | |
| 200 | 8 | int rank = 0; | |
| 201 | 8 | int size = 0; | |
| 202 | 8 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); | |
| 203 | 8 | MPI_Comm_size(MPI_COMM_WORLD, &size); | |
| 204 | |||
| 205 | const std::vector<int> &input = GetInput(); | ||
| 206 | 8 | int total_n = static_cast<int>(input.size()); | |
| 207 | |||
| 208 | 8 | std::vector<int> send_counts(size); | |
| 209 |
1/4✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
8 | std::vector<int> displs(size); |
| 210 | 8 | CalculateDistribution(total_n, size, send_counts, displs); | |
| 211 | |||
| 212 |
1/4✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
8 | std::vector<int> local_data(send_counts[rank]); |
| 213 |
1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
|
8 | MPI_Scatterv(input.data(), send_counts.data(), displs.data(), MPI_INT, local_data.data(), send_counts[rank], MPI_INT, |
| 214 | 0, MPI_COMM_WORLD); | ||
| 215 |
1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
|
8 | LocalSortPhase(local_data); |
| 216 | |||
| 217 |
1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
|
8 | GlobalSortPhase(local_data, rank, size); |
| 218 | |||
| 219 | 8 | std::vector<int> result; | |
| 220 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 4 times.
|
8 | if (rank == 0) { |
| 221 |
1/2✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
|
4 | result.resize(total_n); |
| 222 | } | ||
| 223 | |||
| 224 |
1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
|
8 | MPI_Gatherv(local_data.data(), static_cast<int>(local_data.size()), MPI_INT, result.data(), send_counts.data(), |
| 225 | displs.data(), MPI_INT, 0, MPI_COMM_WORLD); | ||
| 226 | |||
| 227 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 4 times.
|
8 | if (rank == 0) { |
| 228 |
1/2✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
|
4 | GetOutput() = result; |
| 229 | } | ||
| 230 | |||
| 231 | 8 | return true; | |
| 232 | } | ||
| 233 | |||
| 234 | 8 | bool LevonychevIRadixBatcherSortALL::ValidationImpl() { | |
| 235 | 8 | int rank = 0; | |
| 236 | 8 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); | |
| 237 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 4 times.
|
8 | if (rank == 0) { |
| 238 | 4 | return !GetInput().empty(); | |
| 239 | } | ||
| 240 | return true; | ||
| 241 | } | ||
| 242 | 8 | bool LevonychevIRadixBatcherSortALL::PreProcessingImpl() { | |
| 243 | 8 | return true; | |
| 244 | } | ||
| 245 | 8 | bool LevonychevIRadixBatcherSortALL::PostProcessingImpl() { | |
| 246 | 8 | return true; | |
| 247 | } | ||
| 248 | |||
| 249 | } // namespace levonychev_i_radix_batcher_sort | ||
| 250 |