| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #include "votincev_d_radixmerge_sort/all/include/ops_all.hpp" | ||
| 2 | |||
| 3 | #include <mpi.h> | ||
| 4 | #include <omp.h> | ||
| 5 | |||
| 6 | #include <algorithm> | ||
| 7 | #include <array> | ||
| 8 | #include <cstddef> | ||
| 9 | #include <cstdint> | ||
| 10 | #include <utility> | ||
| 11 | #include <vector> | ||
| 12 | |||
| 13 | #include "votincev_d_radixmerge_sort/common/include/common.hpp" | ||
| 14 | |||
| 15 | namespace votincev_d_radixmerge_sort { | ||
| 16 | |||
| 17 | 20 | VotincevDRadixMergeSortALL::VotincevDRadixMergeSortALL(InType in) : input_(std::move(in)) { | |
| 18 | SetTypeOfTask(GetStaticTypeOfTask()); | ||
| 19 | 20 | } | |
| 20 | |||
| 21 | 20 | bool VotincevDRadixMergeSortALL::ValidationImpl() { | |
| 22 | 20 | return !input_.empty(); | |
| 23 | } | ||
| 24 | |||
| 25 | 20 | bool VotincevDRadixMergeSortALL::PreProcessingImpl() { | |
| 26 | 20 | return true; | |
| 27 | } | ||
| 28 | |||
| 29 | 40 | void VotincevDRadixMergeSortALL::LocalRadixSort(uint32_t *begin, uint32_t *end) { | |
| 30 | 40 | auto n = static_cast<int32_t>(end - begin); | |
| 31 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 40 times.
|
40 | if (n <= 1) { |
| 32 | ✗ | return; | |
| 33 | } | ||
| 34 | |||
| 35 | 40 | uint32_t max_val = *std::ranges::max_element(begin, end); | |
| 36 | 40 | std::vector<uint32_t> buffer(static_cast<size_t>(n)); | |
| 37 | uint32_t *src = begin; | ||
| 38 | uint32_t *dst = buffer.data(); | ||
| 39 | |||
| 40 |
2/2✓ Branch 0 taken 72 times.
✓ Branch 1 taken 40 times.
|
112 | for (int64_t exp = 1; (static_cast<int64_t>(max_val) / exp) > 0; exp *= 10) { |
| 41 | 72 | std::array<int32_t, 10> count{}; | |
| 42 | count.fill(0); | ||
| 43 | |||
| 44 |
2/2✓ Branch 0 taken 12700 times.
✓ Branch 1 taken 72 times.
|
12772 | for (int32_t i = 0; i < n; ++i) { |
| 45 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 12700 times.
|
12700 | auto digit = static_cast<size_t>((src[static_cast<size_t>(i)] / exp) % 10); |
| 46 | 12700 | count.at(digit)++; | |
| 47 | } | ||
| 48 |
2/2✓ Branch 0 taken 648 times.
✓ Branch 1 taken 72 times.
|
720 | for (size_t i = 1; i < 10; ++i) { |
| 49 | 648 | count.at(i) += count.at(i - 1); | |
| 50 | } | ||
| 51 |
2/2✓ Branch 0 taken 12700 times.
✓ Branch 1 taken 72 times.
|
12772 | for (int32_t i = n - 1; i >= 0; --i) { |
| 52 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 12700 times.
|
12700 | auto digit = static_cast<size_t>((src[static_cast<size_t>(i)] / exp) % 10); |
| 53 | 12700 | dst[static_cast<size_t>(--count.at(digit))] = src[static_cast<size_t>(i)]; | |
| 54 | } | ||
| 55 | std::swap(src, dst); | ||
| 56 | } | ||
| 57 | |||
| 58 |
2/2✓ Branch 0 taken 16 times.
✓ Branch 1 taken 24 times.
|
40 | if (src != begin) { |
| 59 |
1/2✓ Branch 0 taken 16 times.
✗ Branch 1 not taken.
|
16 | std::ranges::copy(src, src + n, begin); |
| 60 | } | ||
| 61 | } | ||
| 62 | |||
| 63 | 20 | void VotincevDRadixMergeSortALL::Merge(const uint32_t *src, uint32_t *dst, int32_t left, int32_t mid, int32_t right) { | |
| 64 | int32_t i = left; | ||
| 65 | int32_t j = mid; | ||
| 66 | int32_t k = left; | ||
| 67 | |||
| 68 |
2/2✓ Branch 0 taken 5029 times.
✓ Branch 1 taken 20 times.
|
5049 | while (i < mid && j < right) { |
| 69 | 5029 | dst[static_cast<size_t>(k++)] = (src[static_cast<size_t>(i)] <= src[static_cast<size_t>(j)]) | |
| 70 |
2/2✓ Branch 0 taken 2750 times.
✓ Branch 1 taken 2279 times.
|
5029 | ? src[static_cast<size_t>(i++)] |
| 71 | 2279 | : src[static_cast<size_t>(j++)]; | |
| 72 | } | ||
| 73 |
2/2✓ Branch 0 taken 50 times.
✓ Branch 1 taken 20 times.
|
70 | while (i < mid) { |
| 74 | 50 | dst[static_cast<size_t>(k++)] = src[static_cast<size_t>(i++)]; | |
| 75 | } | ||
| 76 |
2/2✓ Branch 0 taken 521 times.
✓ Branch 1 taken 20 times.
|
541 | while (j < right) { |
| 77 | 521 | dst[static_cast<size_t>(k++)] = src[static_cast<size_t>(j++)]; | |
| 78 | } | ||
| 79 | 20 | } | |
| 80 | |||
| 81 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 20 times.
|
20 | void VotincevDRadixMergeSortALL::OmpLocalSortAndMerge(std::vector<uint32_t> &local_data) { |
| 82 | 20 | auto local_n = static_cast<int32_t>(local_data.size()); | |
| 83 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 20 times.
|
20 | if (local_n <= 0) { |
| 84 | ✗ | return; | |
| 85 | } | ||
| 86 | |||
| 87 | 20 | std::vector<uint32_t> temp_buffer(static_cast<size_t>(local_n)); | |
| 88 | |||
| 89 |
1/2✓ Branch 0 taken 20 times.
✗ Branch 1 not taken.
|
20 | #pragma omp parallel default(none) shared(local_n, local_data, temp_buffer) |
| 90 | { | ||
| 91 | auto tid = static_cast<int32_t>(omp_get_thread_num()); | ||
| 92 | auto n_threads = static_cast<int32_t>(omp_get_num_threads()); | ||
| 93 | |||
| 94 | int32_t t_items = local_n / n_threads; | ||
| 95 | int32_t t_rem = local_n % n_threads; | ||
| 96 | |||
| 97 | auto get_offset = [&](int32_t id) { return (id * t_items) + (id < t_rem ? id : t_rem); }; | ||
| 98 | |||
| 99 | int32_t l = get_offset(tid); | ||
| 100 | int32_t r = get_offset(tid + 1); | ||
| 101 | |||
| 102 | if (l < r) { | ||
| 103 | LocalRadixSort(local_data.data() + l, local_data.data() + r); | ||
| 104 | } | ||
| 105 | |||
| 106 | for (int32_t step = 1; step < n_threads; step *= 2) { | ||
| 107 | #pragma omp barrier | ||
| 108 | if (tid % (2 * step) == 0 && tid + step < n_threads) { | ||
| 109 | int32_t m = get_offset(tid + step); | ||
| 110 | int32_t next_id = (tid + (2 * step) < n_threads) ? (tid + (2 * step)) : n_threads; | ||
| 111 | int32_t next_r = get_offset(next_id); | ||
| 112 | |||
| 113 | Merge(local_data.data(), temp_buffer.data(), l, m, next_r); | ||
| 114 | std::copy(temp_buffer.begin() + l, temp_buffer.begin() + next_r, local_data.begin() + l); | ||
| 115 | } | ||
| 116 | } | ||
| 117 | } | ||
| 118 | } | ||
| 119 | |||
| 120 | 20 | int32_t VotincevDRadixMergeSortALL::ScatterData(int32_t rank, int32_t n, int32_t local_n, | |
| 121 | const std::vector<int32_t> &send_counts, | ||
| 122 | const std::vector<int32_t> &displacements, | ||
| 123 | std::vector<uint32_t> &local_data) { | ||
| 124 | 20 | int32_t min_val = 0; | |
| 125 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
20 | if (rank == 0) { |
| 126 | 10 | min_val = *std::ranges::min_element(input_); | |
| 127 | 10 | std::vector<uint32_t> unsigned_input(static_cast<size_t>(n)); | |
| 128 |
2/2✓ Branch 0 taken 5600 times.
✓ Branch 1 taken 10 times.
|
5610 | for (int32_t i = 0; i < n; ++i) { |
| 129 |
2/4✗ Branch 0 not taken.
✓ Branch 1 taken 5600 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 5600 times.
|
5600 | unsigned_input.at(static_cast<size_t>(i)) = static_cast<uint32_t>(input_.at(static_cast<size_t>(i)) - min_val); |
| 130 | } | ||
| 131 |
1/2✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
|
10 | MPI_Bcast(&min_val, 1, MPI_INT32_T, 0, MPI_COMM_WORLD); |
| 132 |
1/2✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
|
10 | MPI_Scatterv(unsigned_input.data(), send_counts.data(), displacements.data(), MPI_UINT32_T, local_data.data(), |
| 133 | local_n, MPI_UINT32_T, 0, MPI_COMM_WORLD); | ||
| 134 | } else { | ||
| 135 | 10 | MPI_Bcast(&min_val, 1, MPI_INT32_T, 0, MPI_COMM_WORLD); | |
| 136 | 10 | MPI_Scatterv(nullptr, send_counts.data(), displacements.data(), MPI_UINT32_T, local_data.data(), local_n, | |
| 137 | MPI_UINT32_T, 0, MPI_COMM_WORLD); | ||
| 138 | } | ||
| 139 | 20 | return min_val; | |
| 140 | } | ||
| 141 | |||
| 142 | 20 | void VotincevDRadixMergeSortALL::FinalMergeAndFormat(int32_t rank, int32_t size, int32_t n, int32_t min_val, | |
| 143 | std::vector<uint32_t> &gathered_data, | ||
| 144 | const std::vector<int32_t> &displacements) { | ||
| 145 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
20 | if (rank == 0) { |
| 146 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
20 | for (int32_t i = 1; i < size; ++i) { |
| 147 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
|
10 | int32_t mid = displacements.at(static_cast<size_t>(i)); |
| 148 | 10 | int32_t next_idx = i + 1; | |
| 149 |
1/4✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
10 | int32_t right = (next_idx >= size) ? n : displacements.at(static_cast<size_t>(next_idx)); |
| 150 | 10 | std::ranges::inplace_merge(gathered_data.begin(), gathered_data.begin() + mid, gathered_data.begin() + right); | |
| 151 | } | ||
| 152 | |||
| 153 | 10 | output_.resize(static_cast<size_t>(n)); | |
| 154 |
2/2✓ Branch 0 taken 5600 times.
✓ Branch 1 taken 10 times.
|
5610 | for (int32_t i = 0; i < n; ++i) { |
| 155 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 5600 times.
|
5600 | auto idx = static_cast<size_t>(i); |
| 156 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 5600 times.
|
5600 | output_.at(idx) = static_cast<int32_t>(gathered_data.at(idx) + static_cast<uint32_t>(min_val)); |
| 157 | } | ||
| 158 | } | ||
| 159 | 20 | } | |
| 160 | |||
| 161 | 20 | bool VotincevDRadixMergeSortALL::RunImpl() { | |
| 162 | 20 | int32_t rank = 0; | |
| 163 | 20 | int32_t size = 0; | |
| 164 | 20 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); | |
| 165 | 20 | MPI_Comm_size(MPI_COMM_WORLD, &size); | |
| 166 | |||
| 167 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
20 | int32_t n = (rank == 0) ? static_cast<int32_t>(input_.size()) : 0; |
| 168 | 20 | MPI_Bcast(&n, 1, MPI_INT32_T, 0, MPI_COMM_WORLD); | |
| 169 | |||
| 170 | 20 | std::vector<int32_t> send_counts(static_cast<size_t>(size)); | |
| 171 |
1/4✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
20 | std::vector<int32_t> displacements(static_cast<size_t>(size)); |
| 172 | 20 | int32_t items = n / size; | |
| 173 | 20 | int32_t rem = n % size; | |
| 174 | |||
| 175 |
2/2✓ Branch 0 taken 40 times.
✓ Branch 1 taken 20 times.
|
60 | for (int32_t i = 0; i < size; ++i) { |
| 176 |
2/4✓ Branch 0 taken 40 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 40 times.
|
80 | send_counts.at(static_cast<size_t>(i)) = items + (i < rem ? 1 : 0); |
| 177 | 40 | displacements.at(static_cast<size_t>(i)) = | |
| 178 |
4/6✓ Branch 0 taken 20 times.
✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 20 times.
✗ Branch 4 not taken.
✓ Branch 5 taken 20 times.
|
40 | (i == 0) ? 0 : displacements.at(static_cast<size_t>(i - 1)) + send_counts.at(static_cast<size_t>(i - 1)); |
| 179 | } | ||
| 180 | |||
| 181 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 20 times.
|
20 | auto local_n = send_counts.at(static_cast<size_t>(rank)); |
| 182 |
1/4✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
20 | std::vector<uint32_t> local_data(static_cast<size_t>(local_n)); |
| 183 | |||
| 184 |
1/2✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.
|
20 | int32_t min_val = ScatterData(rank, n, local_n, send_counts, displacements, local_data); |
| 185 | |||
| 186 |
1/2✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.
|
20 | OmpLocalSortAndMerge(local_data); |
| 187 | |||
| 188 | 20 | std::vector<uint32_t> gathered_data; | |
| 189 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
20 | if (rank == 0) { |
| 190 |
1/2✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
|
10 | gathered_data.resize(static_cast<size_t>(n)); |
| 191 | } | ||
| 192 | |||
| 193 |
1/2✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.
|
20 | MPI_Gatherv(local_data.data(), local_n, MPI_UINT32_T, gathered_data.data(), send_counts.data(), displacements.data(), |
| 194 | MPI_UINT32_T, 0, MPI_COMM_WORLD); | ||
| 195 | |||
| 196 |
1/2✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.
|
20 | FinalMergeAndFormat(rank, size, n, min_val, gathered_data, displacements); |
| 197 | |||
| 198 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
20 | if (rank == 0) { |
| 199 | 10 | GetOutput() = std::move(output_); | |
| 200 | } | ||
| 201 | |||
| 202 | 20 | return true; | |
| 203 | } | ||
| 204 | |||
| 205 | 20 | bool VotincevDRadixMergeSortALL::PostProcessingImpl() { | |
| 206 | 20 | return true; | |
| 207 | } | ||
| 208 | |||
| 209 | } // namespace votincev_d_radixmerge_sort | ||
| 210 |