| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #include "olesnitskiy_v_hoare_sort_simple_merge/all/include/ops_all.hpp" | ||
| 2 | |||
| 3 | #include <mpi.h> | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <cstddef> | ||
| 7 | #include <stack> | ||
| 8 | #include <thread> | ||
| 9 | #include <utility> | ||
| 10 | #include <vector> | ||
| 11 | |||
| 12 | #include "olesnitskiy_v_hoare_sort_simple_merge/common/include/common.hpp" | ||
| 13 | |||
| 14 | namespace olesnitskiy_v_hoare_sort_simple_merge { | ||
| 15 | |||
| 16 | namespace { | ||
| 17 | |||
| 18 | constexpr size_t kBlockSize = 64; | ||
| 19 | |||
| 20 | size_t GetThreadCount(size_t task_count) { | ||
| 21 | 26 | if (task_count == 0) { | |
| 22 | return 0; | ||
| 23 | } | ||
| 24 | |||
| 25 | 26 | const unsigned int hardware_threads = std::thread::hardware_concurrency(); | |
| 26 |
1/4✗ Branch 0 not taken.
✗ Branch 1 not taken.
✓ Branch 2 taken 26 times.
✗ Branch 3 not taken.
|
26 | const size_t available_threads = hardware_threads == 0 ? 2 : static_cast<size_t>(hardware_threads); |
| 27 | return std::min(task_count, available_threads); | ||
| 28 | } | ||
| 29 | |||
| 30 | template <class Function> | ||
| 31 |
1/2✓ Branch 0 taken 26 times.
✗ Branch 1 not taken.
|
52 | void RunInThreads(size_t task_count, Function function) { |
| 32 | const size_t thread_count = GetThreadCount(task_count); | ||
| 33 |
1/2✓ Branch 0 taken 26 times.
✗ Branch 1 not taken.
|
52 | if (thread_count <= 1) { |
| 34 |
2/2✓ Branch 0 taken 26 times.
✓ Branch 1 taken 26 times.
|
104 | for (size_t task_index = 0; task_index < task_count; ++task_index) { |
| 35 | 52 | function(task_index); | |
| 36 | } | ||
| 37 | 52 | return; | |
| 38 | } | ||
| 39 | |||
| 40 | ✗ | std::vector<std::thread> threads; | |
| 41 | ✗ | threads.reserve(thread_count); | |
| 42 | |||
| 43 | ✗ | for (size_t thread_index = 0; thread_index < thread_count; ++thread_index) { | |
| 44 | ✗ | threads.emplace_back([thread_index, thread_count, task_count, &function]() { | |
| 45 | ✗ | for (size_t task_index = thread_index; task_index < task_count; task_index += thread_count) { | |
| 46 | ✗ | function(task_index); | |
| 47 | } | ||
| 48 | }); | ||
| 49 | } | ||
| 50 | |||
| 51 | ✗ | for (auto &thread : threads) { | |
| 52 | ✗ | thread.join(); | |
| 53 | } | ||
| 54 | ✗ | } | |
| 55 | |||
| 56 | 56 | std::vector<int> MakeIntVector(const std::vector<size_t> &values) { | |
| 57 | 56 | std::vector<int> result(values.size()); | |
| 58 |
2/2✓ Branch 0 taken 112 times.
✓ Branch 1 taken 56 times.
|
168 | for (size_t i = 0; i < values.size(); ++i) { |
| 59 | 112 | result[i] = static_cast<int>(values[i]); | |
| 60 | } | ||
| 61 | 56 | return result; | |
| 62 | } | ||
| 63 | |||
| 64 | 28 | void BuildDistribution(size_t total_size, int mpi_size, std::vector<size_t> &chunk_sizes, | |
| 65 | std::vector<size_t> &offsets) { | ||
| 66 | 28 | const size_t base_size = total_size / static_cast<size_t>(mpi_size); | |
| 67 | 28 | const size_t remainder = total_size % static_cast<size_t>(mpi_size); | |
| 68 | |||
| 69 |
2/2✓ Branch 0 taken 56 times.
✓ Branch 1 taken 28 times.
|
84 | for (int rank = 0; rank < mpi_size; ++rank) { |
| 70 | 56 | const auto index = static_cast<size_t>(rank); | |
| 71 |
4/4✓ Branch 0 taken 40 times.
✓ Branch 1 taken 16 times.
✓ Branch 2 taken 28 times.
✓ Branch 3 taken 28 times.
|
96 | chunk_sizes[index] = base_size + (index < remainder ? 1U : 0U); |
| 72 |
2/2✓ Branch 0 taken 28 times.
✓ Branch 1 taken 28 times.
|
56 | offsets[index] = rank == 0 ? 0 : offsets[index - 1] + chunk_sizes[index - 1]; |
| 73 | } | ||
| 74 | 28 | } | |
| 75 | |||
| 76 | 28 | void BroadcastVector(std::vector<int> &data, int mpi_rank) { | |
| 77 | 28 | size_t size = data.size(); | |
| 78 | 28 | MPI_Bcast(&size, 1, MPI_UNSIGNED_LONG, 0, MPI_COMM_WORLD); | |
| 79 | |||
| 80 |
2/2✓ Branch 0 taken 14 times.
✓ Branch 1 taken 14 times.
|
28 | if (mpi_rank != 0) { |
| 81 | 14 | data.resize(size); | |
| 82 | } | ||
| 83 |
1/2✓ Branch 0 taken 28 times.
✗ Branch 1 not taken.
|
28 | if (size > 0) { |
| 84 | 28 | MPI_Bcast(data.data(), static_cast<int>(size), MPI_INT, 0, MPI_COMM_WORLD); | |
| 85 | } | ||
| 86 | 28 | } | |
| 87 | |||
| 88 | } // namespace | ||
| 89 | |||
| 90 |
1/2✓ Branch 1 taken 30 times.
✗ Branch 2 not taken.
|
30 | OlesnitskiyVHoareSortSimpleMergeALL::OlesnitskiyVHoareSortSimpleMergeALL(const InType &in) { |
| 91 | SetTypeOfTask(GetStaticTypeOfTask()); | ||
| 92 |
1/2✓ Branch 1 taken 30 times.
✗ Branch 2 not taken.
|
30 | GetInput() = in; |
| 93 | GetOutput().clear(); | ||
| 94 | 30 | } | |
| 95 | |||
| 96 | 192 | int OlesnitskiyVHoareSortSimpleMergeALL::HoarePartition(std::vector<int> &array, int left, int right) { | |
| 97 | 192 | const int pivot = array[left + ((right - left) / 2)]; | |
| 98 | 192 | int i = left - 1; | |
| 99 | 192 | int j = right + 1; | |
| 100 | |||
| 101 | while (true) { | ||
| 102 | 437 | ++i; | |
| 103 |
2/2✓ Branch 0 taken 86 times.
✓ Branch 1 taken 437 times.
|
523 | while (array[i] < pivot) { |
| 104 | 86 | ++i; | |
| 105 | } | ||
| 106 | |||
| 107 | 437 | --j; | |
| 108 |
2/2✓ Branch 0 taken 169 times.
✓ Branch 1 taken 437 times.
|
606 | while (array[j] > pivot) { |
| 109 | 169 | --j; | |
| 110 | } | ||
| 111 | |||
| 112 |
2/2✓ Branch 0 taken 192 times.
✓ Branch 1 taken 245 times.
|
437 | if (i >= j) { |
| 113 | 192 | return j; | |
| 114 | } | ||
| 115 | |||
| 116 | std::swap(array[i], array[j]); | ||
| 117 | } | ||
| 118 | } | ||
| 119 | |||
| 120 | 26 | void OlesnitskiyVHoareSortSimpleMergeALL::HoareQuickSort(std::vector<int> &array, int left, int right) { | |
| 121 | std::stack<std::pair<int, int>> stack; | ||
| 122 | stack.emplace(left, right); | ||
| 123 | |||
| 124 |
2/2✓ Branch 0 taken 410 times.
✓ Branch 1 taken 26 times.
|
436 | while (!stack.empty()) { |
| 125 | auto [current_left, current_right] = stack.top(); | ||
| 126 | stack.pop(); | ||
| 127 | |||
| 128 |
2/2✓ Branch 0 taken 218 times.
✓ Branch 1 taken 192 times.
|
410 | if (current_left >= current_right) { |
| 129 | 218 | continue; | |
| 130 | } | ||
| 131 | |||
| 132 | 192 | const int middle = HoarePartition(array, current_left, current_right); | |
| 133 | |||
| 134 |
2/2✓ Branch 0 taken 25 times.
✓ Branch 1 taken 167 times.
|
192 | if ((middle - current_left) > (current_right - (middle + 1))) { |
| 135 | stack.emplace(current_left, middle); | ||
| 136 |
1/2✓ Branch 1 taken 25 times.
✗ Branch 2 not taken.
|
25 | stack.emplace(middle + 1, current_right); |
| 137 | } else { | ||
| 138 |
2/4✓ Branch 1 taken 167 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 167 times.
✗ Branch 5 not taken.
|
167 | stack.emplace(middle + 1, current_right); |
| 139 | stack.emplace(current_left, middle); | ||
| 140 | } | ||
| 141 | } | ||
| 142 | 26 | } | |
| 143 | |||
| 144 | 14 | void OlesnitskiyVHoareSortSimpleMergeALL::SimpleMerge(const std::vector<int> &source, std::vector<int> &destination, | |
| 145 | size_t left, size_t middle, size_t right) { | ||
| 146 | size_t left_index = left; | ||
| 147 | size_t right_index = middle; | ||
| 148 | size_t destination_index = left; | ||
| 149 | |||
| 150 |
2/2✓ Branch 0 taken 131 times.
✓ Branch 1 taken 14 times.
|
145 | while (left_index < middle && right_index < right) { |
| 151 |
2/2✓ Branch 0 taken 60 times.
✓ Branch 1 taken 71 times.
|
131 | if (source[left_index] <= source[right_index]) { |
| 152 | 60 | destination[destination_index++] = source[left_index++]; | |
| 153 | } else { | ||
| 154 | 71 | destination[destination_index++] = source[right_index++]; | |
| 155 | } | ||
| 156 | } | ||
| 157 | |||
| 158 |
2/2✓ Branch 0 taken 54 times.
✓ Branch 1 taken 14 times.
|
68 | while (left_index < middle) { |
| 159 | 54 | destination[destination_index++] = source[left_index++]; | |
| 160 | } | ||
| 161 | |||
| 162 |
2/2✓ Branch 0 taken 35 times.
✓ Branch 1 taken 14 times.
|
49 | while (right_index < right) { |
| 163 | 35 | destination[destination_index++] = source[right_index++]; | |
| 164 | } | ||
| 165 | 14 | } | |
| 166 | |||
| 167 |
2/2✓ Branch 0 taken 26 times.
✓ Branch 1 taken 2 times.
|
28 | void OlesnitskiyVHoareSortSimpleMergeALL::SortLocalStlParallel(std::vector<int> &array) { |
| 168 |
2/2✓ Branch 0 taken 26 times.
✓ Branch 1 taken 2 times.
|
28 | if (array.size() <= 1) { |
| 169 | return; | ||
| 170 | } | ||
| 171 | |||
| 172 | const size_t size = array.size(); | ||
| 173 | 26 | const size_t block_count = (size + kBlockSize - 1) / kBlockSize; | |
| 174 | |||
| 175 | 26 | RunInThreads(block_count, [&array, size](size_t block_index) { | |
| 176 | 26 | const size_t block_start = block_index * kBlockSize; | |
| 177 |
1/2✓ Branch 0 taken 26 times.
✗ Branch 1 not taken.
|
26 | const size_t block_end = std::min(block_start + kBlockSize, size); |
| 178 |
1/2✓ Branch 0 taken 26 times.
✗ Branch 1 not taken.
|
26 | if ((block_end - block_start) > 1) { |
| 179 | 26 | HoareQuickSort(array, static_cast<int>(block_start), static_cast<int>(block_end - 1)); | |
| 180 | } | ||
| 181 | 26 | }); | |
| 182 | |||
| 183 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 26 times.
|
26 | for (size_t merge_width = kBlockSize; merge_width < size; merge_width *= 2) { |
| 184 | ✗ | std::vector<int> merged_data(size); | |
| 185 | ✗ | const size_t merge_count = (size + (2 * merge_width) - 1) / (2 * merge_width); | |
| 186 | |||
| 187 | ✗ | RunInThreads(merge_count, [&array, size, merge_width, &merged_data](size_t merge_index) { | |
| 188 | ✗ | const size_t left = merge_index * 2 * merge_width; | |
| 189 | ✗ | const size_t middle = std::min(left + merge_width, size); | |
| 190 | ✗ | const size_t right = std::min(left + (2 * merge_width), size); | |
| 191 | |||
| 192 | ✗ | if (middle < right) { | |
| 193 | ✗ | SimpleMerge(array, merged_data, left, middle, right); | |
| 194 | } else { | ||
| 195 | ✗ | std::copy(array.begin() + static_cast<std::ptrdiff_t>(left), array.begin() + static_cast<std::ptrdiff_t>(right), | |
| 196 | ✗ | merged_data.begin() + static_cast<std::ptrdiff_t>(left)); | |
| 197 | } | ||
| 198 | ✗ | }); | |
| 199 | |||
| 200 | array.swap(merged_data); | ||
| 201 | } | ||
| 202 | } | ||
| 203 | |||
| 204 | 14 | void OlesnitskiyVHoareSortSimpleMergeALL::MergeGatheredChunks(std::vector<int> &array, | |
| 205 | const std::vector<size_t> &chunk_sizes, | ||
| 206 | const std::vector<size_t> &offsets) { | ||
| 207 | 14 | std::vector<int> merged_data(array.size()); | |
| 208 |
2/2✓ Branch 0 taken 14 times.
✓ Branch 1 taken 14 times.
|
28 | for (size_t rank = 1; rank < chunk_sizes.size(); ++rank) { |
| 209 | const size_t left = 0; | ||
| 210 | 14 | const size_t middle = offsets[rank]; | |
| 211 | 14 | const size_t right = middle + chunk_sizes[rank]; | |
| 212 | 14 | SimpleMerge(array, merged_data, left, middle, right); | |
| 213 | 14 | std::copy(merged_data.begin(), merged_data.begin() + static_cast<std::ptrdiff_t>(right), array.begin()); | |
| 214 | } | ||
| 215 | 14 | } | |
| 216 | |||
| 217 | 30 | bool OlesnitskiyVHoareSortSimpleMergeALL::ValidationImpl() { | |
| 218 | 30 | return !GetInput().empty(); | |
| 219 | } | ||
| 220 | |||
| 221 | 30 | bool OlesnitskiyVHoareSortSimpleMergeALL::PreProcessingImpl() { | |
| 222 | 30 | data_ = GetInput(); | |
| 223 | GetOutput().clear(); | ||
| 224 | 30 | return true; | |
| 225 | } | ||
| 226 | |||
| 227 | 30 | bool OlesnitskiyVHoareSortSimpleMergeALL::RunImpl() { | |
| 228 | 30 | int mpi_rank = 0; | |
| 229 | 30 | int mpi_size = 1; | |
| 230 | 30 | MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank); | |
| 231 | 30 | MPI_Comm_size(MPI_COMM_WORLD, &mpi_size); | |
| 232 | |||
| 233 | const size_t total_size = data_.size(); | ||
| 234 |
2/2✓ Branch 0 taken 28 times.
✓ Branch 1 taken 2 times.
|
30 | if (total_size <= 1) { |
| 235 | return true; | ||
| 236 | } | ||
| 237 | |||
| 238 | 28 | std::vector<size_t> chunk_sizes(static_cast<size_t>(mpi_size)); | |
| 239 |
1/4✓ Branch 1 taken 28 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
28 | std::vector<size_t> offsets(static_cast<size_t>(mpi_size)); |
| 240 | 28 | BuildDistribution(total_size, mpi_size, chunk_sizes, offsets); | |
| 241 | |||
| 242 |
1/2✓ Branch 1 taken 28 times.
✗ Branch 2 not taken.
|
28 | std::vector<int> send_counts = MakeIntVector(chunk_sizes); |
| 243 |
1/2✓ Branch 1 taken 28 times.
✗ Branch 2 not taken.
|
28 | std::vector<int> send_offsets = MakeIntVector(offsets); |
| 244 |
1/4✓ Branch 1 taken 28 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
28 | std::vector<int> local_data(chunk_sizes[static_cast<size_t>(mpi_rank)]); |
| 245 | |||
| 246 |
1/2✓ Branch 1 taken 28 times.
✗ Branch 2 not taken.
|
28 | MPI_Scatterv(data_.data(), send_counts.data(), send_offsets.data(), MPI_INT, local_data.data(), send_counts[mpi_rank], |
| 247 | MPI_INT, 0, MPI_COMM_WORLD); | ||
| 248 | |||
| 249 |
1/2✓ Branch 1 taken 28 times.
✗ Branch 2 not taken.
|
28 | SortLocalStlParallel(local_data); |
| 250 | |||
| 251 | 28 | std::vector<int> gathered_data; | |
| 252 |
2/2✓ Branch 0 taken 14 times.
✓ Branch 1 taken 14 times.
|
28 | if (mpi_rank == 0) { |
| 253 |
1/2✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
|
14 | gathered_data.resize(total_size); |
| 254 | } | ||
| 255 | |||
| 256 |
1/2✓ Branch 1 taken 28 times.
✗ Branch 2 not taken.
|
28 | MPI_Gatherv(local_data.data(), static_cast<int>(local_data.size()), MPI_INT, gathered_data.data(), send_counts.data(), |
| 257 | send_offsets.data(), MPI_INT, 0, MPI_COMM_WORLD); | ||
| 258 | |||
| 259 |
2/2✓ Branch 0 taken 14 times.
✓ Branch 1 taken 14 times.
|
28 | if (mpi_rank == 0) { |
| 260 |
1/2✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
|
14 | MergeGatheredChunks(gathered_data, chunk_sizes, offsets); |
| 261 | 14 | data_ = std::move(gathered_data); | |
| 262 | } | ||
| 263 | |||
| 264 |
1/2✓ Branch 1 taken 28 times.
✗ Branch 2 not taken.
|
28 | BroadcastVector(data_, mpi_rank); |
| 265 | return std::ranges::is_sorted(data_); | ||
| 266 | } | ||
| 267 | |||
| 268 |
1/2✓ Branch 0 taken 30 times.
✗ Branch 1 not taken.
|
30 | bool OlesnitskiyVHoareSortSimpleMergeALL::PostProcessingImpl() { |
| 269 | const bool is_sorted = std::ranges::is_sorted(data_); | ||
| 270 |
1/2✓ Branch 0 taken 30 times.
✗ Branch 1 not taken.
|
30 | if (is_sorted) { |
| 271 | 30 | GetOutput() = data_; | |
| 272 | } | ||
| 273 | 30 | return is_sorted; | |
| 274 | } | ||
| 275 | |||
| 276 | } // namespace olesnitskiy_v_hoare_sort_simple_merge | ||
| 277 |