| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #include "kondakov_v_shell_sort/all/include/ops_all.hpp" | ||
| 2 | |||
| 3 | #include <mpi.h> | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <atomic> | ||
| 7 | #include <cstddef> | ||
| 8 | #include <limits> | ||
| 9 | #include <thread> | ||
| 10 | #include <utility> | ||
| 11 | #include <vector> | ||
| 12 | |||
| 13 | #include "kondakov_v_shell_sort/common/include/common.hpp" | ||
| 14 | #include "util/include/util.hpp" | ||
| 15 | |||
| 16 | namespace kondakov_v_shell_sort { | ||
| 17 | |||
| 18 | namespace { | ||
| 19 | |||
| 20 | size_t CalcPartsCount(size_t data_size, int requested_threads); | ||
| 21 | |||
| 22 | template <class F> | ||
| 23 | 20 | void ParallelForIndex(size_t count, int requested_threads, F body) { | |
| 24 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 20 times.
|
20 | if (count == 0) { |
| 25 | ✗ | return; | |
| 26 | } | ||
| 27 | |||
| 28 |
1/2✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.
|
20 | const size_t workers_count = std::min(count, static_cast<size_t>(std::max(1, requested_threads))); |
| 29 | 20 | std::atomic_size_t next_idx = 0; | |
| 30 | 20 | std::vector<std::thread> workers; | |
| 31 |
1/2✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.
|
20 | workers.reserve(workers_count); |
| 32 | |||
| 33 |
2/2✓ Branch 0 taken 40 times.
✓ Branch 1 taken 20 times.
|
60 | for (size_t worker = 0; worker < workers_count; ++worker) { |
| 34 |
1/2✓ Branch 1 taken 40 times.
✗ Branch 2 not taken.
|
80 | workers.emplace_back([&]() { |
| 35 | 40 | while (true) { | |
| 36 |
2/2✓ Branch 0 taken 40 times.
✓ Branch 1 taken 40 times.
|
80 | const size_t idx = next_idx.fetch_add(1); |
| 37 |
2/2✓ Branch 0 taken 40 times.
✓ Branch 1 taken 40 times.
|
80 | if (idx >= count) { |
| 38 | break; | ||
| 39 | } | ||
| 40 | 40 | body(idx); | |
| 41 | } | ||
| 42 | }); | ||
| 43 | } | ||
| 44 | |||
| 45 |
2/2✓ Branch 0 taken 40 times.
✓ Branch 1 taken 20 times.
|
60 | for (auto &worker : workers) { |
| 46 |
1/2✓ Branch 1 taken 40 times.
✗ Branch 2 not taken.
|
40 | worker.join(); |
| 47 | } | ||
| 48 | 20 | } | |
| 49 | |||
| 50 | struct ChunkBounds { | ||
| 51 | size_t begin; | ||
| 52 | size_t end; | ||
| 53 | }; | ||
| 54 | |||
| 55 | 42 | void ShellSort(std::vector<int> &data) { | |
| 56 | const size_t n = data.size(); | ||
| 57 |
2/2✓ Branch 0 taken 20 times.
✓ Branch 1 taken 42 times.
|
62 | for (size_t gap = n / 2; gap > 0; gap /= 2) { |
| 58 |
2/2✓ Branch 0 taken 23 times.
✓ Branch 1 taken 20 times.
|
43 | for (size_t i = gap; i < n; ++i) { |
| 59 | 23 | int value = data[i]; | |
| 60 | size_t j = i; | ||
| 61 |
4/4✓ Branch 0 taken 26 times.
✓ Branch 1 taken 15 times.
✓ Branch 2 taken 18 times.
✓ Branch 3 taken 8 times.
|
41 | while (j >= gap && data[j - gap] > value) { |
| 62 | 18 | data[j] = data[j - gap]; | |
| 63 | j -= gap; | ||
| 64 | } | ||
| 65 | 23 | data[j] = value; | |
| 66 | } | ||
| 67 | } | ||
| 68 | 42 | } | |
| 69 | |||
| 70 | 31 | void SimpleMerge(const std::vector<int> &left, const std::vector<int> &right, std::vector<int> &result) { | |
| 71 | size_t i = 0; | ||
| 72 | size_t j = 0; | ||
| 73 | size_t k = 0; | ||
| 74 | |||
| 75 |
4/4✓ Branch 0 taken 99 times.
✓ Branch 1 taken 13 times.
✓ Branch 2 taken 18 times.
✓ Branch 3 taken 81 times.
|
112 | while (i < left.size() && j < right.size()) { |
| 76 |
2/2✓ Branch 0 taken 29 times.
✓ Branch 1 taken 52 times.
|
81 | if (left[i] <= right[j]) { |
| 77 | 29 | result[k++] = left[i++]; | |
| 78 | } else { | ||
| 79 | 52 | result[k++] = right[j++]; | |
| 80 | } | ||
| 81 | } | ||
| 82 | |||
| 83 |
2/2✓ Branch 0 taken 26 times.
✓ Branch 1 taken 31 times.
|
57 | while (i < left.size()) { |
| 84 | 26 | result[k++] = left[i++]; | |
| 85 | } | ||
| 86 | |||
| 87 |
2/2✓ Branch 0 taken 21 times.
✓ Branch 1 taken 31 times.
|
52 | while (j < right.size()) { |
| 88 | 21 | result[k++] = right[j++]; | |
| 89 | } | ||
| 90 | 31 | } | |
| 91 | |||
| 92 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 11 times.
|
11 | void MergeManySortedChunks(const std::vector<int> &flat_data, const std::vector<int> &counts, |
| 93 | std::vector<int> &result) { | ||
| 94 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 11 times.
|
11 | if (counts.empty()) { |
| 95 | result.clear(); | ||
| 96 | ✗ | return; | |
| 97 | } | ||
| 98 | |||
| 99 | size_t offset = 0; | ||
| 100 | 11 | std::vector<int> merged; | |
| 101 |
3/4✓ Branch 1 taken 22 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 22 times.
✓ Branch 4 taken 11 times.
|
33 | for (int count : counts) { |
| 102 | 22 | const size_t chunk_size = static_cast<size_t>(std::max(0, count)); | |
| 103 | std::vector<int> chunk(flat_data.begin() + static_cast<std::ptrdiff_t>(offset), | ||
| 104 |
3/6✓ Branch 1 taken 22 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 11 times.
✓ Branch 4 taken 11 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
|
22 | flat_data.begin() + static_cast<std::ptrdiff_t>(offset + chunk_size)); |
| 105 | |||
| 106 |
2/2✓ Branch 0 taken 11 times.
✓ Branch 1 taken 11 times.
|
22 | if (merged.empty()) { |
| 107 | merged = std::move(chunk); | ||
| 108 | } else { | ||
| 109 |
1/4✓ Branch 1 taken 11 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
11 | std::vector<int> tmp(merged.size() + chunk.size()); |
| 110 | 11 | SimpleMerge(merged, chunk, tmp); | |
| 111 | merged = std::move(tmp); | ||
| 112 | } | ||
| 113 | |||
| 114 | offset += chunk_size; | ||
| 115 | } | ||
| 116 | |||
| 117 | result = std::move(merged); | ||
| 118 | } | ||
| 119 | |||
| 120 | 20 | std::vector<ChunkBounds> BuildBalancedChunks(size_t data_size, size_t chunks_count) { | |
| 121 | 20 | std::vector<ChunkBounds> bounds(chunks_count); | |
| 122 |
2/2✓ Branch 0 taken 40 times.
✓ Branch 1 taken 20 times.
|
60 | for (size_t i = 0; i < chunks_count; ++i) { |
| 123 | 40 | bounds[i] = ChunkBounds{.begin = (i * data_size) / chunks_count, .end = ((i + 1) * data_size) / chunks_count}; | |
| 124 | } | ||
| 125 | 20 | return bounds; | |
| 126 | } | ||
| 127 | |||
| 128 |
1/2✓ Branch 0 taken 22 times.
✗ Branch 1 not taken.
|
22 | void SortLocalWithStlThreads(std::vector<int> &local_data, int requested_threads) { |
| 129 | const size_t parts = CalcPartsCount(local_data.size(), requested_threads); | ||
| 130 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 20 times.
|
22 | if (parts <= 1) { |
| 131 | 2 | ShellSort(local_data); | |
| 132 | 2 | return; | |
| 133 | } | ||
| 134 | |||
| 135 | 20 | const std::vector<ChunkBounds> bounds = BuildBalancedChunks(local_data.size(), parts); | |
| 136 |
1/4✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
20 | std::vector<std::vector<int>> runs(parts); |
| 137 |
1/2✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.
|
20 | ParallelForIndex(parts, requested_threads, [&](size_t part) { |
| 138 | 40 | runs[part] = std::vector<int>(local_data.begin() + static_cast<std::ptrdiff_t>(bounds[part].begin), | |
| 139 | 40 | local_data.begin() + static_cast<std::ptrdiff_t>(bounds[part].end)); | |
| 140 | 40 | ShellSort(runs[part]); | |
| 141 | 40 | }); | |
| 142 | |||
| 143 | std::vector<int> merged = std::move(runs.front()); | ||
| 144 |
2/2✓ Branch 0 taken 20 times.
✓ Branch 1 taken 20 times.
|
40 | for (size_t i = 1; i < runs.size(); ++i) { |
| 145 |
1/4✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
|
20 | std::vector<int> tmp(merged.size() + runs[i].size()); |
| 146 | 20 | SimpleMerge(merged, runs[i], tmp); | |
| 147 | merged = std::move(tmp); | ||
| 148 | } | ||
| 149 | local_data = std::move(merged); | ||
| 150 | 20 | } | |
| 151 | |||
| 152 | bool IsMpiSizeRepresentable(size_t size) { | ||
| 153 | return size <= static_cast<size_t>(std::numeric_limits<int>::max()); | ||
| 154 | } | ||
| 155 | |||
| 156 | size_t CalcPartsCount(size_t data_size, int requested_threads) { | ||
| 157 |
1/2✓ Branch 0 taken 22 times.
✗ Branch 1 not taken.
|
22 | if (data_size == 0) { |
| 158 | return 0; | ||
| 159 | } | ||
| 160 | const int threads = std::max(1, requested_threads); | ||
| 161 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 20 times.
|
22 | return std::min(static_cast<size_t>(threads), data_size); |
| 162 | } | ||
| 163 | |||
| 164 | } // namespace | ||
| 165 | |||
| 166 |
1/2✓ Branch 1 taken 24 times.
✗ Branch 2 not taken.
|
24 | KondakovVShellSortALL::KondakovVShellSortALL(const InType &in) { |
| 167 | SetTypeOfTask(GetStaticTypeOfTask()); | ||
| 168 |
1/2✓ Branch 1 taken 24 times.
✗ Branch 2 not taken.
|
24 | GetInput() = in; |
| 169 |
1/2✓ Branch 1 taken 24 times.
✗ Branch 2 not taken.
|
24 | GetOutput() = in; |
| 170 | 24 | } | |
| 171 | |||
| 172 | 24 | bool KondakovVShellSortALL::ValidationImpl() { | |
| 173 | 24 | return !GetInput().empty(); | |
| 174 | } | ||
| 175 | |||
| 176 | 24 | bool KondakovVShellSortALL::PreProcessingImpl() { | |
| 177 | 24 | GetOutput() = GetInput(); | |
| 178 | 24 | return true; | |
| 179 | } | ||
| 180 | |||
| 181 |
2/2✓ Branch 0 taken 22 times.
✓ Branch 1 taken 2 times.
|
24 | bool KondakovVShellSortALL::RunImpl() { |
| 182 | std::vector<int> &data = GetOutput(); | ||
| 183 |
2/2✓ Branch 0 taken 22 times.
✓ Branch 1 taken 2 times.
|
24 | if (data.size() <= 1) { |
| 184 | return true; | ||
| 185 | } | ||
| 186 | |||
| 187 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 22 times.
|
22 | if (!IsMpiSizeRepresentable(data.size())) { |
| 188 | return false; | ||
| 189 | } | ||
| 190 | |||
| 191 | 22 | int rank = 0; | |
| 192 | 22 | int world_size = 1; | |
| 193 | 22 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); | |
| 194 | 22 | MPI_Comm_size(MPI_COMM_WORLD, &world_size); | |
| 195 | |||
| 196 |
2/2✓ Branch 0 taken 11 times.
✓ Branch 1 taken 11 times.
|
22 | int global_size = (rank == 0) ? static_cast<int>(data.size()) : 0; |
| 197 | 22 | MPI_Bcast(&global_size, 1, MPI_INT, 0, MPI_COMM_WORLD); | |
| 198 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 22 times.
|
22 | if (global_size <= 0) { |
| 199 | return false; | ||
| 200 | } | ||
| 201 | |||
| 202 | 22 | std::vector<int> global_data; | |
| 203 |
2/2✓ Branch 0 taken 11 times.
✓ Branch 1 taken 11 times.
|
22 | if (rank == 0) { |
| 204 |
1/2✓ Branch 1 taken 11 times.
✗ Branch 2 not taken.
|
11 | global_data = data; |
| 205 | } else { | ||
| 206 |
1/2✓ Branch 1 taken 11 times.
✗ Branch 2 not taken.
|
11 | global_data.resize(static_cast<size_t>(global_size)); |
| 207 | } | ||
| 208 |
1/2✓ Branch 1 taken 22 times.
✗ Branch 2 not taken.
|
22 | MPI_Bcast(global_data.data(), global_size, MPI_INT, 0, MPI_COMM_WORLD); |
| 209 | |||
| 210 |
1/4✓ Branch 1 taken 22 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
22 | std::vector<int> counts(static_cast<size_t>(world_size), 0); |
| 211 |
1/4✓ Branch 1 taken 22 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
22 | std::vector<int> displs(static_cast<size_t>(world_size), 0); |
| 212 |
2/2✓ Branch 0 taken 44 times.
✓ Branch 1 taken 22 times.
|
66 | for (int proc = 0; proc < world_size; ++proc) { |
| 213 | 44 | const int begin = (proc * global_size) / world_size; | |
| 214 | 44 | const int end = ((proc + 1) * global_size) / world_size; | |
| 215 | 44 | counts[static_cast<size_t>(proc)] = end - begin; | |
| 216 | 44 | displs[static_cast<size_t>(proc)] = begin; | |
| 217 | } | ||
| 218 | |||
| 219 |
1/4✓ Branch 1 taken 22 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
22 | std::vector<int> local_data(static_cast<size_t>(counts[static_cast<size_t>(rank)])); |
| 220 |
1/2✓ Branch 1 taken 22 times.
✗ Branch 2 not taken.
|
22 | MPI_Scatterv(global_data.data(), counts.data(), displs.data(), MPI_INT, local_data.data(), |
| 221 |
1/2✓ Branch 1 taken 22 times.
✗ Branch 2 not taken.
|
22 | counts[static_cast<size_t>(rank)], MPI_INT, 0, MPI_COMM_WORLD); |
| 222 | |||
| 223 |
2/4✓ Branch 1 taken 22 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 22 times.
✗ Branch 5 not taken.
|
22 | SortLocalWithStlThreads(local_data, ppc::util::GetNumThreads()); |
| 224 | |||
| 225 | 22 | std::vector<int> gathered; | |
| 226 |
2/2✓ Branch 0 taken 11 times.
✓ Branch 1 taken 11 times.
|
22 | if (rank == 0) { |
| 227 |
1/2✓ Branch 1 taken 11 times.
✗ Branch 2 not taken.
|
11 | gathered.resize(static_cast<size_t>(global_size)); |
| 228 | } | ||
| 229 |
1/2✓ Branch 1 taken 22 times.
✗ Branch 2 not taken.
|
22 | MPI_Gatherv(local_data.data(), counts[static_cast<size_t>(rank)], MPI_INT, gathered.data(), counts.data(), |
| 230 | displs.data(), MPI_INT, 0, MPI_COMM_WORLD); | ||
| 231 | |||
| 232 | 22 | std::vector<int> sorted_result; | |
| 233 |
2/2✓ Branch 0 taken 11 times.
✓ Branch 1 taken 11 times.
|
22 | if (rank == 0) { |
| 234 |
1/2✓ Branch 1 taken 11 times.
✗ Branch 2 not taken.
|
11 | MergeManySortedChunks(gathered, counts, sorted_result); |
| 235 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 11 times.
|
11 | if (sorted_result.size() != static_cast<size_t>(global_size)) { |
| 236 | return false; | ||
| 237 | } | ||
| 238 | } else { | ||
| 239 |
1/2✓ Branch 1 taken 11 times.
✗ Branch 2 not taken.
|
11 | sorted_result.resize(static_cast<size_t>(global_size)); |
| 240 | } | ||
| 241 | |||
| 242 |
1/2✓ Branch 1 taken 22 times.
✗ Branch 2 not taken.
|
22 | MPI_Bcast(sorted_result.data(), global_size, MPI_INT, 0, MPI_COMM_WORLD); |
| 243 | data = std::move(sorted_result); | ||
| 244 | |||
| 245 | return std::ranges::is_sorted(data); | ||
| 246 | } | ||
| 247 | |||
| 248 | 24 | bool KondakovVShellSortALL::PostProcessingImpl() { | |
| 249 | 24 | return !GetOutput().empty(); | |
| 250 | } | ||
| 251 | |||
| 252 | } // namespace kondakov_v_shell_sort | ||
| 253 |