GCC Code Coverage Report


Directory: ./
File: tasks/sakharov_a_shell_sorting_with_merging_butcher/all/src/ops_all.cpp
Date: 2026-05-11 08:26:31
Exec Total Coverage
Lines: 146 160 91.2%
Functions: 22 24 91.7%
Branches: 101 166 60.8%

Line Branch Exec Source
1 #include "sakharov_a_shell_sorting_with_merging_butcher/all/include/ops_all.hpp"
2
3 #include <mpi.h>
4 #include <omp.h>
5 #include <tbb/blocked_range.h>
6 #include <tbb/parallel_for.h>
7
8 #include <algorithm>
9 #include <cstddef>
10 #include <functional>
11 #include <thread>
12 #include <utility>
13 #include <vector>
14
15 #include "sakharov_a_shell_sorting_with_merging_butcher/common/include/common.hpp"
16 #include "util/include/util.hpp"
17
18 namespace sakharov_a_shell_sorting_with_merging_butcher {
19
20 namespace {
21
22 constexpr std::size_t kMinParallelChunkSize = 1U << 14;
23
24 7 std::vector<std::size_t> BuildChunkBounds(std::size_t size, int requested_chunks) {
25
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 7 times.
7 if (size == 0) {
26 return {0};
27 }
28
29
1/2
✓ Branch 0 taken 7 times.
✗ Branch 1 not taken.
7 const std::size_t max_chunks_by_size = std::max<std::size_t>(1, size / kMinParallelChunkSize);
30
2/4
✓ Branch 0 taken 7 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 7 times.
14 const int chunks = std::max(1, std::min<int>(requested_chunks, static_cast<int>(max_chunks_by_size)));
31
32 7 std::vector<std::size_t> bounds;
33
1/2
✓ Branch 1 taken 7 times.
✗ Branch 2 not taken.
7 bounds.reserve(static_cast<std::size_t>(chunks) + 1);
34
35 7 const std::size_t base_chunk = size / static_cast<std::size_t>(chunks);
36 7 const std::size_t remainder = size % static_cast<std::size_t>(chunks);
37 const auto chunk_count = static_cast<std::size_t>(chunks);
38
39
1/2
✓ Branch 1 taken 7 times.
✗ Branch 2 not taken.
7 bounds.push_back(0);
40
2/2
✓ Branch 0 taken 7 times.
✓ Branch 1 taken 7 times.
14 for (std::size_t chunk = 0; chunk < chunk_count; ++chunk) {
41
2/4
✓ Branch 0 taken 7 times.
✗ Branch 1 not taken.
✓ Branch 3 taken 7 times.
✗ Branch 4 not taken.
14 const std::size_t chunk_size = base_chunk + (chunk < remainder ? 1 : 0);
42
1/4
✓ Branch 1 taken 7 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
7 bounds.push_back(bounds.back() + chunk_size);
43 }
44
45 return bounds;
46 }
47
48 5 std::vector<std::size_t> BuildBoundsFromCounts(const std::vector<int> &counts) {
49
1/2
✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
5 std::vector<std::size_t> bounds;
50
1/2
✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
5 bounds.reserve(counts.size() + 1);
51
1/2
✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
5 bounds.push_back(0);
52
3/4
✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 10 times.
✓ Branch 4 taken 5 times.
15 for (int count : counts) {
53
1/4
✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
10 bounds.push_back(bounds.back() + static_cast<std::size_t>(count));
54 }
55 5 return bounds;
56 }
57
58 5 std::vector<int> BuildMpiChunkCounts(int data_size, int process_count) {
59 5 std::vector<int> counts(static_cast<std::size_t>(process_count), 0);
60 5 const int base = data_size / process_count;
61 5 const int remainder = data_size % process_count;
62
63
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 5 times.
15 for (int rank = 0; rank < process_count; ++rank) {
64
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 2 times.
18 counts[static_cast<std::size_t>(rank)] = base + (rank < remainder ? 1 : 0);
65 }
66
67 5 return counts;
68 }
69
70 5 std::vector<int> BuildDisplacements(const std::vector<int> &counts) {
71 5 std::vector<int> displacements(counts.size(), 0);
72
2/2
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
10 for (std::size_t i = 1; i < counts.size(); ++i) {
73 5 displacements[i] = displacements[i - 1] + counts[i - 1];
74 }
75 5 return displacements;
76 }
77
78 7 void SortChunksOpenMP(std::vector<int> &data, const std::vector<std::size_t> &bounds, int thread_count) {
79 7 const int chunk_count = static_cast<int>(bounds.size()) - 1;
80
81 7 #pragma omp parallel for default(none) shared(data, bounds, chunk_count) num_threads(thread_count) schedule(static)
82 for (int chunk = 0; chunk < chunk_count; ++chunk) {
83 const auto index = static_cast<std::size_t>(chunk);
84 auto begin = data.begin() + static_cast<std::ptrdiff_t>(bounds[index]);
85 auto end = data.begin() + static_cast<std::ptrdiff_t>(bounds[index + 1]);
86 std::sort(begin, end);
87 }
88 7 }
89
90
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
5 void MergeRange(const std::vector<int> &source, std::vector<int> &destination, const std::vector<std::size_t> &bounds,
91 std::size_t width, std::size_t merge_index) {
92 5 const std::size_t chunk_count = bounds.size() - 1;
93 5 const std::size_t left_chunk = merge_index * 2 * width;
94
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
5 const std::size_t mid = std::min(left_chunk + width, chunk_count);
95
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
5 const std::size_t right = std::min(left_chunk + (2 * width), chunk_count);
96
97 5 const std::size_t begin_index = bounds[left_chunk];
98 5 const std::size_t middle_index = bounds[mid];
99
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
5 const std::size_t end_index = bounds[right];
100
101 auto output_begin = destination.begin() + static_cast<std::ptrdiff_t>(begin_index);
102
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
5 if (mid == right) {
103 std::copy(source.begin() + static_cast<std::ptrdiff_t>(begin_index),
104 source.begin() + static_cast<std::ptrdiff_t>(end_index), output_begin);
105 } else {
106 5 std::merge(source.begin() + static_cast<std::ptrdiff_t>(begin_index),
107 source.begin() + static_cast<std::ptrdiff_t>(middle_index),
108 source.begin() + static_cast<std::ptrdiff_t>(middle_index),
109 source.begin() + static_cast<std::ptrdiff_t>(end_index), output_begin);
110 }
111 5 }
112
113 void MergePassTBB(const std::vector<int> &source, std::vector<int> &destination, const std::vector<std::size_t> &bounds,
114 std::size_t width) {
115 const std::size_t chunk_count = bounds.size() - 1;
116 const std::size_t merge_count = (chunk_count + (2 * width) - 1) / (2 * width);
117
118 tbb::parallel_for(tbb::blocked_range<std::size_t>(0, merge_count), [&](const tbb::blocked_range<std::size_t> &range) {
119 for (std::size_t merge_index = range.begin(); merge_index != range.end(); ++merge_index) {
120 MergeRange(source, destination, bounds, width, merge_index);
121 }
122 });
123 }
124
125 5 void MergePassThreadWorker(const std::vector<int> &source, std::vector<int> &destination,
126 const std::vector<std::size_t> &bounds, std::size_t width, std::size_t begin_merge,
127 std::size_t end_merge) {
128
2/2
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
10 for (std::size_t merge_index = begin_merge; merge_index < end_merge; ++merge_index) {
129 5 MergeRange(source, destination, bounds, width, merge_index);
130 }
131 5 }
132
133
1/2
✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
5 void MergePassSTLThreads(const std::vector<int> &source, std::vector<int> &destination,
134 const std::vector<std::size_t> &bounds, std::size_t width, int requested_threads) {
135 5 const std::size_t chunk_count = bounds.size() - 1;
136
1/2
✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
5 const std::size_t merge_count = (chunk_count + (2 * width) - 1) / (2 * width);
137 const auto worker_count =
138
1/2
✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
5 std::min<std::size_t>(merge_count, static_cast<std::size_t>(std::max(1, requested_threads)));
139
140 5 std::vector<std::thread> threads;
141
1/2
✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
5 threads.reserve(worker_count);
142
143 5 const std::size_t base_chunk = merge_count / worker_count;
144 5 const std::size_t remainder = merge_count % worker_count;
145 5 std::size_t current = 0;
146
147
2/2
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
10 for (std::size_t worker = 0; worker < worker_count; ++worker) {
148
1/2
✓ Branch 0 taken 5 times.
✗ Branch 1 not taken.
5 const std::size_t count = base_chunk + (worker < remainder ? 1 : 0);
149 5 const std::size_t next = current + count;
150
1/2
✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
5 threads.emplace_back(MergePassThreadWorker, std::cref(source), std::ref(destination), std::cref(bounds), width,
151 current, next);
152 5 current = next;
153 }
154
155
2/2
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
10 for (auto &thread : threads) {
156
1/2
✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
5 thread.join();
157 }
158 5 }
159
160
1/2
✓ Branch 0 taken 7 times.
✗ Branch 1 not taken.
7 std::vector<int> MergeChunksTBB(std::vector<int> source, const std::vector<std::size_t> &bounds) {
161 7 const std::size_t chunk_count = bounds.size() - 1;
162
1/2
✓ Branch 0 taken 7 times.
✗ Branch 1 not taken.
7 if (chunk_count <= 1) {
163 return source;
164 }
165
166 std::vector<int> destination(source.size());
167 for (std::size_t width = 1; width < chunk_count; width *= 2) {
168 MergePassTBB(source, destination, bounds, width);
169 source.swap(destination);
170 }
171
172 return source;
173 }
174
175
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
5 std::vector<int> MergeChunksSTLThreads(std::vector<int> source, const std::vector<std::size_t> &bounds,
176 int thread_count) {
177 5 const std::size_t chunk_count = bounds.size() - 1;
178
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
5 if (chunk_count <= 1) {
179 return source;
180 }
181
182 5 std::vector<int> destination(source.size());
183
2/2
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
10 for (std::size_t width = 1; width < chunk_count; width *= 2) {
184
1/2
✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
5 MergePassSTLThreads(source, destination, bounds, width, thread_count);
185 source.swap(destination);
186 }
187
188 return source;
189 }
190
191
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 7 times.
10 std::vector<int> SortLocalPart(std::vector<int> data) {
192
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 7 times.
10 if (data.empty()) {
193 return data;
194 }
195
196 7 const int thread_count = std::max(1, ppc::util::GetNumThreads());
197 7 const auto bounds = BuildChunkBounds(data.size(), thread_count);
198
199 7 SortChunksOpenMP(data, bounds, thread_count);
200
2/6
✓ Branch 1 taken 7 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 7 times.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
14 return MergeChunksTBB(std::move(data), bounds);
201 }
202
203 struct MpiRootData {
204 int input_size{0};
205 std::vector<int> counts;
206 std::vector<int> displacements;
207 };
208
209 const int *RootBuffer(const std::vector<int> &buffer, int rank) {
210
2/2
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
10 if (rank != 0) {
211 return nullptr;
212 }
213 10 return buffer.data();
214 }
215
216 const int *RootInputBuffer(const std::vector<int> &input, int rank) {
217
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 4 times.
5 if (rank != 0 || input.empty()) {
218 return nullptr;
219 }
220 return input.data();
221 }
222
223 int *BufferOrNull(std::vector<int> &buffer) {
224
8/8
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 6 times.
✓ Branch 3 taken 4 times.
✓ Branch 4 taken 3 times.
✓ Branch 5 taken 7 times.
✓ Branch 6 taken 3 times.
✓ Branch 7 taken 7 times.
36 if (buffer.empty()) {
225 14 return nullptr;
226 }
227 return buffer.data();
228 }
229
230 10 MpiRootData BuildRootData(const std::vector<int> &input, int rank, int process_count) {
231 10 MpiRootData root_data;
232
2/2
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
10 if (rank == 0) {
233 5 root_data.input_size = static_cast<int>(input.size());
234
1/2
✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
5 root_data.counts = BuildMpiChunkCounts(root_data.input_size, process_count);
235
1/2
✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
10 root_data.displacements = BuildDisplacements(root_data.counts);
236 }
237 10 return root_data;
238 }
239
240 10 std::vector<int> ScatterInput(const std::vector<int> &input, const MpiRootData &root_data, int rank) {
241
2/2
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
10 int local_size = 0;
242 10 MPI_Scatter(RootBuffer(root_data.counts, rank), 1, MPI_INT, &local_size, 1, MPI_INT, 0, MPI_COMM_WORLD);
243
244 10 std::vector<int> local_data(static_cast<std::size_t>(local_size));
245
3/4
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 7 times.
✓ Branch 3 taken 10 times.
✗ Branch 4 not taken.
20 MPI_Scatterv(RootInputBuffer(input, rank), RootBuffer(root_data.counts, rank),
246 RootBuffer(root_data.displacements, rank), MPI_INT, BufferOrNull(local_data), local_size, MPI_INT, 0,
247 MPI_COMM_WORLD);
248
249 10 return local_data;
250 }
251
252 10 std::vector<int> GatherSortedData(std::vector<int> &local_data, const MpiRootData &root_data, int rank) {
253 10 std::vector<int> gathered_data;
254
2/2
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
10 if (rank == 0) {
255
1/2
✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
5 gathered_data.resize(static_cast<std::size_t>(root_data.input_size));
256 }
257
258
2/2
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
10 const auto local_size = static_cast<int>(local_data.size());
259
1/2
✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
10 MPI_Gatherv(BufferOrNull(local_data), local_size, MPI_INT, BufferOrNull(gathered_data),
260 RootBuffer(root_data.counts, rank), RootBuffer(root_data.displacements, rank), MPI_INT, 0,
261 MPI_COMM_WORLD);
262
263 10 return gathered_data;
264 }
265
266 10 std::vector<int> MergeRootData(std::vector<int> gathered_data, const MpiRootData &root_data, int rank) {
267
2/2
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
10 if (rank != 0) {
268 5 return {};
269 }
270
271 5 const auto bounds = BuildBoundsFromCounts(root_data.counts);
272
3/8
✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 5 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 5 times.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
10 return MergeChunksSTLThreads(std::move(gathered_data), bounds, std::max(1, ppc::util::GetNumThreads()));
273 }
274
275 10 std::vector<int> BroadcastResult(std::vector<int> result, int rank) {
276 10 int result_size = 0;
277
2/2
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
10 if (rank == 0) {
278 5 result_size = static_cast<int>(result.size());
279 }
280 10 MPI_Bcast(&result_size, 1, MPI_INT, 0, MPI_COMM_WORLD);
281 10 result.resize(static_cast<std::size_t>(result_size));
282
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
12 MPI_Bcast(BufferOrNull(result), result_size, MPI_INT, 0, MPI_COMM_WORLD);
283
284 10 return result;
285 }
286
287 10 std::vector<int> RunMpiSort(const std::vector<int> &input, int rank, int process_count) {
288 10 const auto root_data = BuildRootData(input, rank, process_count);
289
1/2
✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
10 auto local_data = ScatterInput(input, root_data, rank);
290
1/2
✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
10 local_data = SortLocalPart(std::move(local_data));
291
1/2
✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
10 auto gathered_data = GatherSortedData(local_data, root_data, rank);
292
2/4
✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 10 times.
✗ Branch 5 not taken.
20 auto result = MergeRootData(std::move(gathered_data), root_data, rank);
293
3/6
✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 7 times.
✓ Branch 4 taken 3 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
30 return BroadcastResult(std::move(result), rank);
294 10 }
295
296 } // namespace
297
298
1/2
✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
10 SakharovAShellButcherALL::SakharovAShellButcherALL(const InType &in) {
299 SetTypeOfTask(GetStaticTypeOfTask());
300
1/2
✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
10 GetInput() = in;
301 10 GetOutput() = OutType();
302 10 }
303
304 10 bool SakharovAShellButcherALL::ValidationImpl() {
305 10 return IsValidInput(GetInput());
306 }
307
308 10 bool SakharovAShellButcherALL::PreProcessingImpl() {
309 10 GetOutput().assign(GetInput().size(), 0);
310 10 return true;
311 }
312
313 10 bool SakharovAShellButcherALL::RunImpl() {
314 10 int rank = 0;
315 10 int process_count = 1;
316 10 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
317 10 MPI_Comm_size(MPI_COMM_WORLD, &process_count);
318
319 10 GetOutput() = RunMpiSort(GetInput(), rank, process_count);
320 10 return true;
321 }
322
323 10 bool SakharovAShellButcherALL::PostProcessingImpl() {
324 10 return true;
325 }
326
327 } // namespace sakharov_a_shell_sorting_with_merging_butcher
328