GCC Code Coverage Report


Directory: ./
File: tasks/gusev_d_double_sort_even_odd_batcher/all/src/ops_all.cpp
Date: 2026-06-04 20:25:32
Exec Total Coverage
Lines: 203 219 92.7%
Functions: 30 31 96.8%
Branches: 164 230 71.3%

Line Branch Exec Source
1 #include "gusev_d_double_sort_even_odd_batcher/all/include/ops_all.hpp"
2
3 #include <mpi.h>
4 #include <omp.h>
5
6 #include <algorithm>
7 #include <array>
8 #include <bit>
9 #include <cstddef>
10 #include <cstdint>
11 #include <exception>
12 #include <limits>
13 #include <mutex>
14 #include <stdexcept>
15 #include <thread>
16 #include <utility>
17 #include <vector>
18
19 #include "gusev_d_double_sort_even_odd_batcher/all/include/common.hpp"
20 #include "oneapi/tbb/blocked_range.h"
21 #include "oneapi/tbb/global_control.h"
22 #include "oneapi/tbb/parallel_for.h"
23 #include "util/include/util.hpp"
24
25 namespace gusev_d_double_sort_even_odd_batcher_all_task_threads {
26 namespace {
27
28 constexpr int kRadixPasses = 8;
29 constexpr int kBitsPerByte = 8;
30 constexpr size_t kRadixBuckets = 256;
31 constexpr uint64_t kBucketMask = 0xFFULL;
32 constexpr size_t kMinParallelElements = 128;
33 constexpr size_t kMinThreadedTasks = 2;
34 constexpr size_t kMinTbbComparePairs = 512;
35
36 static_assert((kRadixPasses % 2) == 0, "Radix sort expects the final data to remain in the input buffer");
37
38 using Block = std::vector<ValueType>;
39 using BlockList = std::vector<Block>;
40
41 struct BlockRange {
42 size_t begin = 0;
43 size_t end = 0;
44 };
45
46 8128 struct MergeItem {
47 ValueType value = 0.0;
48 bool is_padding = true;
49 };
50
51 struct MpiContext {
52 bool active = false;
53 int rank = 0;
54 int size = 1;
55 };
56
57 uint64_t DoubleToSortableKey(ValueType value) {
58 const auto bits = std::bit_cast<uint64_t>(value);
59 const auto sign_mask = uint64_t{1} << 63;
60
4/4
✓ Branch 0 taken 10736 times.
✓ Branch 1 taken 8272 times.
✓ Branch 2 taken 10736 times.
✓ Branch 3 taken 8272 times.
38016 return (bits & sign_mask) == 0 ? bits ^ sign_mask : ~bits;
61 }
62
63 size_t GetBucketIndex(ValueType value, int shift) {
64 38016 return static_cast<size_t>((DoubleToSortableKey(value) >> shift) & kBucketMask);
65 }
66
67 void BuildPrefixSums(std::array<size_t, kRadixBuckets> &count) {
68 size_t prefix = 0;
69
2/2
✓ Branch 0 taken 133120 times.
✓ Branch 1 taken 520 times.
133640 for (auto &value : count) {
70 133120 const auto current = value;
71 133120 value = prefix;
72 133120 prefix += current;
73 }
74 }
75
76
2/2
✓ Branch 0 taken 90 times.
✓ Branch 1 taken 65 times.
155 void RadixSortDoubles(OutType &data) {
77
2/2
✓ Branch 0 taken 90 times.
✓ Branch 1 taken 65 times.
155 if (data.size() < 2) {
78 90 return;
79 }
80
81 65 OutType buffer(data.size());
82 auto *source = &data;
83 auto *destination = &buffer;
84
85
2/2
✓ Branch 0 taken 520 times.
✓ Branch 1 taken 65 times.
585 for (int byte = 0; byte < kRadixPasses; ++byte) {
86 520 std::array<size_t, kRadixBuckets> count{};
87 520 const auto shift = byte * kBitsPerByte;
88
89
4/4
✓ Branch 0 taken 10736 times.
✓ Branch 1 taken 8272 times.
✓ Branch 2 taken 19008 times.
✓ Branch 3 taken 520 times.
19528 for (ValueType value : *source) {
90 19008 count.at(GetBucketIndex(value, shift))++;
91 }
92 BuildPrefixSums(count);
93
94
6/6
✓ Branch 0 taken 10736 times.
✓ Branch 1 taken 8272 times.
✓ Branch 2 taken 10736 times.
✓ Branch 3 taken 8272 times.
✓ Branch 4 taken 19008 times.
✓ Branch 5 taken 520 times.
19528 for (ValueType value : *source) {
95 const auto bucket = GetBucketIndex(value, shift);
96 19008 (*destination)[count.at(bucket)++] = value;
97 }
98
99 std::swap(source, destination);
100 }
101 }
102
103 size_t NextPowerOfTwo(size_t value) {
104 size_t result = 1;
105
2/2
✓ Branch 0 taken 286 times.
✓ Branch 1 taken 136 times.
422 while (result < value) {
106 286 result <<= 1U;
107 }
108 return result;
109 }
110
111 bool IsGreater(const MergeItem &lhs, const MergeItem &rhs) {
112
2/2
✓ Branch 0 taken 28999 times.
✓ Branch 1 taken 797 times.
29796 if (lhs.is_padding != rhs.is_padding) {
113 return lhs.is_padding;
114 }
115
4/4
✓ Branch 0 taken 27261 times.
✓ Branch 1 taken 1738 times.
✓ Branch 2 taken 13725 times.
✓ Branch 3 taken 13536 times.
28999 return !lhs.is_padding && lhs.value > rhs.value;
116 }
117
118
2/2
✓ Branch 0 taken 28999 times.
✓ Branch 1 taken 797 times.
29796 void CompareExchange(std::vector<MergeItem> &data, size_t left, size_t right) {
119
2/2
✓ Branch 0 taken 520 times.
✓ Branch 1 taken 277 times.
797 if (IsGreater(data[left], data[right])) {
120 std::swap(data[left], data[right]);
121 }
122 29796 }
123
124 7706 void CompareExchangeBlocks(std::vector<MergeItem> &data, size_t first, size_t distance) {
125
2/2
✓ Branch 0 taken 7704 times.
✓ Branch 1 taken 2 times.
7706 if (distance < kMinTbbComparePairs) {
126
2/2
✓ Branch 0 taken 28772 times.
✓ Branch 1 taken 7704 times.
36476 for (size_t i = 0; i < distance; ++i) {
127 28772 CompareExchange(data, first + i, first + distance + i);
128 }
129 return;
130 }
131
132 272 tbb::parallel_for(tbb::blocked_range<size_t>(0, distance), [&](const tbb::blocked_range<size_t> &range) {
133
2/2
✓ Branch 0 taken 1024 times.
✓ Branch 1 taken 270 times.
1294 for (size_t i = range.begin(); i != range.end(); ++i) {
134 1024 CompareExchange(data, first + i, first + distance + i);
135 }
136 270 });
137 }
138
139
1/2
✓ Branch 0 taken 136 times.
✗ Branch 1 not taken.
136 void OddEvenMerge(std::vector<MergeItem> &data) {
140
1/2
✓ Branch 0 taken 136 times.
✗ Branch 1 not taken.
136 if (data.size() < 2) {
141 return;
142 }
143
144 136 auto distance = data.size() / 2;
145 136 CompareExchangeBlocks(data, 0, distance);
146
147
2/2
✓ Branch 0 taken 286 times.
✓ Branch 1 taken 136 times.
422 for (distance /= 2; distance > 0; distance /= 2) {
148 286 const auto step = distance * 2;
149
2/2
✓ Branch 0 taken 7570 times.
✓ Branch 1 taken 286 times.
7856 for (size_t first = distance; (first + distance) < data.size(); first += step) {
150 7570 CompareExchangeBlocks(data, first, distance);
151 }
152 }
153 }
154
155 void CopyBlockToMergeItems(const Block &block, std::vector<MergeItem> &items, size_t offset) {
156
4/4
✓ Branch 0 taken 3729 times.
✓ Branch 1 taken 136 times.
✓ Branch 2 taken 3713 times.
✓ Branch 3 taken 136 times.
7714 for (size_t i = 0; i < block.size(); ++i) {
157 7442 items[offset + i] = {.value = block[i], .is_padding = false};
158 }
159 }
160
161 136 Block ExtractMergedValues(const std::vector<MergeItem> &items, size_t result_size) {
162 136 Block result;
163
1/2
✓ Branch 1 taken 136 times.
✗ Branch 2 not taken.
136 result.reserve(result_size);
164
2/2
✓ Branch 0 taken 8128 times.
✓ Branch 1 taken 136 times.
8264 for (const auto &item : items) {
165
2/2
✓ Branch 0 taken 7442 times.
✓ Branch 1 taken 686 times.
8128 if (!item.is_padding) {
166
1/2
✓ Branch 0 taken 7442 times.
✗ Branch 1 not taken.
7442 result.push_back(item.value);
167 }
168 }
169 136 return result;
170 }
171
172
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 137 times.
139 Block MergeBatcherEvenOdd(const Block &left, const Block &right) {
173
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 137 times.
139 if (left.empty()) {
174 2 return right;
175 }
176
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 136 times.
137 if (right.empty()) {
177 1 return left;
178 }
179
180 const auto half_size = NextPowerOfTwo(std::max(left.size(), right.size()));
181 136 std::vector<MergeItem> items(half_size * 2);
182
183 CopyBlockToMergeItems(left, items, 0);
184 CopyBlockToMergeItems(right, items, half_size);
185
1/2
✓ Branch 1 taken 136 times.
✗ Branch 2 not taken.
136 OddEvenMerge(items);
186
1/2
✓ Branch 1 taken 136 times.
✗ Branch 2 not taken.
136 return ExtractMergedValues(items, left.size() + right.size());
187 }
188
189 BlockRange GetBlockRange(size_t block_index, size_t block_count, size_t total_size) {
190 return {
191 131 .begin = (block_index * total_size) / block_count,
192
1/2
✓ Branch 1 taken 131 times.
✗ Branch 2 not taken.
131 .end = ((block_index + 1) * total_size) / block_count,
193 };
194 }
195
196 size_t GetBlockCount(size_t input_size, size_t parallelism) {
197
2/2
✓ Branch 0 taken 18 times.
✓ Branch 1 taken 19 times.
37 const auto target_blocks = std::max<size_t>(1, parallelism * 2);
198
2/2
✓ Branch 0 taken 35 times.
✓ Branch 1 taken 2 times.
37 return std::max<size_t>(1, std::min(input_size, target_blocks));
199 }
200
201 155 void FillAndSortBlock(const InType &input, Block &block, BlockRange range) {
202 155 block.assign(input.begin() + static_cast<std::ptrdiff_t>(range.begin),
203 155 input.begin() + static_cast<std::ptrdiff_t>(range.end));
204 155 RadixSortDoubles(block);
205 155 }
206
207 void StoreCurrentException(std::exception_ptr &worker_exception, std::mutex &exception_mutex) noexcept {
208 const std::scoped_lock lock(exception_mutex);
209 if (worker_exception == nullptr) {
210 worker_exception = std::current_exception();
211 }
212 }
213
214 37 BlockList MakeSortedBlocks(const InType &input, size_t parallelism) {
215
2/2
✓ Branch 0 taken 18 times.
✓ Branch 1 taken 19 times.
37 const auto block_count = GetBlockCount(input.size(), parallelism);
216 const auto total_size = input.size();
217 37 BlockList blocks(block_count);
218
219
4/4
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 35 times.
✓ Branch 2 taken 29 times.
✓ Branch 3 taken 6 times.
37 if (block_count == 1 || input.size() < kMinParallelElements) {
220
2/2
✓ Branch 0 taken 131 times.
✓ Branch 1 taken 31 times.
162 for (size_t block = 0; block < block_count; ++block) {
221
1/2
✓ Branch 1 taken 131 times.
✗ Branch 2 not taken.
131 FillAndSortBlock(input, blocks[block], GetBlockRange(block, block_count, total_size));
222 }
223 return blocks;
224 }
225
226
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 6 times.
6 const auto signed_block_count = static_cast<std::ptrdiff_t>(block_count);
227 const auto omp_threads = static_cast<int>(std::min<size_t>(parallelism, block_count));
228 static_cast<void>(omp_threads);
229
230 std::exception_ptr worker_exception;
231 6 std::mutex exception_mutex;
232
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 6 times.
6 #pragma omp parallel for schedule(static) num_threads(omp_threads) default(none) \
233 shared(input, blocks, block_count, total_size, signed_block_count, worker_exception, exception_mutex)
234 for (std::ptrdiff_t block = 0; block < signed_block_count; ++block) {
235 try {
236 const auto block_index = static_cast<size_t>(block);
237 FillAndSortBlock(input, blocks[block_index], GetBlockRange(block_index, block_count, total_size));
238 } catch (...) {
239 StoreCurrentException(worker_exception, exception_mutex);
240 }
241 }
242
243
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 6 times.
6 if (worker_exception != nullptr) {
244 std::rethrow_exception(worker_exception);
245 }
246
247 return blocks;
248 }
249
250 template <typename Function>
251 void RunSequentialByIndex(size_t work_count, const Function &function) {
252
2/2
✓ Branch 0 taken 61 times.
✓ Branch 1 taken 61 times.
122 for (size_t index = 0; index < work_count; ++index) {
253 function(index);
254 }
255 }
256
257 template <typename Function>
258 78 void RunThreadChunk(size_t thread_index, size_t work_count, size_t thread_count, const Function &function,
259 std::exception_ptr &worker_exception, std::mutex &exception_mutex) noexcept {
260 try {
261
2/2
✓ Branch 0 taken 78 times.
✓ Branch 1 taken 78 times.
156 for (size_t index = thread_index; index < work_count; index += thread_count) {
262 function(index);
263 }
264 } catch (...) {
265 StoreCurrentException(worker_exception, exception_mutex);
266 }
267 78 }
268
269 34 void JoinWorkersAndRethrow(std::vector<std::thread> &workers, const std::exception_ptr &worker_exception) {
270
2/2
✓ Branch 0 taken 78 times.
✓ Branch 1 taken 34 times.
112 for (auto &worker : workers) {
271 78 worker.join();
272 }
273
274
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 34 times.
34 if (worker_exception != nullptr) {
275 std::rethrow_exception(worker_exception);
276 }
277 34 }
278
279 template <typename Function>
280 95 void RunThreadedByIndex(size_t work_count, size_t max_threads, const Function &function) {
281
3/4
✓ Branch 0 taken 34 times.
✓ Branch 1 taken 61 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 34 times.
95 if (work_count < kMinThreadedTasks || max_threads < kMinThreadedTasks) {
282 RunSequentialByIndex(work_count, function);
283 61 return;
284 }
285
286 34 const auto thread_count = std::min(work_count, max_threads);
287 34 std::vector<std::thread> workers;
288
1/2
✓ Branch 1 taken 34 times.
✗ Branch 2 not taken.
34 workers.reserve(thread_count);
289 std::exception_ptr worker_exception;
290 34 std::mutex exception_mutex;
291
2/2
✓ Branch 0 taken 78 times.
✓ Branch 1 taken 34 times.
112 for (size_t thread_index = 0; thread_index < thread_count; ++thread_index) {
292
1/4
✓ Branch 1 taken 78 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
78 workers.emplace_back([&, thread_index] {
293 78 RunThreadChunk(thread_index, work_count, thread_count, function, worker_exception, exception_mutex);
294 });
295 }
296
297
1/2
✓ Branch 1 taken 34 times.
✗ Branch 2 not taken.
34 JoinWorkersAndRethrow(workers, worker_exception);
298 34 }
299
300 139 void MergeBlockPair(const BlockList &blocks, BlockList &next, size_t pair_index) {
301 139 next[pair_index] = MergeBatcherEvenOdd(blocks[pair_index * 2], blocks[(pair_index * 2) + 1]);
302 139 }
303
304 95 BlockList MergeBlockPairs(const BlockList &blocks, size_t parallelism) {
305 95 const auto pair_count = blocks.size() / 2;
306 95 BlockList next((blocks.size() + 1) / 2);
307
308
4/6
✓ Branch 1 taken 78 times.
✗ Branch 2 not taken.
✓ Branch 5 taken 95 times.
✗ Branch 6 not taken.
✓ Branch 7 taken 9 times.
✓ Branch 8 taken 86 times.
234 RunThreadedByIndex(pair_count, parallelism, [&](size_t pair_index) { MergeBlockPair(blocks, next, pair_index); });
309
310
2/2
✓ Branch 0 taken 9 times.
✓ Branch 1 taken 86 times.
95 if ((blocks.size() & 1U) != 0U) {
311
1/2
✓ Branch 1 taken 9 times.
✗ Branch 2 not taken.
9 next.back() = blocks.back();
312 }
313
314 95 return next;
315 }
316
317 58 Block MergeBlocks(BlockList blocks, size_t parallelism) {
318
2/2
✓ Branch 0 taken 95 times.
✓ Branch 1 taken 58 times.
153 while (blocks.size() > 1) {
319 95 blocks = MergeBlockPairs(blocks, parallelism);
320 }
321
322 58 return std::move(blocks.front());
323 }
324
325
2/2
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 37 times.
42 Block SortLocal(const InType &input, size_t parallelism) {
326
2/2
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 37 times.
42 if (input.empty()) {
327 5 return {};
328 }
329
330 37 auto blocks = MakeSortedBlocks(input, parallelism);
331
1/2
✓ Branch 1 taken 37 times.
✗ Branch 2 not taken.
37 return MergeBlocks(std::move(blocks), parallelism);
332 37 }
333
334 147 int ToMpiCount(size_t size) {
335
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 147 times.
147 if (size > static_cast<size_t>(std::numeric_limits<int>::max())) {
336 throw std::runtime_error("Input is too large for MPI int counts");
337 }
338 147 return static_cast<int>(size);
339 }
340
341 42 MpiContext GetMpiContext() {
342 42 int initialized = 0;
343 42 MPI_Initialized(&initialized);
344
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 42 times.
42 if (initialized == 0) {
345 return {};
346 }
347
348 42 int finalized = 0;
349 42 MPI_Finalized(&finalized);
350
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 42 times.
42 if (finalized != 0) {
351 return {};
352 }
353
354 42 MpiContext context{.active = true};
355 42 MPI_Comm_rank(MPI_COMM_WORLD, &context.rank);
356 42 MPI_Comm_size(MPI_COMM_WORLD, &context.size);
357 42 return context;
358 }
359
360 42 std::vector<int> BuildCounts(size_t total_size, int parts) {
361 42 std::vector<int> counts(static_cast<size_t>(parts));
362 42 const auto base = total_size / static_cast<size_t>(parts);
363 42 const auto remainder = total_size % static_cast<size_t>(parts);
364
365
2/2
✓ Branch 0 taken 84 times.
✓ Branch 1 taken 42 times.
126 for (int part = 0; part < parts; ++part) {
366 84 const auto part_size = base + (std::cmp_less(part, remainder) ? 1 : 0);
367
1/2
✓ Branch 1 taken 84 times.
✗ Branch 2 not taken.
84 counts[static_cast<size_t>(part)] = ToMpiCount(part_size);
368 }
369
370 42 return counts;
371 }
372
373 63 std::vector<int> BuildDisplacements(const std::vector<int> &counts) {
374 63 std::vector<int> displacements(counts.size());
375
2/2
✓ Branch 0 taken 63 times.
✓ Branch 1 taken 63 times.
126 for (size_t i = 1; i < counts.size(); ++i) {
376 63 displacements[i] = displacements[i - 1] + counts[i - 1];
377 }
378 63 return displacements;
379 }
380
381 42 Block ScatterInput(const InType &input, MpiContext context) {
382
2/4
✓ Branch 0 taken 42 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 42 times.
42 if (!context.active || context.size == 1) {
383 return input;
384 }
385
386
2/2
✓ Branch 0 taken 21 times.
✓ Branch 1 taken 21 times.
42 uint64_t total_size_wire = context.rank == 0 ? static_cast<uint64_t>(input.size()) : 0;
387 42 MPI_Bcast(&total_size_wire, 1, MPI_UINT64_T, 0, MPI_COMM_WORLD);
388 42 const auto total_size = static_cast<size_t>(total_size_wire);
389
390 42 const auto counts = BuildCounts(total_size, context.size);
391
1/2
✓ Branch 1 taken 42 times.
✗ Branch 2 not taken.
42 const auto displacements = BuildDisplacements(counts);
392
1/4
✓ Branch 1 taken 42 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
42 Block local_data(static_cast<size_t>(counts[static_cast<size_t>(context.rank)]));
393
394
3/4
✓ Branch 0 taken 21 times.
✓ Branch 1 taken 21 times.
✓ Branch 3 taken 42 times.
✗ Branch 4 not taken.
63 MPI_Scatterv(context.rank == 0 ? input.data() : nullptr, counts.data(), displacements.data(), MPI_DOUBLE,
395 local_data.data(), counts[static_cast<size_t>(context.rank)], MPI_DOUBLE, 0, MPI_COMM_WORLD);
396 return local_data;
397 }
398
399 21 BlockList SplitGatheredBlocks(const Block &gathered_data, const std::vector<int> &counts) {
400
1/2
✓ Branch 1 taken 21 times.
✗ Branch 2 not taken.
21 BlockList blocks;
401
1/2
✓ Branch 1 taken 21 times.
✗ Branch 2 not taken.
21 blocks.reserve(counts.size());
402
403 size_t offset = 0;
404
2/2
✓ Branch 0 taken 42 times.
✓ Branch 1 taken 21 times.
63 for (int count : counts) {
405 42 const auto block_size = static_cast<size_t>(count);
406
1/2
✓ Branch 1 taken 42 times.
✗ Branch 2 not taken.
42 blocks.emplace_back(gathered_data.begin() + static_cast<std::ptrdiff_t>(offset),
407
1/2
✓ Branch 1 taken 42 times.
✗ Branch 2 not taken.
42 gathered_data.begin() + static_cast<std::ptrdiff_t>(offset + block_size));
408 offset += block_size;
409 }
410
411 21 return blocks;
412 }
413
414 42 Block BroadcastResult(Block result, MpiContext context) {
415
2/2
✓ Branch 0 taken 21 times.
✓ Branch 1 taken 21 times.
42 int result_size = context.rank == 0 ? ToMpiCount(result.size()) : 0;
416 42 MPI_Bcast(&result_size, 1, MPI_INT, 0, MPI_COMM_WORLD);
417 42 result.resize(static_cast<size_t>(result_size));
418 42 MPI_Bcast(result.data(), result_size, MPI_DOUBLE, 0, MPI_COMM_WORLD);
419 42 return result;
420 }
421
422 42 Block GatherAndMerge(const Block &local_sorted, MpiContext context, size_t parallelism) {
423
2/4
✓ Branch 0 taken 42 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 42 times.
42 if (!context.active || context.size == 1) {
424 return local_sorted;
425 }
426
427 42 const auto local_count = ToMpiCount(local_sorted.size());
428
1/2
✓ Branch 2 taken 42 times.
✗ Branch 3 not taken.
42 std::vector<int> counts(static_cast<size_t>(context.size));
429
1/2
✓ Branch 1 taken 42 times.
✗ Branch 2 not taken.
42 MPI_Gather(&local_count, 1, MPI_INT, counts.data(), 1, MPI_INT, 0, MPI_COMM_WORLD);
430
431 42 Block gathered_data;
432 42 std::vector<int> displacements;
433
2/2
✓ Branch 0 taken 21 times.
✓ Branch 1 taken 21 times.
42 if (context.rank == 0) {
434
2/6
✓ Branch 1 taken 21 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 21 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
42 displacements = BuildDisplacements(counts);
435 21 const auto total_count = displacements.back() + counts.back();
436
1/2
✓ Branch 1 taken 21 times.
✗ Branch 2 not taken.
21 gathered_data.resize(static_cast<size_t>(total_count));
437 }
438
439
5/6
✓ Branch 0 taken 21 times.
✓ Branch 1 taken 21 times.
✓ Branch 2 taken 21 times.
✓ Branch 3 taken 21 times.
✓ Branch 5 taken 42 times.
✗ Branch 6 not taken.
84 MPI_Gatherv(local_sorted.data(), local_count, MPI_DOUBLE, context.rank == 0 ? gathered_data.data() : nullptr,
440 counts.data(), context.rank == 0 ? displacements.data() : nullptr, MPI_DOUBLE, 0, MPI_COMM_WORLD);
441
442 42 Block result;
443
2/2
✓ Branch 0 taken 21 times.
✓ Branch 1 taken 21 times.
42 if (context.rank == 0) {
444
2/4
✓ Branch 1 taken 21 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 21 times.
✗ Branch 5 not taken.
42 result = MergeBlocks(SplitGatheredBlocks(gathered_data, counts), parallelism);
445 }
446
447
3/6
✓ Branch 1 taken 42 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 21 times.
✓ Branch 4 taken 21 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
84 return BroadcastResult(std::move(result), context);
448 }
449
450 } // namespace
451
452
1/2
✓ Branch 1 taken 48 times.
✗ Branch 2 not taken.
48 DoubleSortEvenOddBatcherALL::DoubleSortEvenOddBatcherALL(const InType &in) : input_data_(in) {
453 SetTypeOfTask(GetStaticTypeOfTask());
454
1/2
✓ Branch 1 taken 48 times.
✗ Branch 2 not taken.
48 GetInput() = in;
455 GetOutput() = {};
456 48 }
457
458 46 bool DoubleSortEvenOddBatcherALL::ValidationImpl() {
459 46 return GetOutput().empty();
460 }
461
462 42 bool DoubleSortEvenOddBatcherALL::PreProcessingImpl() {
463 42 input_data_ = GetInput();
464 result_data_.clear();
465 42 return true;
466 }
467
468 42 bool DoubleSortEvenOddBatcherALL::RunImpl() {
469 42 const auto parallelism = static_cast<size_t>(std::max(1, ppc::util::GetNumThreads()));
470 42 const tbb::global_control control(tbb::global_control::max_allowed_parallelism, parallelism);
471
1/2
✓ Branch 1 taken 42 times.
✗ Branch 2 not taken.
42 const auto mpi_context = GetMpiContext();
472
473
1/2
✓ Branch 1 taken 42 times.
✗ Branch 2 not taken.
42 const auto local_data = ScatterInput(input_data_, mpi_context);
474
1/2
✓ Branch 1 taken 42 times.
✗ Branch 2 not taken.
42 const auto local_sorted = SortLocal(local_data, parallelism);
475
3/6
✓ Branch 1 taken 42 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 37 times.
✓ Branch 5 taken 5 times.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
84 result_data_ = GatherAndMerge(local_sorted, mpi_context, parallelism);
476 42 return true;
477 }
478
479 40 bool DoubleSortEvenOddBatcherALL::PostProcessingImpl() {
480 40 GetOutput() = result_data_;
481 40 return true;
482 }
483
484 } // namespace gusev_d_double_sort_even_odd_batcher_all_task_threads
485