GCC Code Coverage Report


Directory: ./
File: tasks/gusev_d_double_sort_even_odd_batcher/tbb/src/ops_tbb.cpp
Date: 2026-06-04 20:25:32
Exec Total Coverage
Lines: 141 143 98.6%
Functions: 23 23 100.0%
Branches: 108 132 81.8%

Line Branch Exec Source
1 #include "gusev_d_double_sort_even_odd_batcher/tbb/include/ops_tbb.hpp"
2
3 #include <tbb/blocked_range.h>
4 #include <tbb/global_control.h>
5 #include <tbb/parallel_for.h>
6 #include <tbb/parallel_invoke.h>
7 #include <tbb/task_arena.h>
8
9 #include <algorithm>
10 #include <array>
11 #include <bit>
12 #include <cstddef>
13 #include <cstdint>
14 #include <iterator>
15 #include <utility>
16 #include <vector>
17
18 #include "gusev_d_double_sort_even_odd_batcher/tbb/include/common.hpp"
19 #include "util/include/util.hpp"
20
21 namespace gusev_d_double_sort_even_odd_batcher_tbb_task_threads {
22 namespace {
23
24 constexpr int kRadixPasses = 8;
25 constexpr int kBitsPerByte = 8;
26 constexpr size_t kRadixBuckets = 256;
27 constexpr uint64_t kBucketMask = 0xFFULL;
28 constexpr size_t kMinParallelElements = 128;
29 constexpr size_t kMinParallelPairs = 32;
30
31 static_assert((kRadixPasses % 2) == 0, "Radix sort expects the final data to remain in the input buffer");
32
33 using Block = std::vector<ValueType>;
34 using BlockList = std::vector<Block>;
35
36 struct BlockRange {
37 size_t begin = 0;
38 size_t end = 0;
39 };
40
41 uint64_t DoubleToSortableKey(ValueType value) {
42 const auto bits = std::bit_cast<uint64_t>(value);
43 const auto sign_mask = uint64_t{1} << 63;
44
4/4
✓ Branch 0 taken 94504 times.
✓ Branch 1 taken 81912 times.
✓ Branch 2 taken 94504 times.
✓ Branch 3 taken 81912 times.
352832 return (bits & sign_mask) == 0 ? bits ^ sign_mask : ~bits;
45 }
46
47 size_t GetBucketIndex(ValueType value, int shift) {
48 352832 return static_cast<size_t>((DoubleToSortableKey(value) >> shift) & kBucketMask);
49 }
50
51 void BuildPrefixSums(std::array<size_t, kRadixBuckets> &count) {
52 size_t prefix = 0;
53
2/2
✓ Branch 0 taken 331776 times.
✓ Branch 1 taken 1296 times.
333072 for (auto &value : count) {
54 331776 const auto current = value;
55 331776 value = prefix;
56 331776 prefix += current;
57 }
58 }
59
60
2/2
✓ Branch 0 taken 284 times.
✓ Branch 1 taken 162 times.
446 void RadixSortDoubles(OutType &data) {
61
2/2
✓ Branch 0 taken 284 times.
✓ Branch 1 taken 162 times.
446 if (data.size() < 2) {
62 284 return;
63 }
64
65 162 OutType buffer(data.size());
66 auto *src = &data;
67 auto *dst = &buffer;
68
69
2/2
✓ Branch 0 taken 1296 times.
✓ Branch 1 taken 162 times.
1458 for (int byte = 0; byte < kRadixPasses; ++byte) {
70 1296 std::array<size_t, kRadixBuckets> count{};
71 1296 const auto shift = byte * kBitsPerByte;
72
73
4/4
✓ Branch 0 taken 94504 times.
✓ Branch 1 taken 81912 times.
✓ Branch 2 taken 176416 times.
✓ Branch 3 taken 1296 times.
177712 for (ValueType value : *src) {
74 176416 count.at(GetBucketIndex(value, shift))++;
75 }
76 BuildPrefixSums(count);
77
78
4/4
✓ Branch 0 taken 94504 times.
✓ Branch 1 taken 81912 times.
✓ Branch 2 taken 176416 times.
✓ Branch 3 taken 1296 times.
177712 for (ValueType value : *src) {
79 const auto bucket = GetBucketIndex(value, shift);
80 176416 (*dst)[count.at(bucket)++] = value;
81 }
82
83 std::swap(src, dst);
84 }
85 }
86
87
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 740 times.
740 void SplitByGlobalParity(const OutType &source, size_t global_offset, OutType &even_values, OutType &odd_values) {
88 even_values.clear();
89 odd_values.clear();
90 740 even_values.reserve((source.size() + 1) / 2);
91 740 odd_values.reserve(source.size() / 2);
92
93
2/2
✓ Branch 0 taken 27677 times.
✓ Branch 1 taken 740 times.
28417 for (size_t i = 0; i < source.size(); ++i) {
94
2/2
✓ Branch 0 taken 13869 times.
✓ Branch 1 taken 13808 times.
27677 if (((global_offset + i) & 1U) == 0U) {
95 even_values.push_back(source[i]);
96 } else {
97 odd_values.push_back(source[i]);
98 }
99 }
100 740 }
101
102 370 OutType InterleaveParityGroups(size_t total_size, const OutType &even_values, const OutType &odd_values) {
103 370 OutType result(total_size);
104
4/6
✓ Branch 0 taken 18 times.
✓ Branch 1 taken 352 times.
✓ Branch 2 taken 18 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 18 times.
370 if (total_size < kMinParallelElements || even_values.empty() || odd_values.empty()) {
105
2/2
✓ Branch 0 taken 1382 times.
✓ Branch 1 taken 352 times.
1734 for (size_t even_index = 0; even_index < even_values.size(); ++even_index) {
106 1382 result[even_index * 2] = even_values[even_index];
107 }
108
2/2
✓ Branch 0 taken 1330 times.
✓ Branch 1 taken 352 times.
1682 for (size_t odd_index = 0; odd_index < odd_values.size(); ++odd_index) {
109 1330 result[(odd_index * 2) + 1] = odd_values[odd_index];
110 }
111 return result;
112 }
113
114 36 tbb::parallel_invoke([&]() {
115
2/2
✓ Branch 0 taken 12487 times.
✓ Branch 1 taken 18 times.
12505 for (size_t even_index = 0; even_index < even_values.size(); ++even_index) {
116 12487 result[even_index * 2] = even_values[even_index];
117 }
118
1/4
✓ Branch 1 taken 18 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
18 }, [&]() {
119
2/2
✓ Branch 0 taken 12478 times.
✓ Branch 1 taken 18 times.
12496 for (size_t odd_index = 0; odd_index < odd_values.size(); ++odd_index) {
120 12478 result[(odd_index * 2) + 1] = odd_values[odd_index];
121 }
122 });
123
124 18 return result;
125 }
126
127
2/2
✓ Branch 0 taken 2190 times.
✓ Branch 1 taken 25353 times.
27543 void RunOddEvenPhase(OutType &data, size_t start) {
128 27543 const auto pair_count = (data.size() - start) / 2;
129
2/2
✓ Branch 0 taken 2190 times.
✓ Branch 1 taken 25353 times.
27543 if (pair_count < kMinParallelPairs) {
130
2/2
✓ Branch 0 taken 17088 times.
✓ Branch 1 taken 2190 times.
19278 for (size_t pair_index = 0; pair_index < pair_count; ++pair_index) {
131
2/2
✓ Branch 0 taken 508 times.
✓ Branch 1 taken 16580 times.
17088 const auto left_index = start + (pair_index * 2);
132
2/2
✓ Branch 0 taken 508 times.
✓ Branch 1 taken 16580 times.
17088 if (data[left_index] > data[left_index + 1]) {
133 std::swap(data[left_index], data[left_index + 1]);
134 }
135 }
136 return;
137 }
138
139 7136866 tbb::parallel_for(tbb::blocked_range<size_t>(0, pair_count), [&](const tbb::blocked_range<size_t> &range) {
140
2/2
✓ Branch 0 taken 35176682 times.
✓ Branch 1 taken 7111513 times.
42288195 for (size_t pair_index = range.begin(); pair_index != range.end(); ++pair_index) {
141 35176682 const auto left_index = start + (pair_index * 2);
142
2/2
✓ Branch 0 taken 5891 times.
✓ Branch 1 taken 35170791 times.
35176682 if (data[left_index] > data[left_index + 1]) {
143 std::swap(data[left_index], data[left_index + 1]);
144 }
145 }
146 7111513 });
147 }
148
149 370 void OddEvenFinalize(OutType &data) {
150
2/2
✓ Branch 0 taken 27677 times.
✓ Branch 1 taken 370 times.
28047 for (size_t phase = 0; phase < data.size(); ++phase) {
151 27677 const auto start = phase & 1U;
152
2/2
✓ Branch 0 taken 27543 times.
✓ Branch 1 taken 134 times.
27677 if (data.size() > start + 1) {
153 27543 RunOddEvenPhase(data, start);
154 }
155 }
156 370 }
157
158
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 370 times.
370 void MergeParityGroups(const OutType &left_even, const OutType &right_even, const OutType &left_odd,
159 const OutType &right_odd, OutType &merged_even, OutType &merged_odd) {
160 merged_even.clear();
161 merged_odd.clear();
162 370 merged_even.reserve(left_even.size() + right_even.size());
163 370 merged_odd.reserve(left_odd.size() + right_odd.size());
164
165
2/2
✓ Branch 0 taken 352 times.
✓ Branch 1 taken 18 times.
370 if ((merged_even.capacity() + merged_odd.capacity()) < kMinParallelElements) {
166 352 std::ranges::merge(left_even, right_even, std::back_inserter(merged_even));
167 352 std::ranges::merge(left_odd, right_odd, std::back_inserter(merged_odd));
168 352 return;
169 }
170
171 18 tbb::parallel_invoke([&]() { std::ranges::merge(left_even, right_even, std::back_inserter(merged_even)); },
172 36 [&]() { std::ranges::merge(left_odd, right_odd, std::back_inserter(merged_odd)); });
173 }
174
175
2/2
✓ Branch 0 taken 352 times.
✓ Branch 1 taken 18 times.
370 void SplitBlocksByParity(const OutType &left, const OutType &right, OutType &left_even, OutType &left_odd,
176 OutType &right_even, OutType &right_odd) {
177
2/2
✓ Branch 0 taken 352 times.
✓ Branch 1 taken 18 times.
370 if ((left.size() + right.size()) < kMinParallelElements) {
178 352 SplitByGlobalParity(left, 0, left_even, left_odd);
179 352 SplitByGlobalParity(right, left.size(), right_even, right_odd);
180 352 return;
181 }
182
183 18 tbb::parallel_invoke([&]() { SplitByGlobalParity(left, 0, left_even, left_odd); },
184 36 [&]() { SplitByGlobalParity(right, left.size(), right_even, right_odd); });
185 }
186
187 370 OutType MergeBatcherEvenOdd(const OutType &left, const OutType &right) {
188 370 OutType left_even;
189 370 OutType left_odd;
190 370 OutType right_even;
191 370 OutType right_odd;
192
193
1/2
✓ Branch 1 taken 370 times.
✗ Branch 2 not taken.
370 SplitBlocksByParity(left, right, left_even, left_odd, right_even, right_odd);
194
195 370 OutType merged_even;
196 370 OutType merged_odd;
197
1/2
✓ Branch 1 taken 370 times.
✗ Branch 2 not taken.
370 MergeParityGroups(left_even, right_even, left_odd, right_odd, merged_even, merged_odd);
198
199
1/2
✓ Branch 1 taken 370 times.
✗ Branch 2 not taken.
370 auto result = InterleaveParityGroups(left.size() + right.size(), merged_even, merged_odd);
200
1/2
✓ Branch 1 taken 370 times.
✗ Branch 2 not taken.
370 OddEvenFinalize(result);
201 370 return result;
202 }
203
204 BlockRange GetBlockRange(size_t block_index, size_t block_count, size_t total_size) {
205 return {
206 446 .begin = (block_index * total_size) / block_count,
207 446 .end = ((block_index + 1) * total_size) / block_count,
208 };
209 }
210
211 size_t GetBlockCount(size_t input_size, size_t parallelism) {
212
2/2
✓ Branch 0 taken 57 times.
✓ Branch 1 taken 19 times.
76 return std::max<size_t>(1, std::min(input_size, parallelism));
213 }
214
215 446 void FillAndSortBlock(const InType &input, Block &block, BlockRange range) {
216 446 block.assign(input.begin() + static_cast<std::ptrdiff_t>(range.begin),
217 446 input.begin() + static_cast<std::ptrdiff_t>(range.end));
218 446 RadixSortDoubles(block);
219 446 }
220
221
2/2
✓ Branch 0 taken 64 times.
✓ Branch 1 taken 12 times.
76 BlockList MakeSortedBlocks(const InType &input, size_t parallelism) {
222 76 const auto block_count = GetBlockCount(input.size(), parallelism);
223 76 const auto total_size = input.size();
224
225 76 BlockList blocks(block_count);
226
4/4
✓ Branch 0 taken 19 times.
✓ Branch 1 taken 57 times.
✓ Branch 2 taken 48 times.
✓ Branch 3 taken 9 times.
76 if (block_count == 1 || input.size() < kMinParallelElements) {
227
2/2
✓ Branch 0 taken 419 times.
✓ Branch 1 taken 67 times.
486 for (size_t block = 0; block < block_count; ++block) {
228
1/2
✓ Branch 1 taken 419 times.
✗ Branch 2 not taken.
419 FillAndSortBlock(input, blocks[block], GetBlockRange(block, block_count, total_size));
229 }
230 return blocks;
231 }
232
233
1/2
✓ Branch 1 taken 9 times.
✗ Branch 2 not taken.
36 tbb::parallel_for(tbb::blocked_range<size_t>(0, block_count), [&](const tbb::blocked_range<size_t> &range) {
234
2/2
✓ Branch 0 taken 27 times.
✓ Branch 1 taken 27 times.
54 for (size_t block = range.begin(); block != range.end(); ++block) {
235 27 FillAndSortBlock(input, blocks[block], GetBlockRange(block, block_count, total_size));
236 }
237 27 });
238
239 9 return blocks;
240 }
241
242 370 void MergeBlockPair(const BlockList &blocks, BlockList &next, size_t pair_index) {
243 370 next[pair_index] = MergeBatcherEvenOdd(blocks[pair_index * 2], blocks[(pair_index * 2) + 1]);
244 370 }
245
246 123 BlockList MergeBlockPairs(const BlockList &blocks) {
247 123 const auto pair_count = blocks.size() / 2;
248 123 BlockList next((blocks.size() + 1) / 2);
249
2/2
✓ Branch 0 taken 119 times.
✓ Branch 1 taken 4 times.
123 if (pair_count < kMinParallelPairs) {
250
2/2
✓ Branch 0 taken 242 times.
✓ Branch 1 taken 119 times.
361 for (size_t pair = 0; pair < pair_count; ++pair) {
251
1/2
✓ Branch 1 taken 242 times.
✗ Branch 2 not taken.
242 MergeBlockPair(blocks, next, pair);
252 }
253
2/2
✓ Branch 0 taken 47 times.
✓ Branch 1 taken 72 times.
119 if ((blocks.size() & 1U) != 0U) {
254
1/2
✓ Branch 1 taken 47 times.
✗ Branch 2 not taken.
47 next.back() = blocks.back();
255 }
256 return next;
257 }
258
259
2/4
✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 4 times.
✗ Branch 4 not taken.
4 tbb::parallel_for(tbb::blocked_range<size_t>(0, pair_count), [&](const tbb::blocked_range<size_t> &range) {
260
4/4
✓ Branch 0 taken 64 times.
✓ Branch 1 taken 64 times.
✓ Branch 2 taken 64 times.
✓ Branch 3 taken 64 times.
256 for (size_t pair = range.begin(); pair != range.end(); ++pair) {
261
1/2
✓ Branch 2 taken 64 times.
✗ Branch 3 not taken.
128 MergeBlockPair(blocks, next, pair);
262 }
263 });
264
265
1/2
✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
4 if ((blocks.size() & 1U) != 0U) {
266
1/2
✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
4 next.back() = blocks.back();
267 }
268
269 return next;
270 }
271
272 76 Block MergeBlocks(BlockList blocks) {
273
2/2
✓ Branch 0 taken 123 times.
✓ Branch 1 taken 76 times.
199 while (blocks.size() > 1) {
274 123 blocks = MergeBlockPairs(blocks);
275 }
276
277 76 return std::move(blocks.front());
278 }
279
280 } // namespace
281
282
1/2
✓ Branch 1 taken 96 times.
✗ Branch 2 not taken.
96 DoubleSortEvenOddBatcherTBB::DoubleSortEvenOddBatcherTBB(const InType &in) {
283 SetTypeOfTask(GetStaticTypeOfTask());
284
1/2
✓ Branch 1 taken 96 times.
✗ Branch 2 not taken.
96 GetInput() = in;
285 GetOutput() = {};
286 96 }
287
288 92 bool DoubleSortEvenOddBatcherTBB::ValidationImpl() {
289 92 return GetOutput().empty();
290 }
291
292 84 bool DoubleSortEvenOddBatcherTBB::PreProcessingImpl() {
293 84 input_data_ = GetInput();
294 result_data_.clear();
295 84 return true;
296 }
297
298
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 76 times.
84 bool DoubleSortEvenOddBatcherTBB::RunImpl() {
299
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 76 times.
84 if (input_data_.empty()) {
300 result_data_.clear();
301 8 return true;
302 }
303
304 76 const auto parallelism = static_cast<size_t>(std::max(1, ppc::util::GetNumThreads()));
305 76 const tbb::global_control control(tbb::global_control::max_allowed_parallelism, parallelism);
306
307
1/2
✓ Branch 1 taken 76 times.
✗ Branch 2 not taken.
76 auto blocks = MakeSortedBlocks(input_data_, parallelism);
308
1/2
✓ Branch 1 taken 76 times.
✗ Branch 2 not taken.
152 result_data_ = MergeBlocks(std::move(blocks));
309 return true;
310 76 }
311
312 80 bool DoubleSortEvenOddBatcherTBB::PostProcessingImpl() {
313 80 GetOutput() = result_data_;
314 80 return true;
315 }
316
317 } // namespace gusev_d_double_sort_even_odd_batcher_tbb_task_threads
318