GCC Code Coverage Report


Directory: ./
File: tasks/gusev_d_double_sort_even_odd_batcher/stl/src/ops_stl.cpp
Date: 2026-06-04 20:25:32
Exec Total Coverage
Lines: 129 142 90.8%
Functions: 22 23 95.7%
Branches: 102 128 79.7%

Line Branch Exec Source
1 #include "gusev_d_double_sort_even_odd_batcher/stl/include/ops_stl.hpp"
2
3 #include <algorithm>
4 #include <array>
5 #include <bit>
6 #include <cstddef>
7 #include <cstdint>
8 #include <exception>
9 #include <mutex>
10 #include <thread>
11 #include <utility>
12 #include <vector>
13
14 #include "gusev_d_double_sort_even_odd_batcher/stl/include/common.hpp"
15 #include "util/include/util.hpp"
16
17 namespace gusev_d_double_sort_even_odd_batcher_stl_task_threads {
18 namespace {
19
20 constexpr int kRadixPasses = 8;
21 constexpr int kBitsPerByte = 8;
22 constexpr size_t kRadixBuckets = 256;
23 constexpr uint64_t kBucketMask = 0xFFULL;
24 constexpr size_t kMinParallelElements = 128;
25 constexpr size_t kMinThreadedTasks = 2;
26
27 static_assert((kRadixPasses % 2) == 0, "Radix sort expects the final data to remain in the input buffer");
28
29 using Block = std::vector<ValueType>;
30 using BlockList = std::vector<Block>;
31
32 struct BlockRange {
33 size_t begin = 0;
34 size_t end = 0;
35 };
36
37 31784 struct MergeItem {
38 ValueType value = 0.0;
39 bool is_padding = true;
40 };
41
42 uint64_t DoubleToSortableKey(ValueType value) {
43 const auto bits = std::bit_cast<uint64_t>(value);
44 const auto sign_mask = uint64_t{1} << 63;
45
4/4
✓ Branch 0 taken 89488 times.
✓ Branch 1 taken 67952 times.
✓ Branch 2 taken 89488 times.
✓ Branch 3 taken 67952 times.
314880 return (bits & sign_mask) == 0 ? bits ^ sign_mask : ~bits;
46 }
47
48 size_t GetBucketIndex(ValueType value, int shift) {
49 314880 return static_cast<size_t>((DoubleToSortableKey(value) >> shift) & kBucketMask);
50 }
51
52 void BuildPrefixSums(std::array<size_t, kRadixBuckets> &count) {
53 size_t prefix = 0;
54
2/2
✓ Branch 0 taken 712704 times.
✓ Branch 1 taken 2784 times.
715488 for (auto &value : count) {
55 712704 const auto current = value;
56 712704 value = prefix;
57 712704 prefix += current;
58 }
59 }
60
61
2/2
✓ Branch 0 taken 48 times.
✓ Branch 1 taken 348 times.
396 void RadixSortDoubles(OutType &data) {
62
2/2
✓ Branch 0 taken 48 times.
✓ Branch 1 taken 348 times.
396 if (data.size() < 2) {
63 48 return;
64 }
65
66 348 OutType buffer(data.size());
67 auto *source = &data;
68 auto *destination = &buffer;
69
70
2/2
✓ Branch 0 taken 2784 times.
✓ Branch 1 taken 348 times.
3132 for (int byte = 0; byte < kRadixPasses; ++byte) {
71 2784 std::array<size_t, kRadixBuckets> count{};
72 2784 const auto shift = byte * kBitsPerByte;
73
74
4/4
✓ Branch 0 taken 89488 times.
✓ Branch 1 taken 67952 times.
✓ Branch 2 taken 157440 times.
✓ Branch 3 taken 2784 times.
160224 for (ValueType value : *source) {
75 157440 count.at(GetBucketIndex(value, shift))++;
76 }
77 BuildPrefixSums(count);
78
79
4/4
✓ Branch 0 taken 89488 times.
✓ Branch 1 taken 67952 times.
✓ Branch 2 taken 157440 times.
✓ Branch 3 taken 2784 times.
160224 for (ValueType value : *source) {
80 const auto bucket = GetBucketIndex(value, shift);
81 157440 (*destination)[count.at(bucket)++] = value;
82 }
83
84 std::swap(source, destination);
85 }
86 }
87
88 size_t NextPowerOfTwo(size_t value) {
89 size_t result = 1;
90
2/2
✓ Branch 0 taken 794 times.
✓ Branch 1 taken 244 times.
1038 while (result < value) {
91 794 result <<= 1U;
92 }
93 return result;
94 }
95
96 bool IsGreater(const MergeItem &lhs, const MergeItem &rhs) {
97
2/2
✓ Branch 0 taken 126582 times.
✓ Branch 1 taken 7414 times.
133996 if (lhs.is_padding != rhs.is_padding) {
98 return lhs.is_padding;
99 }
100
4/4
✓ Branch 0 taken 93860 times.
✓ Branch 1 taken 32722 times.
✓ Branch 2 taken 47106 times.
✓ Branch 3 taken 46754 times.
126582 return !lhs.is_padding && lhs.value > rhs.value;
101 }
102
103
2/2
✓ Branch 0 taken 126582 times.
✓ Branch 1 taken 7414 times.
133996 void CompareExchange(std::vector<MergeItem> &data, size_t left, size_t right) {
104
2/2
✓ Branch 0 taken 4640 times.
✓ Branch 1 taken 2774 times.
7414 if (IsGreater(data[left], data[right])) {
105 std::swap(data[left], data[right]);
106 }
107 133996 }
108
109 void CompareExchangeBlocks(std::vector<MergeItem> &data, size_t first, size_t distance) {
110
4/4
✓ Branch 0 taken 15892 times.
✓ Branch 1 taken 244 times.
✓ Branch 2 taken 118104 times.
✓ Branch 3 taken 30502 times.
164742 for (size_t i = 0; i < distance; ++i) {
111 133996 CompareExchange(data, first + i, first + distance + i);
112 }
113 }
114
115
1/2
✓ Branch 0 taken 244 times.
✗ Branch 1 not taken.
244 void OddEvenMerge(std::vector<MergeItem> &data) {
116
1/2
✓ Branch 0 taken 244 times.
✗ Branch 1 not taken.
244 if (data.size() < 2) {
117 return;
118 }
119
120 244 auto distance = data.size() / 2;
121 CompareExchangeBlocks(data, 0, distance);
122
123
2/2
✓ Branch 0 taken 794 times.
✓ Branch 1 taken 244 times.
1038 for (distance /= 2; distance > 0; distance /= 2) {
124 794 const auto step = distance * 2;
125
2/2
✓ Branch 0 taken 30502 times.
✓ Branch 1 taken 794 times.
31296 for (size_t first = distance; (first + distance) < data.size(); first += step) {
126 CompareExchangeBlocks(data, first, distance);
127 }
128 }
129 }
130
131 void CopyBlockToMergeItems(const Block &block, std::vector<MergeItem> &items, size_t offset) {
132
4/4
✓ Branch 0 taken 12390 times.
✓ Branch 1 taken 244 times.
✓ Branch 2 taken 10886 times.
✓ Branch 3 taken 244 times.
23764 for (size_t i = 0; i < block.size(); ++i) {
133 10886 auto &item = items[offset + i];
134 23276 item.value = block[i];
135 23276 item.is_padding = false;
136 }
137 }
138
139 244 Block ExtractMergedValues(const std::vector<MergeItem> &items, size_t result_size) {
140 244 Block result;
141
1/2
✓ Branch 1 taken 244 times.
✗ Branch 2 not taken.
244 result.reserve(result_size);
142
2/2
✓ Branch 0 taken 31784 times.
✓ Branch 1 taken 244 times.
32028 for (const auto &item : items) {
143
2/2
✓ Branch 0 taken 23276 times.
✓ Branch 1 taken 8508 times.
31784 if (!item.is_padding) {
144
1/2
✓ Branch 0 taken 23276 times.
✗ Branch 1 not taken.
23276 result.push_back(item.value);
145 }
146 }
147 244 return result;
148 }
149
150
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 244 times.
244 Block MergeBatcherEvenOdd(const Block &left, const Block &right) {
151
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 244 times.
244 if (left.empty()) {
152 return right;
153 }
154
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 244 times.
244 if (right.empty()) {
155 return left;
156 }
157
158 const auto half_size = NextPowerOfTwo(std::max(left.size(), right.size()));
159 244 std::vector<MergeItem> items(half_size * 2);
160
161 CopyBlockToMergeItems(left, items, 0);
162 CopyBlockToMergeItems(right, items, half_size);
163 244 OddEvenMerge(items);
164
1/2
✓ Branch 1 taken 244 times.
✗ Branch 2 not taken.
244 return ExtractMergedValues(items, left.size() + right.size());
165 }
166
167 BlockRange GetBlockRange(size_t block_index, size_t block_count, size_t total_size) {
168 BlockRange range;
169 396 range.begin = (block_index * total_size) / block_count;
170 396 range.end = ((block_index + 1) * total_size) / block_count;
171 return range;
172 }
173
174 size_t GetBlockCount(size_t input_size, size_t parallelism) {
175
2/2
✓ Branch 0 taken 114 times.
✓ Branch 1 taken 38 times.
152 return std::max<size_t>(1, std::min(input_size, parallelism));
176 }
177
178 152 size_t GetSafeParallelism() {
179 152 const auto requested = static_cast<size_t>(std::max(1, ppc::util::GetNumThreads()));
180 152 const auto available = std::thread::hardware_concurrency();
181
1/2
✓ Branch 0 taken 152 times.
✗ Branch 1 not taken.
152 if (available == 0) {
182 return requested;
183 }
184 152 return std::min(requested, static_cast<size_t>(available));
185 }
186
187 396 void FillAndSortBlock(const InType &input, Block &block, BlockRange range) {
188 396 block.assign(input.begin() + static_cast<std::ptrdiff_t>(range.begin),
189 396 input.begin() + static_cast<std::ptrdiff_t>(range.end));
190 396 RadixSortDoubles(block);
191 396 }
192
193 template <typename Function>
194 void RunSequentialByIndex(size_t work_count, const Function &function) {
195
4/4
✓ Branch 0 taken 152 times.
✓ Branch 1 taken 152 times.
✓ Branch 2 taken 342 times.
✓ Branch 3 taken 134 times.
780 for (size_t index = 0; index < work_count; ++index) {
196 342 function(index);
197 }
198 }
199
200 void StoreCurrentException(std::exception_ptr &worker_exception, std::mutex &exception_mutex) noexcept {
201 const std::scoped_lock lock(exception_mutex);
202 if (worker_exception == nullptr) {
203 worker_exception = std::current_exception();
204 }
205 }
206
207 template <typename Function>
208 292 void RunThreadChunk(size_t thread_index, size_t work_count, size_t thread_count, const Function &function,
209 std::exception_ptr &worker_exception, std::mutex &exception_mutex) noexcept {
210 try {
211
2/2
✓ Branch 0 taken 146 times.
✓ Branch 1 taken 146 times.
584 for (size_t index = thread_index; index < work_count; index += thread_count) {
212
1/2
✓ Branch 1 taken 54 times.
✗ Branch 2 not taken.
108 function(index);
213 }
214 } catch (...) {
215 StoreCurrentException(worker_exception, exception_mutex);
216 }
217 292 }
218
219 64 void JoinWorkersAndRethrow(std::vector<std::thread> &workers, const std::exception_ptr &worker_exception) {
220
2/2
✓ Branch 0 taken 146 times.
✓ Branch 1 taken 64 times.
210 for (auto &worker : workers) {
221 146 worker.join();
222 }
223
224
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 64 times.
64 if (worker_exception != nullptr) {
225 std::rethrow_exception(worker_exception);
226 }
227 64 }
228
229 template <typename Function>
230 700 void RunThreadedByIndex(size_t work_count, size_t max_threads, const Function &function) {
231
4/4
✓ Branch 0 taken 160 times.
✓ Branch 1 taken 190 times.
✓ Branch 2 taken 96 times.
✓ Branch 3 taken 64 times.
700 if (work_count < kMinThreadedTasks || max_threads < kMinThreadedTasks) {
232 RunSequentialByIndex(work_count, function);
233 572 return;
234 }
235
236 128 const auto thread_count = std::min(work_count, max_threads);
237 128 std::vector<std::thread> workers;
238
1/2
✓ Branch 1 taken 64 times.
✗ Branch 2 not taken.
128 workers.reserve(thread_count);
239 std::exception_ptr worker_exception;
240 128 std::mutex exception_mutex;
241 try {
242
2/2
✓ Branch 0 taken 146 times.
✓ Branch 1 taken 64 times.
420 for (size_t thread_index = 0; thread_index < thread_count; ++thread_index) {
243
1/2
✓ Branch 1 taken 146 times.
✗ Branch 2 not taken.
584 workers.emplace_back([&, thread_index] {
244 146 RunThreadChunk(thread_index, work_count, thread_count, function, worker_exception, exception_mutex);
245 });
246 }
247 } catch (...) {
248 StoreCurrentException(worker_exception, exception_mutex);
249 }
250
251
1/2
✓ Branch 1 taken 64 times.
✗ Branch 2 not taken.
128 JoinWorkersAndRethrow(workers, worker_exception);
252 128 }
253
254
2/2
✓ Branch 0 taken 136 times.
✓ Branch 1 taken 16 times.
152 BlockList MakeSortedBlocks(const InType &input, size_t parallelism) {
255 152 const auto block_count = GetBlockCount(input.size(), parallelism);
256 152 const auto total_size = input.size();
257 152 BlockList blocks(block_count);
258
259 396 const auto sort_block = [&](size_t block_index) {
260 396 FillAndSortBlock(input, blocks[block_index], GetBlockRange(block_index, block_count, total_size));
261
2/2
✓ Branch 0 taken 128 times.
✓ Branch 1 taken 24 times.
152 };
262
263
2/2
✓ Branch 0 taken 128 times.
✓ Branch 1 taken 24 times.
152 if (input.size() < kMinParallelElements) {
264
1/2
✓ Branch 1 taken 128 times.
✗ Branch 2 not taken.
128 RunThreadedByIndex(block_count, 1, sort_block);
265 } else {
266
1/2
✓ Branch 1 taken 24 times.
✗ Branch 2 not taken.
24 RunThreadedByIndex(block_count, parallelism, sort_block);
267 }
268
269 152 return blocks;
270 }
271
272 244 void MergeBlockPair(const BlockList &blocks, BlockList &next, size_t pair_index) {
273 244 next[pair_index] = MergeBatcherEvenOdd(blocks[pair_index * 2], blocks[(pair_index * 2) + 1]);
274 244 }
275
276 198 BlockList MergeBlockPairs(const BlockList &blocks, size_t parallelism) {
277 198 const auto pair_count = blocks.size() / 2;
278 198 BlockList next((blocks.size() + 1) / 2);
279
280
4/6
✓ Branch 1 taken 92 times.
✗ Branch 2 not taken.
✓ Branch 5 taken 198 times.
✗ Branch 6 not taken.
✓ Branch 7 taken 38 times.
✓ Branch 8 taken 160 times.
442 RunThreadedByIndex(pair_count, parallelism, [&](size_t pair_index) { MergeBlockPair(blocks, next, pair_index); });
281
282
2/2
✓ Branch 0 taken 38 times.
✓ Branch 1 taken 160 times.
198 if ((blocks.size() & 1U) != 0U) {
283
1/2
✓ Branch 1 taken 38 times.
✗ Branch 2 not taken.
38 next.back() = blocks.back();
284 }
285
286 198 return next;
287 }
288
289 152 Block MergeBlocks(BlockList blocks, size_t parallelism) {
290
2/2
✓ Branch 0 taken 198 times.
✓ Branch 1 taken 152 times.
350 while (blocks.size() > 1) {
291 198 blocks = MergeBlockPairs(blocks, parallelism);
292 }
293
294 152 return std::move(blocks.front());
295 }
296
297 } // namespace
298
299
1/2
✓ Branch 1 taken 192 times.
✗ Branch 2 not taken.
192 DoubleSortEvenOddBatcherSTL::DoubleSortEvenOddBatcherSTL(const InType &in) : input_data_(in) {
300 SetTypeOfTask(GetStaticTypeOfTask());
301
1/2
✓ Branch 1 taken 192 times.
✗ Branch 2 not taken.
192 GetInput() = in;
302 GetOutput() = {};
303 192 }
304
305 184 bool DoubleSortEvenOddBatcherSTL::ValidationImpl() {
306 184 return GetOutput().empty();
307 }
308
309 168 bool DoubleSortEvenOddBatcherSTL::PreProcessingImpl() {
310 168 input_data_ = GetInput();
311 result_data_.clear();
312 168 return true;
313 }
314
315
2/2
✓ Branch 0 taken 16 times.
✓ Branch 1 taken 152 times.
168 bool DoubleSortEvenOddBatcherSTL::RunImpl() {
316
2/2
✓ Branch 0 taken 16 times.
✓ Branch 1 taken 152 times.
168 if (input_data_.empty()) {
317 result_data_.clear();
318 16 return true;
319 }
320
321 152 const auto parallelism = GetSafeParallelism();
322 152 auto blocks = MakeSortedBlocks(input_data_, parallelism);
323
1/2
✓ Branch 1 taken 152 times.
✗ Branch 2 not taken.
304 result_data_ = MergeBlocks(std::move(blocks), parallelism);
324 return true;
325 152 }
326
327 160 bool DoubleSortEvenOddBatcherSTL::PostProcessingImpl() {
328 160 GetOutput() = result_data_;
329 160 return true;
330 }
331
332 } // namespace gusev_d_double_sort_even_odd_batcher_stl_task_threads
333