GCC Code Coverage Report


Directory: ./
File: tasks/gusev_d_double_sort_even_odd_batcher_omp/omp/src/ops_omp.cpp
Date: 2026-04-02 17:12:27
Exec Total Coverage
Lines: 120 130 92.3%
Functions: 22 23 95.7%
Branches: 65 88 73.9%

Line Branch Exec Source
1 #include "gusev_d_double_sort_even_odd_batcher_omp/omp/include/ops_omp.hpp"
2
3 #include <omp.h>
4
5 #include <algorithm>
6 #include <array>
7 #include <atomic>
8 #include <bit>
9 #include <cstddef>
10 #include <cstdint>
11 #include <exception>
12 #include <iterator>
13 #include <utility>
14 #include <vector>
15
16 #include "gusev_d_double_sort_even_odd_batcher_omp/common/include/common.hpp"
17
18 namespace gusev_d_double_sort_even_odd_batcher_omp_task_threads {
19 namespace {
20
21 constexpr int kRadixPasses = 8;
22 constexpr int kBitsPerByte = 8;
23 constexpr size_t kRadixBuckets = 256;
24 constexpr uint64_t kBucketMask = 0xFFULL;
25
26 static_assert((kRadixPasses % 2) == 0, "Radix sort expects the final data to remain in the input buffer");
27
28 using Block = std::vector<ValueType>;
29 using BlockList = std::vector<Block>;
30
31 struct BlockRange {
32 size_t begin = 0;
33 size_t end = 0;
34 };
35
36 uint64_t DoubleToSortableKey(ValueType value) {
37 const auto bits = std::bit_cast<uint64_t>(value);
38 const auto sign_mask = uint64_t{1} << 63;
39
4/4
✓ Branch 0 taken 94216 times.
✓ Branch 1 taken 81464 times.
✓ Branch 2 taken 94216 times.
✓ Branch 3 taken 81464 times.
351360 return (bits & sign_mask) == 0 ? bits ^ sign_mask : ~bits;
40 }
41
42 size_t GetBucketIndex(ValueType value, int shift) {
43 351360 return static_cast<size_t>((DoubleToSortableKey(value) >> shift) & kBucketMask);
44 }
45
46 void BuildPrefixSums(std::array<size_t, kRadixBuckets> &count) {
47 size_t prefix = 0;
48
2/2
✓ Branch 0 taken 311296 times.
✓ Branch 1 taken 1216 times.
312512 for (auto &value : count) {
49 311296 const auto current = value;
50 311296 value = prefix;
51 311296 prefix += current;
52 }
53 }
54
55
2/2
✓ Branch 0 taken 24 times.
✓ Branch 1 taken 152 times.
176 void RadixSortDoubles(Block &data) {
56
2/2
✓ Branch 0 taken 24 times.
✓ Branch 1 taken 152 times.
176 if (data.size() < 2) {
57 24 return;
58 }
59
60 152 Block buffer(data.size());
61 auto *src = &data;
62 auto *dst = &buffer;
63
64
2/2
✓ Branch 0 taken 1216 times.
✓ Branch 1 taken 152 times.
1368 for (int byte = 0; byte < kRadixPasses; ++byte) {
65 1216 std::array<size_t, kRadixBuckets> count{};
66 1216 const auto shift = byte * kBitsPerByte;
67
68
4/4
✓ Branch 0 taken 94216 times.
✓ Branch 1 taken 81464 times.
✓ Branch 2 taken 175680 times.
✓ Branch 3 taken 1216 times.
176896 for (ValueType value : *src) {
69 175680 count.at(GetBucketIndex(value, shift))++;
70 }
71 BuildPrefixSums(count);
72
73
4/4
✓ Branch 0 taken 94216 times.
✓ Branch 1 taken 81464 times.
✓ Branch 2 taken 175680 times.
✓ Branch 3 taken 1216 times.
176896 for (ValueType value : *src) {
74 const auto bucket = GetBucketIndex(value, shift);
75 175680 (*dst)[count.at(bucket)++] = value;
76 }
77
78 std::swap(src, dst);
79 }
80 }
81
82
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 216 times.
216 void SplitByGlobalParity(const Block &source, size_t global_offset, Block &even, Block &odd) {
83 even.clear();
84 odd.clear();
85 216 even.reserve((source.size() + 1) / 2);
86 216 odd.reserve(source.size() / 2);
87
88
2/2
✓ Branch 0 taken 25774 times.
✓ Branch 1 taken 216 times.
25990 for (size_t i = 0; i < source.size(); ++i) {
89
2/2
✓ Branch 0 taken 12913 times.
✓ Branch 1 taken 12861 times.
25774 if (((global_offset + i) & 1U) == 0U) {
90 even.push_back(source[i]);
91 } else {
92 odd.push_back(source[i]);
93 }
94 }
95 216 }
96
97 108 Block InterleaveParityGroups(size_t total_size, const Block &even, const Block &odd) {
98 108 Block result(total_size);
99 size_t even_index = 0;
100 size_t odd_index = 0;
101
102
2/2
✓ Branch 0 taken 25774 times.
✓ Branch 1 taken 108 times.
25882 for (size_t i = 0; i < total_size; ++i) {
103
2/2
✓ Branch 0 taken 12913 times.
✓ Branch 1 taken 12861 times.
25774 if ((i & 1U) == 0U) {
104 12913 result[i] = even[even_index++];
105 } else {
106 12861 result[i] = odd[odd_index++];
107 }
108 }
109
110 108 return result;
111 }
112
113 108 void OddEvenFinalize(Block &result) {
114
2/2
✓ Branch 0 taken 25774 times.
✓ Branch 1 taken 108 times.
25882 for (size_t phase = 0; phase < result.size(); ++phase) {
115 25774 const auto start = phase & 1U;
116
2/2
✓ Branch 0 taken 35169105 times.
✓ Branch 1 taken 25774 times.
35194879 for (size_t i = start; i + 1 < result.size(); i += 2) {
117
2/2
✓ Branch 0 taken 6016 times.
✓ Branch 1 taken 35163089 times.
35169105 if (result[i] > result[i + 1]) {
118 std::swap(result[i], result[i + 1]);
119 }
120 }
121 }
122 108 }
123
124 108 void SplitBlocksByParity(const Block &left, const Block &right, Block &left_even, Block &left_odd, Block &right_even,
125 Block &right_odd) {
126 108 SplitByGlobalParity(left, 0, left_even, left_odd);
127 108 SplitByGlobalParity(right, left.size(), right_even, right_odd);
128 108 }
129
130
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 108 times.
108 void MergeParityGroups(const Block &left_even, const Block &right_even, const Block &left_odd, const Block &right_odd,
131 Block &merged_even, Block &merged_odd) {
132 merged_even.clear();
133 merged_odd.clear();
134 108 merged_even.reserve(left_even.size() + right_even.size());
135 108 merged_odd.reserve(left_odd.size() + right_odd.size());
136
137 108 std::ranges::merge(left_even, right_even, std::back_inserter(merged_even));
138 108 std::ranges::merge(left_odd, right_odd, std::back_inserter(merged_odd));
139 108 }
140
141 108 Block MergeBatcherEvenOdd(const Block &left, const Block &right) {
142 108 Block left_even;
143 108 Block left_odd;
144 108 Block right_even;
145 108 Block right_odd;
146
147
1/2
✓ Branch 1 taken 108 times.
✗ Branch 2 not taken.
108 SplitBlocksByParity(left, right, left_even, left_odd, right_even, right_odd);
148
149 108 Block merged_even;
150 108 Block merged_odd;
151
1/2
✓ Branch 1 taken 108 times.
✗ Branch 2 not taken.
108 MergeParityGroups(left_even, right_even, left_odd, right_odd, merged_even, merged_odd);
152
153
1/2
✓ Branch 1 taken 108 times.
✗ Branch 2 not taken.
108 auto result = InterleaveParityGroups(left.size() + right.size(), merged_even, merged_odd);
154 108 OddEvenFinalize(result);
155 108 return result;
156 }
157
158 BlockRange GetBlockRange(size_t block_index, size_t block_count, size_t total_size) {
159 return {
160 176 .begin = (block_index * total_size) / block_count,
161 176 .end = ((block_index + 1) * total_size) / block_count,
162 };
163 }
164
165 68 size_t GetBlockCount(size_t input_size) {
166
2/2
✓ Branch 1 taken 60 times.
✓ Branch 2 taken 8 times.
68 const auto omp_threads = static_cast<size_t>(std::max(1, omp_get_max_threads()));
167
2/2
✓ Branch 0 taken 50 times.
✓ Branch 1 taken 18 times.
68 return std::max<size_t>(1, std::min(input_size, omp_threads));
168 }
169
170 176 void FillAndSortBlock(const std::vector<ValueType> &input, Block &block, BlockRange range) {
171 176 block.assign(input.begin() + static_cast<std::ptrdiff_t>(range.begin),
172 176 input.begin() + static_cast<std::ptrdiff_t>(range.end));
173 176 RadixSortDoubles(block);
174 176 }
175
176 void StoreFirstException(std::atomic_bool &has_exception, std::exception_ptr &exception) {
177 #pragma omp critical
178 if (!has_exception.exchange(true)) {
179 exception = std::current_exception();
180 }
181 }
182
183 template <class Func>
184
1/2
✓ Branch 0 taken 284 times.
✗ Branch 1 not taken.
568 void ExecuteWithExceptionCapture(std::atomic_bool &has_exception, std::exception_ptr &exception, Func &&func) {
185
1/2
✓ Branch 0 taken 284 times.
✗ Branch 1 not taken.
568 if (has_exception.load()) {
186 return;
187 }
188
189 try {
190
1/2
✓ Branch 1 taken 176 times.
✗ Branch 2 not taken.
352 std::forward<Func>(func)();
191 } catch (...) {
192 StoreFirstException(has_exception, exception);
193 }
194 }
195
196
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 158 times.
158 void RethrowIfCaptured(const std::exception_ptr &exception) {
197
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 158 times.
158 if (exception != nullptr) {
198 std::rethrow_exception(exception);
199 }
200 158 }
201
202 68 BlockList MakeSortedBlocks(const std::vector<ValueType> &input) {
203 68 const auto block_count = GetBlockCount(input.size());
204 const auto total_size = input.size();
205 68 const auto signed_block_count = static_cast<std::ptrdiff_t>(block_count);
206
207
1/2
✓ Branch 2 taken 68 times.
✗ Branch 3 not taken.
68 BlockList blocks(block_count);
208 std::exception_ptr exception;
209 68 std::atomic_bool has_exception{false};
210
211 68 #pragma omp parallel for default(none) shared(input, blocks, exception, has_exception) \
212 firstprivate(block_count, signed_block_count, total_size) schedule(static) if (block_count > 1)
213 for (std::ptrdiff_t block = 0; block < signed_block_count; ++block) {
214 176 ExecuteWithExceptionCapture(has_exception, exception, [&]() {
215 176 const auto index = static_cast<size_t>(block);
216 176 FillAndSortBlock(input, blocks[index], GetBlockRange(index, block_count, total_size));
217 176 });
218 }
219
220
1/2
✓ Branch 1 taken 68 times.
✗ Branch 2 not taken.
68 RethrowIfCaptured(exception);
221
222 68 return blocks;
223 }
224
225 108 void MergeBlockPair(const BlockList &blocks, BlockList &next, size_t pair_index) {
226 108 next[pair_index] = MergeBatcherEvenOdd(blocks[pair_index * 2], blocks[(pair_index * 2) + 1]);
227 108 }
228
229 90 BlockList MergeBlockPairs(const BlockList &blocks) {
230 90 const auto signed_pair_count = static_cast<std::ptrdiff_t>(blocks.size() / 2);
231
1/2
✓ Branch 2 taken 90 times.
✗ Branch 3 not taken.
90 BlockList next((blocks.size() + 1) / 2);
232 std::exception_ptr exception;
233 90 std::atomic_bool has_exception{false};
234
235 90 #pragma omp parallel for default(none) shared(blocks, next, exception, has_exception) firstprivate(signed_pair_count) \
236 schedule(static) if (signed_pair_count > 1)
237 for (std::ptrdiff_t pair = 0; pair < signed_pair_count; ++pair) {
238 ExecuteWithExceptionCapture(has_exception, exception,
239
1/2
✓ Branch 1 taken 108 times.
✗ Branch 2 not taken.
108 [&]() { MergeBlockPair(blocks, next, static_cast<size_t>(pair)); });
240 }
241
242
1/2
✓ Branch 1 taken 90 times.
✗ Branch 2 not taken.
90 RethrowIfCaptured(exception);
243
244
2/2
✓ Branch 0 taken 26 times.
✓ Branch 1 taken 64 times.
90 if ((blocks.size() & 1U) != 0U) {
245
1/2
✓ Branch 1 taken 26 times.
✗ Branch 2 not taken.
26 next.back() = blocks.back();
246 }
247
248 90 return next;
249 }
250
251 68 Block MergeBlocks(BlockList blocks) {
252
2/2
✓ Branch 0 taken 90 times.
✓ Branch 1 taken 68 times.
158 while (blocks.size() > 1) {
253 90 blocks = MergeBlockPairs(blocks);
254 }
255
256 68 return std::move(blocks.front());
257 }
258
259 } // namespace
260
261
1/2
✓ Branch 1 taken 88 times.
✗ Branch 2 not taken.
88 DoubleSortEvenOddBatcherOMP::DoubleSortEvenOddBatcherOMP(const InType &in) {
262 SetTypeOfTask(GetStaticTypeOfTask());
263
1/2
✓ Branch 1 taken 88 times.
✗ Branch 2 not taken.
88 GetInput() = in;
264 GetOutput() = {};
265 88 }
266
267 84 bool DoubleSortEvenOddBatcherOMP::ValidationImpl() {
268 84 return GetOutput().empty();
269 }
270
271 76 bool DoubleSortEvenOddBatcherOMP::PreProcessingImpl() {
272 76 input_data_ = GetInput();
273 result_data_.clear();
274 76 return true;
275 }
276
277
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 68 times.
76 bool DoubleSortEvenOddBatcherOMP::RunImpl() {
278
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 68 times.
76 if (input_data_.empty()) {
279 result_data_.clear();
280 8 return true;
281 }
282
283 68 auto blocks = MakeSortedBlocks(input_data_);
284
1/2
✓ Branch 1 taken 68 times.
✗ Branch 2 not taken.
136 result_data_ = MergeBlocks(std::move(blocks));
285 return true;
286 68 }
287
288 72 bool DoubleSortEvenOddBatcherOMP::PostProcessingImpl() {
289 72 GetOutput() = result_data_;
290 72 return true;
291 }
292
293 } // namespace gusev_d_double_sort_even_odd_batcher_omp_task_threads
294