GCC Code Coverage Report


Directory: ./
File: tasks/leonova_a_radix_merge_sort/tbb/src/ops_tbb.cpp
Date: 2026-06-04 20:25:32
Exec Total Coverage
Lines: 131 143 91.6%
Functions: 16 19 84.2%
Branches: 85 138 61.6%

Line Branch Exec Source
1 #include "leonova_a_radix_merge_sort/tbb/include/ops_tbb.hpp"
2
3 #include <algorithm>
4 #include <cstddef>
5 #include <cstdint>
6 #include <utility>
7 #include <vector>
8
9 #include "leonova_a_radix_merge_sort/common/include/common.hpp"
10 #include "tbb/parallel_for.h"
11 #include "tbb/task_arena.h"
12 #include "util/include/util.hpp"
13
14 namespace leonova_a_radix_merge_sort {
15
16 12 tbb::task_arena &LeonovaARadixMergeSortTBB::GetTbbArena() {
17
4/6
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 8 times.
✓ Branch 3 taken 4 times.
✗ Branch 4 not taken.
✓ Branch 6 taken 4 times.
✗ Branch 7 not taken.
12 static int num_threads = std::max(1, ppc::util::GetNumThreads());
18
3/4
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 8 times.
✓ Branch 3 taken 4 times.
✗ Branch 4 not taken.
12 static tbb::task_arena arena(num_threads);
19 12 return arena;
20 }
21
22 inline uint64_t LeonovaARadixMergeSortTBB::ToUnsignedValue(int64_t value) {
23 1048580 return static_cast<uint64_t>(value) ^ kSignBitMask;
24 }
25
26 inline std::pair<size_t, size_t> LeonovaARadixMergeSortTBB::GetChunk(size_t tid, size_t num_threads, size_t size) {
27 510 const size_t chunk = (size + num_threads - 1) / num_threads;
28 510 const size_t begin = tid * chunk;
29 510 const size_t end = std::min(begin + chunk, size);
30 return {begin, end};
31 }
32
33 void LeonovaARadixMergeSortTBB::FillUnsignedKeys(const std::vector<int64_t> &arr, size_t left, size_t size,
34 std::vector<uint64_t> &keys, size_t num_threads) {
35 tbb::parallel_for(static_cast<size_t>(0), num_threads, [&](size_t tid) {
36 30 auto [begin, end] = GetChunk(tid, num_threads, size);
37
2/2
✓ Branch 0 taken 1048580 times.
✓ Branch 1 taken 30 times.
1048610 for (size_t index = begin; index < end; ++index) {
38 1048580 keys[index] = ToUnsignedValue(arr[left + index]);
39 }
40 30 });
41 }
42
43 void LeonovaARadixMergeSortTBB::CountBytesParallel(const std::vector<uint64_t> &keys, size_t size, int shift,
44 std::vector<CounterRow> &local_counts, size_t num_threads) {
45 tbb::parallel_for(static_cast<size_t>(0), num_threads, [&](size_t tid) {
46 240 auto [begin, end] = GetChunk(tid, num_threads, size);
47
48 240 auto &row = local_counts[tid];
49
50
2/2
✓ Branch 0 taken 8388640 times.
✓ Branch 1 taken 240 times.
8388880 for (size_t index = begin; index < end; ++index) {
51 8388640 ++row[(keys[index] >> shift) & 0xFFU];
52 }
53 240 });
54 }
55
56
1/2
✓ Branch 0 taken 96 times.
✗ Branch 1 not taken.
96 void LeonovaARadixMergeSortTBB::ReduceCounts(const std::vector<CounterRow> &local_counts, CounterRow &global_counts) {
57 std::ranges::fill(global_counts, 0);
58
59
2/2
✓ Branch 0 taken 240 times.
✓ Branch 1 taken 96 times.
336 for (const auto &row : local_counts) {
60
2/2
✓ Branch 0 taken 61440 times.
✓ Branch 1 taken 240 times.
61680 for (size_t index = 0; index < kNumCounters; ++index) {
61 61440 global_counts[index] += row[index];
62 }
63 }
64 96 }
65
66 96 void LeonovaARadixMergeSortTBB::BuildOffsets(const std::vector<CounterRow> &local_counts,
67 std::vector<CounterRow> &local_offsets, CounterRow &global_counts,
68 size_t num_threads) {
69 96 CounterRow bucket_totals = global_counts;
70
71 size_t prefix = 0;
72
2/2
✓ Branch 0 taken 24576 times.
✓ Branch 1 taken 96 times.
24672 for (size_t index = 0; index < kNumCounters; ++index) {
73 24576 size_t count = bucket_totals[index];
74 24576 bucket_totals[index] = prefix;
75 24576 prefix += count;
76 }
77
78
2/2
✓ Branch 0 taken 240 times.
✓ Branch 1 taken 96 times.
336 for (size_t tndex = 0; tndex < num_threads; ++tndex) {
79 auto &offset_row = local_offsets[tndex];
80 const auto &count_row = local_counts[tndex];
81
82
2/2
✓ Branch 0 taken 61440 times.
✓ Branch 1 taken 240 times.
61680 for (size_t index = 0; index < kNumCounters; ++index) {
83 61440 offset_row[index] = bucket_totals[index];
84 61440 bucket_totals[index] += count_row[index];
85 }
86 }
87 96 }
88
89 void LeonovaARadixMergeSortTBB::ScatterParallel(const std::vector<uint64_t> &keys, const std::vector<int64_t> &arr,
90 size_t left, size_t size, int shift,
91 std::vector<CounterRow> &local_offsets, std::vector<int64_t> &temp_arr,
92 std::vector<uint64_t> &temp_keys, size_t num_threads) {
93 tbb::parallel_for(static_cast<size_t>(0), num_threads, [&](size_t tid) {
94 240 auto [begin, end] = GetChunk(tid, num_threads, size);
95
96 240 auto &offsets = local_offsets[tid];
97
98
2/2
✓ Branch 0 taken 8388640 times.
✓ Branch 1 taken 240 times.
8388880 for (size_t index = begin; index < end; ++index) {
99 8388640 const size_t byte = (keys[index] >> shift) & 0xFFU;
100 8388640 const size_t pos = offsets[byte]++;
101
102 8388640 temp_arr[pos] = arr[left + index];
103 8388640 temp_keys[pos] = keys[index];
104 }
105 240 });
106 }
107
108
1/2
✓ Branch 1 taken 128 times.
✗ Branch 2 not taken.
128 LeonovaARadixMergeSortTBB::LeonovaARadixMergeSortTBB(const InType &in) {
109 SetTypeOfTask(GetStaticTypeOfTask());
110
1/2
✓ Branch 1 taken 128 times.
✗ Branch 2 not taken.
128 GetInput() = in;
111
1/2
✓ Branch 1 taken 128 times.
✗ Branch 2 not taken.
128 GetOutput() = std::vector<int64_t>(GetInput().size());
112 128 }
113
114 256 bool LeonovaARadixMergeSortTBB::ValidationImpl() {
115 256 return !GetInput().empty();
116 }
117
118 128 bool LeonovaARadixMergeSortTBB::PreProcessingImpl() {
119 128 return true;
120 }
121
122 128 bool LeonovaARadixMergeSortTBB::RunImpl() {
123
1/2
✓ Branch 1 taken 128 times.
✗ Branch 2 not taken.
128 if (!ValidationImpl()) {
124 return false;
125 }
126
127 128 GetOutput() = GetInput();
128
129
2/2
✓ Branch 0 taken 120 times.
✓ Branch 1 taken 8 times.
128 if (GetOutput().size() > 1) {
130 120 RadixMergeSort(GetOutput(), 0, GetOutput().size());
131 }
132
133 return true;
134 }
135
136 128 bool LeonovaARadixMergeSortTBB::PostProcessingImpl() {
137 128 return true;
138 }
139
140 112 void LeonovaARadixMergeSortTBB::SequentialRadixSort(std::vector<int64_t> &arr, size_t left, size_t right) {
141 112 const size_t size = right - left;
142
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 112 times.
112 if (size <= 1) {
143 return;
144 }
145
146 112 std::vector<uint64_t> keys(size);
147
1/4
✓ Branch 1 taken 112 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
112 std::vector<int64_t> temp_arr(size);
148
1/4
✓ Branch 1 taken 112 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
112 std::vector<uint64_t> temp_keys(size);
149
150
2/2
✓ Branch 0 taken 3384 times.
✓ Branch 1 taken 112 times.
3496 for (size_t index = 0; index < size; ++index) {
151 3384 keys[index] = ToUnsignedValue(arr[left + index]);
152 }
153
154
2/2
✓ Branch 0 taken 896 times.
✓ Branch 1 taken 112 times.
1008 for (int byte_pos = 0; byte_pos < kNumBytes; ++byte_pos) {
155 896 const int shift = byte_pos * kByteSize;
156
157
1/4
✓ Branch 1 taken 896 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
896 std::vector<size_t> counts(kNumCounters, 0);
158
1/4
✓ Branch 1 taken 896 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
896 std::vector<size_t> offsets(kNumCounters, 0);
159
160
2/2
✓ Branch 0 taken 27072 times.
✓ Branch 1 taken 896 times.
27968 for (size_t index = 0; index < size; ++index) {
161 27072 ++counts[(keys[index] >> shift) & 0xFFU];
162 }
163
164 size_t sum = 0;
165
2/2
✓ Branch 0 taken 229376 times.
✓ Branch 1 taken 896 times.
230272 for (size_t index = 0; index < kNumCounters; ++index) {
166 229376 offsets[index] = sum;
167 229376 sum += counts[index];
168 }
169
170
2/2
✓ Branch 0 taken 27072 times.
✓ Branch 1 taken 896 times.
27968 for (size_t index = 0; index < size; ++index) {
171 27072 size_t byte = (keys[index] >> shift) & 0xFFU;
172 27072 size_t pos = offsets[byte]++;
173
174 27072 temp_arr[pos] = arr[left + index];
175 27072 temp_keys[pos] = keys[index];
176 }
177
178 std::ranges::copy(temp_arr, arr.begin() + static_cast<std::ptrdiff_t>(left));
179 keys.swap(temp_keys);
180 }
181 }
182
183 124 void LeonovaARadixMergeSortTBB::RadixSort(std::vector<int64_t> &arr, size_t left, size_t right) {
184 124 const size_t size = right - left;
185
1/2
✓ Branch 0 taken 124 times.
✗ Branch 1 not taken.
124 if (size <= 1) {
186 112 return;
187 }
188
189
2/2
✓ Branch 0 taken 112 times.
✓ Branch 1 taken 12 times.
124 if (size < kMinParallelSize) {
190 112 SequentialRadixSort(arr, left, right);
191 112 return;
192 }
193
194 12 auto &arena = GetTbbArena();
195
196
3/4
✗ Branch 0 not taken.
✓ Branch 1 taken 12 times.
✓ Branch 2 taken 9 times.
✓ Branch 3 taken 3 times.
12 const size_t num_threads = std::max<size_t>(1, std::min<size_t>(arena.max_concurrency(), size));
197
198 12 std::vector<uint64_t> keys(size);
199
1/4
✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
12 std::vector<uint64_t> temp_keys(size);
200
1/4
✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
12 std::vector<int64_t> temp_arr(size);
201
202
2/6
✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 12 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
12 std::vector<CounterRow> local_counts(num_threads, CounterRow(kNumCounters, 0));
203
2/4
✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 12 times.
✗ Branch 5 not taken.
12 std::vector<CounterRow> local_offsets(num_threads, CounterRow(kNumCounters, 0));
204
1/2
✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.
12 CounterRow global_counts(kNumCounters);
205
206
2/6
✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 12 times.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
12 arena.execute([&] {
207 12 FillUnsignedKeys(arr, left, size, keys, num_threads);
208
209
2/2
✓ Branch 0 taken 96 times.
✓ Branch 1 taken 12 times.
108 for (int byte_pos = 0; byte_pos < kNumBytes; ++byte_pos) {
210 96 const int shift = byte_pos * kByteSize;
211
212
2/2
✓ Branch 0 taken 240 times.
✓ Branch 1 taken 96 times.
336 for (auto &row : local_counts) {
213 std::ranges::fill(row, 0);
214 }
215
216 96 CountBytesParallel(keys, size, shift, local_counts, num_threads);
217 96 ReduceCounts(local_counts, global_counts);
218 96 BuildOffsets(local_counts, local_offsets, global_counts, num_threads);
219
220 96 ScatterParallel(keys, arr, left, size, shift, local_offsets, temp_arr, temp_keys, num_threads);
221
222
1/2
✓ Branch 0 taken 96 times.
✗ Branch 1 not taken.
96 std::ranges::copy(temp_arr, arr.begin() + static_cast<std::ptrdiff_t>(left));
223 96 keys.swap(temp_keys);
224 }
225 12 });
226 12 }
227
228 4 void LeonovaARadixMergeSortTBB::SimpleMerge(std::vector<int64_t> &arr, size_t left, size_t mid, size_t right) {
229 4 std::vector<int64_t> merged(right - left);
230
231 size_t in = left;
232 size_t j = mid;
233 size_t k = 0;
234
235
2/2
✓ Branch 0 taken 262148 times.
✓ Branch 1 taken 4 times.
262152 while (in < mid && j < right) {
236
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 262148 times.
262148 merged[k++] = (arr[in] <= arr[j]) ? arr[in++] : arr[j++];
237 }
238
2/2
✓ Branch 0 taken 262144 times.
✓ Branch 1 taken 4 times.
262148 while (in < mid) {
239 262144 merged[k++] = arr[in++];
240 }
241
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
4 while (j < right) {
242 merged[k++] = arr[j++];
243 }
244
245 std::ranges::copy(merged, arr.begin() + static_cast<std::ptrdiff_t>(left));
246 4 }
247
248 120 void LeonovaARadixMergeSortTBB::RadixMergeSort(std::vector<int64_t> &arr, size_t left, size_t right) {
249 struct Task {
250 size_t l, r;
251 bool sorted;
252 };
253
254 120 std::vector<Task> stack;
255
1/2
✓ Branch 1 taken 120 times.
✗ Branch 2 not taken.
120 stack.push_back({left, right, false});
256
257
2/2
✓ Branch 0 taken 132 times.
✓ Branch 1 taken 120 times.
252 while (!stack.empty()) {
258
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 132 times.
132 auto [l, r, sorted] = stack.back();
259 stack.pop_back();
260
261 132 size_t size = r - l;
262
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 132 times.
132 if (size <= 1) {
263 continue;
264 }
265
266
2/2
✓ Branch 0 taken 124 times.
✓ Branch 1 taken 8 times.
132 if (size <= kRadixThreshold) {
267
1/2
✓ Branch 1 taken 124 times.
✗ Branch 2 not taken.
124 RadixSort(arr, l, r);
268 124 continue;
269 }
270
271 8 size_t mid = l + (size / 2);
272
273
2/2
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 4 times.
8 if (!sorted) {
274
1/2
✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
4 stack.push_back({l, r, true});
275
1/2
✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
4 stack.push_back({mid, r, false});
276
1/4
✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
4 stack.push_back({l, mid, false});
277 } else {
278
1/2
✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
4 SimpleMerge(arr, l, mid, r);
279 }
280 }
281 120 }
282
283 } // namespace leonova_a_radix_merge_sort
284