GCC Code Coverage Report


Directory: ./
File: tasks/melnik_i_radix_sort_int/all/src/ops_all.cpp
Date: 2026-06-04 20:25:32
Exec Total Coverage
Lines: 131 161 81.4%
Functions: 13 15 86.7%
Branches: 97 172 56.4%

Line Branch Exec Source
1 #include "melnik_i_radix_sort_int/all/include/ops_all.hpp"
2
3 #include <mpi.h>
4
5 #include <algorithm>
6 #include <array>
7 #include <cstddef>
8 #include <cstdint>
9 #include <thread>
10 #include <utility>
11 #include <vector>
12
13 #include "melnik_i_radix_sort_int/common/include/common.hpp"
14 #include "util/include/util.hpp"
15
16 namespace melnik_i_radix_sort_int {
17
18 namespace {
19
20 constexpr int kBitsPerPass = 8;
21 constexpr std::size_t kBuckets = 1U << kBitsPerPass;
22
23 std::pair<std::size_t, std::size_t> ChunkBounds(std::size_t n, int num_ranks, int rank) {
24 70 const auto nz = static_cast<std::size_t>(num_ranks);
25 70 const auto rz = static_cast<std::size_t>(rank);
26 70 const std::size_t base = ((n / nz) * rz) + std::min(rz, n % nz);
27
6/8
✓ Branch 0 taken 22 times.
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 22 times.
✓ Branch 3 taken 6 times.
✓ Branch 4 taken 8 times.
✓ Branch 5 taken 6 times.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
70 const std::size_t size = (n / nz) + (rz < n % nz ? 1U : 0U);
28 56 return {base, base + size};
29 }
30
31 std::pair<std::size_t, std::size_t> ThreadChunkBounds(std::size_t n, int num_threads, int tid) {
32 return ChunkBounds(n, num_threads, tid);
33 }
34
35 } // namespace
36
37
1/2
✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
14 MelnikIRadixSortIntALL::MelnikIRadixSortIntALL(const InType &in) {
38 SetTypeOfTask(GetStaticTypeOfTask());
39
1/2
✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
14 GetInput() = in;
40 GetOutput() = {};
41 14 }
42
43 14 bool MelnikIRadixSortIntALL::ValidationImpl() {
44 14 return !GetInput().empty();
45 }
46
47 14 bool MelnikIRadixSortIntALL::PreProcessingImpl() {
48 14 GetOutput() = GetInput();
49 14 return !GetOutput().empty();
50 }
51
52
1/2
✓ Branch 0 taken 14 times.
✗ Branch 1 not taken.
14 bool MelnikIRadixSortIntALL::PostProcessingImpl() {
53 14 return std::ranges::is_sorted(GetOutput());
54 }
55
56 10 void MelnikIRadixSortIntALL::CountingSortByByte(const std::vector<int> &source, std::vector<int> &destination,
57 std::size_t begin, std::size_t end, std::int64_t exp,
58 std::int64_t offset) {
59 10 std::array<std::size_t, kBuckets> count{};
60 count.fill(0);
61
62
2/2
✓ Branch 0 taken 20 times.
✓ Branch 1 taken 10 times.
30 for (std::size_t i = begin; i < end; ++i) {
63 20 const std::int64_t val = static_cast<std::int64_t>(source[i]) + offset;
64
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 20 times.
20 const auto bucket = static_cast<std::size_t>((val / exp) % static_cast<std::int64_t>(kBuckets));
65 20 ++count.at(bucket);
66 }
67
68 10 std::array<std::size_t, kBuckets> pos{};
69 10 pos.at(0) = begin;
70
2/2
✓ Branch 0 taken 2550 times.
✓ Branch 1 taken 10 times.
2560 for (std::size_t bucket_idx = 1; bucket_idx < kBuckets; ++bucket_idx) {
71 2550 pos.at(bucket_idx) = pos.at(bucket_idx - 1U) + count.at(bucket_idx - 1U);
72 }
73
74
2/2
✓ Branch 0 taken 20 times.
✓ Branch 1 taken 10 times.
30 for (std::size_t i = begin; i < end; ++i) {
75 20 const std::int64_t val = static_cast<std::int64_t>(source[i]) + offset;
76
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 20 times.
20 const auto bucket = static_cast<std::size_t>((val / exp) % static_cast<std::int64_t>(kBuckets));
77 20 destination[pos.at(bucket)] = source[i];
78 20 ++pos.at(bucket);
79 }
80 10 }
81
82 28 void MelnikIRadixSortIntALL::RadixSortRange(std::vector<int> &data, std::vector<int> &buffer, std::size_t begin,
83 std::size_t end) {
84
2/2
✓ Branch 0 taken 18 times.
✓ Branch 1 taken 10 times.
28 if (end - begin <= 1U) {
85 18 return;
86 }
87
88 const auto range_begin = data.begin() + static_cast<std::ptrdiff_t>(begin);
89 const auto range_end = data.begin() + static_cast<std::ptrdiff_t>(end);
90
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
10 const auto [min_it, max_it] = std::ranges::minmax_element(range_begin, range_end);
91 10 const auto min_val = static_cast<std::int64_t>(*min_it);
92 10 const auto max_val = static_cast<std::int64_t>(*max_it);
93
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
10 const std::int64_t offset = (min_val < 0) ? -min_val : 0;
94 10 const std::int64_t max_shifted = max_val + offset;
95
96 std::vector<int> *src = &data;
97 std::vector<int> *dst = &buffer;
98
99
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
20 for (std::int64_t exp = 1; max_shifted / exp > 0; exp <<= kBitsPerPass) {
100 10 CountingSortByByte(*src, *dst, begin, end, exp, offset);
101 std::swap(src, dst);
102 }
103
104 // If result ended up in buffer, copy it back to data.
105
1/2
✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
10 if (src != &data) {
106 10 std::copy(src->begin() + static_cast<std::ptrdiff_t>(begin), src->begin() + static_cast<std::ptrdiff_t>(end),
107 data.begin() + static_cast<std::ptrdiff_t>(begin));
108 }
109 }
110
111 19 void MelnikIRadixSortIntALL::MergeRanges(const std::vector<int> &source, std::vector<int> &destination, Range left,
112 Range right, std::size_t write_begin) {
113 19 std::size_t li = left.begin;
114 19 std::size_t ri = right.begin;
115 std::size_t wi = write_begin;
116
117
4/4
✓ Branch 0 taken 49 times.
✓ Branch 1 taken 10 times.
✓ Branch 2 taken 40 times.
✓ Branch 3 taken 9 times.
59 while (li < left.end && ri < right.end) {
118
2/2
✓ Branch 0 taken 26 times.
✓ Branch 1 taken 14 times.
40 if (source[li] <= source[ri]) {
119 26 destination[wi++] = source[li++];
120 } else {
121 14 destination[wi++] = source[ri++];
122 }
123 }
124
125
2/2
✓ Branch 0 taken 9 times.
✓ Branch 1 taken 10 times.
19 if (li < left.end) {
126 9 std::copy(source.begin() + static_cast<std::ptrdiff_t>(li), source.begin() + static_cast<std::ptrdiff_t>(left.end),
127 destination.begin() + static_cast<std::ptrdiff_t>(wi));
128
1/2
✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
10 } else if (ri < right.end) {
129 10 std::copy(source.begin() + static_cast<std::ptrdiff_t>(ri), source.begin() + static_cast<std::ptrdiff_t>(right.end),
130 destination.begin() + static_cast<std::ptrdiff_t>(wi));
131 }
132 19 }
133
134
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 21 times.
21 void MelnikIRadixSortIntALL::MergeSortedRangesParallel(std::vector<int> &data, std::vector<int> &buffer,
135 const std::vector<Range> &ranges, int num_threads) {
136
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 21 times.
21 if (ranges.empty()) {
137 return;
138 }
139
140 21 std::vector<Range> cur = ranges;
141 std::vector<int> *src = &data;
142 std::vector<int> *dst = &buffer;
143
144
2/2
✓ Branch 0 taken 19 times.
✓ Branch 1 taken 21 times.
40 while (cur.size() > 1U) {
145 19 const std::size_t pairs = (cur.size() + 1U) / 2U;
146
1/4
✓ Branch 1 taken 19 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
19 std::vector<Range> next(pairs);
147
148 // Limit concurrency to num_threads, but not more than available pairs.
149
1/2
✓ Branch 0 taken 19 times.
✗ Branch 1 not taken.
19 const int active = std::min(num_threads, static_cast<int>(pairs));
150
151
1/2
✓ Branch 0 taken 19 times.
✗ Branch 1 not taken.
19 if (active <= 1) {
152 19 MergeLevelSequential(next, cur, src, dst, pairs);
153 } else {
154 MergeLevelParallel(next, cur, src, dst, pairs, active);
155 }
156
157 cur = std::move(next);
158 std::swap(src, dst);
159 }
160
161
2/2
✓ Branch 0 taken 19 times.
✓ Branch 1 taken 2 times.
21 if (src != &data) {
162 data.swap(*src);
163 }
164 }
165
166 19 void MelnikIRadixSortIntALL::MergeLevelSequential(std::vector<Range> &next, const std::vector<Range> &cur,
167 std::vector<int> *src, std::vector<int> *dst, std::size_t pairs) {
168
2/2
✓ Branch 0 taken 19 times.
✓ Branch 1 taken 19 times.
38 for (std::size_t pair_idx = 0; pair_idx < pairs; ++pair_idx) {
169
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 19 times.
19 const std::size_t lp = pair_idx * 2U;
170 19 const Range left = cur[lp];
171
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 19 times.
19 if (lp + 1U >= cur.size()) {
172 std::copy(src->begin() + static_cast<std::ptrdiff_t>(left.begin),
173 src->begin() + static_cast<std::ptrdiff_t>(left.end),
174 dst->begin() + static_cast<std::ptrdiff_t>(left.begin));
175 next[pair_idx] = left;
176 continue;
177 }
178 19 const Range right = cur[lp + 1U];
179 19 MergeRanges(*src, *dst, left, right, left.begin);
180 19 next[pair_idx] = Range{.begin = left.begin, .end = right.end};
181 }
182 19 }
183
184 void MelnikIRadixSortIntALL::MergeLevelParallel(std::vector<Range> &next, const std::vector<Range> &cur,
185 std::vector<int> *src, std::vector<int> *dst, std::size_t pairs,
186 int active) {
187 std::vector<std::thread> workers;
188 workers.reserve(static_cast<std::size_t>(active));
189
190 for (int tid = 0; tid < active; ++tid) {
191 workers.emplace_back([&, tid]() {
192 auto [p_begin, p_end] = ThreadChunkBounds(pairs, active, tid);
193 for (std::size_t pair_idx = p_begin; pair_idx < p_end; ++pair_idx) {
194 const std::size_t lp = pair_idx * 2U;
195 const Range left = cur[lp];
196 if (lp + 1U >= cur.size()) {
197 std::copy(src->begin() + static_cast<std::ptrdiff_t>(left.begin),
198 src->begin() + static_cast<std::ptrdiff_t>(left.end),
199 dst->begin() + static_cast<std::ptrdiff_t>(left.begin));
200 next[pair_idx] = left;
201 return;
202 }
203 const Range right = cur[lp + 1U];
204 MergeRanges(*src, *dst, left, right, left.begin);
205 next[pair_idx] = Range{.begin = left.begin, .end = right.end};
206 }
207 });
208 }
209 for (auto &w : workers) {
210 w.join();
211 }
212 }
213
214
1/2
✓ Branch 0 taken 14 times.
✗ Branch 1 not taken.
14 bool MelnikIRadixSortIntALL::RunImpl() {
215 auto &output = GetOutput();
216
1/2
✓ Branch 0 taken 14 times.
✗ Branch 1 not taken.
14 if (output.empty()) {
217 return false;
218 }
219
220 14 int rank = 0;
221 14 int num_ranks = 1;
222 14 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
223 14 MPI_Comm_size(MPI_COMM_WORLD, &num_ranks);
224
225 14 const int num_threads = std::max(1, ppc::util::GetNumThreads());
226 14 const auto total_size = static_cast<int>(output.size());
227
228 14 std::vector<int> send_counts(static_cast<std::size_t>(num_ranks));
229
1/4
✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
14 std::vector<int> displacements(static_cast<std::size_t>(num_ranks));
230
2/2
✓ Branch 0 taken 7 times.
✓ Branch 1 taken 7 times.
14 if (rank == 0) {
231
2/2
✓ Branch 0 taken 14 times.
✓ Branch 1 taken 7 times.
21 for (int rank_idx = 0; rank_idx < num_ranks; ++rank_idx) {
232
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 6 times.
14 auto [b, e] = ChunkBounds(static_cast<std::size_t>(total_size), num_ranks, rank_idx);
233 14 send_counts[static_cast<std::size_t>(rank_idx)] = static_cast<int>(e - b);
234 14 displacements[static_cast<std::size_t>(rank_idx)] = static_cast<int>(b);
235 }
236 }
237
238
1/2
✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
14 MPI_Bcast(send_counts.data(), num_ranks, MPI_INT, 0, MPI_COMM_WORLD);
239
1/2
✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
14 MPI_Bcast(displacements.data(), num_ranks, MPI_INT, 0, MPI_COMM_WORLD);
240
241
1/2
✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
14 const int local_count = send_counts[static_cast<std::size_t>(rank)];
242
2/6
✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 14 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
14 std::vector<int> local(static_cast<std::size_t>(local_count));
243
244
1/2
✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
14 MPI_Scatterv(output.data(), send_counts.data(), displacements.data(), MPI_INT, local.data(), local_count, MPI_INT, 0,
245 MPI_COMM_WORLD);
246
247 const int local_threads = num_threads;
248
249
1/2
✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
14 LocalSortChunk(local, local_count, local_threads, num_threads);
250
251
1/2
✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
14 MPI_Gatherv(local.data(), local_count, MPI_INT, output.data(), send_counts.data(), displacements.data(), MPI_INT, 0,
252 MPI_COMM_WORLD);
253
254
3/4
✓ Branch 0 taken 7 times.
✓ Branch 1 taken 7 times.
✓ Branch 2 taken 7 times.
✗ Branch 3 not taken.
14 if (rank == 0 && num_ranks > 1) {
255
1/2
✓ Branch 1 taken 7 times.
✗ Branch 2 not taken.
7 GlobalMergeChunks(output, total_size, num_ranks, send_counts, displacements, num_threads);
256 }
257
258
1/2
✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
14 MPI_Bcast(output.data(), total_size, MPI_INT, 0, MPI_COMM_WORLD);
259 return true;
260 }
261
262 14 void MelnikIRadixSortIntALL::LocalSortChunk(std::vector<int> &local, int local_count, int local_threads,
263 int num_threads) {
264
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 14 times.
14 if (local_count <= 0) {
265 return;
266 }
267 14 std::vector<int> local_buf(static_cast<std::size_t>(local_count));
268
269
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 14 times.
14 if (local_threads <= 1) {
270 RadixSortRange(local, local_buf, 0, static_cast<std::size_t>(local_count));
271 } else {
272 14 std::vector<std::thread> workers;
273
1/2
✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
14 workers.reserve(static_cast<std::size_t>(local_threads));
274
2/2
✓ Branch 0 taken 28 times.
✓ Branch 1 taken 14 times.
42 for (int tid = 0; tid < local_threads; ++tid) {
275
1/2
✓ Branch 1 taken 28 times.
✗ Branch 2 not taken.
28 workers.emplace_back([&, tid]() {
276
2/2
✓ Branch 0 taken 22 times.
✓ Branch 1 taken 6 times.
28 auto [b, e] = ThreadChunkBounds(static_cast<std::size_t>(local_count), local_threads, tid);
277 28 RadixSortRange(local, local_buf, b, e);
278 28 });
279 }
280
2/2
✓ Branch 0 taken 28 times.
✓ Branch 1 taken 14 times.
42 for (auto &w : workers) {
281
1/2
✓ Branch 1 taken 28 times.
✗ Branch 2 not taken.
28 w.join();
282 }
283
284 14 std::vector<Range> thread_ranges;
285
1/2
✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
14 thread_ranges.reserve(static_cast<std::size_t>(local_threads));
286
2/2
✓ Branch 0 taken 28 times.
✓ Branch 1 taken 14 times.
42 for (int tid = 0; tid < local_threads; ++tid) {
287
2/2
✓ Branch 0 taken 22 times.
✓ Branch 1 taken 6 times.
28 auto [b, e] = ThreadChunkBounds(static_cast<std::size_t>(local_count), local_threads, tid);
288
2/2
✓ Branch 0 taken 26 times.
✓ Branch 1 taken 2 times.
28 if (b < e) {
289
1/4
✓ Branch 1 taken 26 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
26 thread_ranges.push_back(Range{.begin = b, .end = e});
290 }
291 }
292
293
1/2
✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
14 MergeSortedRangesParallel(local, local_buf, thread_ranges, num_threads);
294 14 }
295 }
296
297 7 void MelnikIRadixSortIntALL::GlobalMergeChunks(std::vector<int> &output, int total_size, int num_ranks,
298 const std::vector<int> &send_counts,
299 const std::vector<int> &displacements, int num_threads) {
300 7 std::vector<int> global_buf(static_cast<std::size_t>(total_size));
301 7 std::vector<Range> rank_ranges;
302
1/2
✓ Branch 1 taken 7 times.
✗ Branch 2 not taken.
7 rank_ranges.reserve(static_cast<std::size_t>(num_ranks));
303
2/2
✓ Branch 0 taken 14 times.
✓ Branch 1 taken 7 times.
21 for (int rank_idx = 0; rank_idx < num_ranks; ++rank_idx) {
304
1/2
✓ Branch 0 taken 14 times.
✗ Branch 1 not taken.
14 const auto b = static_cast<std::size_t>(displacements[static_cast<std::size_t>(rank_idx)]);
305 14 const auto e = b + static_cast<std::size_t>(send_counts[static_cast<std::size_t>(rank_idx)]);
306
1/2
✓ Branch 0 taken 14 times.
✗ Branch 1 not taken.
14 if (b < e) {
307
1/4
✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
14 rank_ranges.push_back(Range{.begin = b, .end = e});
308 }
309 }
310
311
1/2
✓ Branch 1 taken 7 times.
✗ Branch 2 not taken.
7 MergeSortedRangesParallel(output, global_buf, rank_ranges, num_threads);
312 7 }
313
314 } // namespace melnik_i_radix_sort_int
315