GCC Code Coverage Report


Directory: ./
File: tasks/baldin_a_radix_sort/all/src/ops_all.cpp
Date: 2026-05-11 08:26:31
Exec Total Coverage
Lines: 104 106 98.1%
Functions: 11 11 100.0%
Branches: 60 68 88.2%

Line Branch Exec Source
1 #include "baldin_a_radix_sort/all/include/ops_all.hpp"
2
3 #include <mpi.h>
4
5 #include <algorithm>
6 #include <array>
7 #include <cstddef>
8 #include <cstdint>
9 #include <iterator>
10 #include <vector>
11
12 #include "baldin_a_radix_sort/common/include/common.hpp"
13 #include "util/include/util.hpp"
14
15 namespace baldin_a_radix_sort { // comment for ci rerun
16
17
1/2
✓ Branch 1 taken 26 times.
✗ Branch 2 not taken.
26 BaldinARadixSortALL::BaldinARadixSortALL(const InType &in) {
18 SetTypeOfTask(GetStaticTypeOfTask());
19
1/2
✓ Branch 1 taken 26 times.
✗ Branch 2 not taken.
26 GetInput() = in;
20 GetOutput() = {};
21 26 }
22
23 26 bool BaldinARadixSortALL::ValidationImpl() {
24 26 return true;
25 }
26
27 26 bool BaldinARadixSortALL::PreProcessingImpl() {
28 26 int rank = 0;
29
30
1/2
✓ Branch 1 taken 26 times.
✗ Branch 2 not taken.
26 if (ppc::util::IsUnderMpirun()) {
31 26 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
32 }
33
34
2/2
✓ Branch 0 taken 13 times.
✓ Branch 1 taken 13 times.
26 if (rank == 0) {
35 13 GetOutput() = GetInput();
36 }
37
38 26 return true;
39 }
40
41 namespace {
42
43 104 void CountingSortStep(std::vector<int>::iterator in_begin, std::vector<int>::iterator in_end,
44 std::vector<int>::iterator out_begin, size_t byte_index) {
45 104 size_t shift = byte_index * 8;
46 104 std::array<size_t, 256> count = {0};
47
48
2/2
✓ Branch 0 taken 944 times.
✓ Branch 1 taken 104 times.
1048 for (auto it = in_begin; it != in_end; it++) {
49 944 auto raw_val = static_cast<unsigned int>(*it);
50 944 unsigned int byte_val = (raw_val >> shift) & 0xFF;
51
52
2/2
✓ Branch 0 taken 236 times.
✓ Branch 1 taken 708 times.
944 if (byte_index == sizeof(int) - 1) {
53 236 byte_val ^= 128;
54 }
55 944 count.at(byte_val)++;
56 }
57
58 104 std::array<size_t, 256> prefix{};
59 prefix[0] = 0;
60
2/2
✓ Branch 0 taken 26520 times.
✓ Branch 1 taken 104 times.
26624 for (int i = 1; i < 256; i++) {
61 26520 prefix.at(i) = prefix.at(i - 1) + count.at(i - 1);
62 }
63
64
2/2
✓ Branch 0 taken 944 times.
✓ Branch 1 taken 104 times.
1048 for (auto it = in_begin; it != in_end; it++) {
65 944 auto raw_val = static_cast<unsigned int>(*it);
66 944 unsigned int byte_val = (raw_val >> shift) & 0xFF;
67
68
2/2
✓ Branch 0 taken 236 times.
✓ Branch 1 taken 708 times.
944 if (byte_index == sizeof(int) - 1) {
69 236 byte_val ^= 128;
70 }
71
72 944 *(out_begin + static_cast<int64_t>(prefix.at(byte_val))) = *it;
73 944 prefix.at(byte_val)++;
74 }
75 104 }
76
77
2/2
✓ Branch 0 taken 20 times.
✓ Branch 1 taken 26 times.
46 void RadixSortLocal(std::vector<int>::iterator begin, std::vector<int>::iterator end) {
78 46 size_t n = std::distance(begin, end);
79
2/2
✓ Branch 0 taken 20 times.
✓ Branch 1 taken 26 times.
46 if (n < 2) {
80 20 return;
81 }
82
83 26 std::vector<int> temp(n);
84
85
2/2
✓ Branch 0 taken 104 times.
✓ Branch 1 taken 26 times.
130 for (size_t i = 0; i < sizeof(int); i++) {
86 size_t shift = i;
87
88
2/2
✓ Branch 0 taken 52 times.
✓ Branch 1 taken 52 times.
104 if (i % 2 == 0) {
89 52 CountingSortStep(begin, end, temp.begin(), shift);
90 } else {
91 52 CountingSortStep(temp.begin(), temp.end(), begin, shift);
92 }
93 }
94 }
95
96 } // namespace
97
98 26 void BaldinARadixSortALL::DistributeData(int rank, int size, int &n) {
99 26 bool is_mpi = ppc::util::IsUnderMpirun();
100
101
1/2
✓ Branch 0 taken 26 times.
✗ Branch 1 not taken.
26 if (is_mpi) {
102 26 MPI_Bcast(&n, 1, MPI_INT, 0, MPI_COMM_WORLD);
103 }
104
105
2/2
✓ Branch 0 taken 24 times.
✓ Branch 1 taken 2 times.
26 if (n == 0) {
106 return;
107 }
108
109 24 counts_.resize(size);
110 24 displs_.resize(size);
111 24 int chunk = n / size;
112 24 int rem = n % size;
113 int curr = 0;
114
2/2
✓ Branch 0 taken 48 times.
✓ Branch 1 taken 24 times.
72 for (int i = 0; i < size; i++) {
115
2/2
✓ Branch 0 taken 30 times.
✓ Branch 1 taken 18 times.
78 counts_[i] = chunk + (i < rem ? 1 : 0);
116 48 displs_[i] = curr;
117 48 curr += counts_[i];
118 }
119
120 24 local_data_.resize(counts_[rank]);
121
122
1/2
✓ Branch 0 taken 24 times.
✗ Branch 1 not taken.
24 if (is_mpi) {
123
2/2
✓ Branch 0 taken 12 times.
✓ Branch 1 taken 12 times.
36 MPI_Scatterv(rank == 0 ? GetOutput().data() : nullptr, counts_.data(), displs_.data(), MPI_INT, local_data_.data(),
124 counts_[rank], MPI_INT, 0, MPI_COMM_WORLD);
125 } else {
126 local_data_ = GetOutput();
127 }
128 }
129
130 24 void BaldinARadixSortALL::GatherData(int rank) {
131 24 bool is_mpi = ppc::util::IsUnderMpirun();
132
133
1/2
✓ Branch 0 taken 24 times.
✗ Branch 1 not taken.
24 if (is_mpi) {
134
2/2
✓ Branch 0 taken 12 times.
✓ Branch 1 taken 12 times.
36 MPI_Gatherv(local_data_.data(), counts_[rank], MPI_INT, rank == 0 ? GetOutput().data() : nullptr, counts_.data(),
135 displs_.data(), MPI_INT, 0, MPI_COMM_WORLD);
136 } else {
137 GetOutput() = local_data_;
138 }
139 24 }
140
141
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 23 times.
24 void BaldinARadixSortALL::LocalSort(int num_threads) {
142 24 int local_n = static_cast<int>(local_data_.size());
143
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 23 times.
24 if (local_n == 0) {
144 1 return;
145 }
146
147 23 std::vector<int> local_offsets(num_threads + 1);
148 23 int l_chunk = local_n / num_threads;
149 23 int l_rem = local_n % num_threads;
150 int l_curr = 0;
151
2/2
✓ Branch 0 taken 46 times.
✓ Branch 1 taken 23 times.
69 for (int i = 0; i < num_threads; i++) {
152
2/2
✓ Branch 0 taken 35 times.
✓ Branch 1 taken 11 times.
46 local_offsets[i] = l_curr;
153
2/2
✓ Branch 0 taken 35 times.
✓ Branch 1 taken 11 times.
81 l_curr += l_chunk + (i < l_rem ? 1 : 0);
154 }
155 23 local_offsets[num_threads] = local_n;
156
157 23 auto &local_data_ref = local_data_;
158
159 23 #pragma omp parallel num_threads(num_threads) default(none) shared(num_threads, local_offsets, local_data_ref)
160 {
161 int tid = omp_get_thread_num();
162 auto begin = local_data_ref.begin() + local_offsets[tid];
163 auto end = local_data_ref.begin() + local_offsets[tid + 1];
164 RadixSortLocal(begin, end);
165 }
166
167
2/2
✓ Branch 0 taken 23 times.
✓ Branch 1 taken 23 times.
46 for (int step = 1; step < num_threads; step *= 2) {
168 23 #pragma omp parallel for num_threads(num_threads) default(none) shared(step, num_threads, local_offsets, local_data_ref)
169 for (int i = 0; i < num_threads; i += 2 * step) {
170 if (i + step < num_threads) {
171 auto begin = local_data_ref.begin() + local_offsets[i];
172 auto middle = local_data_ref.begin() + local_offsets[i + step];
173 int end_idx = std::min(i + (2 * step), num_threads);
174 auto end = local_data_ref.begin() + local_offsets[end_idx];
175
176 std::inplace_merge(begin, middle, end);
177 }
178 }
179 }
180 }
181
182 12 void BaldinARadixSortALL::GlobalMerge(int num_threads, int size, int n) {
183 auto &out = GetOutput();
184
185 12 std::vector<int> global_offsets(size + 1);
186
2/2
✓ Branch 0 taken 24 times.
✓ Branch 1 taken 12 times.
36 for (int i = 0; i < size; i++) {
187 24 global_offsets[i] = displs_[i];
188 }
189 12 global_offsets[size] = n;
190
191
2/2
✓ Branch 0 taken 12 times.
✓ Branch 1 taken 12 times.
24 for (int step = 1; step < size; step *= 2) {
192 12 #pragma omp parallel for num_threads(num_threads) default(none) shared(step, size, global_offsets, out)
193 for (int i = 0; i < size; i += 2 * step) {
194 if (i + step < size) {
195 auto begin = out.begin() + global_offsets[i];
196 auto middle = out.begin() + global_offsets[i + step];
197 int end_idx = std::min(i + (2 * step), size);
198 auto end = out.begin() + global_offsets[end_idx];
199
200 std::inplace_merge(begin, middle, end);
201 }
202 }
203 }
204 12 }
205
206 26 bool BaldinARadixSortALL::RunImpl() {
207 26 int rank = 0;
208 26 int size = 1;
209 26 bool is_mpi = ppc::util::IsUnderMpirun();
210
211
1/2
✓ Branch 0 taken 26 times.
✗ Branch 1 not taken.
26 if (is_mpi) {
212 26 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
213 26 MPI_Comm_size(MPI_COMM_WORLD, &size);
214 }
215
216 26 int n = 0;
217
2/2
✓ Branch 0 taken 13 times.
✓ Branch 1 taken 13 times.
26 if (rank == 0) {
218 13 n = static_cast<int>(GetOutput().size());
219 }
220
221 26 DistributeData(rank, size, n);
222
223
2/2
✓ Branch 0 taken 24 times.
✓ Branch 1 taken 2 times.
26 if (n == 0) {
224 return true;
225 }
226
227 24 int num_threads = ppc::util::GetNumThreads();
228
229 24 LocalSort(num_threads);
230
231 24 GatherData(rank);
232
233
3/4
✓ Branch 0 taken 12 times.
✓ Branch 1 taken 12 times.
✓ Branch 2 taken 12 times.
✗ Branch 3 not taken.
24 if (rank == 0 && size > 1) {
234 12 GlobalMerge(num_threads, size, n);
235 }
236
237 return true;
238 }
239
240 26 bool BaldinARadixSortALL::PostProcessingImpl() {
241 26 return true;
242 }
243
244 } // namespace baldin_a_radix_sort
245