GCC Code Coverage Report


Directory: ./
File: tasks/dorofeev_i_bitwise_sort_double_eo_batcher_merge/all/src/ops_all.cpp
Date: 2026-06-04 20:25:32
Exec Total Coverage
Lines: 135 139 97.1%
Functions: 17 17 100.0%
Branches: 98 140 70.0%

Line Branch Exec Source
1 #include "dorofeev_i_bitwise_sort_double_eo_batcher_merge/all/include/ops_all.hpp"
2
3 #include <mpi.h>
4 #include <tbb/tbb.h>
5
6 #include <algorithm>
7 #include <cstdint>
8 #include <cstring>
9 #include <limits>
10 #include <vector>
11
12 #include "dorofeev_i_bitwise_sort_double_eo_batcher_merge/common/include/common.hpp"
13 #include "util/include/util.hpp"
14
15 namespace dorofeev_i_bitwise_sort_double_eo_batcher_merge {
16
17 namespace {
18
19 uint64_t DoubleToUint(double d) {
20 uint64_t u = 0;
21 std::memcpy(&u, &d, sizeof(double));
22
2/2
✓ Branch 0 taken 1246 times.
✓ Branch 1 taken 1602 times.
2848 if ((u & 0x8000000000000000ULL) != 0) {
23 1246 u = ~u;
24 } else {
25 1602 u |= 0x8000000000000000ULL;
26 }
27 return u;
28 }
29
30 double UintToDouble(uint64_t u) {
31 2848 if ((u & 0x8000000000000000ULL) != 0) {
32 1602 u &= ~0x8000000000000000ULL;
33 } else {
34 1246 u = ~u;
35 }
36 double d = 0.0;
37 std::memcpy(&d, &u, sizeof(double));
38 return d;
39 }
40
41
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 24 times.
24 void RadixSortDouble(std::vector<double> &arr) {
42
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 24 times.
24 if (arr.empty()) {
43 return;
44 }
45
46 24 std::vector<uint64_t> uarr(arr.size());
47
2/2
✓ Branch 0 taken 2848 times.
✓ Branch 1 taken 24 times.
2872 for (size_t i = 0; i < arr.size(); ++i) {
48
2/2
✓ Branch 0 taken 1246 times.
✓ Branch 1 taken 1602 times.
5696 uarr[i] = DoubleToUint(arr[i]);
49 }
50
51
1/4
✓ Branch 1 taken 24 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
24 std::vector<uint64_t> temp(uarr.size());
52
2/2
✓ Branch 0 taken 192 times.
✓ Branch 1 taken 24 times.
216 for (size_t byte = 0; byte < 8; ++byte) {
53
1/4
✓ Branch 1 taken 192 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
192 std::vector<int> count(256, 0);
54
2/2
✓ Branch 0 taken 22784 times.
✓ Branch 1 taken 192 times.
22976 for (uint64_t val : uarr) {
55 22784 count[(val >> (byte * 8)) & 0xFF]++;
56 }
57
2/2
✓ Branch 0 taken 48960 times.
✓ Branch 1 taken 192 times.
49152 for (size_t i = 1; i < 256; ++i) {
58 48960 count[i] += count[i - 1];
59 }
60
2/2
✓ Branch 0 taken 22784 times.
✓ Branch 1 taken 192 times.
22976 for (int i = static_cast<int>(uarr.size()) - 1; i >= 0; --i) {
61 22784 temp[--count[(uarr[i] >> (byte * 8)) & 0xFF]] = uarr[i];
62 }
63
1/2
✓ Branch 1 taken 192 times.
✗ Branch 2 not taken.
192 uarr = temp;
64 }
65
66
2/2
✓ Branch 0 taken 2848 times.
✓ Branch 1 taken 24 times.
2872 for (size_t i = 0; i < arr.size(); ++i) {
67
2/2
✓ Branch 0 taken 1602 times.
✓ Branch 1 taken 1246 times.
5696 arr[i] = UintToDouble(uarr[i]);
68 }
69 }
70
71 void CompareExchangeBlocks(double *arr, size_t i, size_t step) {
72
4/4
✓ Branch 0 taken 2136 times.
✓ Branch 1 taken 16 times.
✓ Branch 2 taken 14904 times.
✓ Branch 3 taken 4148 times.
21204 for (size_t k = 0; k < step; ++k) {
73
4/4
✓ Branch 0 taken 353 times.
✓ Branch 1 taken 1783 times.
✓ Branch 2 taken 7577 times.
✓ Branch 3 taken 7327 times.
17040 if (arr[i + k] > arr[i + k + step]) {
74 std::swap(arr[i + k], arr[i + k + step]);
75 }
76 }
77 }
78
79 16 void OddEvenMergeIterative(double *arr, size_t start, size_t n) {
80
1/2
✓ Branch 0 taken 16 times.
✗ Branch 1 not taken.
16 if (n <= 1) {
81 return;
82 }
83 16 size_t step = n / 2;
84 CompareExchangeBlocks(arr, start, step);
85 16 step /= 2;
86
2/2
✓ Branch 0 taken 92 times.
✓ Branch 1 taken 16 times.
108 for (; step > 0; step /= 2) {
87
2/2
✓ Branch 0 taken 4148 times.
✓ Branch 1 taken 92 times.
4240 for (size_t i = step; i < n - step; i += step * 2) {
88 4148 CompareExchangeBlocks(arr, start + i, step);
89 }
90 }
91 }
92
93 24 void ProcessChunkTBB(double *raw_data, int chunk_idx, size_t chunk_size) {
94 24 size_t start_idx = static_cast<size_t>(chunk_idx) * chunk_size;
95 24 std::vector<double> local_arr(chunk_size);
96
2/2
✓ Branch 0 taken 2848 times.
✓ Branch 1 taken 24 times.
2872 for (size_t j = 0; j < chunk_size; ++j) {
97 2848 local_arr[j] = raw_data[start_idx + j];
98 }
99
1/2
✓ Branch 1 taken 24 times.
✗ Branch 2 not taken.
24 RadixSortDouble(local_arr);
100
2/2
✓ Branch 0 taken 2848 times.
✓ Branch 1 taken 24 times.
2872 for (size_t j = 0; j < chunk_size; ++j) {
101 2848 raw_data[start_idx + j] = local_arr[j];
102 }
103 24 }
104
105 12 void ExecuteTBBSortLocal(double *raw_data, size_t total_size, size_t chunk_size, int num_chunks_int) {
106 12 int num_threads = ppc::util::GetNumThreads();
107
1/2
✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.
12 tbb::task_arena arena(num_threads > 0 ? num_threads : 1);
108
1/2
✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.
12 arena.execute([&] {
109 24 tbb::parallel_for(tbb::blocked_range<int>(0, num_chunks_int),
110 12 [raw_data, chunk_size](const tbb::blocked_range<int> &r) {
111
2/4
✓ Branch 0 taken 24 times.
✓ Branch 1 taken 24 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
48 for (int i = r.begin(); i != r.end(); ++i) {
112
0/2
✗ Branch 2 not taken.
✗ Branch 3 not taken.
24 ProcessChunkTBB(raw_data, i, chunk_size);
113 }
114 });
115
2/2
✓ Branch 0 taken 12 times.
✓ Branch 1 taken 12 times.
24 for (size_t size = chunk_size; size < total_size; size *= 2) {
116 12 int merges_count = static_cast<int>(total_size / (size * 2));
117 24 tbb::parallel_for(tbb::blocked_range<int>(0, merges_count), [raw_data, size](const tbb::blocked_range<int> &r) {
118
2/2
✓ Branch 0 taken 12 times.
✓ Branch 1 taken 12 times.
24 for (int i = r.begin(); i != r.end(); ++i) {
119 12 OddEvenMergeIterative(raw_data, static_cast<size_t>(i) * 2 * size, 2 * size);
120 }
121 12 });
122 }
123 12 });
124 12 }
125
126 12 void PerformLocalTBBSort(double *data, size_t total_size) {
127
1/2
✓ Branch 0 taken 12 times.
✗ Branch 1 not taken.
12 if (total_size == 0) {
128 return;
129 }
130 12 int num_threads = ppc::util::GetNumThreads();
131 12 if (num_threads <= 0) {
132 num_threads = 1;
133 }
134 size_t num_chunks = 1;
135
3/4
✓ Branch 0 taken 12 times.
✓ Branch 1 taken 12 times.
✓ Branch 2 taken 12 times.
✗ Branch 3 not taken.
24 while (num_chunks * 2 <= static_cast<size_t>(num_threads) && num_chunks * 2 <= total_size) {
136 num_chunks *= 2;
137 }
138 12 ExecuteTBBSortLocal(data, total_size, total_size / num_chunks, static_cast<int>(num_chunks));
139 }
140
141 4 void PerformFinalMergeTBB(double *raw_data, size_t local_chunk_size, size_t pow2) {
142 4 int num_threads = ppc::util::GetNumThreads();
143
1/2
✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
4 tbb::task_arena arena(num_threads > 0 ? num_threads : 1);
144
1/2
✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
4 arena.execute([&] {
145
2/2
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 4 times.
8 for (size_t size = local_chunk_size; size < pow2; size *= 2) {
146 4 int merges_count = static_cast<int>(pow2 / (size * 2));
147 8 tbb::parallel_for(tbb::blocked_range<int>(0, merges_count), [raw_data, size](const tbb::blocked_range<int> &r) {
148
2/2
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 4 times.
8 for (int i = r.begin(); i != r.end(); ++i) {
149 4 OddEvenMergeIterative(raw_data, static_cast<size_t>(i) * 2 * size, 2 * size);
150 }
151 4 });
152 }
153 4 });
154 4 }
155
156 8 void ExecuteMPIHybridSort(std::vector<double> &local_data, size_t pow2, int world_rank, int active_procs) {
157 8 size_t local_chunk_size = pow2 / active_procs;
158 8 int chunk_size_int = static_cast<int>(local_chunk_size);
159
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 8 times.
8 size_t buffer_size = (world_rank < active_procs) ? local_chunk_size : 0;
160 8 std::vector<double> mpi_buffer(buffer_size, 0.0);
161
162
2/2
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 4 times.
8 if (world_rank == 0) {
163
1/2
✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
4 MPI_Scatter(local_data.data(), chunk_size_int, MPI_DOUBLE, mpi_buffer.data(), chunk_size_int, MPI_DOUBLE, 0,
164 MPI_COMM_WORLD);
165
1/2
✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
4 } else if (world_rank < active_procs) {
166
1/2
✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
4 MPI_Scatter(nullptr, 0, MPI_DATATYPE_NULL, mpi_buffer.data(), chunk_size_int, MPI_DOUBLE, 0, MPI_COMM_WORLD);
167 }
168
169
1/2
✓ Branch 0 taken 8 times.
✗ Branch 1 not taken.
8 if (world_rank < active_procs) {
170
1/2
✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
8 PerformLocalTBBSort(mpi_buffer.data(), local_chunk_size);
171 }
172
173
2/2
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 4 times.
8 if (world_rank == 0) {
174
1/2
✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
4 MPI_Gather(mpi_buffer.data(), chunk_size_int, MPI_DOUBLE, local_data.data(), chunk_size_int, MPI_DOUBLE, 0,
175 MPI_COMM_WORLD);
176
1/2
✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
4 PerformFinalMergeTBB(local_data.data(), local_chunk_size, pow2);
177
1/2
✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
4 } else if (world_rank < active_procs) {
178
1/2
✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
4 MPI_Gather(mpi_buffer.data(), chunk_size_int, MPI_DOUBLE, nullptr, 0, MPI_DATATYPE_NULL, 0, MPI_COMM_WORLD);
179 }
180 8 }
181
182 8 void HandleGTestLocalSort(std::vector<double> &local_data, int world_rank) {
183
3/4
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 4 times.
✗ Branch 3 not taken.
8 if (world_rank == 0 || local_data.empty()) {
184 return;
185 }
186 size_t my_orig = local_data.size();
187 size_t my_pow2 = 1;
188
2/2
✓ Branch 0 taken 29 times.
✓ Branch 1 taken 4 times.
33 while (my_pow2 < my_orig) {
189 29 my_pow2 *= 2;
190 }
191
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 1 times.
4 if (my_pow2 > my_orig) {
192 3 local_data.resize(my_pow2, std::numeric_limits<double>::max());
193 }
194
195 4 PerformLocalTBBSort(local_data.data(), my_pow2);
196
197
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 1 times.
4 if (my_pow2 > my_orig) {
198 3 local_data.resize(my_orig);
199 }
200 }
201
202 size_t CalculatePaddedSize(size_t original_size) {
203 if (original_size == 0) {
204 return 1;
205 }
206 size_t pow2 = 1;
207
2/2
✓ Branch 0 taken 29 times.
✓ Branch 1 taken 4 times.
33 while (pow2 < original_size) {
208 29 pow2 *= 2;
209 }
210 return pow2;
211 }
212
213 } // namespace
214
215
1/2
✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
8 DorofeevIBitwiseSortDoubleEOBatcherMergeALL::DorofeevIBitwiseSortDoubleEOBatcherMergeALL(const InType &in) {
216 SetTypeOfTask(GetStaticTypeOfTask());
217
1/2
✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
8 GetInput() = in;
218 8 }
219
220 8 bool DorofeevIBitwiseSortDoubleEOBatcherMergeALL::ValidationImpl() {
221 8 return true;
222 }
223
224 8 bool DorofeevIBitwiseSortDoubleEOBatcherMergeALL::PreProcessingImpl() {
225 8 local_data_ = GetInput();
226 8 return true;
227 }
228
229 8 bool DorofeevIBitwiseSortDoubleEOBatcherMergeALL::RunImpl() {
230 8 int world_size = 0;
231 8 int world_rank = 0;
232 8 MPI_Comm_size(MPI_COMM_WORLD, &world_size);
233 8 MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);
234
235
2/4
✓ Branch 0 taken 8 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 8 times.
8 bool is_pow2_procs = (world_size > 0) && ((world_size & (world_size - 1)) == 0);
236 8 int active_procs = is_pow2_procs ? world_size : 1;
237
238 size_t rank0_original_size = 0;
239 8 size_t pow2 = 1;
240
241
2/2
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 4 times.
8 if (world_rank == 0) {
242 rank0_original_size = local_data_.size();
243
1/2
✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
4 if (rank0_original_size == 0) {
244 active_procs = 1;
245 }
246 4 pow2 = CalculatePaddedSize(rank0_original_size);
247
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 1 times.
4 if (pow2 > rank0_original_size) {
248 3 local_data_.resize(pow2, std::numeric_limits<double>::max());
249 }
250 }
251
252 8 MPI_Bcast(&active_procs, 1, MPI_INT, 0, MPI_COMM_WORLD);
253 8 MPI_Bcast(&pow2, 1, MPI_UNSIGNED_LONG_LONG, 0, MPI_COMM_WORLD);
254
255
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 8 times.
8 if (active_procs == 1) {
256 if (world_rank == 0 && pow2 > 0) {
257 PerformLocalTBBSort(local_data_.data(), pow2);
258 }
259 } else {
260 8 ExecuteMPIHybridSort(local_data_, pow2, world_rank, active_procs);
261 }
262
263
4/4
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 3 times.
✓ Branch 3 taken 1 times.
8 if (world_rank == 0 && pow2 > rank0_original_size) {
264 3 local_data_.resize(rank0_original_size);
265 }
266
267 8 HandleGTestLocalSort(local_data_, world_rank);
268
269 8 return true;
270 }
271
272 8 bool DorofeevIBitwiseSortDoubleEOBatcherMergeALL::PostProcessingImpl() {
273 8 GetOutput() = local_data_;
274 8 return true;
275 }
276
277 } // namespace dorofeev_i_bitwise_sort_double_eo_batcher_merge
278