GCC Code Coverage Report


Directory: ./
File: tasks/gonozov_l_bitwise_sorting_double_Batcher_merge/all/src/ops_all.cpp
Date: 2026-06-04 20:25:32
Exec Total Coverage
Lines: 136 143 95.1%
Functions: 14 14 100.0%
Branches: 84 128 65.6%

Line Branch Exec Source
1 #include "gonozov_l_bitwise_sorting_double_Batcher_merge/all/include/ops_all.hpp"
2
3 #include <mpi.h>
4 #include <tbb/tbb.h>
5
6 #include <algorithm>
7 #include <cstdint>
8 #include <cstring>
9 #include <limits>
10 #include <utility>
11 #include <vector>
12
13 #include "gonozov_l_bitwise_sorting_double_Batcher_merge/common/include/common.hpp"
14 #include "util/include/util.hpp"
15
16 namespace gonozov_l_bitwise_sorting_double_batcher_merge {
17
18 namespace {
19
20 // ==================== ВСПОМОГАТЕЛЬНЫЕ ФУНКЦИИ ====================
21
22 uint64_t DoubleToSortableInt(double d) {
23 uint64_t bits = 0;
24 std::memcpy(&bits, &d, sizeof(double));
25
2/2
✓ Branch 0 taken 12 times.
✓ Branch 1 taken 44 times.
56 if ((bits >> 63) != 0) {
26 12 return ~bits;
27 }
28 44 return bits | 0x8000000000000000ULL;
29 }
30
31 double SortableIntToDouble(uint64_t bits) {
32 56 if ((bits >> 63) != 0) {
33 44 bits &= ~0x8000000000000000ULL;
34 } else {
35 12 bits = ~bits;
36 }
37 double result = 0.0;
38 std::memcpy(&result, &bits, sizeof(double));
39 return result;
40 }
41
42 size_t NextPowerOfTwo(size_t n) {
43 size_t power = 1;
44
2/2
✓ Branch 0 taken 16 times.
✓ Branch 1 taken 8 times.
24 while (power < n) {
45 16 power <<= 1;
46 }
47 return power;
48 }
49
50
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 20 times.
20 void RadixSortDouble(std::vector<double> &data) {
51
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 20 times.
20 if (data.empty()) {
52 return;
53 }
54
55 20 std::vector<uint64_t> keys(data.size());
56
2/2
✓ Branch 0 taken 56 times.
✓ Branch 1 taken 20 times.
76 for (size_t i = 0; i < data.size(); ++i) {
57
2/2
✓ Branch 0 taken 12 times.
✓ Branch 1 taken 44 times.
112 keys[i] = DoubleToSortableInt(data[i]);
58 }
59
60
1/4
✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
20 std::vector<uint64_t> temp(keys.size());
61 constexpr int kRadix = 256;
62
63
2/2
✓ Branch 0 taken 160 times.
✓ Branch 1 taken 20 times.
180 for (int pass = 0; pass < 8; ++pass) {
64
1/4
✓ Branch 1 taken 160 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
160 std::vector<int> count(kRadix, 0);
65 160 const int shift = pass * 8;
66
67
2/2
✓ Branch 0 taken 448 times.
✓ Branch 1 taken 160 times.
608 for (uint64_t key : keys) {
68 448 ++count[(key >> shift) & 0xFF];
69 }
70
71
2/2
✓ Branch 0 taken 40800 times.
✓ Branch 1 taken 160 times.
40960 for (int i = 1; i < kRadix; ++i) {
72 40800 count[i] += count[i - 1];
73 }
74
75
2/2
✓ Branch 0 taken 448 times.
✓ Branch 1 taken 160 times.
608 for (int i = static_cast<int>(keys.size()) - 1; i >= 0; --i) {
76 448 const uint8_t byte = (keys[i] >> shift) & 0xFF;
77 448 temp[--count[byte]] = keys[i];
78 }
79
80 std::swap(keys, temp);
81 }
82
83
2/2
✓ Branch 0 taken 56 times.
✓ Branch 1 taken 20 times.
76 for (size_t i = 0; i < data.size(); ++i) {
84
2/2
✓ Branch 0 taken 44 times.
✓ Branch 1 taken 12 times.
112 data[i] = SortableIntToDouble(keys[i]);
85 }
86 }
87
88 void CompareExchangeBlocks(double *arr, size_t i, size_t step) {
89
4/4
✓ Branch 0 taken 16 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 8 times.
✓ Branch 3 taken 8 times.
40 for (size_t k = 0; k < step; ++k) {
90
4/4
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 14 times.
✓ Branch 2 taken 5 times.
✓ Branch 3 taken 3 times.
24 if (arr[i + k] > arr[i + k + step]) {
91 std::swap(arr[i + k], arr[i + k + step]);
92 }
93 }
94 }
95
96 8 void OddEvenMergeIterative(double *arr, size_t start, size_t n) {
97
1/2
✓ Branch 0 taken 8 times.
✗ Branch 1 not taken.
8 if (n <= 1) {
98 return;
99 }
100
101 8 size_t step = n / 2;
102 CompareExchangeBlocks(arr, start, step);
103
104 8 step /= 2;
105
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
16 for (; step > 0; step /= 2) {
106
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
16 for (size_t i = step; i < n - step; i += step * 2) {
107 8 CompareExchangeBlocks(arr, start + i, step);
108 }
109 }
110 }
111
112 16 void SortChunkTBB(double *raw_data, int chunk_index, size_t chunk_size) {
113 16 const size_t start_idx = static_cast<size_t>(chunk_index) * chunk_size;
114 16 std::vector<double> local_chunk(raw_data + start_idx, raw_data + start_idx + chunk_size);
115
1/2
✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
16 RadixSortDouble(local_chunk);
116 std::ranges::copy(local_chunk.begin(), local_chunk.end(), raw_data + start_idx);
117 16 }
118
119 // ==================== ФУНКЦИИ ДЛЯ TBB-СОРТИРОВКИ ====================
120
121
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 8 times.
8 void LocalTbbSort(std::vector<double> &data) {
122
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 8 times.
8 if (data.empty()) {
123 return;
124 }
125
126 const size_t original_size = data.size();
127 const size_t padded_size = NextPowerOfTwo(original_size);
128
129
1/2
✓ Branch 0 taken 8 times.
✗ Branch 1 not taken.
8 if (padded_size > original_size) {
130 8 data.resize(padded_size, std::numeric_limits<double>::infinity());
131 }
132
133 8 int num_threads = ppc::util::GetNumThreads();
134 8 if (num_threads <= 0) {
135 num_threads = 1;
136 }
137
138 size_t num_chunks = 1;
139
3/4
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 8 times.
✗ Branch 3 not taken.
16 while ((num_chunks * 2 <= static_cast<size_t>(num_threads)) && (num_chunks * 2 <= padded_size)) {
140 num_chunks *= 2;
141 }
142
143 8 const size_t chunk_size = std::max<size_t>(1, padded_size / num_chunks);
144 8 double *raw_data = data.data();
145 8 const int num_chunks_int = static_cast<int>(num_chunks);
146
147 24 tbb::parallel_for(0, num_chunks_int, [&](int chunk_index) { SortChunkTBB(raw_data, chunk_index, chunk_size); });
148
149
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
16 for (size_t current_size = chunk_size; current_size < padded_size; current_size *= 2) {
150 8 const int merges_count = static_cast<int>(padded_size / (current_size * 2));
151 8 tbb::parallel_for(0, merges_count, [&](int merge_index) {
152 8 OddEvenMergeIterative(raw_data, static_cast<size_t>(merge_index) * 2 * current_size, 2 * current_size);
153 });
154 }
155
156
1/2
✓ Branch 0 taken 8 times.
✗ Branch 1 not taken.
8 if (padded_size > original_size) {
157 8 data.resize(original_size);
158 }
159 }
160
161 // ==================== ФУНКЦИИ ДЛЯ MPI-РАСПРЕДЕЛЕНИЯ ====================
162
163 struct DistributionParams {
164 std::vector<int> send_counts;
165 std::vector<int> send_displs;
166 int local_size{};
167 };
168
169 8 DistributionParams ComputeDistributionParams(size_t global_size, int num_processes, int rank) {
170 8 DistributionParams params;
171
1/2
✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
8 params.send_counts.resize(static_cast<size_t>(num_processes));
172
1/2
✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
8 params.send_displs.resize(static_cast<size_t>(num_processes));
173
174 8 const size_t base_count = global_size / static_cast<size_t>(num_processes);
175 8 const size_t remainder = global_size % static_cast<size_t>(num_processes);
176
177 size_t offset = 0;
178
2/2
✓ Branch 0 taken 16 times.
✓ Branch 1 taken 8 times.
24 for (int proc = 0; proc < num_processes; ++proc) {
179
1/2
✓ Branch 0 taken 16 times.
✗ Branch 1 not taken.
16 const size_t count = base_count + (std::cmp_less(static_cast<size_t>(proc), remainder) ? 1ULL : 0ULL);
180 16 params.send_counts[static_cast<size_t>(proc)] = static_cast<int>(count);
181 16 params.send_displs[static_cast<size_t>(proc)] = static_cast<int>(offset);
182 16 offset += count;
183 }
184
185 8 params.local_size = params.send_counts[static_cast<size_t>(rank)];
186 8 return params;
187 }
188
189 8 void DistributeData(const std::vector<double> &source_data, std::vector<double> &local_data,
190 const DistributionParams &params, int rank) {
191 8 local_data.resize(static_cast<size_t>(params.local_size));
192
193
2/2
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 4 times.
8 if (rank == 0) {
194 4 MPI_Scatterv(source_data.data(), params.send_counts.data(), params.send_displs.data(), MPI_DOUBLE,
195 4 local_data.data(), params.local_size, MPI_DOUBLE, 0, MPI_COMM_WORLD);
196 } else {
197 4 MPI_Scatterv(nullptr, nullptr, nullptr, MPI_DOUBLE, local_data.data(), params.local_size, MPI_DOUBLE, 0,
198 MPI_COMM_WORLD);
199 }
200 8 }
201
202 // ==================== ФУНКЦИИ ДЛЯ СБОРА РЕЗУЛЬТАТОВ ====================
203 8 void GatherResultsToRoot(std::vector<double> &local_data, int rank, int num_processes) {
204
2/2
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 4 times.
8 if (rank == 0) {
205 std::vector<double> all_data = std::move(local_data);
206
2/2
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 4 times.
8 for (int partner = 1; partner < num_processes; ++partner) {
207 4 int received_size = 0;
208
1/2
✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
4 MPI_Recv(&received_size, 1, MPI_INT, partner, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
209
1/2
✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
4 if (received_size > 0) {
210
1/4
✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
4 std::vector<double> received_data(static_cast<size_t>(received_size));
211
1/2
✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
4 MPI_Recv(received_data.data(), received_size, MPI_DOUBLE, partner, 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
212
1/2
✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
4 all_data.insert(all_data.end(), received_data.begin(), received_data.end());
213 }
214 }
215 local_data = std::move(all_data);
216 } else {
217 4 const int my_size = static_cast<int>(local_data.size());
218 4 MPI_Send(&my_size, 1, MPI_INT, 0, 0, MPI_COMM_WORLD);
219
1/2
✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
4 if (my_size > 0) {
220 4 MPI_Send(local_data.data(), my_size, MPI_DOUBLE, 0, 1, MPI_COMM_WORLD);
221 }
222 local_data.clear();
223 }
224 8 }
225
226 8 void BroadcastResult(std::vector<double> &local_data, int rank) {
227 8 size_t total_size = local_data.size(); // без const
228 8 MPI_Bcast(&total_size, 1, MPI_UNSIGNED_LONG, 0, MPI_COMM_WORLD);
229
230
2/2
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 4 times.
8 if (rank == 0) {
231
1/2
✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
4 if (total_size > 0) {
232 4 MPI_Bcast(local_data.data(), static_cast<int>(total_size), MPI_DOUBLE, 0, MPI_COMM_WORLD);
233 }
234 } else {
235 4 local_data.resize(total_size);
236
1/2
✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
4 if (total_size > 0) {
237 4 MPI_Bcast(local_data.data(), static_cast<int>(total_size), MPI_DOUBLE, 0, MPI_COMM_WORLD);
238 }
239 }
240 8 }
241
242 8 void RunMpiVersion(std::vector<double> &local_data, const std::vector<double> &input_data, int rank,
243 int num_processes) {
244 // 1. Распределение данных
245 8 size_t global_size = input_data.size(); // без const
246 8 MPI_Bcast(&global_size, 1, MPI_UNSIGNED_LONG, 0, MPI_COMM_WORLD);
247
248
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 8 times.
8 if (global_size == 0) {
249 local_data.clear();
250 return;
251 }
252
253 8 const DistributionParams params = ComputeDistributionParams(global_size, num_processes, rank);
254
255 // Создаём копию для распределения (MPI требует не const данные)
256 const std::vector<double> &mutable_input = input_data;
257
1/2
✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
8 DistributeData(mutable_input, local_data, params, rank);
258
259 // 2. Локальная TBB-сортировка
260
1/2
✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
8 LocalTbbSort(local_data);
261
262 // 3. Сбор результатов на процесс 0
263
1/2
✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
8 GatherResultsToRoot(local_data, rank, num_processes);
264
265 // 4. Финальная сортировка на процессе 0
266
3/4
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 4 times.
✗ Branch 3 not taken.
8 if ((rank == 0) && (!local_data.empty())) {
267
1/2
✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
4 RadixSortDouble(local_data);
268 }
269
270 // 5. Рассылка результата всем процессам
271
1/2
✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
8 BroadcastResult(local_data, rank);
272 8 }
273
274 void RunTbbVersion(std::vector<double> &local_data, std::vector<double> &&input_data) {
275 local_data = std::move(input_data);
276 LocalTbbSort(local_data);
277 }
278
279 } // namespace
280
281 // ==================== МЕТОДЫ КЛАССА ====================
282
283
1/2
✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
8 GonozovLBitSortBatcherMergeALL::GonozovLBitSortBatcherMergeALL(const InType &in) {
284 SetTypeOfTask(GetStaticTypeOfTask());
285
1/2
✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
8 GetInput() = in;
286 8 }
287
288 8 bool GonozovLBitSortBatcherMergeALL::ValidationImpl() {
289 8 return true;
290 }
291
292 8 bool GonozovLBitSortBatcherMergeALL::PreProcessingImpl() {
293 8 local_data_ = GetInput();
294 8 return true;
295 }
296
297 8 bool GonozovLBitSortBatcherMergeALL::RunImpl() {
298 8 int mpi_initialized = 0;
299 8 MPI_Initialized(&mpi_initialized);
300
301 8 int rank = 0;
302 8 int num_processes = 1;
303
1/2
✓ Branch 0 taken 8 times.
✗ Branch 1 not taken.
8 if (mpi_initialized != 0) {
304 8 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
305 8 MPI_Comm_size(MPI_COMM_WORLD, &num_processes);
306 }
307
308
2/4
✓ Branch 0 taken 8 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 8 times.
8 const bool use_mpi = (mpi_initialized != 0 && num_processes > 1);
309 8 std::vector<double> result_data;
310
311
1/2
✓ Branch 0 taken 8 times.
✗ Branch 1 not taken.
8 if (use_mpi) {
312
1/2
✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
8 RunMpiVersion(result_data, local_data_, rank, num_processes);
313 local_data_.clear();
314 local_data_.shrink_to_fit();
315 } else {
316 RunTbbVersion(result_data, std::move(local_data_));
317 }
318
319 GetOutput() = std::move(result_data);
320 8 return true;
321 }
322
323 8 bool GonozovLBitSortBatcherMergeALL::PostProcessingImpl() {
324 8 return true;
325 }
326
327 } // namespace gonozov_l_bitwise_sorting_double_batcher_merge
328 // #include "gonozov_l_bitwise_sorting_double_Batcher_merge/all/include/ops_all.hpp"
329
330 // #include <mpi.h>
331 // #include <tbb/tbb.h>
332
333 // #include <algorithm>
334 // #include <cstdint>
335 // #include <cstring>
336 // #include <limits>
337 // #include <vector>
338
339 // #include "gonozov_l_bitwise_sorting_double_Batcher_merge/common/include/common.hpp"
340 // #include "util/include/util.hpp"
341
342 // namespace gonozov_l_bitwise_sorting_double_batcher_merge {
343
344 // namespace {
345
346 // uint64_t DoubleToSortableInt(double d) {
347 // uint64_t bits = 0;
348 // std::memcpy(&bits, &d, sizeof(double));
349
350 // if ((bits >> 63) != 0) {
351 // return ~bits;
352 // }
353
354 // return bits | 0x8000000000000000ULL;
355 // }
356
357 // double SortableIntToDouble(uint64_t bits) {
358 // if ((bits >> 63) != 0) {
359 // bits &= ~0x8000000000000000ULL;
360 // } else {
361 // bits = ~bits;
362 // }
363
364 // double result = 0.0;
365 // std::memcpy(&result, &bits, sizeof(double));
366
367 // return result;
368 // }
369
370 // size_t NextPowerOfTwo(size_t n) {
371 // size_t power = 1;
372 // while (power < n) {
373 // power <<= 1;
374 // }
375 // return power;
376 // }
377
378 // void RadixSortDouble(std::vector<double> &data) {
379 // if (data.empty()) {
380 // return;
381 // }
382
383 // std::vector<uint64_t> keys(data.size());
384
385 // for (size_t i = 0; i < data.size(); ++i) {
386 // keys[i] = DoubleToSortableInt(data[i]);
387 // }
388
389 // std::vector<uint64_t> temp(keys.size());
390
391 // constexpr int kRadix = 256;
392
393 // for (int pass = 0; pass < 8; ++pass) {
394 // std::vector<int> count(kRadix, 0);
395
396 // int shift = pass * 8;
397
398 // for (uint64_t key : keys) {
399 // count[(key >> shift) & 0xFF]++;
400 // }
401
402 // for (int i = 1; i < kRadix; ++i) {
403 // count[i] += count[i - 1];
404 // }
405
406 // for (int i = static_cast<int>(keys.size()) - 1; i >= 0; --i) {
407 // uint8_t byte = (keys[i] >> shift) & 0xFF;
408 // temp[--count[byte]] = keys[i];
409 // }
410
411 // std::swap(keys, temp);
412 // }
413
414 // for (size_t i = 0; i < data.size(); ++i) {
415 // data[i] = SortableIntToDouble(keys[i]);
416 // }
417 // }
418
419 // void CompareExchangeBlocks(double *arr, size_t i, size_t step) {
420 // for (size_t k = 0; k < step; ++k) {
421 // if (arr[i + k] > arr[i + k + step]) {
422 // std::swap(arr[i + k], arr[i + k + step]);
423 // }
424 // }
425 // }
426
427 // void OddEvenMergeIterative(double *arr, size_t start, size_t n) {
428 // if (n <= 1) {
429 // return;
430 // }
431
432 // size_t step = n / 2;
433
434 // CompareExchangeBlocks(arr, start, step);
435
436 // step /= 2;
437
438 // for (; step > 0; step /= 2) {
439 // for (size_t i = step; i < n - step; i += step * 2) {
440 // CompareExchangeBlocks(arr, start + i, step);
441 // }
442 // }
443 // }
444
445 // void SortChunkTBB(double *raw_data, int chunk_idx, size_t chunk_size) {
446 // size_t start_idx = static_cast<size_t>(chunk_idx) * chunk_size;
447
448 // std::vector<double> local_arr(raw_data + start_idx, raw_data + start_idx + chunk_size);
449
450 // RadixSortDouble(local_arr);
451
452 // std::ranges::copy(local_arr.begin(), local_arr.end(), raw_data + start_idx);
453 // }
454
455 // } // namespace
456
457 // GonozovLBitSortBatcherMergeALL::GonozovLBitSortBatcherMergeALL(const InType &in) {
458 // SetTypeOfTask(GetStaticTypeOfTask());
459
460 // GetInput() = in;
461 // }
462
463 // bool GonozovLBitSortBatcherMergeALL::ValidationImpl() {
464 // return true;
465 // }
466
467 // bool GonozovLBitSortBatcherMergeALL::PreProcessingImpl() {
468 // local_data_ = GetInput();
469
470 // return true;
471 // }
472
473 // bool GonozovLBitSortBatcherMergeALL::RunImpl() {
474 // int mpi_initialized = 0;
475 // MPI_Initialized(&mpi_initialized);
476
477 // int rank = 0, size = 1;
478 // if (mpi_initialized) {
479 // MPI_Comm_rank(MPI_COMM_WORLD, &rank);
480 // MPI_Comm_size(MPI_COMM_WORLD, &size);
481 // }
482
483 // bool use_mpi = (mpi_initialized && size > 1);
484 // std::vector<double> local_data;
485
486 // if (use_mpi) {
487 // // ==================== MPI + TBB ВЕРСИЯ ====================
488
489 // // 1. Распределение данных
490 // size_t global_n = local_data_.size();
491 // MPI_Bcast(&global_n, 1, MPI_UNSIGNED_LONG, 0, MPI_COMM_WORLD);
492
493 // if (global_n == 0) {
494 // GetOutput() = std::vector<double>();
495 // return true;
496 // }
497
498 // // Вычисляем размер порции для каждого процесса
499 // size_t base_count = global_n / size;
500 // size_t remainder = global_n % size;
501
502 // std::vector<int> send_counts(size), send_displs(size);
503 // size_t offset = 0;
504 // for (int i = 0; i < size; ++i) {
505 // send_counts[i] = static_cast<int>(base_count + (static_cast<size_t>(i) < remainder ? 1 : 0));
506 // send_displs[i] = static_cast<int>(offset);
507 // offset += send_counts[i];
508 // }
509
510 // int local_n = send_counts[rank];
511 // local_data.resize(local_n);
512
513 // // Рассылка данных
514 // if (rank == 0) {
515 // MPI_Scatterv(local_data_.data(), send_counts.data(), send_displs.data(), MPI_DOUBLE, local_data.data(),
516 // local_n,
517 // MPI_DOUBLE, 0, MPI_COMM_WORLD);
518 // local_data_.clear();
519 // local_data_.shrink_to_fit();
520 // } else {
521 // MPI_Scatterv(nullptr, nullptr, nullptr, MPI_DOUBLE, local_data.data(), local_n, MPI_DOUBLE, 0, MPI_COMM_WORLD);
522 // }
523
524 // // 2. Локальная TBB-сортировка
525 // if (!local_data.empty()) {
526 // size_t local_n_original = local_data.size();
527 // size_t local_new_size = NextPowerOfTwo(local_n_original);
528
529 // if (local_new_size > local_n_original) {
530 // local_data.resize(local_new_size, std::numeric_limits<double>::infinity());
531 // }
532
533 // int num_threads = ppc::util::GetNumThreads();
534 // if (num_threads <= 0) {
535 // num_threads = 1;
536 // }
537
538 // size_t num_chunks = 1;
539 // while (num_chunks * 2 <= static_cast<size_t>(num_threads) && num_chunks * 2 <= local_new_size) {
540 // num_chunks *= 2;
541 // }
542
543 // size_t chunk_size = std::max<size_t>(1, local_new_size / num_chunks);
544 // double *raw_data = local_data.data();
545 // int num_chunks_int = static_cast<int>(num_chunks);
546
547 // tbb::parallel_for(0, num_chunks_int, [&](int i) { SortChunkTBB(raw_data, i, chunk_size); });
548
549 // for (size_t cur_size = chunk_size; cur_size < local_new_size; cur_size *= 2) {
550 // int merges_count = static_cast<int>(local_new_size / (cur_size * 2));
551 // tbb::parallel_for(0, merges_count, [&](int i) {
552 // OddEvenMergeIterative(raw_data, static_cast<size_t>(i) * 2 * cur_size, 2 * cur_size);
553 // });
554 // }
555
556 // if (local_new_size > local_n_original) {
557 // local_data.resize(local_n_original);
558 // }
559 // }
560
561 // // 3. Сбор всех данных на процесс 0 (упрощённое слияние)
562 // if (rank == 0) {
563 // std::vector<double> all_data = std::move(local_data);
564 // for (int p = 1; p < size; ++p) {
565 // int recv_size;
566 // MPI_Recv(&recv_size, 1, MPI_INT, p, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
567 // if (recv_size > 0) {
568 // std::vector<double> recv_data(recv_size);
569 // MPI_Recv(recv_data.data(), recv_size, MPI_DOUBLE, p, 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
570 // all_data.insert(all_data.end(), recv_data.begin(), recv_data.end());
571 // }
572 // }
573 // local_data = std::move(all_data);
574 // } else {
575 // int my_size = static_cast<int>(local_data.size());
576 // MPI_Send(&my_size, 1, MPI_INT, 0, 0, MPI_COMM_WORLD);
577 // if (my_size > 0) {
578 // MPI_Send(local_data.data(), my_size, MPI_DOUBLE, 0, 1, MPI_COMM_WORLD);
579 // }
580 // local_data.clear();
581 // }
582
583 // // 4. Финальная сортировка на процессе 0
584 // if (rank == 0 && !local_data.empty()) {
585 // RadixSortDouble(local_data);
586 // }
587
588 // // 5. Рассылка результата всем процессам
589 // size_t total_size = local_data.size();
590 // MPI_Bcast(&total_size, 1, MPI_UNSIGNED_LONG, 0, MPI_COMM_WORLD);
591
592 // if (rank == 0) {
593 // if (total_size > 0) {
594 // MPI_Bcast(local_data.data(), static_cast<int>(total_size), MPI_DOUBLE, 0, MPI_COMM_WORLD);
595 // }
596 // GetOutput() = std::move(local_data);
597 // } else {
598 // local_data.resize(total_size);
599 // if (total_size > 0) {
600 // MPI_Bcast(local_data.data(), static_cast<int>(total_size), MPI_DOUBLE, 0, MPI_COMM_WORLD);
601 // }
602 // GetOutput() = std::move(local_data);
603 // }
604
605 // } else {
606 // // ==================== TBB ВЕРСИЯ (БЕЗ MPI) ====================
607 // local_data = std::move(local_data_);
608
609 // if (!local_data.empty()) {
610 // size_t n = local_data.size();
611 // size_t new_size = NextPowerOfTwo(n);
612
613 // if (new_size > n) {
614 // local_data.resize(new_size, std::numeric_limits<double>::infinity());
615 // }
616
617 // int num_threads = ppc::util::GetNumThreads();
618 // if (num_threads <= 0) {
619 // num_threads = 1;
620 // }
621
622 // size_t num_chunks = 1;
623 // while (num_chunks * 2 <= static_cast<size_t>(num_threads) && num_chunks * 2 <= new_size) {
624 // num_chunks *= 2;
625 // }
626
627 // size_t chunk_size = std::max<size_t>(1, new_size / num_chunks);
628 // double *raw_data = local_data.data();
629 // int num_chunks_int = static_cast<int>(num_chunks);
630
631 // tbb::parallel_for(0, num_chunks_int, [&](int i) { SortChunkTBB(raw_data, i, chunk_size); });
632
633 // for (size_t size_step = chunk_size; size_step < new_size; size_step *= 2) {
634 // int merges_count = static_cast<int>(new_size / (size_step * 2));
635 // tbb::parallel_for(0, merges_count, [&](int i) {
636 // OddEvenMergeIterative(raw_data, static_cast<size_t>(i) * 2 * size_step, 2 * size_step);
637 // });
638 // }
639
640 // if (new_size > n) {
641 // local_data.resize(n);
642 // }
643 // }
644
645 // GetOutput() = std::move(local_data);
646 // }
647
648 // return true;
649 // }
650
651 // bool GonozovLBitSortBatcherMergeALL::PostProcessingImpl() {
652 // return true;
653 // }
654
655 // } // namespace gonozov_l_bitwise_sorting_double_batcher_merge
656
657 // #include "gonozov_l_bitwise_sorting_double_Batcher_merge/all/include/ops_all.hpp"
658
659 // #include <mpi.h>
660 // #include <tbb/tbb.h>
661
662 // #include <algorithm>
663 // #include <cstdint>
664 // #include <cstring>
665 // #include <limits>
666 // #include <vector>
667
668 // #include "gonozov_l_bitwise_sorting_double_Batcher_merge/common/include/common.hpp"
669 // #include "util/include/util.hpp"
670
671 // namespace gonozov_l_bitwise_sorting_double_batcher_merge {
672
673 // namespace {
674
675 // uint64_t DoubleToSortableInt(double d) {
676 // uint64_t bits = 0;
677 // std::memcpy(&bits, &d, sizeof(double));
678
679 // if ((bits >> 63) != 0) {
680 // return ~bits;
681 // }
682
683 // return bits | 0x8000000000000000ULL;
684 // }
685
686 // double SortableIntToDouble(uint64_t bits) {
687 // if ((bits >> 63) != 0) {
688 // bits &= ~0x8000000000000000ULL;
689 // } else {
690 // bits = ~bits;
691 // }
692
693 // double result = 0.0;
694 // std::memcpy(&result, &bits, sizeof(double));
695
696 // return result;
697 // }
698
699 // size_t NextPowerOfTwo(size_t n) {
700 // size_t power = 1;
701 // while (power < n) {
702 // power <<= 1;
703 // }
704 // return power;
705 // }
706
707 // void RadixSortDouble(std::vector<double> &data) {
708 // if (data.empty()) {
709 // return;
710 // }
711
712 // std::vector<uint64_t> keys(data.size());
713
714 // for (size_t i = 0; i < data.size(); ++i) {
715 // keys[i] = DoubleToSortableInt(data[i]);
716 // }
717
718 // std::vector<uint64_t> temp(keys.size());
719
720 // constexpr int kRadix = 256;
721
722 // for (int pass = 0; pass < 8; ++pass) {
723 // std::vector<int> count(kRadix, 0);
724
725 // int shift = pass * 8;
726
727 // for (uint64_t key : keys) {
728 // count[(key >> shift) & 0xFF]++;
729 // }
730
731 // for (int i = 1; i < kRadix; ++i) {
732 // count[i] += count[i - 1];
733 // }
734
735 // for (int i = static_cast<int>(keys.size()) - 1; i >= 0; --i) {
736 // uint8_t byte = (keys[i] >> shift) & 0xFF;
737 // temp[--count[byte]] = keys[i];
738 // }
739
740 // std::swap(keys, temp);
741 // }
742
743 // for (size_t i = 0; i < data.size(); ++i) {
744 // data[i] = SortableIntToDouble(keys[i]);
745 // }
746 // }
747
748 // void CompareExchangeBlocks(double *arr, size_t i, size_t step) {
749 // for (size_t k = 0; k < step; ++k) {
750 // if (arr[i + k] > arr[i + k + step]) {
751 // std::swap(arr[i + k], arr[i + k + step]);
752 // }
753 // }
754 // }
755
756 // void OddEvenMergeIterative(double *arr, size_t start, size_t n) {
757 // if (n <= 1) {
758 // return;
759 // }
760
761 // size_t step = n / 2;
762
763 // CompareExchangeBlocks(arr, start, step);
764
765 // step /= 2;
766
767 // for (; step > 0; step /= 2) {
768 // for (size_t i = step; i < n - step; i += step * 2) {
769 // CompareExchangeBlocks(arr, start + i, step);
770 // }
771 // }
772 // }
773
774 // void SortChunkALL(double *raw_data, int chunk_idx, size_t chunk_size) {
775 // size_t start_idx = static_cast<size_t>(chunk_idx) * chunk_size;
776
777 // std::vector<double> local_arr(raw_data + start_idx, raw_data + start_idx + chunk_size);
778
779 // RadixSortDouble(local_arr);
780
781 // std::ranges::copy(local_arr.begin(), local_arr.end(), raw_data + start_idx);
782 // }
783
784 // } // namespace
785
786 // GonozovLBitSortBatcherMergeALL::GonozovLBitSortBatcherMergeALL(const InType &in) {
787 // SetTypeOfTask(GetStaticTypeOfTask());
788
789 // GetInput() = in;
790 // }
791
792 // bool GonozovLBitSortBatcherMergeALL::ValidationImpl() {
793 // return true;
794 // }
795
796 // bool GonozovLBitSortBatcherMergeALL::PreProcessingImpl() {
797 // local_data_ = GetInput();
798
799 // return true;
800 // }
801
802 // bool GonozovLBitSortBatcherMergeALL::RunImpl() {
803 // if (local_data_.empty()) {
804 // return true;
805 // }
806
807 // size_t n = local_data_.size();
808
809 // size_t new_size = NextPowerOfTwo(n);
810
811 // if (new_size > n) {
812 // local_data_.resize(new_size, std::numeric_limits<double>::infinity());
813 // }
814
815 // int num_threads = ppc::util::GetNumThreads();
816
817 // if (num_threads <= 0) {
818 // num_threads = 1;
819 // }
820
821 // size_t num_chunks = 1;
822
823 // while (num_chunks * 2 <= static_cast<size_t>(num_threads) && num_chunks * 2 <= new_size) {
824 // num_chunks *= 2;
825 // }
826
827 // size_t chunk_size = new_size / num_chunks;
828 // chunk_size = std::max<size_t>(1, chunk_size);
829 // double *raw_data = local_data_.data();
830 // int num_chunks_int = static_cast<int>(num_chunks);
831
832 // tbb::parallel_for(0, num_chunks_int, [&](int i) { SortChunkALL(raw_data, i, chunk_size); });
833
834 // for (size_t size = chunk_size; size < new_size; size *= 2) {
835 // int merges_count = static_cast<int>(new_size / (size * 2));
836
837 // tbb::parallel_for(0, merges_count,
838 // [&](int i) { OddEvenMergeIterative(raw_data, static_cast<size_t>(i) * 2 * size, 2 * size); });
839 // }
840
841 // if (new_size > n) {
842 // local_data_.resize(n);
843 // }
844
845 // MPI_Barrier(MPI_COMM_WORLD);
846 // GetOutput() = local_data_;
847 // return true;
848 // }
849
850 // bool GonozovLBitSortBatcherMergeALL::PostProcessingImpl() {
851 // return true;
852 // }
853
854 // } // namespace gonozov_l_bitwise_sorting_double_batcher_merge
855