GCC Code Coverage Report


Directory: ./
File: tasks/mityaeva_radix/omp/src/sorter_omp.cpp
Date: 2026-04-02 17:12:27
Exec Total Coverage
Lines: 25 33 75.8%
Functions: 2 4 50.0%
Branches: 20 38 52.6%

Line Branch Exec Source
1 #include "mityaeva_radix/omp/include/sorter_omp.hpp"
2
3 #include <omp.h>
4
5 #include <algorithm>
6 #include <cstddef>
7 #include <cstdint>
8 #include <cstring>
9 #include <vector>
10
11 #include "util/include/util.hpp"
12
13 namespace mityaeva_radix {
14
15 uint64_t SorterOmp::DoubleToSortable(uint64_t x) {
16 if ((x & 0x8000000000000000ULL) != 0U) {
17 return ~x;
18 }
19 return x | 0x8000000000000000ULL;
20 }
21
22 uint64_t SorterOmp::SortableToDouble(uint64_t x) {
23 if ((x & 0x8000000000000000ULL) != 0U) {
24 return x & 0x7FFFFFFFFFFFFFFFULL;
25 }
26 return ~x;
27 }
28
29 352 void SorterOmp::CountingPass(std::vector<uint64_t> *current, std::vector<uint64_t> *next, int shift, int radix,
30 int num_threads, size_t data_size) {
31
1/2
✓ Branch 2 taken 352 times.
✗ Branch 3 not taken.
352 std::vector<std::vector<int>> thread_counters(num_threads, std::vector<int>(radix, 0));
32
33 352 #pragma omp parallel default(none) shared(current, shift, radix, thread_counters, data_size, num_threads)
34 {
35 int thread_id = omp_get_thread_num();
36 size_t chunk_size = data_size / static_cast<size_t>(num_threads);
37 size_t start = static_cast<size_t>(thread_id) * chunk_size;
38 size_t end = (thread_id == num_threads - 1) ? data_size : start + chunk_size;
39
40 auto &local_counters = thread_counters[thread_id];
41
42 for (size_t i = start; i < end; i++) {
43 int digit = static_cast<int>(((*current)[i] >> static_cast<size_t>(shift)) & static_cast<size_t>(radix - 1));
44 local_counters[digit]++;
45 }
46 }
47
48
1/2
✓ Branch 1 taken 352 times.
✗ Branch 2 not taken.
352 std::vector<int> prefix_sums(static_cast<size_t>(radix * num_threads), 0);
49
50 int total = 0;
51
2/2
✓ Branch 0 taken 90112 times.
✓ Branch 1 taken 352 times.
90464 for (int digit = 0; digit < radix; digit++) {
52 int digit_sum = 0;
53
2/2
✓ Branch 0 taken 225280 times.
✓ Branch 1 taken 90112 times.
315392 for (int thread_idx = 0; thread_idx < num_threads; thread_idx++) {
54 225280 prefix_sums[(thread_idx * radix) + digit] = total + digit_sum;
55 225280 digit_sum += thread_counters[thread_idx][digit];
56 }
57 90112 total += digit_sum;
58 }
59
60
1/2
✓ Branch 0 taken 352 times.
✗ Branch 1 not taken.
352 #pragma omp parallel default(none) shared(current, next, shift, radix, prefix_sums, data_size, num_threads)
61 {
62 int thread_id = omp_get_thread_num();
63 size_t chunk_size = data_size / static_cast<size_t>(num_threads);
64 size_t start = static_cast<size_t>(thread_id) * chunk_size;
65 size_t end = (thread_id == num_threads - 1) ? data_size : start + chunk_size;
66
67 std::vector<int> local_pos(radix, 0);
68 for (int digit = 0; digit < radix; digit++) {
69 local_pos[digit] = prefix_sums[(thread_id * radix) + digit];
70 }
71
72 for (size_t i = start; i < end; i++) {
73 int digit = static_cast<int>(((*current)[i] >> static_cast<size_t>(shift)) & static_cast<size_t>(radix - 1));
74 auto pos = static_cast<size_t>(local_pos[digit]++);
75 (*next)[pos] = (*current)[i];
76 }
77 }
78 352 }
79
80
2/2
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 44 times.
48 void SorterOmp::Sort(std::vector<double> &data) {
81
2/2
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 44 times.
48 if (data.size() <= 1) {
82 4 return;
83 }
84
85 44 int num_threads = ppc::util::GetNumThreads();
86 44 omp_set_num_threads(num_threads);
87
88
1/2
✓ Branch 2 taken 44 times.
✗ Branch 3 not taken.
44 std::vector<double> temp(data.size());
89
1/4
✓ Branch 1 taken 44 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
44 std::vector<uint64_t> as_uint(data.size());
90
91
1/2
✓ Branch 1 taken 44 times.
✗ Branch 2 not taken.
44 #pragma omp parallel for default(none) shared(data, as_uint, num_threads)
92 for (size_t i = 0; i < data.size(); i++) {
93 uint64_t bits = 0;
94 std::memcpy(&bits, &data[i], sizeof(double));
95 as_uint[i] = DoubleToSortable(bits);
96 }
97
98 const int bits_per_pass = 8;
99 const int radix = 1 << bits_per_pass;
100 const int passes = static_cast<int>(sizeof(uint64_t) * 8 / bits_per_pass);
101
102
1/4
✓ Branch 1 taken 44 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
44 std::vector<uint64_t> uint_temp(data.size());
103 44 std::vector<uint64_t> *current = &as_uint;
104 std::vector<uint64_t> *next = &uint_temp;
105
106
2/2
✓ Branch 0 taken 352 times.
✓ Branch 1 taken 44 times.
396 for (int pass = 0; pass < passes; pass++) {
107
1/2
✓ Branch 1 taken 352 times.
✗ Branch 2 not taken.
352 int shift = pass * bits_per_pass;
108
109
1/2
✓ Branch 1 taken 352 times.
✗ Branch 2 not taken.
352 CountingPass(current, next, shift, radix, num_threads, data.size());
110
111 std::swap(current, next);
112 }
113
114
1/2
✓ Branch 0 taken 44 times.
✗ Branch 1 not taken.
44 #pragma omp parallel for default(none) shared(data, current, as_uint, uint_temp)
115 for (size_t i = 0; i < data.size(); i++) {
116 uint64_t bits = 0;
117 if (current == &as_uint) {
118 bits = SortableToDouble(as_uint[i]);
119 } else {
120 bits = SortableToDouble(uint_temp[i]);
121 }
122 std::memcpy(&data[i], &bits, sizeof(double));
123 }
124 }
125
126 } // namespace mityaeva_radix
127