| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #include "papulina_y_radix_sort/omp/include/ops_omp.hpp" | ||
| 2 | |||
| 3 | #include <omp.h> | ||
| 4 | |||
| 5 | #include <array> | ||
| 6 | #include <cmath> | ||
| 7 | #include <cstddef> | ||
| 8 | #include <cstdint> | ||
| 9 | #include <cstring> | ||
| 10 | #include <span> | ||
| 11 | #include <utility> | ||
| 12 | #include <vector> | ||
| 13 | |||
| 14 | #include "papulina_y_radix_sort/common/include/common.hpp" | ||
| 15 | |||
| 16 | namespace papulina_y_radix_sort { | ||
| 17 | |||
| 18 |
1/2✓ Branch 1 taken 28 times.
✗ Branch 2 not taken.
|
28 | PapulinaYRadixSortOMP::PapulinaYRadixSortOMP(const InType &in) { |
| 19 | SetTypeOfTask(GetStaticTypeOfTask()); | ||
| 20 |
1/2✓ Branch 1 taken 28 times.
✗ Branch 2 not taken.
|
28 | GetInput() = in; |
| 21 | 28 | GetOutput() = std::vector<double>(); | |
| 22 | 28 | } | |
| 23 | 28 | bool PapulinaYRadixSortOMP::ValidationImpl() { | |
| 24 | 28 | return true; | |
| 25 | } | ||
| 26 | |||
| 27 | 28 | bool PapulinaYRadixSortOMP::PreProcessingImpl() { | |
| 28 | 28 | return true; | |
| 29 | } | ||
| 30 | 28 | bool PapulinaYRadixSortOMP::RunImpl() { | |
| 31 | double *result = GetInput().data(); | ||
| 32 | 28 | int size = static_cast<int>(GetInput().size()); | |
| 33 | // int threads_count = std::min(omp_get_max_threads(), std::max(4, size / 1000)); | ||
| 34 | 28 | int threads_count = omp_get_max_threads(); | |
| 35 | |||
| 36 | 28 | std::vector<std::span<double>> chunks; | |
| 37 | 28 | std::vector<int> chunks_offsets; | |
| 38 | |||
| 39 | 28 | int chunk_size = size / threads_count; | |
| 40 | 28 | int reminder = size % threads_count; | |
| 41 | |||
| 42 | 28 | int offset = 0; | |
| 43 |
2/2✓ Branch 0 taken 70 times.
✓ Branch 1 taken 28 times.
|
98 | for (int i = 0; i < threads_count; i++) { |
| 44 |
2/2✓ Branch 0 taken 52 times.
✓ Branch 1 taken 18 times.
|
70 | int real_chunk_size = chunk_size + (i < reminder ? 1 : 0); |
| 45 |
1/2✓ Branch 0 taken 70 times.
✗ Branch 1 not taken.
|
70 | if (real_chunk_size > 0) { |
| 46 | chunks_offsets.push_back(offset); | ||
| 47 |
1/2✓ Branch 1 taken 70 times.
✗ Branch 2 not taken.
|
70 | chunks.emplace_back(result + offset, real_chunk_size); |
| 48 | 70 | offset += real_chunk_size; | |
| 49 | } | ||
| 50 | } | ||
| 51 | 28 | threads_count = static_cast<int>( | |
| 52 | chunks.size()); // тк возможно chunks.size() < threads_count(каким-то потокам ничего не распределилось из данных) | ||
| 53 | |||
| 54 | 28 | #pragma omp parallel for default(none) shared(result, chunks, threads_count) num_threads(threads_count) | |
| 55 | for (int i = 0; i < threads_count; i++) { | ||
| 56 | RadixSort(chunks[i].data(), static_cast<int>(chunks[i].size())); | ||
| 57 | } | ||
| 58 | |||
| 59 |
1/2✓ Branch 1 taken 28 times.
✗ Branch 2 not taken.
|
28 | MergeChunks(chunks, result); |
| 60 |
1/4✓ Branch 1 taken 28 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
28 | GetOutput() = std::vector<double>(size); |
| 61 |
2/2✓ Branch 0 taken 200 times.
✓ Branch 1 taken 28 times.
|
228 | for (int i = 0; i < size; i++) { |
| 62 | 200 | GetOutput()[i] = result[i]; | |
| 63 | // std::cout << result [i] << " "; | ||
| 64 | } | ||
| 65 | // std::cout << std::endl; | ||
| 66 | 28 | return true; | |
| 67 | } | ||
| 68 | 42 | std::vector<double> PapulinaYRadixSortOMP::Merge(const std::vector<double> &a, const std::vector<double> &b) { | |
| 69 |
1/2✓ Branch 1 taken 42 times.
✗ Branch 2 not taken.
|
42 | std::vector<double> result; |
| 70 |
1/2✓ Branch 1 taken 42 times.
✗ Branch 2 not taken.
|
42 | result.reserve(a.size() + b.size()); |
| 71 | size_t i = 0; | ||
| 72 | size_t j = 0; | ||
| 73 |
4/4✓ Branch 0 taken 185 times.
✓ Branch 1 taken 16 times.
✓ Branch 2 taken 26 times.
✓ Branch 3 taken 159 times.
|
201 | while (i < a.size() && j < b.size()) { |
| 74 |
2/2✓ Branch 0 taken 95 times.
✓ Branch 1 taken 64 times.
|
159 | if (a[i] <= b[j]) { |
| 75 | result.push_back(a[i]); | ||
| 76 | 95 | ++i; | |
| 77 | } else { | ||
| 78 | result.push_back(b[j]); | ||
| 79 | 64 | ++j; | |
| 80 | } | ||
| 81 | } | ||
| 82 |
2/2✓ Branch 0 taken 40 times.
✓ Branch 1 taken 42 times.
|
82 | while (i < a.size()) { |
| 83 | result.push_back(a[i]); | ||
| 84 | 40 | ++i; | |
| 85 | } | ||
| 86 |
2/2✓ Branch 0 taken 35 times.
✓ Branch 1 taken 42 times.
|
77 | while (j < b.size()) { |
| 87 | result.push_back(b[j]); | ||
| 88 | 35 | ++j; | |
| 89 | } | ||
| 90 | 42 | return result; | |
| 91 | } | ||
| 92 |
2/2✓ Branch 0 taken 7 times.
✓ Branch 1 taken 21 times.
|
28 | void PapulinaYRadixSortOMP::MergeChunks(std::vector<std::span<double>> &chunks, double *result) { |
| 93 |
2/2✓ Branch 0 taken 7 times.
✓ Branch 1 taken 21 times.
|
28 | if (chunks.size() <= 1) { |
| 94 | 7 | return; | |
| 95 | } | ||
| 96 | |||
| 97 | 21 | int n = static_cast<int>(GetInput().size()); | |
| 98 | |||
| 99 | 21 | std::vector<std::vector<double>> chunks_copy; | |
| 100 |
1/2✓ Branch 1 taken 21 times.
✗ Branch 2 not taken.
|
21 | chunks_copy.reserve(chunks.size()); |
| 101 |
2/2✓ Branch 0 taken 63 times.
✓ Branch 1 taken 21 times.
|
84 | for (auto &chunk : chunks) { |
| 102 |
1/2✓ Branch 1 taken 63 times.
✗ Branch 2 not taken.
|
63 | chunks_copy.emplace_back(chunk.begin(), chunk.end()); |
| 103 | } | ||
| 104 |
2/2✓ Branch 0 taken 35 times.
✓ Branch 1 taken 21 times.
|
56 | while (chunks_copy.size() > 1) { |
| 105 | 35 | size_t pair_count = chunks_copy.size() / 2; | |
| 106 |
1/2✓ Branch 1 taken 35 times.
✗ Branch 2 not taken.
|
35 | std::vector<std::vector<double>> next((chunks_copy.size() + 1) / 2); |
| 107 | |||
| 108 |
2/2✓ Branch 0 taken 7 times.
✓ Branch 1 taken 28 times.
|
35 | #pragma omp parallel for default(none) shared(chunks_copy, next, pair_count) |
| 109 | for (size_t i = 0; i < pair_count; ++i) { | ||
| 110 | size_t idx = i; | ||
| 111 | next[idx] = Merge(chunks_copy[2 * idx], chunks_copy[(2 * idx) + 1]); | ||
| 112 | } | ||
| 113 |
2/2✓ Branch 0 taken 7 times.
✓ Branch 1 taken 28 times.
|
35 | if (chunks_copy.size() % 2 != 0) { |
| 114 | next.back() = std::move(chunks_copy.back()); | ||
| 115 | } | ||
| 116 | 35 | chunks_copy = std::move(next); | |
| 117 | 35 | } | |
| 118 | const auto &final_result = chunks_copy[0]; | ||
| 119 |
2/2✓ Branch 0 taken 150 times.
✓ Branch 1 taken 21 times.
|
171 | for (int i = 0; i < n; ++i) { |
| 120 | 150 | result[i] = final_result[i]; | |
| 121 | } | ||
| 122 | 21 | } | |
| 123 | 70 | void PapulinaYRadixSortOMP::RadixSort(double *arr, int size) { | |
| 124 | 70 | std::vector<uint64_t> bytes(size); | |
| 125 |
1/4✓ Branch 1 taken 70 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
70 | std::vector<uint64_t> out(size); |
| 126 | |||
| 127 |
2/2✓ Branch 0 taken 200 times.
✓ Branch 1 taken 70 times.
|
270 | for (int i = 0; i < size; i++) { |
| 128 |
2/2✓ Branch 0 taken 40 times.
✓ Branch 1 taken 160 times.
|
400 | bytes[i] = InBytes(arr[i]); |
| 129 | } | ||
| 130 | |||
| 131 | 70 | SortByByte(bytes.data(), out.data(), 0, size); | |
| 132 | 70 | SortByByte(out.data(), bytes.data(), 1, size); | |
| 133 | 70 | SortByByte(bytes.data(), out.data(), 2, size); | |
| 134 | 70 | SortByByte(out.data(), bytes.data(), 3, size); | |
| 135 | 70 | SortByByte(bytes.data(), out.data(), 4, size); | |
| 136 | 70 | SortByByte(out.data(), bytes.data(), 5, size); | |
| 137 | 70 | SortByByte(bytes.data(), out.data(), 6, size); | |
| 138 | 70 | SortByByte(out.data(), bytes.data(), 7, size); | |
| 139 | |||
| 140 |
2/2✓ Branch 0 taken 200 times.
✓ Branch 1 taken 70 times.
|
270 | for (int i = 0; i < size; i++) { |
| 141 |
2/2✓ Branch 0 taken 160 times.
✓ Branch 1 taken 40 times.
|
400 | arr[i] = FromBytes(bytes[i]); |
| 142 | } | ||
| 143 | 70 | } | |
| 144 | 28 | bool PapulinaYRadixSortOMP::PostProcessingImpl() { | |
| 145 | 28 | return true; | |
| 146 | } | ||
| 147 | 560 | void PapulinaYRadixSortOMP::SortByByte(uint64_t *bytes, uint64_t *out, int byte, int size) { | |
| 148 | auto *byte_view = reinterpret_cast<unsigned char *>(bytes); // просматриваем как массив байтов | ||
| 149 | 560 | std::array<int, 256> counter = {0}; | |
| 150 | |||
| 151 |
2/2✓ Branch 0 taken 1600 times.
✓ Branch 1 taken 560 times.
|
2160 | for (int i = 0; i < size; i++) { |
| 152 | 1600 | int index = byte_view[(8 * i) + byte]; | |
| 153 | 1600 | *(counter.data() + index) += 1; | |
| 154 | } | ||
| 155 | int tmp = 0; | ||
| 156 | int j = 0; | ||
| 157 |
1/2✓ Branch 0 taken 45344 times.
✗ Branch 1 not taken.
|
45344 | for (; j < 256; j++) { |
| 158 |
2/2✓ Branch 0 taken 560 times.
✓ Branch 1 taken 44784 times.
|
45344 | if (*(counter.data() + j) != 0) { |
| 159 | tmp = *(counter.data() + j); | ||
| 160 | 560 | *(counter.data() + j) = 0; | |
| 161 | 560 | j++; | |
| 162 | 560 | break; | |
| 163 | } | ||
| 164 | } | ||
| 165 |
2/2✓ Branch 0 taken 98016 times.
✓ Branch 1 taken 560 times.
|
98576 | for (; j < 256; j++) { |
| 166 | 98016 | int a = *(counter.data() + j); | |
| 167 | 98016 | *(counter.data() + j) = tmp; | |
| 168 | 98016 | tmp += a; | |
| 169 | } | ||
| 170 |
2/2✓ Branch 0 taken 1600 times.
✓ Branch 1 taken 560 times.
|
2160 | for (int i = 0; i < size; i++) { |
| 171 | 1600 | int index = byte_view[(8 * i) + byte]; | |
| 172 | 1600 | out[*(counter.data() + index)] = bytes[i]; | |
| 173 | 1600 | *(counter.data() + index) += 1; | |
| 174 | } | ||
| 175 | 560 | } | |
| 176 | ✗ | uint64_t PapulinaYRadixSortOMP::InBytes(double d) { | |
| 177 | uint64_t bits = 0; | ||
| 178 | memcpy(&bits, &d, sizeof(double)); | ||
| 179 |
2/4✗ Branch 0 not taken.
✗ Branch 1 not taken.
✓ Branch 2 taken 40 times.
✓ Branch 3 taken 160 times.
|
200 | if ((bits & kMask) != 0) { |
| 180 | 40 | bits = ~bits; | |
| 181 | } else { | ||
| 182 | 160 | bits = bits ^ kMask; | |
| 183 | } | ||
| 184 | ✗ | return bits; | |
| 185 | } | ||
| 186 | ✗ | double PapulinaYRadixSortOMP::FromBytes(uint64_t bits) { | |
| 187 | double d = NAN; | ||
| 188 |
2/4✗ Branch 0 not taken.
✗ Branch 1 not taken.
✓ Branch 2 taken 160 times.
✓ Branch 3 taken 40 times.
|
200 | if ((bits & kMask) != 0) { |
| 189 | 160 | bits = bits ^ kMask; | |
| 190 | } else { | ||
| 191 | 40 | bits = ~bits; | |
| 192 | } | ||
| 193 | memcpy(&d, &bits, sizeof(double)); | ||
| 194 | ✗ | return d; | |
| 195 | } | ||
| 196 | } // namespace papulina_y_radix_sort | ||
| 197 |