| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #include "sakharov_a_shell_sorting_with_merging_butcher/omp/include/ops_omp.hpp" | ||
| 2 | |||
| 3 | #include <omp.h> | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <cstddef> | ||
| 7 | #include <vector> | ||
| 8 | |||
| 9 | #include "sakharov_a_shell_sorting_with_merging_butcher/common/include/common.hpp" | ||
| 10 | |||
| 11 | namespace sakharov_a_shell_sorting_with_merging_butcher { | ||
| 12 | |||
| 13 | namespace { | ||
| 14 | |||
| 15 | constexpr std::size_t kMinParallelChunkSize = 1U << 14; | ||
| 16 | |||
| 17 | 16 | std::vector<std::size_t> BuildChunkBounds(std::size_t size, int requested_chunks) { | |
| 18 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 16 times.
|
16 | if (size == 0) { |
| 19 | ✗ | return {0}; | |
| 20 | } | ||
| 21 | |||
| 22 |
1/2✓ Branch 0 taken 16 times.
✗ Branch 1 not taken.
|
16 | const std::size_t max_chunks_by_size = std::max<std::size_t>(1, size / kMinParallelChunkSize); |
| 23 |
3/4✓ Branch 0 taken 12 times.
✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 16 times.
|
28 | const int chunks = std::max(1, std::min<int>(requested_chunks, static_cast<int>(max_chunks_by_size))); |
| 24 | |||
| 25 | 16 | std::vector<std::size_t> bounds; | |
| 26 |
1/2✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
|
16 | bounds.reserve(static_cast<std::size_t>(chunks) + 1); |
| 27 | 16 | const std::size_t base_chunk = size / static_cast<std::size_t>(chunks); | |
| 28 | 16 | const std::size_t remainder = size % static_cast<std::size_t>(chunks); | |
| 29 | const auto chunk_count = static_cast<std::size_t>(chunks); | ||
| 30 | |||
| 31 |
1/2✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
|
16 | bounds.push_back(0); |
| 32 |
2/2✓ Branch 0 taken 16 times.
✓ Branch 1 taken 16 times.
|
32 | for (std::size_t chunk = 0; chunk < chunk_count; ++chunk) { |
| 33 |
2/4✓ Branch 0 taken 16 times.
✗ Branch 1 not taken.
✓ Branch 3 taken 16 times.
✗ Branch 4 not taken.
|
32 | const std::size_t chunk_size = base_chunk + (chunk < remainder ? 1 : 0); |
| 34 |
1/4✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
16 | bounds.push_back(bounds.back() + chunk_size); |
| 35 | } | ||
| 36 | |||
| 37 | return bounds; | ||
| 38 | } | ||
| 39 | |||
| 40 | 16 | void ParallelSortChunks(std::vector<int> &data, const std::vector<std::size_t> &bounds) { | |
| 41 | 16 | const int chunk_count = static_cast<int>(bounds.size()) - 1; | |
| 42 | |||
| 43 | 16 | #pragma omp parallel for default(none) shared(data, bounds, chunk_count) schedule(static) | |
| 44 | for (int chunk = 0; chunk < chunk_count; ++chunk) { | ||
| 45 | auto begin = data.begin() + static_cast<std::ptrdiff_t>(bounds[static_cast<std::size_t>(chunk)]); | ||
| 46 | auto end = data.begin() + static_cast<std::ptrdiff_t>(bounds[static_cast<std::size_t>(chunk) + 1]); | ||
| 47 | std::sort(begin, end); | ||
| 48 | } | ||
| 49 | 16 | } | |
| 50 | |||
| 51 | ✗ | void ParallelMergePass(const std::vector<int> &source, std::vector<int> &destination, | |
| 52 | const std::vector<std::size_t> &bounds, std::size_t width) { | ||
| 53 | ✗ | const std::size_t chunk_count = bounds.size() - 1; | |
| 54 | |||
| 55 | ✗ | #pragma omp parallel for default(none) shared(source, destination, bounds, width, chunk_count) schedule(static) | |
| 56 | for (std::size_t left_chunk = 0; left_chunk < chunk_count; left_chunk += 2 * width) { | ||
| 57 | auto left = left_chunk; | ||
| 58 | const std::size_t mid = std::min(left + width, chunk_count); | ||
| 59 | const std::size_t right = std::min(left + (2 * width), chunk_count); | ||
| 60 | |||
| 61 | const std::size_t begin_index = bounds[left]; | ||
| 62 | const std::size_t middle_index = bounds[mid]; | ||
| 63 | const std::size_t end_index = bounds[right]; | ||
| 64 | |||
| 65 | auto output_begin = destination.begin() + static_cast<std::ptrdiff_t>(begin_index); | ||
| 66 | if (mid == right) { | ||
| 67 | std::copy(source.begin() + static_cast<std::ptrdiff_t>(begin_index), | ||
| 68 | source.begin() + static_cast<std::ptrdiff_t>(end_index), output_begin); | ||
| 69 | } else { | ||
| 70 | std::merge(source.begin() + static_cast<std::ptrdiff_t>(begin_index), | ||
| 71 | source.begin() + static_cast<std::ptrdiff_t>(middle_index), | ||
| 72 | source.begin() + static_cast<std::ptrdiff_t>(middle_index), | ||
| 73 | source.begin() + static_cast<std::ptrdiff_t>(end_index), output_begin); | ||
| 74 | } | ||
| 75 | } | ||
| 76 | ✗ | } | |
| 77 | |||
| 78 | 16 | std::vector<int> ParallelSortAndMerge(const std::vector<int> &input) { | |
| 79 | 16 | std::vector<int> source = input; | |
| 80 |
1/2✓ Branch 2 taken 16 times.
✗ Branch 3 not taken.
|
16 | const auto bounds = BuildChunkBounds(source.size(), omp_get_max_threads()); |
| 81 | 16 | const std::size_t chunk_count = bounds.size() - 1; | |
| 82 | |||
| 83 | 16 | ParallelSortChunks(source, bounds); | |
| 84 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 16 times.
|
16 | if (chunk_count == 1) { |
| 85 | return source; | ||
| 86 | } | ||
| 87 | |||
| 88 | ✗ | std::vector<int> destination(source.size()); | |
| 89 | ✗ | for (std::size_t width = 1; width < chunk_count; width *= 2) { | |
| 90 | ✗ | ParallelMergePass(source, destination, bounds, width); | |
| 91 | source.swap(destination); | ||
| 92 | } | ||
| 93 | |||
| 94 | return source; | ||
| 95 | } | ||
| 96 | |||
| 97 | } // namespace | ||
| 98 | |||
| 99 |
1/2✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.
|
20 | SakharovAShellButcherOMP::SakharovAShellButcherOMP(const InType &in) { |
| 100 | SetTypeOfTask(GetStaticTypeOfTask()); | ||
| 101 |
1/2✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.
|
20 | GetInput() = in; |
| 102 | 20 | GetOutput() = OutType(); | |
| 103 | 20 | } | |
| 104 | |||
| 105 | 20 | bool SakharovAShellButcherOMP::ValidationImpl() { | |
| 106 | 20 | return IsValidInput(GetInput()); | |
| 107 | } | ||
| 108 | |||
| 109 | 20 | bool SakharovAShellButcherOMP::PreProcessingImpl() { | |
| 110 | 20 | GetOutput().assign(GetInput().size(), 0); | |
| 111 | 20 | return true; | |
| 112 | } | ||
| 113 | |||
| 114 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 16 times.
|
20 | bool SakharovAShellButcherOMP::RunImpl() { |
| 115 | const auto &input = GetInput(); | ||
| 116 | |||
| 117 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 16 times.
|
20 | if (input.empty()) { |
| 118 | GetOutput().clear(); | ||
| 119 | 4 | return true; | |
| 120 | } | ||
| 121 | |||
| 122 | 16 | GetOutput() = ParallelSortAndMerge(input); | |
| 123 | 16 | return true; | |
| 124 | } | ||
| 125 | |||
| 126 | 20 | bool SakharovAShellButcherOMP::PostProcessingImpl() { | |
| 127 | 20 | return true; | |
| 128 | } | ||
| 129 | |||
| 130 | } // namespace sakharov_a_shell_sorting_with_merging_butcher | ||
| 131 |