| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #include "sakharov_a_shell_sorting_with_merging_butcher/tbb/include/ops_tbb.hpp" | ||
| 2 | |||
| 3 | #include <tbb/blocked_range.h> | ||
| 4 | #include <tbb/parallel_for.h> | ||
| 5 | |||
| 6 | #include <algorithm> | ||
| 7 | #include <cstddef> | ||
| 8 | #include <vector> | ||
| 9 | |||
| 10 | #include "sakharov_a_shell_sorting_with_merging_butcher/common/include/common.hpp" | ||
| 11 | #include "util/include/util.hpp" | ||
| 12 | |||
| 13 | namespace sakharov_a_shell_sorting_with_merging_butcher { | ||
| 14 | |||
| 15 | namespace { | ||
| 16 | |||
| 17 | constexpr std::size_t kMinParallelChunkSize = 1U << 14; | ||
| 18 | |||
| 19 | 16 | std::vector<std::size_t> BuildChunkBounds(std::size_t size, int requested_chunks) { | |
| 20 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 16 times.
|
16 | if (size == 0) { |
| 21 | ✗ | return {0}; | |
| 22 | } | ||
| 23 | |||
| 24 |
1/2✓ Branch 0 taken 16 times.
✗ Branch 1 not taken.
|
16 | const std::size_t max_chunks_by_size = std::max<std::size_t>(1, size / kMinParallelChunkSize); |
| 25 |
3/4✓ Branch 0 taken 12 times.
✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 16 times.
|
28 | const int chunks = std::max(1, std::min<int>(requested_chunks, static_cast<int>(max_chunks_by_size))); |
| 26 | |||
| 27 | 16 | std::vector<std::size_t> bounds; | |
| 28 |
1/2✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
|
16 | bounds.reserve(static_cast<std::size_t>(chunks) + 1); |
| 29 | |||
| 30 | 16 | const std::size_t base_chunk = size / static_cast<std::size_t>(chunks); | |
| 31 | 16 | const std::size_t remainder = size % static_cast<std::size_t>(chunks); | |
| 32 | const auto chunk_count = static_cast<std::size_t>(chunks); | ||
| 33 | |||
| 34 |
1/2✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
|
16 | bounds.push_back(0); |
| 35 |
2/2✓ Branch 0 taken 16 times.
✓ Branch 1 taken 16 times.
|
32 | for (std::size_t chunk = 0; chunk < chunk_count; ++chunk) { |
| 36 |
2/4✓ Branch 0 taken 16 times.
✗ Branch 1 not taken.
✓ Branch 3 taken 16 times.
✗ Branch 4 not taken.
|
32 | const std::size_t chunk_size = base_chunk + (chunk < remainder ? 1 : 0); |
| 37 |
1/4✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
16 | bounds.push_back(bounds.back() + chunk_size); |
| 38 | } | ||
| 39 | |||
| 40 | return bounds; | ||
| 41 | } | ||
| 42 | |||
| 43 | 16 | void ParallelSortChunks(std::vector<int> &data, const std::vector<std::size_t> &bounds) { | |
| 44 | 16 | const auto chunk_count = bounds.size() - 1; | |
| 45 | |||
| 46 | 32 | tbb::parallel_for(tbb::blocked_range<std::size_t>(0, chunk_count), [&](const tbb::blocked_range<std::size_t> &range) { | |
| 47 |
2/2✓ Branch 0 taken 16 times.
✓ Branch 1 taken 16 times.
|
32 | for (std::size_t chunk = range.begin(); chunk != range.end(); ++chunk) { |
| 48 | 16 | auto begin = data.begin() + static_cast<std::ptrdiff_t>(bounds[chunk]); | |
| 49 | 16 | auto end = data.begin() + static_cast<std::ptrdiff_t>(bounds[chunk + 1]); | |
| 50 | 16 | std::sort(begin, end); | |
| 51 | } | ||
| 52 | 16 | }); | |
| 53 | 16 | } | |
| 54 | |||
| 55 | ✗ | void ParallelMergePass(const std::vector<int> &source, std::vector<int> &destination, | |
| 56 | const std::vector<std::size_t> &bounds, std::size_t width) { | ||
| 57 | ✗ | const std::size_t chunk_count = bounds.size() - 1; | |
| 58 | ✗ | const std::size_t merge_count = (chunk_count + (2 * width) - 1) / (2 * width); | |
| 59 | |||
| 60 | ✗ | tbb::parallel_for(tbb::blocked_range<std::size_t>(0, merge_count), [&](const tbb::blocked_range<std::size_t> &range) { | |
| 61 | ✗ | for (std::size_t merge_index = range.begin(); merge_index != range.end(); ++merge_index) { | |
| 62 | ✗ | const std::size_t left_chunk = merge_index * 2 * width; | |
| 63 | ✗ | const std::size_t mid = std::min(left_chunk + width, chunk_count); | |
| 64 | ✗ | const std::size_t right = std::min(left_chunk + (2 * width), chunk_count); | |
| 65 | |||
| 66 | ✗ | const std::size_t begin_index = bounds[left_chunk]; | |
| 67 | ✗ | const std::size_t middle_index = bounds[mid]; | |
| 68 | ✗ | const std::size_t end_index = bounds[right]; | |
| 69 | |||
| 70 | ✗ | auto output_begin = destination.begin() + static_cast<std::ptrdiff_t>(begin_index); | |
| 71 | ✗ | if (mid == right) { | |
| 72 | ✗ | std::copy(source.begin() + static_cast<std::ptrdiff_t>(begin_index), | |
| 73 | ✗ | source.begin() + static_cast<std::ptrdiff_t>(end_index), output_begin); | |
| 74 | } else { | ||
| 75 | ✗ | std::merge(source.begin() + static_cast<std::ptrdiff_t>(begin_index), | |
| 76 | source.begin() + static_cast<std::ptrdiff_t>(middle_index), | ||
| 77 | source.begin() + static_cast<std::ptrdiff_t>(middle_index), | ||
| 78 | ✗ | source.begin() + static_cast<std::ptrdiff_t>(end_index), output_begin); | |
| 79 | } | ||
| 80 | } | ||
| 81 | ✗ | }); | |
| 82 | ✗ | } | |
| 83 | |||
| 84 | 16 | std::vector<int> ParallelSortAndMerge(const std::vector<int> &input) { | |
| 85 | 16 | std::vector<int> source = input; | |
| 86 |
3/8✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 16 times.
✗ Branch 5 not taken.
✓ Branch 7 taken 16 times.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
|
16 | const auto bounds = BuildChunkBounds(source.size(), std::max(1, ppc::util::GetNumThreads())); |
| 87 | 16 | const std::size_t chunk_count = bounds.size() - 1; | |
| 88 | |||
| 89 |
1/2✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
|
16 | ParallelSortChunks(source, bounds); |
| 90 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 16 times.
|
16 | if (chunk_count == 1) { |
| 91 | return source; | ||
| 92 | } | ||
| 93 | |||
| 94 | ✗ | std::vector<int> destination(source.size()); | |
| 95 | ✗ | for (std::size_t width = 1; width < chunk_count; width *= 2) { | |
| 96 | ✗ | ParallelMergePass(source, destination, bounds, width); | |
| 97 | source.swap(destination); | ||
| 98 | } | ||
| 99 | |||
| 100 | return source; | ||
| 101 | } | ||
| 102 | |||
| 103 | } // namespace | ||
| 104 | |||
| 105 |
1/2✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.
|
20 | SakharovAShellButcherTBB::SakharovAShellButcherTBB(const InType &in) { |
| 106 | SetTypeOfTask(GetStaticTypeOfTask()); | ||
| 107 |
1/2✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.
|
20 | GetInput() = in; |
| 108 | 20 | GetOutput() = OutType(); | |
| 109 | 20 | } | |
| 110 | |||
| 111 | 20 | bool SakharovAShellButcherTBB::ValidationImpl() { | |
| 112 | 20 | return IsValidInput(GetInput()); | |
| 113 | } | ||
| 114 | |||
| 115 | 20 | bool SakharovAShellButcherTBB::PreProcessingImpl() { | |
| 116 | 20 | GetOutput().assign(GetInput().size(), 0); | |
| 117 | 20 | return true; | |
| 118 | } | ||
| 119 | |||
| 120 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 16 times.
|
20 | bool SakharovAShellButcherTBB::RunImpl() { |
| 121 | const auto &input = GetInput(); | ||
| 122 | |||
| 123 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 16 times.
|
20 | if (input.empty()) { |
| 124 | GetOutput().clear(); | ||
| 125 | 4 | return true; | |
| 126 | } | ||
| 127 | |||
| 128 | 16 | GetOutput() = ParallelSortAndMerge(input); | |
| 129 | 16 | return true; | |
| 130 | } | ||
| 131 | |||
| 132 | 20 | bool SakharovAShellButcherTBB::PostProcessingImpl() { | |
| 133 | 20 | return true; | |
| 134 | } | ||
| 135 | |||
| 136 | } // namespace sakharov_a_shell_sorting_with_merging_butcher | ||
| 137 |