GCC Code Coverage Report

Directory:	./
File:	tasks/sakharov_a_shell_sorting_with_merging_butcher/all/src/ops_all.cpp
Date:	2026-06-04 20:25:32

	Exec	Total	Coverage
Lines:	146	160	91.2%
Functions:	22	24	91.7%
Branches:	101	166	60.8%

  
      Line
      Branch
      Exec
      Source
    
      #include "sakharov_a_shell_sorting_with_merging_butcher/all/include/ops_all.hpp"
    
      #include <mpi.h>
    
      #include <omp.h>
    
      #include <tbb/blocked_range.h>
    
      #include <tbb/parallel_for.h>
    
      #include <algorithm>
    
      #include <cstddef>
    
      #include <functional>
    
      #include <thread>
    
      #include <utility>
    
      #include <vector>
    
      #include "sakharov_a_shell_sorting_with_merging_butcher/common/include/common.hpp"
    
      #include "util/include/util.hpp"
    
      namespace sakharov_a_shell_sorting_with_merging_butcher {
    
      namespace {
    
      constexpr std::size_t kMinParallelChunkSize = 1U << 14;
    
      7
      std::vector<std::size_t> BuildChunkBounds(std::size_t size, int requested_chunks) {
    
        1/2✗ Branch 0 not taken.
✓ Branch 1 taken 7 times.

      7
        if (size == 0) {
    
      ✗
          return {0};
    
        }
    
        1/2✓ Branch 0 taken 7 times.
✗ Branch 1 not taken.

      7
        const std::size_t max_chunks_by_size = std::max<std::size_t>(1, size / kMinParallelChunkSize);
    
        2/4✓ Branch 0 taken 7 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 7 times.

      14
        const int chunks = std::max(1, std::min<int>(requested_chunks, static_cast<int>(max_chunks_by_size)));
    
      7
        std::vector<std::size_t> bounds;
    
        1/2✓ Branch 1 taken 7 times.
✗ Branch 2 not taken.

      7
        bounds.reserve(static_cast<std::size_t>(chunks) + 1);
    
      7
        const std::size_t base_chunk = size / static_cast<std::size_t>(chunks);
    
      7
        const std::size_t remainder = size % static_cast<std::size_t>(chunks);
    
        const auto chunk_count = static_cast<std::size_t>(chunks);
    
        1/2✓ Branch 1 taken 7 times.
✗ Branch 2 not taken.

      7
        bounds.push_back(0);
    
        2/2✓ Branch 0 taken 7 times.
✓ Branch 1 taken 7 times.

      14
        for (std::size_t chunk = 0; chunk < chunk_count; ++chunk) {
    
        2/4✓ Branch 0 taken 7 times.
✗ Branch 1 not taken.
✓ Branch 3 taken 7 times.
✗ Branch 4 not taken.

      14
          const std::size_t chunk_size = base_chunk + (chunk < remainder ? 1 : 0);
    
        1/4✓ Branch 1 taken 7 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.

      7
          bounds.push_back(bounds.back() + chunk_size);
    
        }
    
        return bounds;
    
      }
    
      5
      std::vector<std::size_t> BuildBoundsFromCounts(const std::vector<int> &counts) {
    
        1/2✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.

      5
        std::vector<std::size_t> bounds;
    
        1/2✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.

      5
        bounds.reserve(counts.size() + 1);
    
        1/2✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.

      5
        bounds.push_back(0);
    
        3/4✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 10 times.
✓ Branch 4 taken 5 times.

      15
        for (int count : counts) {
    
        1/4✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.

      10
          bounds.push_back(bounds.back() + static_cast<std::size_t>(count));
    
        }
    
      5
        return bounds;
    
      }
    
      5
      std::vector<int> BuildMpiChunkCounts(int data_size, int process_count) {
    
      5
        std::vector<int> counts(static_cast<std::size_t>(process_count), 0);
    
      5
        const int base = data_size / process_count;
    
      5
        const int remainder = data_size % process_count;
    
        2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 5 times.

      15
        for (int rank = 0; rank < process_count; ++rank) {
    
        2/2✓ Branch 0 taken 8 times.
✓ Branch 1 taken 2 times.

      18
          counts[static_cast<std::size_t>(rank)] = base + (rank < remainder ? 1 : 0);
    
        }
    
      5
        return counts;
    
      }
    
      5
      std::vector<int> BuildDisplacements(const std::vector<int> &counts) {
    
      5
        std::vector<int> displacements(counts.size(), 0);
    
        2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.

      10
        for (std::size_t i = 1; i < counts.size(); ++i) {
    
      5
          displacements[i] = displacements[i - 1] + counts[i - 1];
    
        }
    
      5
        return displacements;
    
      }
    
      7
      void SortChunksOpenMP(std::vector<int> &data, const std::vector<std::size_t> &bounds, int thread_count) {
    
      7
        const int chunk_count = static_cast<int>(bounds.size()) - 1;
    
      7
      #pragma omp parallel for default(none) shared(data, bounds, chunk_count) num_threads(thread_count) schedule(static)
    
        for (int chunk = 0; chunk < chunk_count; ++chunk) {
    
          const auto index = static_cast<std::size_t>(chunk);
    
          auto begin = data.begin() + static_cast<std::ptrdiff_t>(bounds[index]);
    
          auto end = data.begin() + static_cast<std::ptrdiff_t>(bounds[index + 1]);
    
          std::sort(begin, end);
    
        }
    
      7
      }
    
        1/2✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.

      5
      void MergeRange(const std::vector<int> &source, std::vector<int> &destination, const std::vector<std::size_t> &bounds,
    
                      std::size_t width, std::size_t merge_index) {
    
      5
        const std::size_t chunk_count = bounds.size() - 1;
    
      5
        const std::size_t left_chunk = merge_index * 2 * width;
    
        1/2✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.

      5
        const std::size_t mid = std::min(left_chunk + width, chunk_count);
    
        1/2✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.

      5
        const std::size_t right = std::min(left_chunk + (2 * width), chunk_count);
    
      5
        const std::size_t begin_index = bounds[left_chunk];
    
      5
        const std::size_t middle_index = bounds[mid];
    
        1/2✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.

      5
        const std::size_t end_index = bounds[right];
    
        auto output_begin = destination.begin() + static_cast<std::ptrdiff_t>(begin_index);
    
        1/2✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.

      5
        if (mid == right) {
    
      ✗
          std::copy(source.begin() + static_cast<std::ptrdiff_t>(begin_index),
    
                    source.begin() + static_cast<std::ptrdiff_t>(end_index), output_begin);
    
        } else {
    
      5
          std::merge(source.begin() + static_cast<std::ptrdiff_t>(begin_index),
    
                     source.begin() + static_cast<std::ptrdiff_t>(middle_index),
    
                     source.begin() + static_cast<std::ptrdiff_t>(middle_index),
    
                     source.begin() + static_cast<std::ptrdiff_t>(end_index), output_begin);
    
        }
    
      5
      }
    
      ✗
      void MergePassTBB(const std::vector<int> &source, std::vector<int> &destination, const std::vector<std::size_t> &bounds,
    
                        std::size_t width) {
    
      ✗
        const std::size_t chunk_count = bounds.size() - 1;
    
      ✗
        const std::size_t merge_count = (chunk_count + (2 * width) - 1) / (2 * width);
    
      ✗
        tbb::parallel_for(tbb::blocked_range<std::size_t>(0, merge_count), [&](const tbb::blocked_range<std::size_t> &range) {
    
      ✗
          for (std::size_t merge_index = range.begin(); merge_index != range.end(); ++merge_index) {
    
      ✗
            MergeRange(source, destination, bounds, width, merge_index);
    
          }
    
      ✗
        });
    
      ✗
      }
    
      5
      void MergePassThreadWorker(const std::vector<int> &source, std::vector<int> &destination,
    
                                 const std::vector<std::size_t> &bounds, std::size_t width, std::size_t begin_merge,
    
                                 std::size_t end_merge) {
    
        2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.

      10
        for (std::size_t merge_index = begin_merge; merge_index < end_merge; ++merge_index) {
    
      5
          MergeRange(source, destination, bounds, width, merge_index);
    
        }
    
      5
      }
    
        1/2✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.

      5
      void MergePassSTLThreads(const std::vector<int> &source, std::vector<int> &destination,
    
                               const std::vector<std::size_t> &bounds, std::size_t width, int requested_threads) {
    
      5
        const std::size_t chunk_count = bounds.size() - 1;
    
        1/2✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.

      5
        const std::size_t merge_count = (chunk_count + (2 * width) - 1) / (2 * width);
    
        const auto worker_count =
    
        1/2✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.

      5
            std::min<std::size_t>(merge_count, static_cast<std::size_t>(std::max(1, requested_threads)));
    
      5
        std::vector<std::thread> threads;
    
        1/2✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.

      5
        threads.reserve(worker_count);
    
      5
        const std::size_t base_chunk = merge_count / worker_count;
    
      5
        const std::size_t remainder = merge_count % worker_count;
    
      5
        std::size_t current = 0;
    
        2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.

      10
        for (std::size_t worker = 0; worker < worker_count; ++worker) {
    
        1/2✓ Branch 0 taken 5 times.
✗ Branch 1 not taken.

      5
          const std::size_t count = base_chunk + (worker < remainder ? 1 : 0);
    
      5
          const std::size_t next = current + count;
    
        1/2✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.

      5
          threads.emplace_back(MergePassThreadWorker, std::cref(source), std::ref(destination), std::cref(bounds), width,
    
                               current, next);
    
      5
          current = next;
    
        }
    
        2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.

      10
        for (auto &thread : threads) {
    
        1/2✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.

      5
          thread.join();
    
        }
    
      5
      }
    
        1/2✓ Branch 0 taken 7 times.
✗ Branch 1 not taken.

      7
      std::vector<int> MergeChunksTBB(std::vector<int> source, const std::vector<std::size_t> &bounds) {
    
      7
        const std::size_t chunk_count = bounds.size() - 1;
    
        1/2✓ Branch 0 taken 7 times.
✗ Branch 1 not taken.

      7
        if (chunk_count <= 1) {
    
          return source;
    
        }
    
      ✗
        std::vector<int> destination(source.size());
    
      ✗
        for (std::size_t width = 1; width < chunk_count; width *= 2) {
    
      ✗
          MergePassTBB(source, destination, bounds, width);
    
          source.swap(destination);
    
        }
    
        return source;
    
      }
    
        1/2✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.

      5
      std::vector<int> MergeChunksSTLThreads(std::vector<int> source, const std::vector<std::size_t> &bounds,
    
                                             int thread_count) {
    
      5
        const std::size_t chunk_count = bounds.size() - 1;
    
        1/2✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.

      5
        if (chunk_count <= 1) {
    
          return source;
    
        }
    
      5
        std::vector<int> destination(source.size());
    
        2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.

      10
        for (std::size_t width = 1; width < chunk_count; width *= 2) {
    
        1/2✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.

      5
          MergePassSTLThreads(source, destination, bounds, width, thread_count);
    
          source.swap(destination);
    
        }
    
        return source;
    
      }
    
        2/2✓ Branch 0 taken 3 times.
✓ Branch 1 taken 7 times.

      10
      std::vector<int> SortLocalPart(std::vector<int> data) {
    
        2/2✓ Branch 0 taken 3 times.
✓ Branch 1 taken 7 times.

      10
        if (data.empty()) {
    
          return data;
    
        }
    
      7
        const int thread_count = std::max(1, ppc::util::GetNumThreads());
    
      7
        const auto bounds = BuildChunkBounds(data.size(), thread_count);
    
      7
        SortChunksOpenMP(data, bounds, thread_count);
    
        2/6✓ Branch 1 taken 7 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 7 times.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.

      14
        return MergeChunksTBB(std::move(data), bounds);
    
      }
    
      struct MpiRootData {
    
        int input_size{0};
    
        std::vector<int> counts;
    
        std::vector<int> displacements;
    
      };
    
      const int *RootBuffer(const std::vector<int> &buffer, int rank) {
    
        2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.

      10
        if (rank != 0) {
    
          return nullptr;
    
        }
    
      10
        return buffer.data();
    
      }
    
      const int *RootInputBuffer(const std::vector<int> &input, int rank) {
    
        2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 4 times.

      5
        if (rank != 0 || input.empty()) {
    
          return nullptr;
    
        }
    
        return input.data();
    
      }
    
      int *BufferOrNull(std::vector<int> &buffer) {
    
        8/8✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 6 times.
✓ Branch 3 taken 4 times.
✓ Branch 4 taken 3 times.
✓ Branch 5 taken 7 times.
✓ Branch 6 taken 3 times.
✓ Branch 7 taken 7 times.

      36
        if (buffer.empty()) {
    
      14
          return nullptr;
    
        }
    
        return buffer.data();
    
      }
    
      10
      MpiRootData BuildRootData(const std::vector<int> &input, int rank, int process_count) {
    
      10
        MpiRootData root_data;
    
        2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.

      10
        if (rank == 0) {
    
      5
          root_data.input_size = static_cast<int>(input.size());
    
        1/2✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.

      5
          root_data.counts = BuildMpiChunkCounts(root_data.input_size, process_count);
    
        1/2✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.

      10
          root_data.displacements = BuildDisplacements(root_data.counts);
    
        }
    
      10
        return root_data;
    
      ✗
      }
    
      10
      std::vector<int> ScatterInput(const std::vector<int> &input, const MpiRootData &root_data, int rank) {
    
        2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.

      10
        int local_size = 0;
    
      10
        MPI_Scatter(RootBuffer(root_data.counts, rank), 1, MPI_INT, &local_size, 1, MPI_INT, 0, MPI_COMM_WORLD);
    
      10
        std::vector<int> local_data(static_cast<std::size_t>(local_size));
    
        3/4✓ Branch 0 taken 3 times.
✓ Branch 1 taken 7 times.
✓ Branch 3 taken 10 times.
✗ Branch 4 not taken.

      20
        MPI_Scatterv(RootInputBuffer(input, rank), RootBuffer(root_data.counts, rank),
    
                     RootBuffer(root_data.displacements, rank), MPI_INT, BufferOrNull(local_data), local_size, MPI_INT, 0,
    
                     MPI_COMM_WORLD);
    
      10
        return local_data;
    
      }
    
      10
      std::vector<int> GatherSortedData(std::vector<int> &local_data, const MpiRootData &root_data, int rank) {
    
      10
        std::vector<int> gathered_data;
    
        2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.

      10
        if (rank == 0) {
    
        1/2✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.

      5
          gathered_data.resize(static_cast<std::size_t>(root_data.input_size));
    
        }
    
        2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.

      10
        const auto local_size = static_cast<int>(local_data.size());
    
        1/2✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.

      10
        MPI_Gatherv(BufferOrNull(local_data), local_size, MPI_INT, BufferOrNull(gathered_data),
    
                    RootBuffer(root_data.counts, rank), RootBuffer(root_data.displacements, rank), MPI_INT, 0,
    
                    MPI_COMM_WORLD);
    
      10
        return gathered_data;
    
      }
    
      10
      std::vector<int> MergeRootData(std::vector<int> gathered_data, const MpiRootData &root_data, int rank) {
    
        2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.

      10
        if (rank != 0) {
    
      5
          return {};
    
        }
    
      5
        const auto bounds = BuildBoundsFromCounts(root_data.counts);
    
        3/8✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 5 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 5 times.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.

      10
        return MergeChunksSTLThreads(std::move(gathered_data), bounds, std::max(1, ppc::util::GetNumThreads()));
    
      }
    
      10
      std::vector<int> BroadcastResult(std::vector<int> result, int rank) {
    
      10
        int result_size = 0;
    
        2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.

      10
        if (rank == 0) {
    
      5
          result_size = static_cast<int>(result.size());
    
        }
    
      10
        MPI_Bcast(&result_size, 1, MPI_INT, 0, MPI_COMM_WORLD);
    
      10
        result.resize(static_cast<std::size_t>(result_size));
    
        2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.

      12
        MPI_Bcast(BufferOrNull(result), result_size, MPI_INT, 0, MPI_COMM_WORLD);
    
      10
        return result;
    
      }
    
      10
      std::vector<int> RunMpiSort(const std::vector<int> &input, int rank, int process_count) {
    
      10
        const auto root_data = BuildRootData(input, rank, process_count);
    
        1/2✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.

      10
        auto local_data = ScatterInput(input, root_data, rank);
    
        1/2✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.

      10
        local_data = SortLocalPart(std::move(local_data));
    
        1/2✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.

      10
        auto gathered_data = GatherSortedData(local_data, root_data, rank);
    
        2/4✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 10 times.
✗ Branch 5 not taken.

      20
        auto result = MergeRootData(std::move(gathered_data), root_data, rank);
    
        3/6✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 7 times.
✓ Branch 4 taken 3 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.

      30
        return BroadcastResult(std::move(result), rank);
    
      10
      }
    
      }  // namespace
    
        1/2✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.

      10
      SakharovAShellButcherALL::SakharovAShellButcherALL(const InType &in) {
    
        SetTypeOfTask(GetStaticTypeOfTask());
    
        1/2✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.

      10
        GetInput() = in;
    
      10
        GetOutput() = OutType();
    
      10
      }
    
      10
      bool SakharovAShellButcherALL::ValidationImpl() {
    
      10
        return IsValidInput(GetInput());
    
      }
    
      10
      bool SakharovAShellButcherALL::PreProcessingImpl() {
    
      10
        GetOutput().assign(GetInput().size(), 0);
    
      10
        return true;
    
      }
    
      10
      bool SakharovAShellButcherALL::RunImpl() {
    
      10
        int rank = 0;
    
      10
        int process_count = 1;
    
      10
        MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    
      10
        MPI_Comm_size(MPI_COMM_WORLD, &process_count);
    
      10
        GetOutput() = RunMpiSort(GetInput(), rank, process_count);
    
      10
        return true;
    
      }
    
      10
      bool SakharovAShellButcherALL::PostProcessingImpl() {
    
      10
        return true;
    
      }
    
      }  // namespace sakharov_a_shell_sorting_with_merging_butcher

Line	Branch	Exec	Source
1			#include "sakharov_a_shell_sorting_with_merging_butcher/all/include/ops_all.hpp"
2
3			#include <mpi.h>
4			#include <omp.h>
5			#include <tbb/blocked_range.h>
6			#include <tbb/parallel_for.h>
7
8			#include <algorithm>
9			#include <cstddef>
10			#include <functional>
11			#include <thread>
12			#include <utility>
13			#include <vector>
14
15			#include "sakharov_a_shell_sorting_with_merging_butcher/common/include/common.hpp"
16			#include "util/include/util.hpp"
17
18			namespace sakharov_a_shell_sorting_with_merging_butcher {
19
20			namespace {
21
22			constexpr std::size_t kMinParallelChunkSize = 1U << 14;
23
24		7	std::vector<std::size_t> BuildChunkBounds(std::size_t size, int requested_chunks) {
25	1/2 ✗ Branch 0 not taken. ✓ Branch 1 taken 7 times.	7	if (size == 0) {
26		✗	return {0};
27			}
28
29	1/2 ✓ Branch 0 taken 7 times. ✗ Branch 1 not taken.	7	const std::size_t max_chunks_by_size = std::max<std::size_t>(1, size / kMinParallelChunkSize);
30	2/4 ✓ Branch 0 taken 7 times. ✗ Branch 1 not taken. ✗ Branch 2 not taken. ✓ Branch 3 taken 7 times.	14	const int chunks = std::max(1, std::min<int>(requested_chunks, static_cast<int>(max_chunks_by_size)));
31
32		7	std::vector<std::size_t> bounds;
33	1/2 ✓ Branch 1 taken 7 times. ✗ Branch 2 not taken.	7	bounds.reserve(static_cast<std::size_t>(chunks) + 1);
34
35		7	const std::size_t base_chunk = size / static_cast<std::size_t>(chunks);
36		7	const std::size_t remainder = size % static_cast<std::size_t>(chunks);
37			const auto chunk_count = static_cast<std::size_t>(chunks);
38
39	1/2 ✓ Branch 1 taken 7 times. ✗ Branch 2 not taken.	7	bounds.push_back(0);
40	2/2 ✓ Branch 0 taken 7 times. ✓ Branch 1 taken 7 times.	14	for (std::size_t chunk = 0; chunk < chunk_count; ++chunk) {
41	2/4 ✓ Branch 0 taken 7 times. ✗ Branch 1 not taken. ✓ Branch 3 taken 7 times. ✗ Branch 4 not taken.	14	const std::size_t chunk_size = base_chunk + (chunk < remainder ? 1 : 0);
42	1/4 ✓ Branch 1 taken 7 times. ✗ Branch 2 not taken. ✗ Branch 3 not taken. ✗ Branch 4 not taken.	7	bounds.push_back(bounds.back() + chunk_size);
43			}
44
45			return bounds;
46			}
47
48		5	std::vector<std::size_t> BuildBoundsFromCounts(const std::vector<int> &counts) {
49	1/2 ✓ Branch 1 taken 5 times. ✗ Branch 2 not taken.	5	std::vector<std::size_t> bounds;
50	1/2 ✓ Branch 1 taken 5 times. ✗ Branch 2 not taken.	5	bounds.reserve(counts.size() + 1);
51	1/2 ✓ Branch 1 taken 5 times. ✗ Branch 2 not taken.	5	bounds.push_back(0);
52	3/4 ✓ Branch 1 taken 10 times. ✗ Branch 2 not taken. ✓ Branch 3 taken 10 times. ✓ Branch 4 taken 5 times.	15	for (int count : counts) {
53	1/4 ✓ Branch 1 taken 10 times. ✗ Branch 2 not taken. ✗ Branch 3 not taken. ✗ Branch 4 not taken.	10	bounds.push_back(bounds.back() + static_cast<std::size_t>(count));
54			}
55		5	return bounds;
56			}
57
58		5	std::vector<int> BuildMpiChunkCounts(int data_size, int process_count) {
59		5	std::vector<int> counts(static_cast<std::size_t>(process_count), 0);
60		5	const int base = data_size / process_count;
61		5	const int remainder = data_size % process_count;
62
63	2/2 ✓ Branch 0 taken 10 times. ✓ Branch 1 taken 5 times.	15	for (int rank = 0; rank < process_count; ++rank) {
64	2/2 ✓ Branch 0 taken 8 times. ✓ Branch 1 taken 2 times.	18	counts[static_cast<std::size_t>(rank)] = base + (rank < remainder ? 1 : 0);
65			}
66
67		5	return counts;
68			}
69
70		5	std::vector<int> BuildDisplacements(const std::vector<int> &counts) {
71		5	std::vector<int> displacements(counts.size(), 0);
72	2/2 ✓ Branch 0 taken 5 times. ✓ Branch 1 taken 5 times.	10	for (std::size_t i = 1; i < counts.size(); ++i) {
73		5	displacements[i] = displacements[i - 1] + counts[i - 1];
74			}
75		5	return displacements;
76			}
77
78		7	void SortChunksOpenMP(std::vector<int> &data, const std::vector<std::size_t> &bounds, int thread_count) {
79		7	const int chunk_count = static_cast<int>(bounds.size()) - 1;
80
81		7	#pragma omp parallel for default(none) shared(data, bounds, chunk_count) num_threads(thread_count) schedule(static)
82			for (int chunk = 0; chunk < chunk_count; ++chunk) {
83			const auto index = static_cast<std::size_t>(chunk);
84			auto begin = data.begin() + static_cast<std::ptrdiff_t>(bounds[index]);
85			auto end = data.begin() + static_cast<std::ptrdiff_t>(bounds[index + 1]);
86			std::sort(begin, end);
87			}
88		7	}
89
90	1/2 ✗ Branch 0 not taken. ✓ Branch 1 taken 5 times.	5	void MergeRange(const std::vector<int> &source, std::vector<int> &destination, const std::vector<std::size_t> &bounds,
91			std::size_t width, std::size_t merge_index) {
92		5	const std::size_t chunk_count = bounds.size() - 1;
93		5	const std::size_t left_chunk = merge_index * 2 * width;
94	1/2 ✗ Branch 0 not taken. ✓ Branch 1 taken 5 times.	5	const std::size_t mid = std::min(left_chunk + width, chunk_count);
95	1/2 ✗ Branch 0 not taken. ✓ Branch 1 taken 5 times.	5	const std::size_t right = std::min(left_chunk + (2 * width), chunk_count);
96
97		5	const std::size_t begin_index = bounds[left_chunk];
98		5	const std::size_t middle_index = bounds[mid];
99	1/2 ✗ Branch 0 not taken. ✓ Branch 1 taken 5 times.	5	const std::size_t end_index = bounds[right];
100
101			auto output_begin = destination.begin() + static_cast<std::ptrdiff_t>(begin_index);
102	1/2 ✗ Branch 0 not taken. ✓ Branch 1 taken 5 times.	5	if (mid == right) {
103		✗	std::copy(source.begin() + static_cast<std::ptrdiff_t>(begin_index),
104			source.begin() + static_cast<std::ptrdiff_t>(end_index), output_begin);
105			} else {
106		5	std::merge(source.begin() + static_cast<std::ptrdiff_t>(begin_index),
107			source.begin() + static_cast<std::ptrdiff_t>(middle_index),
108			source.begin() + static_cast<std::ptrdiff_t>(middle_index),
109			source.begin() + static_cast<std::ptrdiff_t>(end_index), output_begin);
110			}
111		5	}
112
113		✗	void MergePassTBB(const std::vector<int> &source, std::vector<int> &destination, const std::vector<std::size_t> &bounds,
114			std::size_t width) {
115		✗	const std::size_t chunk_count = bounds.size() - 1;
116		✗	const std::size_t merge_count = (chunk_count + (2 * width) - 1) / (2 * width);
117
118		✗	tbb::parallel_for(tbb::blocked_range<std::size_t>(0, merge_count), [&](const tbb::blocked_range<std::size_t> &range) {
119		✗	for (std::size_t merge_index = range.begin(); merge_index != range.end(); ++merge_index) {
120		✗	MergeRange(source, destination, bounds, width, merge_index);
121			}
122		✗	});
123		✗	}
124
125		5	void MergePassThreadWorker(const std::vector<int> &source, std::vector<int> &destination,
126			const std::vector<std::size_t> &bounds, std::size_t width, std::size_t begin_merge,
127			std::size_t end_merge) {
128	2/2 ✓ Branch 0 taken 5 times. ✓ Branch 1 taken 5 times.	10	for (std::size_t merge_index = begin_merge; merge_index < end_merge; ++merge_index) {
129		5	MergeRange(source, destination, bounds, width, merge_index);
130			}
131		5	}
132
133	1/2 ✓ Branch 1 taken 5 times. ✗ Branch 2 not taken.	5	void MergePassSTLThreads(const std::vector<int> &source, std::vector<int> &destination,
134			const std::vector<std::size_t> &bounds, std::size_t width, int requested_threads) {
135		5	const std::size_t chunk_count = bounds.size() - 1;
136	1/2 ✓ Branch 1 taken 5 times. ✗ Branch 2 not taken.	5	const std::size_t merge_count = (chunk_count + (2 * width) - 1) / (2 * width);
137			const auto worker_count =
138	1/2 ✓ Branch 1 taken 5 times. ✗ Branch 2 not taken.	5	std::min<std::size_t>(merge_count, static_cast<std::size_t>(std::max(1, requested_threads)));
139
140		5	std::vector<std::thread> threads;
141	1/2 ✓ Branch 1 taken 5 times. ✗ Branch 2 not taken.	5	threads.reserve(worker_count);
142
143		5	const std::size_t base_chunk = merge_count / worker_count;
144		5	const std::size_t remainder = merge_count % worker_count;
145		5	std::size_t current = 0;
146
147	2/2 ✓ Branch 0 taken 5 times. ✓ Branch 1 taken 5 times.	10	for (std::size_t worker = 0; worker < worker_count; ++worker) {
148	1/2 ✓ Branch 0 taken 5 times. ✗ Branch 1 not taken.	5	const std::size_t count = base_chunk + (worker < remainder ? 1 : 0);
149		5	const std::size_t next = current + count;
150	1/2 ✓ Branch 1 taken 5 times. ✗ Branch 2 not taken.	5	threads.emplace_back(MergePassThreadWorker, std::cref(source), std::ref(destination), std::cref(bounds), width,
151			current, next);
152		5	current = next;
153			}
154
155	2/2 ✓ Branch 0 taken 5 times. ✓ Branch 1 taken 5 times.	10	for (auto &thread : threads) {
156	1/2 ✓ Branch 1 taken 5 times. ✗ Branch 2 not taken.	5	thread.join();
157			}
158		5	}
159
160	1/2 ✓ Branch 0 taken 7 times. ✗ Branch 1 not taken.	7	std::vector<int> MergeChunksTBB(std::vector<int> source, const std::vector<std::size_t> &bounds) {
161		7	const std::size_t chunk_count = bounds.size() - 1;
162	1/2 ✓ Branch 0 taken 7 times. ✗ Branch 1 not taken.	7	if (chunk_count <= 1) {
163			return source;
164			}
165
166		✗	std::vector<int> destination(source.size());
167		✗	for (std::size_t width = 1; width < chunk_count; width *= 2) {
168		✗	MergePassTBB(source, destination, bounds, width);
169			source.swap(destination);
170			}
171
172			return source;
173			}
174
175	1/2 ✗ Branch 0 not taken. ✓ Branch 1 taken 5 times.	5	std::vector<int> MergeChunksSTLThreads(std::vector<int> source, const std::vector<std::size_t> &bounds,
176			int thread_count) {
177		5	const std::size_t chunk_count = bounds.size() - 1;
178	1/2 ✗ Branch 0 not taken. ✓ Branch 1 taken 5 times.	5	if (chunk_count <= 1) {
179			return source;
180			}
181
182		5	std::vector<int> destination(source.size());
183	2/2 ✓ Branch 0 taken 5 times. ✓ Branch 1 taken 5 times.	10	for (std::size_t width = 1; width < chunk_count; width *= 2) {
184	1/2 ✓ Branch 1 taken 5 times. ✗ Branch 2 not taken.	5	MergePassSTLThreads(source, destination, bounds, width, thread_count);
185			source.swap(destination);
186			}
187
188			return source;
189			}
190
191	2/2 ✓ Branch 0 taken 3 times. ✓ Branch 1 taken 7 times.	10	std::vector<int> SortLocalPart(std::vector<int> data) {
192	2/2 ✓ Branch 0 taken 3 times. ✓ Branch 1 taken 7 times.	10	if (data.empty()) {
193			return data;
194			}
195
196		7	const int thread_count = std::max(1, ppc::util::GetNumThreads());
197		7	const auto bounds = BuildChunkBounds(data.size(), thread_count);
198
199		7	SortChunksOpenMP(data, bounds, thread_count);
200	2/6 ✓ Branch 1 taken 7 times. ✗ Branch 2 not taken. ✓ Branch 3 taken 7 times. ✗ Branch 4 not taken. ✗ Branch 5 not taken. ✗ Branch 6 not taken.	14	return MergeChunksTBB(std::move(data), bounds);
201			}
202
203			struct MpiRootData {
204			int input_size{0};
205			std::vector<int> counts;
206			std::vector<int> displacements;
207			};
208
209			const int *RootBuffer(const std::vector<int> &buffer, int rank) {
210	2/2 ✓ Branch 0 taken 5 times. ✓ Branch 1 taken 5 times.	10	if (rank != 0) {
211			return nullptr;
212			}
213		10	return buffer.data();
214			}
215
216			const int *RootInputBuffer(const std::vector<int> &input, int rank) {
217	2/2 ✓ Branch 0 taken 1 times. ✓ Branch 1 taken 4 times.	5	if (rank != 0 \|\| input.empty()) {
218			return nullptr;
219			}
220			return input.data();
221			}
222
223			int *BufferOrNull(std::vector<int> &buffer) {
224	8/8 ✓ Branch 0 taken 2 times. ✓ Branch 1 taken 8 times. ✓ Branch 2 taken 6 times. ✓ Branch 3 taken 4 times. ✓ Branch 4 taken 3 times. ✓ Branch 5 taken 7 times. ✓ Branch 6 taken 3 times. ✓ Branch 7 taken 7 times.	36	if (buffer.empty()) {
225		14	return nullptr;
226			}
227			return buffer.data();
228			}
229
230		10	MpiRootData BuildRootData(const std::vector<int> &input, int rank, int process_count) {
231		10	MpiRootData root_data;
232	2/2 ✓ Branch 0 taken 5 times. ✓ Branch 1 taken 5 times.	10	if (rank == 0) {
233		5	root_data.input_size = static_cast<int>(input.size());
234	1/2 ✓ Branch 1 taken 5 times. ✗ Branch 2 not taken.	5	root_data.counts = BuildMpiChunkCounts(root_data.input_size, process_count);
235	1/2 ✓ Branch 1 taken 5 times. ✗ Branch 2 not taken.	10	root_data.displacements = BuildDisplacements(root_data.counts);
236			}
237		10	return root_data;
238		✗	}
239
240		10	std::vector<int> ScatterInput(const std::vector<int> &input, const MpiRootData &root_data, int rank) {
241	2/2 ✓ Branch 0 taken 5 times. ✓ Branch 1 taken 5 times.	10	int local_size = 0;
242		10	MPI_Scatter(RootBuffer(root_data.counts, rank), 1, MPI_INT, &local_size, 1, MPI_INT, 0, MPI_COMM_WORLD);
243
244		10	std::vector<int> local_data(static_cast<std::size_t>(local_size));
245	3/4 ✓ Branch 0 taken 3 times. ✓ Branch 1 taken 7 times. ✓ Branch 3 taken 10 times. ✗ Branch 4 not taken.	20	MPI_Scatterv(RootInputBuffer(input, rank), RootBuffer(root_data.counts, rank),
246			RootBuffer(root_data.displacements, rank), MPI_INT, BufferOrNull(local_data), local_size, MPI_INT, 0,
247			MPI_COMM_WORLD);
248
249		10	return local_data;
250			}
251
252		10	std::vector<int> GatherSortedData(std::vector<int> &local_data, const MpiRootData &root_data, int rank) {
253		10	std::vector<int> gathered_data;
254	2/2 ✓ Branch 0 taken 5 times. ✓ Branch 1 taken 5 times.	10	if (rank == 0) {
255	1/2 ✓ Branch 1 taken 5 times. ✗ Branch 2 not taken.	5	gathered_data.resize(static_cast<std::size_t>(root_data.input_size));
256			}
257
258	2/2 ✓ Branch 0 taken 5 times. ✓ Branch 1 taken 5 times.	10	const auto local_size = static_cast<int>(local_data.size());
259	1/2 ✓ Branch 1 taken 10 times. ✗ Branch 2 not taken.	10	MPI_Gatherv(BufferOrNull(local_data), local_size, MPI_INT, BufferOrNull(gathered_data),
260			RootBuffer(root_data.counts, rank), RootBuffer(root_data.displacements, rank), MPI_INT, 0,
261			MPI_COMM_WORLD);
262
263		10	return gathered_data;
264			}
265
266		10	std::vector<int> MergeRootData(std::vector<int> gathered_data, const MpiRootData &root_data, int rank) {
267	2/2 ✓ Branch 0 taken 5 times. ✓ Branch 1 taken 5 times.	10	if (rank != 0) {
268		5	return {};
269			}
270
271		5	const auto bounds = BuildBoundsFromCounts(root_data.counts);
272	3/8 ✓ Branch 1 taken 5 times. ✗ Branch 2 not taken. ✓ Branch 4 taken 5 times. ✗ Branch 5 not taken. ✓ Branch 6 taken 5 times. ✗ Branch 7 not taken. ✗ Branch 8 not taken. ✗ Branch 9 not taken.	10	return MergeChunksSTLThreads(std::move(gathered_data), bounds, std::max(1, ppc::util::GetNumThreads()));
273			}
274
275		10	std::vector<int> BroadcastResult(std::vector<int> result, int rank) {
276		10	int result_size = 0;
277	2/2 ✓ Branch 0 taken 5 times. ✓ Branch 1 taken 5 times.	10	if (rank == 0) {
278		5	result_size = static_cast<int>(result.size());
279			}
280		10	MPI_Bcast(&result_size, 1, MPI_INT, 0, MPI_COMM_WORLD);
281		10	result.resize(static_cast<std::size_t>(result_size));
282	2/2 ✓ Branch 0 taken 2 times. ✓ Branch 1 taken 8 times.	12	MPI_Bcast(BufferOrNull(result), result_size, MPI_INT, 0, MPI_COMM_WORLD);
283
284		10	return result;
285			}
286
287		10	std::vector<int> RunMpiSort(const std::vector<int> &input, int rank, int process_count) {
288		10	const auto root_data = BuildRootData(input, rank, process_count);
289	1/2 ✓ Branch 1 taken 10 times. ✗ Branch 2 not taken.	10	auto local_data = ScatterInput(input, root_data, rank);
290	1/2 ✓ Branch 1 taken 10 times. ✗ Branch 2 not taken.	10	local_data = SortLocalPart(std::move(local_data));
291	1/2 ✓ Branch 1 taken 10 times. ✗ Branch 2 not taken.	10	auto gathered_data = GatherSortedData(local_data, root_data, rank);
292	2/4 ✓ Branch 1 taken 10 times. ✗ Branch 2 not taken. ✓ Branch 4 taken 10 times. ✗ Branch 5 not taken.	20	auto result = MergeRootData(std::move(gathered_data), root_data, rank);
293	3/6 ✓ Branch 1 taken 10 times. ✗ Branch 2 not taken. ✓ Branch 3 taken 7 times. ✓ Branch 4 taken 3 times. ✗ Branch 5 not taken. ✗ Branch 6 not taken.	30	return BroadcastResult(std::move(result), rank);
294		10	}
295
296			} // namespace
297
298	1/2 ✓ Branch 1 taken 10 times. ✗ Branch 2 not taken.	10	SakharovAShellButcherALL::SakharovAShellButcherALL(const InType &in) {
299			SetTypeOfTask(GetStaticTypeOfTask());
300	1/2 ✓ Branch 1 taken 10 times. ✗ Branch 2 not taken.	10	GetInput() = in;
301		10	GetOutput() = OutType();
302		10	}
303
304		10	bool SakharovAShellButcherALL::ValidationImpl() {
305		10	return IsValidInput(GetInput());
306			}
307
308		10	bool SakharovAShellButcherALL::PreProcessingImpl() {
309		10	GetOutput().assign(GetInput().size(), 0);
310		10	return true;
311			}
312
313		10	bool SakharovAShellButcherALL::RunImpl() {
314		10	int rank = 0;
315		10	int process_count = 1;
316		10	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
317		10	MPI_Comm_size(MPI_COMM_WORLD, &process_count);
318
319		10	GetOutput() = RunMpiSort(GetInput(), rank, process_count);
320		10	return true;
321			}
322
323		10	bool SakharovAShellButcherALL::PostProcessingImpl() {
324		10	return true;
325			}
326
327			} // namespace sakharov_a_shell_sorting_with_merging_butcher
328