GCC Code Coverage Report

Directory:	./
File:	tasks/dorofeev_i_bitwise_sort_double_eo_batcher_merge/all/src/ops_all.cpp
Date:	2026-06-04 20:25:32

	Exec	Total	Coverage
Lines:	135	139	97.1%
Functions:	17	17	100.0%
Branches:	98	140	70.0%

  
      Line
      Branch
      Exec
      Source
    
      #include "dorofeev_i_bitwise_sort_double_eo_batcher_merge/all/include/ops_all.hpp"
    
      #include <mpi.h>
    
      #include <tbb/tbb.h>
    
      #include <algorithm>
    
      #include <cstdint>
    
      #include <cstring>
    
      #include <limits>
    
      #include <vector>
    
      #include "dorofeev_i_bitwise_sort_double_eo_batcher_merge/common/include/common.hpp"
    
      #include "util/include/util.hpp"
    
      namespace dorofeev_i_bitwise_sort_double_eo_batcher_merge {
    
      namespace {
    
      uint64_t DoubleToUint(double d) {
    
        uint64_t u = 0;
    
        std::memcpy(&u, &d, sizeof(double));
    
        2/2✓ Branch 0 taken 1246 times.
✓ Branch 1 taken 1602 times.

      2848
        if ((u & 0x8000000000000000ULL) != 0) {
    
      1246
          u = ~u;
    
        } else {
    
      1602
          u |= 0x8000000000000000ULL;
    
        }
    
        return u;
    
      }
    
      double UintToDouble(uint64_t u) {
    
      2848
        if ((u & 0x8000000000000000ULL) != 0) {
    
      1602
          u &= ~0x8000000000000000ULL;
    
        } else {
    
      1246
          u = ~u;
    
        }
    
        double d = 0.0;
    
        std::memcpy(&d, &u, sizeof(double));
    
        return d;
    
      }
    
        1/2✗ Branch 0 not taken.
✓ Branch 1 taken 24 times.

      24
      void RadixSortDouble(std::vector<double> &arr) {
    
        1/2✗ Branch 0 not taken.
✓ Branch 1 taken 24 times.

      24
        if (arr.empty()) {
    
      ✗
          return;
    
        }
    
      24
        std::vector<uint64_t> uarr(arr.size());
    
        2/2✓ Branch 0 taken 2848 times.
✓ Branch 1 taken 24 times.

      2872
        for (size_t i = 0; i < arr.size(); ++i) {
    
        2/2✓ Branch 0 taken 1246 times.
✓ Branch 1 taken 1602 times.

      5696
          uarr[i] = DoubleToUint(arr[i]);
    
        }
    
        1/4✓ Branch 1 taken 24 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.

      24
        std::vector<uint64_t> temp(uarr.size());
    
        2/2✓ Branch 0 taken 192 times.
✓ Branch 1 taken 24 times.

      216
        for (size_t byte = 0; byte < 8; ++byte) {
    
        1/4✓ Branch 1 taken 192 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.

      192
          std::vector<int> count(256, 0);
    
        2/2✓ Branch 0 taken 22784 times.
✓ Branch 1 taken 192 times.

      22976
          for (uint64_t val : uarr) {
    
      22784
            count[(val >> (byte * 8)) & 0xFF]++;
    
          }
    
        2/2✓ Branch 0 taken 48960 times.
✓ Branch 1 taken 192 times.

      49152
          for (size_t i = 1; i < 256; ++i) {
    
      48960
            count[i] += count[i - 1];
    
          }
    
        2/2✓ Branch 0 taken 22784 times.
✓ Branch 1 taken 192 times.

      22976
          for (int i = static_cast<int>(uarr.size()) - 1; i >= 0; --i) {
    
      22784
            temp[--count[(uarr[i] >> (byte * 8)) & 0xFF]] = uarr[i];
    
          }
    
        1/2✓ Branch 1 taken 192 times.
✗ Branch 2 not taken.

      192
          uarr = temp;
    
        }
    
        2/2✓ Branch 0 taken 2848 times.
✓ Branch 1 taken 24 times.

      2872
        for (size_t i = 0; i < arr.size(); ++i) {
    
        2/2✓ Branch 0 taken 1602 times.
✓ Branch 1 taken 1246 times.

      5696
          arr[i] = UintToDouble(uarr[i]);
    
        }
    
      }
    
      void CompareExchangeBlocks(double *arr, size_t i, size_t step) {
    
        4/4✓ Branch 0 taken 2136 times.
✓ Branch 1 taken 16 times.
✓ Branch 2 taken 14904 times.
✓ Branch 3 taken 4148 times.

      21204
        for (size_t k = 0; k < step; ++k) {
    
        4/4✓ Branch 0 taken 353 times.
✓ Branch 1 taken 1783 times.
✓ Branch 2 taken 7577 times.
✓ Branch 3 taken 7327 times.

      17040
          if (arr[i + k] > arr[i + k + step]) {
    
            std::swap(arr[i + k], arr[i + k + step]);
    
          }
    
        }
    
      }
    
      16
      void OddEvenMergeIterative(double *arr, size_t start, size_t n) {
    
        1/2✓ Branch 0 taken 16 times.
✗ Branch 1 not taken.

      16
        if (n <= 1) {
    
          return;
    
        }
    
      16
        size_t step = n / 2;
    
        CompareExchangeBlocks(arr, start, step);
    
      16
        step /= 2;
    
        2/2✓ Branch 0 taken 92 times.
✓ Branch 1 taken 16 times.

      108
        for (; step > 0; step /= 2) {
    
        2/2✓ Branch 0 taken 4148 times.
✓ Branch 1 taken 92 times.

      4240
          for (size_t i = step; i < n - step; i += step * 2) {
    
      4148
            CompareExchangeBlocks(arr, start + i, step);
    
          }
    
        }
    
      }
    
      24
      void ProcessChunkTBB(double *raw_data, int chunk_idx, size_t chunk_size) {
    
      24
        size_t start_idx = static_cast<size_t>(chunk_idx) * chunk_size;
    
      24
        std::vector<double> local_arr(chunk_size);
    
        2/2✓ Branch 0 taken 2848 times.
✓ Branch 1 taken 24 times.

      2872
        for (size_t j = 0; j < chunk_size; ++j) {
    
      2848
          local_arr[j] = raw_data[start_idx + j];
    
        }
    
        1/2✓ Branch 1 taken 24 times.
✗ Branch 2 not taken.

      24
        RadixSortDouble(local_arr);
    
        2/2✓ Branch 0 taken 2848 times.
✓ Branch 1 taken 24 times.

      2872
        for (size_t j = 0; j < chunk_size; ++j) {
    
      2848
          raw_data[start_idx + j] = local_arr[j];
    
        }
    
      24
      }
    
      12
      void ExecuteTBBSortLocal(double *raw_data, size_t total_size, size_t chunk_size, int num_chunks_int) {
    
      12
        int num_threads = ppc::util::GetNumThreads();
    
        1/2✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.

      12
        tbb::task_arena arena(num_threads > 0 ? num_threads : 1);
    
        1/2✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.

      12
        arena.execute([&] {
    
      24
          tbb::parallel_for(tbb::blocked_range<int>(0, num_chunks_int),
    
      12
                            [raw_data, chunk_size](const tbb::blocked_range<int> &r) {
    
        2/4✓ Branch 0 taken 24 times.
✓ Branch 1 taken 24 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.

      48
            for (int i = r.begin(); i != r.end(); ++i) {
    
        0/2✗ Branch 2 not taken.
✗ Branch 3 not taken.

      24
              ProcessChunkTBB(raw_data, i, chunk_size);
    
            }
    
          });
    
        2/2✓ Branch 0 taken 12 times.
✓ Branch 1 taken 12 times.

      24
          for (size_t size = chunk_size; size < total_size; size *= 2) {
    
      12
            int merges_count = static_cast<int>(total_size / (size * 2));
    
      24
            tbb::parallel_for(tbb::blocked_range<int>(0, merges_count), [raw_data, size](const tbb::blocked_range<int> &r) {
    
        2/2✓ Branch 0 taken 12 times.
✓ Branch 1 taken 12 times.

      24
              for (int i = r.begin(); i != r.end(); ++i) {
    
      12
                OddEvenMergeIterative(raw_data, static_cast<size_t>(i) * 2 * size, 2 * size);
    
              }
    
      12
            });
    
          }
    
      12
        });
    
      12
      }
    
      12
      void PerformLocalTBBSort(double *data, size_t total_size) {
    
        1/2✓ Branch 0 taken 12 times.
✗ Branch 1 not taken.

      12
        if (total_size == 0) {
    
          return;
    
        }
    
      12
        int num_threads = ppc::util::GetNumThreads();
    
      12
        if (num_threads <= 0) {
    
          num_threads = 1;
    
        }
    
        size_t num_chunks = 1;
    
        3/4✓ Branch 0 taken 12 times.
✓ Branch 1 taken 12 times.
✓ Branch 2 taken 12 times.
✗ Branch 3 not taken.

      24
        while (num_chunks * 2 <= static_cast<size_t>(num_threads) && num_chunks * 2 <= total_size) {
    
          num_chunks *= 2;
    
        }
    
      12
        ExecuteTBBSortLocal(data, total_size, total_size / num_chunks, static_cast<int>(num_chunks));
    
      }
    
      4
      void PerformFinalMergeTBB(double *raw_data, size_t local_chunk_size, size_t pow2) {
    
      4
        int num_threads = ppc::util::GetNumThreads();
    
        1/2✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.

      4
        tbb::task_arena arena(num_threads > 0 ? num_threads : 1);
    
        1/2✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.

      4
        arena.execute([&] {
    
        2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 4 times.

      8
          for (size_t size = local_chunk_size; size < pow2; size *= 2) {
    
      4
            int merges_count = static_cast<int>(pow2 / (size * 2));
    
      8
            tbb::parallel_for(tbb::blocked_range<int>(0, merges_count), [raw_data, size](const tbb::blocked_range<int> &r) {
    
        2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 4 times.

      8
              for (int i = r.begin(); i != r.end(); ++i) {
    
      4
                OddEvenMergeIterative(raw_data, static_cast<size_t>(i) * 2 * size, 2 * size);
    
              }
    
      4
            });
    
          }
    
      4
        });
    
      4
      }
    
      8
      void ExecuteMPIHybridSort(std::vector<double> &local_data, size_t pow2, int world_rank, int active_procs) {
    
      8
        size_t local_chunk_size = pow2 / active_procs;
    
      8
        int chunk_size_int = static_cast<int>(local_chunk_size);
    
        1/2✗ Branch 0 not taken.
✓ Branch 1 taken 8 times.

      8
        size_t buffer_size = (world_rank < active_procs) ? local_chunk_size : 0;
    
      8
        std::vector<double> mpi_buffer(buffer_size, 0.0);
    
        2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 4 times.

      8
        if (world_rank == 0) {
    
        1/2✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.

      4
          MPI_Scatter(local_data.data(), chunk_size_int, MPI_DOUBLE, mpi_buffer.data(), chunk_size_int, MPI_DOUBLE, 0,
    
                      MPI_COMM_WORLD);
    
        1/2✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.

      4
        } else if (world_rank < active_procs) {
    
        1/2✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.

      4
          MPI_Scatter(nullptr, 0, MPI_DATATYPE_NULL, mpi_buffer.data(), chunk_size_int, MPI_DOUBLE, 0, MPI_COMM_WORLD);
    
        }
    
        1/2✓ Branch 0 taken 8 times.
✗ Branch 1 not taken.

      8
        if (world_rank < active_procs) {
    
        1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.

      8
          PerformLocalTBBSort(mpi_buffer.data(), local_chunk_size);
    
        }
    
        2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 4 times.

      8
        if (world_rank == 0) {
    
        1/2✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.

      4
          MPI_Gather(mpi_buffer.data(), chunk_size_int, MPI_DOUBLE, local_data.data(), chunk_size_int, MPI_DOUBLE, 0,
    
                     MPI_COMM_WORLD);
    
        1/2✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.

      4
          PerformFinalMergeTBB(local_data.data(), local_chunk_size, pow2);
    
        1/2✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.

      4
        } else if (world_rank < active_procs) {
    
        1/2✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.

      4
          MPI_Gather(mpi_buffer.data(), chunk_size_int, MPI_DOUBLE, nullptr, 0, MPI_DATATYPE_NULL, 0, MPI_COMM_WORLD);
    
        }
    
      8
      }
    
      8
      void HandleGTestLocalSort(std::vector<double> &local_data, int world_rank) {
    
        3/4✓ Branch 0 taken 4 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 4 times.
✗ Branch 3 not taken.

      8
        if (world_rank == 0 || local_data.empty()) {
    
          return;
    
        }
    
        size_t my_orig = local_data.size();
    
        size_t my_pow2 = 1;
    
        2/2✓ Branch 0 taken 29 times.
✓ Branch 1 taken 4 times.

      33
        while (my_pow2 < my_orig) {
    
      29
          my_pow2 *= 2;
    
        }
    
        2/2✓ Branch 0 taken 3 times.
✓ Branch 1 taken 1 times.

      4
        if (my_pow2 > my_orig) {
    
      3
          local_data.resize(my_pow2, std::numeric_limits<double>::max());
    
        }
    
      4
        PerformLocalTBBSort(local_data.data(), my_pow2);
    
        2/2✓ Branch 0 taken 3 times.
✓ Branch 1 taken 1 times.

      4
        if (my_pow2 > my_orig) {
    
      3
          local_data.resize(my_orig);
    
        }
    
      }
    
      size_t CalculatePaddedSize(size_t original_size) {
    
        if (original_size == 0) {
    
          return 1;
    
        }
    
        size_t pow2 = 1;
    
        2/2✓ Branch 0 taken 29 times.
✓ Branch 1 taken 4 times.

      33
        while (pow2 < original_size) {
    
      29
          pow2 *= 2;
    
        }
    
        return pow2;
    
      }
    
      }  // namespace
    
        1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.

      8
      DorofeevIBitwiseSortDoubleEOBatcherMergeALL::DorofeevIBitwiseSortDoubleEOBatcherMergeALL(const InType &in) {
    
        SetTypeOfTask(GetStaticTypeOfTask());
    
        1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.

      8
        GetInput() = in;
    
      8
      }
    
      8
      bool DorofeevIBitwiseSortDoubleEOBatcherMergeALL::ValidationImpl() {
    
      8
        return true;
    
      }
    
      8
      bool DorofeevIBitwiseSortDoubleEOBatcherMergeALL::PreProcessingImpl() {
    
      8
        local_data_ = GetInput();
    
      8
        return true;
    
      }
    
      8
      bool DorofeevIBitwiseSortDoubleEOBatcherMergeALL::RunImpl() {
    
      8
        int world_size = 0;
    
      8
        int world_rank = 0;
    
      8
        MPI_Comm_size(MPI_COMM_WORLD, &world_size);
    
      8
        MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);
    
        2/4✓ Branch 0 taken 8 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 8 times.

      8
        bool is_pow2_procs = (world_size > 0) && ((world_size & (world_size - 1)) == 0);
    
      8
        int active_procs = is_pow2_procs ? world_size : 1;
    
        size_t rank0_original_size = 0;
    
      8
        size_t pow2 = 1;
    
        2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 4 times.

      8
        if (world_rank == 0) {
    
          rank0_original_size = local_data_.size();
    
        1/2✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.

      4
          if (rank0_original_size == 0) {
    
      ✗
            active_procs = 1;
    
          }
    
      4
          pow2 = CalculatePaddedSize(rank0_original_size);
    
        2/2✓ Branch 0 taken 3 times.
✓ Branch 1 taken 1 times.

      4
          if (pow2 > rank0_original_size) {
    
      3
            local_data_.resize(pow2, std::numeric_limits<double>::max());
    
          }
    
        }
    
      8
        MPI_Bcast(&active_procs, 1, MPI_INT, 0, MPI_COMM_WORLD);
    
      8
        MPI_Bcast(&pow2, 1, MPI_UNSIGNED_LONG_LONG, 0, MPI_COMM_WORLD);
    
        1/2✗ Branch 0 not taken.
✓ Branch 1 taken 8 times.

      8
        if (active_procs == 1) {
    
      ✗
          if (world_rank == 0 && pow2 > 0) {
    
      ✗
            PerformLocalTBBSort(local_data_.data(), pow2);
    
          }
    
        } else {
    
      8
          ExecuteMPIHybridSort(local_data_, pow2, world_rank, active_procs);
    
        }
    
        4/4✓ Branch 0 taken 4 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 3 times.
✓ Branch 3 taken 1 times.

      8
        if (world_rank == 0 && pow2 > rank0_original_size) {
    
      3
          local_data_.resize(rank0_original_size);
    
        }
    
      8
        HandleGTestLocalSort(local_data_, world_rank);
    
      8
        return true;
    
      }
    
      8
      bool DorofeevIBitwiseSortDoubleEOBatcherMergeALL::PostProcessingImpl() {
    
      8
        GetOutput() = local_data_;
    
      8
        return true;
    
      }
    
      }  // namespace dorofeev_i_bitwise_sort_double_eo_batcher_merge

Line	Branch	Exec	Source
1			#include "dorofeev_i_bitwise_sort_double_eo_batcher_merge/all/include/ops_all.hpp"
2
3			#include <mpi.h>
4			#include <tbb/tbb.h>
5
6			#include <algorithm>
7			#include <cstdint>
8			#include <cstring>
9			#include <limits>
10			#include <vector>
11
12			#include "dorofeev_i_bitwise_sort_double_eo_batcher_merge/common/include/common.hpp"
13			#include "util/include/util.hpp"
14
15			namespace dorofeev_i_bitwise_sort_double_eo_batcher_merge {
16
17			namespace {
18
19			uint64_t DoubleToUint(double d) {
20			uint64_t u = 0;
21			std::memcpy(&u, &d, sizeof(double));
22	2/2 ✓ Branch 0 taken 1246 times. ✓ Branch 1 taken 1602 times.	2848	if ((u & 0x8000000000000000ULL) != 0) {
23		1246	u = ~u;
24			} else {
25		1602	u \|= 0x8000000000000000ULL;
26			}
27			return u;
28			}
29
30			double UintToDouble(uint64_t u) {
31		2848	if ((u & 0x8000000000000000ULL) != 0) {
32		1602	u &= ~0x8000000000000000ULL;
33			} else {
34		1246	u = ~u;
35			}
36			double d = 0.0;
37			std::memcpy(&d, &u, sizeof(double));
38			return d;
39			}
40
41	1/2 ✗ Branch 0 not taken. ✓ Branch 1 taken 24 times.	24	void RadixSortDouble(std::vector<double> &arr) {
42	1/2 ✗ Branch 0 not taken. ✓ Branch 1 taken 24 times.	24	if (arr.empty()) {
43		✗	return;
44			}
45
46		24	std::vector<uint64_t> uarr(arr.size());
47	2/2 ✓ Branch 0 taken 2848 times. ✓ Branch 1 taken 24 times.	2872	for (size_t i = 0; i < arr.size(); ++i) {
48	2/2 ✓ Branch 0 taken 1246 times. ✓ Branch 1 taken 1602 times.	5696	uarr[i] = DoubleToUint(arr[i]);
49			}
50
51	1/4 ✓ Branch 1 taken 24 times. ✗ Branch 2 not taken. ✗ Branch 3 not taken. ✗ Branch 4 not taken.	24	std::vector<uint64_t> temp(uarr.size());
52	2/2 ✓ Branch 0 taken 192 times. ✓ Branch 1 taken 24 times.	216	for (size_t byte = 0; byte < 8; ++byte) {
53	1/4 ✓ Branch 1 taken 192 times. ✗ Branch 2 not taken. ✗ Branch 3 not taken. ✗ Branch 4 not taken.	192	std::vector<int> count(256, 0);
54	2/2 ✓ Branch 0 taken 22784 times. ✓ Branch 1 taken 192 times.	22976	for (uint64_t val : uarr) {
55		22784	count[(val >> (byte * 8)) & 0xFF]++;
56			}
57	2/2 ✓ Branch 0 taken 48960 times. ✓ Branch 1 taken 192 times.	49152	for (size_t i = 1; i < 256; ++i) {
58		48960	count[i] += count[i - 1];
59			}
60	2/2 ✓ Branch 0 taken 22784 times. ✓ Branch 1 taken 192 times.	22976	for (int i = static_cast<int>(uarr.size()) - 1; i >= 0; --i) {
61		22784	temp[--count[(uarr[i] >> (byte * 8)) & 0xFF]] = uarr[i];
62			}
63	1/2 ✓ Branch 1 taken 192 times. ✗ Branch 2 not taken.	192	uarr = temp;
64			}
65
66	2/2 ✓ Branch 0 taken 2848 times. ✓ Branch 1 taken 24 times.	2872	for (size_t i = 0; i < arr.size(); ++i) {
67	2/2 ✓ Branch 0 taken 1602 times. ✓ Branch 1 taken 1246 times.	5696	arr[i] = UintToDouble(uarr[i]);
68			}
69			}
70
71			void CompareExchangeBlocks(double *arr, size_t i, size_t step) {
72	4/4 ✓ Branch 0 taken 2136 times. ✓ Branch 1 taken 16 times. ✓ Branch 2 taken 14904 times. ✓ Branch 3 taken 4148 times.	21204	for (size_t k = 0; k < step; ++k) {
73	4/4 ✓ Branch 0 taken 353 times. ✓ Branch 1 taken 1783 times. ✓ Branch 2 taken 7577 times. ✓ Branch 3 taken 7327 times.	17040	if (arr[i + k] > arr[i + k + step]) {
74			std::swap(arr[i + k], arr[i + k + step]);
75			}
76			}
77			}
78
79		16	void OddEvenMergeIterative(double *arr, size_t start, size_t n) {
80	1/2 ✓ Branch 0 taken 16 times. ✗ Branch 1 not taken.	16	if (n <= 1) {
81			return;
82			}
83		16	size_t step = n / 2;
84			CompareExchangeBlocks(arr, start, step);
85		16	step /= 2;
86	2/2 ✓ Branch 0 taken 92 times. ✓ Branch 1 taken 16 times.	108	for (; step > 0; step /= 2) {
87	2/2 ✓ Branch 0 taken 4148 times. ✓ Branch 1 taken 92 times.	4240	for (size_t i = step; i < n - step; i += step * 2) {
88		4148	CompareExchangeBlocks(arr, start + i, step);
89			}
90			}
91			}
92
93		24	void ProcessChunkTBB(double *raw_data, int chunk_idx, size_t chunk_size) {
94		24	size_t start_idx = static_cast<size_t>(chunk_idx) * chunk_size;
95		24	std::vector<double> local_arr(chunk_size);
96	2/2 ✓ Branch 0 taken 2848 times. ✓ Branch 1 taken 24 times.	2872	for (size_t j = 0; j < chunk_size; ++j) {
97		2848	local_arr[j] = raw_data[start_idx + j];
98			}
99	1/2 ✓ Branch 1 taken 24 times. ✗ Branch 2 not taken.	24	RadixSortDouble(local_arr);
100	2/2 ✓ Branch 0 taken 2848 times. ✓ Branch 1 taken 24 times.	2872	for (size_t j = 0; j < chunk_size; ++j) {
101		2848	raw_data[start_idx + j] = local_arr[j];
102			}
103		24	}
104
105		12	void ExecuteTBBSortLocal(double *raw_data, size_t total_size, size_t chunk_size, int num_chunks_int) {
106		12	int num_threads = ppc::util::GetNumThreads();
107	1/2 ✓ Branch 1 taken 12 times. ✗ Branch 2 not taken.	12	tbb::task_arena arena(num_threads > 0 ? num_threads : 1);
108	1/2 ✓ Branch 1 taken 12 times. ✗ Branch 2 not taken.	12	arena.execute([&] {
109		24	tbb::parallel_for(tbb::blocked_range<int>(0, num_chunks_int),
110		12	[raw_data, chunk_size](const tbb::blocked_range<int> &r) {
111	2/4 ✓ Branch 0 taken 24 times. ✓ Branch 1 taken 24 times. ✗ Branch 2 not taken. ✗ Branch 3 not taken.	48	for (int i = r.begin(); i != r.end(); ++i) {
112	0/2 ✗ Branch 2 not taken. ✗ Branch 3 not taken.	24	ProcessChunkTBB(raw_data, i, chunk_size);
113			}
114			});
115	2/2 ✓ Branch 0 taken 12 times. ✓ Branch 1 taken 12 times.	24	for (size_t size = chunk_size; size < total_size; size *= 2) {
116		12	int merges_count = static_cast<int>(total_size / (size * 2));
117		24	tbb::parallel_for(tbb::blocked_range<int>(0, merges_count), [raw_data, size](const tbb::blocked_range<int> &r) {
118	2/2 ✓ Branch 0 taken 12 times. ✓ Branch 1 taken 12 times.	24	for (int i = r.begin(); i != r.end(); ++i) {
119		12	OddEvenMergeIterative(raw_data, static_cast<size_t>(i) * 2 * size, 2 * size);
120			}
121		12	});
122			}
123		12	});
124		12	}
125
126		12	void PerformLocalTBBSort(double *data, size_t total_size) {
127	1/2 ✓ Branch 0 taken 12 times. ✗ Branch 1 not taken.	12	if (total_size == 0) {
128			return;
129			}
130		12	int num_threads = ppc::util::GetNumThreads();
131		12	if (num_threads <= 0) {
132			num_threads = 1;
133			}
134			size_t num_chunks = 1;
135	3/4 ✓ Branch 0 taken 12 times. ✓ Branch 1 taken 12 times. ✓ Branch 2 taken 12 times. ✗ Branch 3 not taken.	24	while (num_chunks * 2 <= static_cast<size_t>(num_threads) && num_chunks * 2 <= total_size) {
136			num_chunks *= 2;
137			}
138		12	ExecuteTBBSortLocal(data, total_size, total_size / num_chunks, static_cast<int>(num_chunks));
139			}
140
141		4	void PerformFinalMergeTBB(double *raw_data, size_t local_chunk_size, size_t pow2) {
142		4	int num_threads = ppc::util::GetNumThreads();
143	1/2 ✓ Branch 1 taken 4 times. ✗ Branch 2 not taken.	4	tbb::task_arena arena(num_threads > 0 ? num_threads : 1);
144	1/2 ✓ Branch 1 taken 4 times. ✗ Branch 2 not taken.	4	arena.execute([&] {
145	2/2 ✓ Branch 0 taken 4 times. ✓ Branch 1 taken 4 times.	8	for (size_t size = local_chunk_size; size < pow2; size *= 2) {
146		4	int merges_count = static_cast<int>(pow2 / (size * 2));
147		8	tbb::parallel_for(tbb::blocked_range<int>(0, merges_count), [raw_data, size](const tbb::blocked_range<int> &r) {
148	2/2 ✓ Branch 0 taken 4 times. ✓ Branch 1 taken 4 times.	8	for (int i = r.begin(); i != r.end(); ++i) {
149		4	OddEvenMergeIterative(raw_data, static_cast<size_t>(i) * 2 * size, 2 * size);
150			}
151		4	});
152			}
153		4	});
154		4	}
155
156		8	void ExecuteMPIHybridSort(std::vector<double> &local_data, size_t pow2, int world_rank, int active_procs) {
157		8	size_t local_chunk_size = pow2 / active_procs;
158		8	int chunk_size_int = static_cast<int>(local_chunk_size);
159	1/2 ✗ Branch 0 not taken. ✓ Branch 1 taken 8 times.	8	size_t buffer_size = (world_rank < active_procs) ? local_chunk_size : 0;
160		8	std::vector<double> mpi_buffer(buffer_size, 0.0);
161
162	2/2 ✓ Branch 0 taken 4 times. ✓ Branch 1 taken 4 times.	8	if (world_rank == 0) {
163	1/2 ✓ Branch 1 taken 4 times. ✗ Branch 2 not taken.	4	MPI_Scatter(local_data.data(), chunk_size_int, MPI_DOUBLE, mpi_buffer.data(), chunk_size_int, MPI_DOUBLE, 0,
164			MPI_COMM_WORLD);
165	1/2 ✓ Branch 0 taken 4 times. ✗ Branch 1 not taken.	4	} else if (world_rank < active_procs) {
166	1/2 ✓ Branch 1 taken 4 times. ✗ Branch 2 not taken.	4	MPI_Scatter(nullptr, 0, MPI_DATATYPE_NULL, mpi_buffer.data(), chunk_size_int, MPI_DOUBLE, 0, MPI_COMM_WORLD);
167			}
168
169	1/2 ✓ Branch 0 taken 8 times. ✗ Branch 1 not taken.	8	if (world_rank < active_procs) {
170	1/2 ✓ Branch 1 taken 8 times. ✗ Branch 2 not taken.	8	PerformLocalTBBSort(mpi_buffer.data(), local_chunk_size);
171			}
172
173	2/2 ✓ Branch 0 taken 4 times. ✓ Branch 1 taken 4 times.	8	if (world_rank == 0) {
174	1/2 ✓ Branch 1 taken 4 times. ✗ Branch 2 not taken.	4	MPI_Gather(mpi_buffer.data(), chunk_size_int, MPI_DOUBLE, local_data.data(), chunk_size_int, MPI_DOUBLE, 0,
175			MPI_COMM_WORLD);
176	1/2 ✓ Branch 1 taken 4 times. ✗ Branch 2 not taken.	4	PerformFinalMergeTBB(local_data.data(), local_chunk_size, pow2);
177	1/2 ✓ Branch 0 taken 4 times. ✗ Branch 1 not taken.	4	} else if (world_rank < active_procs) {
178	1/2 ✓ Branch 1 taken 4 times. ✗ Branch 2 not taken.	4	MPI_Gather(mpi_buffer.data(), chunk_size_int, MPI_DOUBLE, nullptr, 0, MPI_DATATYPE_NULL, 0, MPI_COMM_WORLD);
179			}
180		8	}
181
182		8	void HandleGTestLocalSort(std::vector<double> &local_data, int world_rank) {
183	3/4 ✓ Branch 0 taken 4 times. ✓ Branch 1 taken 4 times. ✓ Branch 2 taken 4 times. ✗ Branch 3 not taken.	8	if (world_rank == 0 \|\| local_data.empty()) {
184			return;
185			}
186			size_t my_orig = local_data.size();
187			size_t my_pow2 = 1;
188	2/2 ✓ Branch 0 taken 29 times. ✓ Branch 1 taken 4 times.	33	while (my_pow2 < my_orig) {
189		29	my_pow2 *= 2;
190			}
191	2/2 ✓ Branch 0 taken 3 times. ✓ Branch 1 taken 1 times.	4	if (my_pow2 > my_orig) {
192		3	local_data.resize(my_pow2, std::numeric_limits<double>::max());
193			}
194
195		4	PerformLocalTBBSort(local_data.data(), my_pow2);
196
197	2/2 ✓ Branch 0 taken 3 times. ✓ Branch 1 taken 1 times.	4	if (my_pow2 > my_orig) {
198		3	local_data.resize(my_orig);
199			}
200			}
201
202			size_t CalculatePaddedSize(size_t original_size) {
203			if (original_size == 0) {
204			return 1;
205			}
206			size_t pow2 = 1;
207	2/2 ✓ Branch 0 taken 29 times. ✓ Branch 1 taken 4 times.	33	while (pow2 < original_size) {
208		29	pow2 *= 2;
209			}
210			return pow2;
211			}
212
213			} // namespace
214
215	1/2 ✓ Branch 1 taken 8 times. ✗ Branch 2 not taken.	8	DorofeevIBitwiseSortDoubleEOBatcherMergeALL::DorofeevIBitwiseSortDoubleEOBatcherMergeALL(const InType &in) {
216			SetTypeOfTask(GetStaticTypeOfTask());
217	1/2 ✓ Branch 1 taken 8 times. ✗ Branch 2 not taken.	8	GetInput() = in;
218		8	}
219
220		8	bool DorofeevIBitwiseSortDoubleEOBatcherMergeALL::ValidationImpl() {
221		8	return true;
222			}
223
224		8	bool DorofeevIBitwiseSortDoubleEOBatcherMergeALL::PreProcessingImpl() {
225		8	local_data_ = GetInput();
226		8	return true;
227			}
228
229		8	bool DorofeevIBitwiseSortDoubleEOBatcherMergeALL::RunImpl() {
230		8	int world_size = 0;
231		8	int world_rank = 0;
232		8	MPI_Comm_size(MPI_COMM_WORLD, &world_size);
233		8	MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);
234
235	2/4 ✓ Branch 0 taken 8 times. ✗ Branch 1 not taken. ✗ Branch 2 not taken. ✓ Branch 3 taken 8 times.	8	bool is_pow2_procs = (world_size > 0) && ((world_size & (world_size - 1)) == 0);
236		8	int active_procs = is_pow2_procs ? world_size : 1;
237
238			size_t rank0_original_size = 0;
239		8	size_t pow2 = 1;
240
241	2/2 ✓ Branch 0 taken 4 times. ✓ Branch 1 taken 4 times.	8	if (world_rank == 0) {
242			rank0_original_size = local_data_.size();
243	1/2 ✓ Branch 0 taken 4 times. ✗ Branch 1 not taken.	4	if (rank0_original_size == 0) {
244		✗	active_procs = 1;
245			}
246		4	pow2 = CalculatePaddedSize(rank0_original_size);
247	2/2 ✓ Branch 0 taken 3 times. ✓ Branch 1 taken 1 times.	4	if (pow2 > rank0_original_size) {
248		3	local_data_.resize(pow2, std::numeric_limits<double>::max());
249			}
250			}
251
252		8	MPI_Bcast(&active_procs, 1, MPI_INT, 0, MPI_COMM_WORLD);
253		8	MPI_Bcast(&pow2, 1, MPI_UNSIGNED_LONG_LONG, 0, MPI_COMM_WORLD);
254
255	1/2 ✗ Branch 0 not taken. ✓ Branch 1 taken 8 times.	8	if (active_procs == 1) {
256		✗	if (world_rank == 0 && pow2 > 0) {
257		✗	PerformLocalTBBSort(local_data_.data(), pow2);
258			}
259			} else {
260		8	ExecuteMPIHybridSort(local_data_, pow2, world_rank, active_procs);
261			}
262
263	4/4 ✓ Branch 0 taken 4 times. ✓ Branch 1 taken 4 times. ✓ Branch 2 taken 3 times. ✓ Branch 3 taken 1 times.	8	if (world_rank == 0 && pow2 > rank0_original_size) {
264		3	local_data_.resize(rank0_original_size);
265			}
266
267		8	HandleGTestLocalSort(local_data_, world_rank);
268
269		8	return true;
270			}
271
272		8	bool DorofeevIBitwiseSortDoubleEOBatcherMergeALL::PostProcessingImpl() {
273		8	GetOutput() = local_data_;
274		8	return true;
275			}
276
277			} // namespace dorofeev_i_bitwise_sort_double_eo_batcher_merge
278