GCC Code Coverage Report

Directory:	./
File:	tasks/nikolaev_d_block_linear_image_filtering/all/src/ops_all.cpp
Date:	2026-06-04 20:25:32

	Exec	Total	Coverage
Lines:	129	140	92.1%
Functions:	13	14	92.9%
Branches:	69	110	62.7%

  
      Line
      Branch
      Exec
      Source
    
      #include "nikolaev_d_block_linear_image_filtering/all/include/ops_all.hpp"
    
      #include <mpi.h>
    
      #include <tbb/blocked_range2d.h>
    
      #include <tbb/parallel_for.h>
    
      #include <algorithm>
    
      #include <array>
    
      #include <cstddef>
    
      #include <cstdint>
    
      #include <utility>
    
      #include <vector>
    
      #include "nikolaev_d_block_linear_image_filtering/common/include/common.hpp"
    
      namespace nikolaev_d_block_linear_image_filtering {
    
      namespace {
    
      constexpr int kBlockSize = 32;
    
      struct GridSlicing {
    
        std::vector<int> rows_per_proc;
    
        std::vector<int> row_displs;
    
        int halo_top{0};
    
        int halo_bottom{0};
    
        int buffer_height{0};
    
      };
    
      54
      int ApplyKernel(const std::vector<uint8_t> &img, int row, int col, int channel, int width, int height, int rank,
    
                      const GridSlicing &dist, const std::array<std::array<int, 3>, 3> &kernel) {
    
        int sum = 0;
    
      54
        int input_start_row = dist.row_displs[rank] - dist.halo_top;
    
        2/2✓ Branch 0 taken 162 times.
✓ Branch 1 taken 54 times.

      216
        for (size_t kr = 0; kr < 3; ++kr) {
    
      162
          int target_global_row = dist.row_displs[rank] + row + (static_cast<int>(kr) - 1);
    
      162
          int clamped_global_row = std::clamp(target_global_row, 0, height - 1);
    
      162
          int buffer_row = clamped_global_row - input_start_row;
    
        2/2✓ Branch 0 taken 486 times.
✓ Branch 1 taken 162 times.

      648
          for (size_t kc = 0; kc < 3; ++kc) {
    
      486
            int target_col = col + (static_cast<int>(kc) - 1);
    
      486
            int clamped_col = std::clamp(target_col, 0, width - 1);
    
      486
            size_t idx = (((static_cast<size_t>(buffer_row) * width) + clamped_col) * 3) + channel;
    
      486
            sum += (static_cast<int>(img[idx]) * kernel.at(kr).at(kc));
    
          }
    
        }
    
      54
        return sum;
    
      }
    
      ✗
      void ProcessFullBlock(const std::vector<uint8_t> &input, std::vector<uint8_t> &output, int width, int height, int rank,
    
                            const GridSlicing &dist, int start_row, int start_col) {
    
        static constexpr std::array<std::array<int, 3>, 3> kKernel = {{{1, 2, 1}, {2, 4, 2}, {1, 2, 1}}};
    
      ✗
        for (int row = start_row; row < start_row + kBlockSize; ++row) {
    
      ✗
          for (int col = start_col; col < start_col + kBlockSize; ++col) {
    
      ✗
            for (int channel = 0; channel < 3; ++channel) {
    
      ✗
              int sum = ApplyKernel(input, row, col, channel, width, height, rank, dist, kKernel);
    
      ✗
              int result_value = (sum + 8) / 16;
    
              result_value = std::clamp(result_value, 0, 255);
    
      ✗
              auto idx = ((static_cast<size_t>(row) * width + col) * 3) + channel;
    
      ✗
              output[idx] = static_cast<uint8_t>(result_value);
    
            }
    
          }
    
        }
    
      ✗
      }
    
      5
      void ProcessPartBlock(const std::vector<uint8_t> &input, std::vector<uint8_t> &output, int width, int height,
    
                            int local_rows, int rank, const GridSlicing &dist, int start_row, int start_col) {
    
        static constexpr std::array<std::array<int, 3>, 3> kKernel = {{{1, 2, 1}, {2, 4, 2}, {1, 2, 1}}};
    
      5
        const int end_row = std::min(local_rows, start_row + kBlockSize);
    
      5
        const int end_col = std::min(width, start_col + kBlockSize);
    
        2/2✓ Branch 0 taken 8 times.
✓ Branch 1 taken 5 times.

      13
        for (int row = start_row; row < end_row; ++row) {
    
        2/2✓ Branch 0 taken 18 times.
✓ Branch 1 taken 8 times.

      26
          for (int col = start_col; col < end_col; ++col) {
    
        2/2✓ Branch 0 taken 54 times.
✓ Branch 1 taken 18 times.

      72
            for (int channel = 0; channel < 3; ++channel) {
    
      54
              int sum = ApplyKernel(input, row, col, channel, width, height, rank, dist, kKernel);
    
        1/2✓ Branch 0 taken 54 times.
✗ Branch 1 not taken.

      54
              int result_value = (sum + 8) / 16;
    
              result_value = std::clamp(result_value, 0, 255);
    
      54
              auto idx = ((static_cast<size_t>(row) * width + col) * 3) + channel;
    
      54
              output[idx] = static_cast<uint8_t>(result_value);
    
            }
    
          }
    
        }
    
      5
      }
    
      5
      void ProcessGridRange(const tbb::blocked_range2d<int> &r, int local_block_rows, int num_col_blocks, int local_rows,
    
                            int width, int height, int rank, const GridSlicing &dist, const std::vector<uint8_t> &local_input,
    
                            std::vector<uint8_t> &local_output) {
    
        2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.

      10
        for (int bi = r.rows().begin(); bi < r.rows().end(); ++bi) {
    
        2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.

      10
          for (int bj = r.cols().begin(); bj < r.cols().end(); ++bj) {
    
        1/2✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.

      5
            if (bi < local_block_rows && bj < num_col_blocks) {
    
      ✗
              ProcessFullBlock(local_input, local_output, width, height, rank, dist, bi * kBlockSize, bj * kBlockSize);
    
            } else {
    
      5
              ProcessPartBlock(local_input, local_output, width, height, local_rows, rank, dist, bi * kBlockSize,
    
                               bj * kBlockSize);
    
            }
    
          }
    
        }
    
      5
      }
    
      10
      GridSlicing BuildGridSlicing(int rank, int world_size, int height) {
    
      10
        const int total_block_rows = height / kBlockSize;
    
      10
        const int height_remainder = height % kBlockSize;
    
      10
        std::vector<int> block_rows_per_proc(world_size);
    
      10
        const int base_blocks = total_block_rows / world_size;
    
      10
        const int extra_blocks = total_block_rows % world_size;
    
        2/2✓ Branch 0 taken 20 times.
✓ Branch 1 taken 10 times.

      30
        for (int proc = 0; proc < world_size; ++proc) {
    
        1/2✓ Branch 0 taken 20 times.
✗ Branch 1 not taken.

      40
          block_rows_per_proc[proc] = base_blocks + (proc < extra_blocks ? 1 : 0);
    
        }
    
      10
        GridSlicing dist;
    
        1/2✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.

      10
        dist.rows_per_proc.resize(world_size);
    
        1/2✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.

      10
        dist.row_displs.resize(world_size);
    
        int pixel_offset = 0;
    
        2/2✓ Branch 0 taken 20 times.
✓ Branch 1 taken 10 times.

      30
        for (int proc = 0; proc < world_size; ++proc) {
    
        2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.

      20
          int rows = block_rows_per_proc[proc] * kBlockSize;
    
        2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.

      20
          if (proc == world_size - 1) {
    
      10
            rows += height_remainder;
    
          }
    
      20
          dist.rows_per_proc[proc] = rows;
    
      20
          dist.row_displs[proc] = pixel_offset;
    
      20
          pixel_offset += rows;
    
        }
    
        2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.

      10
        if (dist.rows_per_proc[rank] > 0) {
    
        1/2✓ Branch 0 taken 5 times.
✗ Branch 1 not taken.

      5
          dist.halo_top = (dist.row_displs[rank] > 0) ? 1 : 0;
    
        1/2✓ Branch 0 taken 5 times.
✗ Branch 1 not taken.

      10
          dist.halo_bottom = (dist.row_displs[rank] + dist.rows_per_proc[rank] < height) ? 1 : 0;
    
        }
    
        1/2✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.

      10
        dist.buffer_height = dist.rows_per_proc[rank] + dist.halo_top + dist.halo_bottom;
    
      10
        return dist;
    
      ✗
      }
    
      std::pair<int, int> HaloFor(int proc, const GridSlicing &dist, int height) {
    
        2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.

      20
        if (dist.rows_per_proc[proc] == 0) {
    
          return {0, 0};
    
        }
    
        1/2✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.

      10
        int top = (dist.row_displs[proc] > 0) ? 1 : 0;
    
        1/2✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.

      10
        int bot = (dist.row_displs[proc] + dist.rows_per_proc[proc] < height) ? 1 : 0;
    
        return {top, bot};
    
      }
    
      10
      void ScatterWithHalo(int rank, int world_size, int width, int height, const GridSlicing &dist,
    
                           const uint8_t *full_image, std::vector<uint8_t> &local_input) {
    
      10
        std::vector<int> scatter_counts(world_size);
    
        1/4✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.

      10
        std::vector<int> scatter_displs(world_size);
    
        2/2✓ Branch 0 taken 20 times.
✓ Branch 1 taken 10 times.

      30
        for (int proc = 0; proc < world_size; ++proc) {
    
          auto [proc_top, proc_bot] = HaloFor(proc, dist, height);
    
      20
          int proc_buffer_rows = dist.rows_per_proc[proc] + proc_top + proc_bot;
    
      20
          scatter_counts[proc] = proc_buffer_rows * width * 3;
    
      20
          scatter_displs[proc] = (dist.row_displs[proc] - proc_top) * width * 3;
    
        }
    
        2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.

      10
        const uint8_t *send_buf = (rank == 0) ? full_image : nullptr;
    
        1/2✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.

      10
        MPI_Scatterv(send_buf, scatter_counts.data(), scatter_displs.data(), MPI_UNSIGNED_CHAR, local_input.data(),
    
                     static_cast<int>(local_input.size()), MPI_UNSIGNED_CHAR, 0, MPI_COMM_WORLD);
    
      10
      }
    
      10
      void RunLocalTBB(int rank, int world_size, int width, int height, const GridSlicing &dist,
    
                       const std::vector<uint8_t> &local_input, std::vector<uint8_t> &local_output) {
    
        2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.

      10
        const int local_rows = dist.rows_per_proc[rank];
    
        2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.

      10
        if (local_rows == 0) {
    
      5
          return;
    
        }
    
      5
        const int total_block_rows = height / kBlockSize;
    
      5
        const int height_remainder = height % kBlockSize;
    
      5
        const int num_col_blocks = width / kBlockSize;
    
      5
        const bool width_has_remainder = (width % kBlockSize) != 0;
    
      5
        const int local_block_rows =
    
        1/2✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.

      5
            (rank < total_block_rows % world_size) ? ((total_block_rows / world_size) + 1) : (total_block_rows / world_size);
    
      5
        const bool is_last = (rank == world_size - 1);
    
      5
        const int total_local_bi = local_block_rows + static_cast<int>(is_last && height_remainder > 0);
    
      5
        const int total_local_bj = num_col_blocks + static_cast<int>(width_has_remainder);
    
      5
        tbb::parallel_for(tbb::blocked_range2d<int>(0, total_local_bi, 0, total_local_bj),
    
      5
                          [&](const tbb::blocked_range2d<int> &r) {
    
      5
          ProcessGridRange(r, local_block_rows, num_col_blocks, local_rows, width, height, rank, dist, local_input,
    
                           local_output);
    
      5
        });
    
      }
    
      10
      void GatherAndBroadcast(int world_size, int width, int height, const GridSlicing &dist,
    
                              const std::vector<uint8_t> &local_output, std::vector<uint8_t> &result) {
    
      10
        std::vector<int> recv_counts(world_size);
    
        1/4✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.

      10
        std::vector<int> recv_displs(world_size);
    
        2/2✓ Branch 0 taken 20 times.
✓ Branch 1 taken 10 times.

      30
        for (int proc = 0; proc < world_size; ++proc) {
    
      20
          recv_counts[proc] = dist.rows_per_proc[proc] * width * 3;
    
      20
          recv_displs[proc] = dist.row_displs[proc] * width * 3;
    
        }
    
        2/6✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 10 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.

      10
        result.assign(static_cast<size_t>(height) * width * 3, 0);
    
        1/2✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.

      10
        MPI_Gatherv(local_output.data(), static_cast<int>(local_output.size()), MPI_UNSIGNED_CHAR, result.data(),
    
                    recv_counts.data(), recv_displs.data(), MPI_UNSIGNED_CHAR, 0, MPI_COMM_WORLD);
    
        1/2✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.

      10
        MPI_Bcast(result.data(), static_cast<int>(result.size()), MPI_UNSIGNED_CHAR, 0, MPI_COMM_WORLD);
    
      10
      }
    
      }  // namespace
    
        1/2✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.

      10
      NikolaevDBlockLinearImageFilteringALL::NikolaevDBlockLinearImageFilteringALL(const InType &in) {
    
        SetTypeOfTask(GetStaticTypeOfTask());
    
      10
        int rank = 0;
    
        1/2✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.

      10
        MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    
        2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.

      10
        if (rank == 0) {
    
          GetInput() = in;
    
        }
    
      10
        GetOutput() = std::vector<uint8_t>();
    
      10
      }
    
      10
      bool NikolaevDBlockLinearImageFilteringALL::ValidationImpl() {
    
      10
        int rank = 0;
    
      10
        MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    
        2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.

      10
        if (rank != 0) {
    
          return true;
    
        }
    
      5
        return std::get<0>(GetInput()) * std::get<1>(GetInput()) * 3 == static_cast<int>(std::get<2>(GetInput()).size());
    
      }
    
      10
      bool NikolaevDBlockLinearImageFilteringALL::PreProcessingImpl() {
    
      10
        return true;
    
      }
    
      10
      bool NikolaevDBlockLinearImageFilteringALL::RunImpl() {
    
      10
        int rank = 0;
    
      10
        int world_size = 1;
    
      10
        MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    
      10
        MPI_Comm_size(MPI_COMM_WORLD, &world_size);
    
      10
        std::array<int, 2> dims{};
    
        2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.

      10
        if (rank == 0) {
    
      5
          dims[0] = std::get<0>(GetInput());
    
      5
          dims[1] = std::get<1>(GetInput());
    
        }
    
      10
        MPI_Bcast(dims.data(), 2, MPI_INT, 0, MPI_COMM_WORLD);
    
      10
        const int width = dims[0];
    
      10
        const int height = dims[1];
    
      10
        const GridSlicing dist = BuildGridSlicing(rank, world_size, height);
    
        1/2✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.

      10
        std::vector<uint8_t> local_input(static_cast<size_t>(dist.buffer_height) * width * 3);
    
        2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.

      10
        const uint8_t *full_image = (rank == 0) ? std::get<2>(GetInput()).data() : nullptr;
    
        1/2✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.

      10
        ScatterWithHalo(rank, world_size, width, height, dist, full_image, local_input);
    
        1/4✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.

      10
        std::vector<uint8_t> local_output(static_cast<size_t>(dist.rows_per_proc[rank]) * width * 3);
    
        1/2✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.

      10
        RunLocalTBB(rank, world_size, width, height, dist, local_input, local_output);
    
      10
        std::vector<uint8_t> result;
    
        1/2✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.

      10
        GatherAndBroadcast(world_size, width, height, dist, local_output, result);
    
        GetOutput() = std::move(result);
    
      10
        return true;
    
      10
      }
    
      10
      bool NikolaevDBlockLinearImageFilteringALL::PostProcessingImpl() {
    
      10
        return true;
    
      }
    
      }  // namespace nikolaev_d_block_linear_image_filtering

Line	Branch	Exec	Source
1			#include "nikolaev_d_block_linear_image_filtering/all/include/ops_all.hpp"
2
3			#include <mpi.h>
4			#include <tbb/blocked_range2d.h>
5			#include <tbb/parallel_for.h>
6
7			#include <algorithm>
8			#include <array>
9			#include <cstddef>
10			#include <cstdint>
11			#include <utility>
12			#include <vector>
13
14			#include "nikolaev_d_block_linear_image_filtering/common/include/common.hpp"
15
16			namespace nikolaev_d_block_linear_image_filtering {
17
18			namespace {
19
20			constexpr int kBlockSize = 32;
21
22			struct GridSlicing {
23			std::vector<int> rows_per_proc;
24			std::vector<int> row_displs;
25			int halo_top{0};
26			int halo_bottom{0};
27			int buffer_height{0};
28			};
29
30		54	int ApplyKernel(const std::vector<uint8_t> &img, int row, int col, int channel, int width, int height, int rank,
31			const GridSlicing &dist, const std::array<std::array<int, 3>, 3> &kernel) {
32			int sum = 0;
33		54	int input_start_row = dist.row_displs[rank] - dist.halo_top;
34
35	2/2 ✓ Branch 0 taken 162 times. ✓ Branch 1 taken 54 times.	216	for (size_t kr = 0; kr < 3; ++kr) {
36		162	int target_global_row = dist.row_displs[rank] + row + (static_cast<int>(kr) - 1);
37		162	int clamped_global_row = std::clamp(target_global_row, 0, height - 1);
38		162	int buffer_row = clamped_global_row - input_start_row;
39
40	2/2 ✓ Branch 0 taken 486 times. ✓ Branch 1 taken 162 times.	648	for (size_t kc = 0; kc < 3; ++kc) {
41		486	int target_col = col + (static_cast<int>(kc) - 1);
42		486	int clamped_col = std::clamp(target_col, 0, width - 1);
43
44		486	size_t idx = (((static_cast<size_t>(buffer_row) * width) + clamped_col) * 3) + channel;
45		486	sum += (static_cast<int>(img[idx]) * kernel.at(kr).at(kc));
46			}
47			}
48		54	return sum;
49			}
50
51		✗	void ProcessFullBlock(const std::vector<uint8_t> &input, std::vector<uint8_t> &output, int width, int height, int rank,
52			const GridSlicing &dist, int start_row, int start_col) {
53			static constexpr std::array<std::array<int, 3>, 3> kKernel = {{{1, 2, 1}, {2, 4, 2}, {1, 2, 1}}};
54
55		✗	for (int row = start_row; row < start_row + kBlockSize; ++row) {
56		✗	for (int col = start_col; col < start_col + kBlockSize; ++col) {
57		✗	for (int channel = 0; channel < 3; ++channel) {
58		✗	int sum = ApplyKernel(input, row, col, channel, width, height, rank, dist, kKernel);
59		✗	int result_value = (sum + 8) / 16;
60			result_value = std::clamp(result_value, 0, 255);
61		✗	auto idx = ((static_cast<size_t>(row) * width + col) * 3) + channel;
62		✗	output[idx] = static_cast<uint8_t>(result_value);
63			}
64			}
65			}
66		✗	}
67
68		5	void ProcessPartBlock(const std::vector<uint8_t> &input, std::vector<uint8_t> &output, int width, int height,
69			int local_rows, int rank, const GridSlicing &dist, int start_row, int start_col) {
70			static constexpr std::array<std::array<int, 3>, 3> kKernel = {{{1, 2, 1}, {2, 4, 2}, {1, 2, 1}}};
71
72		5	const int end_row = std::min(local_rows, start_row + kBlockSize);
73		5	const int end_col = std::min(width, start_col + kBlockSize);
74
75	2/2 ✓ Branch 0 taken 8 times. ✓ Branch 1 taken 5 times.	13	for (int row = start_row; row < end_row; ++row) {
76	2/2 ✓ Branch 0 taken 18 times. ✓ Branch 1 taken 8 times.	26	for (int col = start_col; col < end_col; ++col) {
77	2/2 ✓ Branch 0 taken 54 times. ✓ Branch 1 taken 18 times.	72	for (int channel = 0; channel < 3; ++channel) {
78		54	int sum = ApplyKernel(input, row, col, channel, width, height, rank, dist, kKernel);
79	1/2 ✓ Branch 0 taken 54 times. ✗ Branch 1 not taken.	54	int result_value = (sum + 8) / 16;
80			result_value = std::clamp(result_value, 0, 255);
81		54	auto idx = ((static_cast<size_t>(row) * width + col) * 3) + channel;
82		54	output[idx] = static_cast<uint8_t>(result_value);
83			}
84			}
85			}
86		5	}
87
88		5	void ProcessGridRange(const tbb::blocked_range2d<int> &r, int local_block_rows, int num_col_blocks, int local_rows,
89			int width, int height, int rank, const GridSlicing &dist, const std::vector<uint8_t> &local_input,
90			std::vector<uint8_t> &local_output) {
91	2/2 ✓ Branch 0 taken 5 times. ✓ Branch 1 taken 5 times.	10	for (int bi = r.rows().begin(); bi < r.rows().end(); ++bi) {
92	2/2 ✓ Branch 0 taken 5 times. ✓ Branch 1 taken 5 times.	10	for (int bj = r.cols().begin(); bj < r.cols().end(); ++bj) {
93	1/2 ✗ Branch 0 not taken. ✓ Branch 1 taken 5 times.	5	if (bi < local_block_rows && bj < num_col_blocks) {
94		✗	ProcessFullBlock(local_input, local_output, width, height, rank, dist, bi * kBlockSize, bj * kBlockSize);
95			} else {
96		5	ProcessPartBlock(local_input, local_output, width, height, local_rows, rank, dist, bi * kBlockSize,
97			bj * kBlockSize);
98			}
99			}
100			}
101		5	}
102
103		10	GridSlicing BuildGridSlicing(int rank, int world_size, int height) {
104		10	const int total_block_rows = height / kBlockSize;
105		10	const int height_remainder = height % kBlockSize;
106
107		10	std::vector<int> block_rows_per_proc(world_size);
108		10	const int base_blocks = total_block_rows / world_size;
109		10	const int extra_blocks = total_block_rows % world_size;
110	2/2 ✓ Branch 0 taken 20 times. ✓ Branch 1 taken 10 times.	30	for (int proc = 0; proc < world_size; ++proc) {
111	1/2 ✓ Branch 0 taken 20 times. ✗ Branch 1 not taken.	40	block_rows_per_proc[proc] = base_blocks + (proc < extra_blocks ? 1 : 0);
112			}
113
114		10	GridSlicing dist;
115	1/2 ✓ Branch 1 taken 10 times. ✗ Branch 2 not taken.	10	dist.rows_per_proc.resize(world_size);
116	1/2 ✓ Branch 1 taken 10 times. ✗ Branch 2 not taken.	10	dist.row_displs.resize(world_size);
117			int pixel_offset = 0;
118	2/2 ✓ Branch 0 taken 20 times. ✓ Branch 1 taken 10 times.	30	for (int proc = 0; proc < world_size; ++proc) {
119	2/2 ✓ Branch 0 taken 10 times. ✓ Branch 1 taken 10 times.	20	int rows = block_rows_per_proc[proc] * kBlockSize;
120	2/2 ✓ Branch 0 taken 10 times. ✓ Branch 1 taken 10 times.	20	if (proc == world_size - 1) {
121		10	rows += height_remainder;
122			}
123		20	dist.rows_per_proc[proc] = rows;
124		20	dist.row_displs[proc] = pixel_offset;
125		20	pixel_offset += rows;
126			}
127
128	2/2 ✓ Branch 0 taken 5 times. ✓ Branch 1 taken 5 times.	10	if (dist.rows_per_proc[rank] > 0) {
129	1/2 ✓ Branch 0 taken 5 times. ✗ Branch 1 not taken.	5	dist.halo_top = (dist.row_displs[rank] > 0) ? 1 : 0;
130	1/2 ✓ Branch 0 taken 5 times. ✗ Branch 1 not taken.	10	dist.halo_bottom = (dist.row_displs[rank] + dist.rows_per_proc[rank] < height) ? 1 : 0;
131			}
132	1/2 ✓ Branch 0 taken 10 times. ✗ Branch 1 not taken.	10	dist.buffer_height = dist.rows_per_proc[rank] + dist.halo_top + dist.halo_bottom;
133
134		10	return dist;
135		✗	}
136
137			std::pair<int, int> HaloFor(int proc, const GridSlicing &dist, int height) {
138	2/2 ✓ Branch 0 taken 10 times. ✓ Branch 1 taken 10 times.	20	if (dist.rows_per_proc[proc] == 0) {
139			return {0, 0};
140			}
141	1/2 ✓ Branch 0 taken 10 times. ✗ Branch 1 not taken.	10	int top = (dist.row_displs[proc] > 0) ? 1 : 0;
142	1/2 ✓ Branch 0 taken 10 times. ✗ Branch 1 not taken.	10	int bot = (dist.row_displs[proc] + dist.rows_per_proc[proc] < height) ? 1 : 0;
143			return {top, bot};
144			}
145
146		10	void ScatterWithHalo(int rank, int world_size, int width, int height, const GridSlicing &dist,
147			const uint8_t *full_image, std::vector<uint8_t> &local_input) {
148		10	std::vector<int> scatter_counts(world_size);
149	1/4 ✓ Branch 1 taken 10 times. ✗ Branch 2 not taken. ✗ Branch 3 not taken. ✗ Branch 4 not taken.	10	std::vector<int> scatter_displs(world_size);
150	2/2 ✓ Branch 0 taken 20 times. ✓ Branch 1 taken 10 times.	30	for (int proc = 0; proc < world_size; ++proc) {
151			auto [proc_top, proc_bot] = HaloFor(proc, dist, height);
152		20	int proc_buffer_rows = dist.rows_per_proc[proc] + proc_top + proc_bot;
153		20	scatter_counts[proc] = proc_buffer_rows * width * 3;
154		20	scatter_displs[proc] = (dist.row_displs[proc] - proc_top) * width * 3;
155			}
156
157	2/2 ✓ Branch 0 taken 5 times. ✓ Branch 1 taken 5 times.	10	const uint8_t *send_buf = (rank == 0) ? full_image : nullptr;
158	1/2 ✓ Branch 1 taken 10 times. ✗ Branch 2 not taken.	10	MPI_Scatterv(send_buf, scatter_counts.data(), scatter_displs.data(), MPI_UNSIGNED_CHAR, local_input.data(),
159			static_cast<int>(local_input.size()), MPI_UNSIGNED_CHAR, 0, MPI_COMM_WORLD);
160		10	}
161
162		10	void RunLocalTBB(int rank, int world_size, int width, int height, const GridSlicing &dist,
163			const std::vector<uint8_t> &local_input, std::vector<uint8_t> &local_output) {
164	2/2 ✓ Branch 0 taken 5 times. ✓ Branch 1 taken 5 times.	10	const int local_rows = dist.rows_per_proc[rank];
165	2/2 ✓ Branch 0 taken 5 times. ✓ Branch 1 taken 5 times.	10	if (local_rows == 0) {
166		5	return;
167			}
168
169		5	const int total_block_rows = height / kBlockSize;
170		5	const int height_remainder = height % kBlockSize;
171		5	const int num_col_blocks = width / kBlockSize;
172		5	const bool width_has_remainder = (width % kBlockSize) != 0;
173
174		5	const int local_block_rows =
175	1/2 ✗ Branch 0 not taken. ✓ Branch 1 taken 5 times.	5	(rank < total_block_rows % world_size) ? ((total_block_rows / world_size) + 1) : (total_block_rows / world_size);
176		5	const bool is_last = (rank == world_size - 1);
177
178		5	const int total_local_bi = local_block_rows + static_cast<int>(is_last && height_remainder > 0);
179		5	const int total_local_bj = num_col_blocks + static_cast<int>(width_has_remainder);
180
181		5	tbb::parallel_for(tbb::blocked_range2d<int>(0, total_local_bi, 0, total_local_bj),
182		5	[&](const tbb::blocked_range2d<int> &r) {
183		5	ProcessGridRange(r, local_block_rows, num_col_blocks, local_rows, width, height, rank, dist, local_input,
184			local_output);
185		5	});
186			}
187
188		10	void GatherAndBroadcast(int world_size, int width, int height, const GridSlicing &dist,
189			const std::vector<uint8_t> &local_output, std::vector<uint8_t> &result) {
190		10	std::vector<int> recv_counts(world_size);
191	1/4 ✓ Branch 1 taken 10 times. ✗ Branch 2 not taken. ✗ Branch 3 not taken. ✗ Branch 4 not taken.	10	std::vector<int> recv_displs(world_size);
192	2/2 ✓ Branch 0 taken 20 times. ✓ Branch 1 taken 10 times.	30	for (int proc = 0; proc < world_size; ++proc) {
193		20	recv_counts[proc] = dist.rows_per_proc[proc] * width * 3;
194		20	recv_displs[proc] = dist.row_displs[proc] * width * 3;
195			}
196	2/6 ✓ Branch 1 taken 10 times. ✗ Branch 2 not taken. ✓ Branch 4 taken 10 times. ✗ Branch 5 not taken. ✗ Branch 6 not taken. ✗ Branch 7 not taken.	10	result.assign(static_cast<size_t>(height) * width * 3, 0);
197	1/2 ✓ Branch 1 taken 10 times. ✗ Branch 2 not taken.	10	MPI_Gatherv(local_output.data(), static_cast<int>(local_output.size()), MPI_UNSIGNED_CHAR, result.data(),
198			recv_counts.data(), recv_displs.data(), MPI_UNSIGNED_CHAR, 0, MPI_COMM_WORLD);
199	1/2 ✓ Branch 1 taken 10 times. ✗ Branch 2 not taken.	10	MPI_Bcast(result.data(), static_cast<int>(result.size()), MPI_UNSIGNED_CHAR, 0, MPI_COMM_WORLD);
200		10	}
201
202			} // namespace
203
204	1/2 ✓ Branch 1 taken 10 times. ✗ Branch 2 not taken.	10	NikolaevDBlockLinearImageFilteringALL::NikolaevDBlockLinearImageFilteringALL(const InType &in) {
205			SetTypeOfTask(GetStaticTypeOfTask());
206		10	int rank = 0;
207	1/2 ✓ Branch 1 taken 10 times. ✗ Branch 2 not taken.	10	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
208	2/2 ✓ Branch 0 taken 5 times. ✓ Branch 1 taken 5 times.	10	if (rank == 0) {
209			GetInput() = in;
210			}
211		10	GetOutput() = std::vector<uint8_t>();
212		10	}
213
214		10	bool NikolaevDBlockLinearImageFilteringALL::ValidationImpl() {
215		10	int rank = 0;
216		10	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
217	2/2 ✓ Branch 0 taken 5 times. ✓ Branch 1 taken 5 times.	10	if (rank != 0) {
218			return true;
219			}
220		5	return std::get<0>(GetInput()) * std::get<1>(GetInput()) * 3 == static_cast<int>(std::get<2>(GetInput()).size());
221			}
222
223		10	bool NikolaevDBlockLinearImageFilteringALL::PreProcessingImpl() {
224		10	return true;
225			}
226
227		10	bool NikolaevDBlockLinearImageFilteringALL::RunImpl() {
228		10	int rank = 0;
229		10	int world_size = 1;
230		10	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
231		10	MPI_Comm_size(MPI_COMM_WORLD, &world_size);
232
233		10	std::array<int, 2> dims{};
234	2/2 ✓ Branch 0 taken 5 times. ✓ Branch 1 taken 5 times.	10	if (rank == 0) {
235		5	dims[0] = std::get<0>(GetInput());
236		5	dims[1] = std::get<1>(GetInput());
237			}
238		10	MPI_Bcast(dims.data(), 2, MPI_INT, 0, MPI_COMM_WORLD);
239		10	const int width = dims[0];
240		10	const int height = dims[1];
241
242		10	const GridSlicing dist = BuildGridSlicing(rank, world_size, height);
243
244	1/2 ✓ Branch 1 taken 10 times. ✗ Branch 2 not taken.	10	std::vector<uint8_t> local_input(static_cast<size_t>(dist.buffer_height) * width * 3);
245	2/2 ✓ Branch 0 taken 5 times. ✓ Branch 1 taken 5 times.	10	const uint8_t *full_image = (rank == 0) ? std::get<2>(GetInput()).data() : nullptr;
246	1/2 ✓ Branch 1 taken 10 times. ✗ Branch 2 not taken.	10	ScatterWithHalo(rank, world_size, width, height, dist, full_image, local_input);
247
248	1/4 ✓ Branch 1 taken 10 times. ✗ Branch 2 not taken. ✗ Branch 3 not taken. ✗ Branch 4 not taken.	10	std::vector<uint8_t> local_output(static_cast<size_t>(dist.rows_per_proc[rank]) * width * 3);
249
250	1/2 ✓ Branch 1 taken 10 times. ✗ Branch 2 not taken.	10	RunLocalTBB(rank, world_size, width, height, dist, local_input, local_output);
251
252		10	std::vector<uint8_t> result;
253	1/2 ✓ Branch 1 taken 10 times. ✗ Branch 2 not taken.	10	GatherAndBroadcast(world_size, width, height, dist, local_output, result);
254			GetOutput() = std::move(result);
255		10	return true;
256		10	}
257
258		10	bool NikolaevDBlockLinearImageFilteringALL::PostProcessingImpl() {
259		10	return true;
260			}
261
262			} // namespace nikolaev_d_block_linear_image_filtering
263