GCC Code Coverage Report

Directory:	./
File:	tasks/timur_a_cannon/all/src/ops_all.cpp
Date:	2026-06-04 20:25:32

	Exec	Total	Coverage
Lines:	86	86	100.0%
Functions:	12	12	100.0%
Branches:	58	94	61.7%

  
      Line
      Branch
      Exec
      Source
    
      #include "timur_a_cannon/all/include/ops_all.hpp"
    
      #include <mpi.h>
    
      #include <omp.h>
    
      #include <algorithm>
    
      #include <cstddef>
    
      #include <tuple>
    
      #include <utility>
    
      #include <vector>
    
      namespace timur_a_cannon {
    
      namespace {
    
      using Matrix = std::vector<std::vector<double>>;
    
      76
      void CopyBlocksForStep(const Matrix &src_a, const Matrix &src_b, int b_size, int global_i, int shift, int j,
    
                             Matrix &block_a, Matrix &block_b) {
    
        2/2✓ Branch 0 taken 196 times.
✓ Branch 1 taken 76 times.

      272
        for (int row = 0; row < b_size; ++row) {
    
        2/2✓ Branch 0 taken 556 times.
✓ Branch 1 taken 196 times.

      752
          for (int col = 0; col < b_size; ++col) {
    
      556
            block_a[row][col] = src_a[(global_i * b_size) + row][(shift * b_size) + col];
    
      556
            block_b[row][col] = src_b[(shift * b_size) + row][(j * b_size) + col];
    
          }
    
        }
    
      76
      }
    
      34
      void ScatterBlockIntoResult(Matrix &local_result, const Matrix &block_c, int local_i, int j, int b_size) {
    
        2/2✓ Branch 0 taken 86 times.
✓ Branch 1 taken 34 times.

      120
        for (int row = 0; row < b_size; ++row) {
    
        2/2✓ Branch 0 taken 242 times.
✓ Branch 1 taken 86 times.

      328
          for (int col = 0; col < b_size; ++col) {
    
      242
            local_result[(local_i * b_size) + row][(j * b_size) + col] = block_c[row][col];
    
          }
    
        }
    
      34
      }
    
        2/2✓ Branch 0 taken 47 times.
✓ Branch 1 taken 1 times.

      48
      std::vector<double> FlattenMatrix(const Matrix &matrix) {
    
        const std::size_t rows = matrix.size();
    
        2/2✓ Branch 0 taken 47 times.
✓ Branch 1 taken 1 times.

      48
        const std::size_t cols = rows == 0 ? 0 : matrix[0].size();
    
      48
        std::vector<double> flat(rows * cols);
    
        2/2✓ Branch 0 taken 200 times.
✓ Branch 1 taken 48 times.

      248
        for (std::size_t row = 0; row < rows; ++row) {
    
      200
          std::copy(matrix[row].begin(), matrix[row].end(), flat.begin() + static_cast<std::ptrdiff_t>(row * cols));
    
        }
    
      48
        return flat;
    
      }
    
      48
      Matrix UnflattenMatrix(const std::vector<double> &flat, std::size_t rows, std::size_t cols) {
    
        1/2✓ Branch 2 taken 48 times.
✗ Branch 3 not taken.

      48
        Matrix matrix(rows, std::vector<double>(cols));
    
        2/2✓ Branch 0 taken 240 times.
✓ Branch 1 taken 48 times.

      288
        for (std::size_t row = 0; row < rows; ++row) {
    
      240
          const std::ptrdiff_t begin_idx = (static_cast<std::ptrdiff_t>(row) * static_cast<std::ptrdiff_t>(cols));
    
      240
          const std::ptrdiff_t end_idx = (static_cast<std::ptrdiff_t>(row + 1) * static_cast<std::ptrdiff_t>(cols));
    
      240
          std::copy(flat.begin() + begin_idx, flat.begin() + end_idx, matrix[row].begin());
    
        }
    
      48
        return matrix;
    
      }
    
      16
      std::pair<std::vector<int>, std::vector<int>> BuildGatherLayout(int size, int base_block_rows, int extra_block_rows,
    
                                                                      int b_size, int n) {
    
      16
        std::vector<int> recv_counts(size);
    
        1/4✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.

      16
        std::vector<int> displs(size);
    
        int offset = 0;
    
        2/2✓ Branch 0 taken 32 times.
✓ Branch 1 taken 16 times.

      48
        for (int proc = 0; proc < size; ++proc) {
    
        2/2✓ Branch 0 taken 28 times.
✓ Branch 1 taken 4 times.

      32
          const int proc_block_rows = base_block_rows + (proc < extra_block_rows ? 1 : 0);
    
      32
          recv_counts[proc] = proc_block_rows * b_size * n;
    
      32
          displs[proc] = offset;
    
      32
          offset += recv_counts[proc];
    
        }
    
        1/2✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.

      32
        return {recv_counts, displs};
    
      }
    
      }  // namespace
    
      16
      TimurACannonMatrixMultiplicationALL::TimurACannonMatrixMultiplicationALL(
    
        1/2✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.

      16
          const std::tuple<int, std::vector<std::vector<double>>, std::vector<std::vector<double>>> &in) {
    
        SetTypeOfTask(GetStaticTypeOfTask());
    
        GetInput() = in;
    
      16
      }
    
      16
      bool TimurACannonMatrixMultiplicationALL::ValidationImpl() {
    
        const auto &input = GetInput();
    
      16
        const int b_size = std::get<0>(input);
    
        const auto &mat_a = std::get<1>(input);
    
        const auto &mat_b = std::get<2>(input);
    
        3/6✓ Branch 0 taken 16 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 16 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 16 times.
✗ Branch 5 not taken.

      16
        if (b_size <= 0 || mat_a.empty() || mat_b.empty()) {
    
          return false;
    
        }
    
        const std::size_t n = mat_a.size();
    
        2/4✓ Branch 0 taken 16 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 16 times.
✗ Branch 3 not taken.

      16
        if (mat_b.size() != n || (n % static_cast<std::size_t>(b_size) != 0)) {
    
          return false;
    
        }
    
        const auto is_square_n = [n](const Matrix &matrix) {
    
          return std::ranges::all_of(matrix, [n](const std::vector<double> &row) { return row.size() == n; });
    
        };
    
        1/2✓ Branch 0 taken 16 times.
✗ Branch 1 not taken.

      16
        return is_square_n(mat_a) && is_square_n(mat_b);
    
      }
    
        1/2✗ Branch 0 not taken.
✓ Branch 1 taken 16 times.

      16
      bool TimurACannonMatrixMultiplicationALL::PreProcessingImpl() {
    
        GetOutput().clear();
    
      16
        return true;
    
      }
    
      76
      void TimurACannonMatrixMultiplicationALL::BlockMultiplyAccumulate(const std::vector<std::vector<double>> &a,
    
                                                                        const std::vector<std::vector<double>> &b,
    
                                                                        std::vector<std::vector<double>> &c, int b_size) {
    
        2/2✓ Branch 0 taken 196 times.
✓ Branch 1 taken 76 times.

      272
        for (int i = 0; i < b_size; ++i) {
    
        2/2✓ Branch 0 taken 556 times.
✓ Branch 1 taken 196 times.

      752
          for (int k = 0; k < b_size; ++k) {
    
      556
            const double temp = a[i][k];
    
        2/2✓ Branch 0 taken 1684 times.
✓ Branch 1 taken 556 times.

      2240
            for (int j = 0; j < b_size; ++j) {
    
      1684
              c[i][j] += temp * b[k][j];
    
            }
    
          }
    
        }
    
      76
      }
    
      16
      std::vector<std::vector<double>> TimurACannonMatrixMultiplicationALL::ComputeLocalResult(const Matrix &src_a,
    
                                                                                               const Matrix &src_b,
    
                                                                                               int b_size, int grid_sz,
    
                                                                                               int block_row_start,
    
                                                                                               int local_block_rows, int n) {
    
      16
        Matrix local_result(static_cast<std::size_t>(local_block_rows) * static_cast<std::size_t>(b_size),
    
        1/2✓ Branch 2 taken 16 times.
✗ Branch 3 not taken.

      16
                            std::vector<double>(static_cast<std::size_t>(n), 0.0));
    
      16
      #pragma omp parallel for default(none) \
    
          shared(local_result, src_a, src_b, b_size, grid_sz, block_row_start, local_block_rows)
    
        for (int local_i = 0; local_i < local_block_rows; ++local_i) {
    
          for (int j = 0; j < grid_sz; ++j) {
    
            Matrix block_c(b_size, std::vector<double>(b_size, 0.0));
    
            const int global_i = block_row_start + local_i;
    
            for (int step = 0; step < grid_sz; ++step) {
    
              const int shift = (global_i + j + step) % grid_sz;
    
              Matrix block_a(b_size, std::vector<double>(b_size));
    
              Matrix block_b(b_size, std::vector<double>(b_size));
    
              CopyBlocksForStep(src_a, src_b, b_size, global_i, shift, j, block_a, block_b);
    
              BlockMultiplyAccumulate(block_a, block_b, block_c, b_size);
    
            }
    
            ScatterBlockIntoResult(local_result, block_c, local_i, j, b_size);
    
          }
    
        }
    
      16
        return local_result;
    
      }
    
      16
      bool TimurACannonMatrixMultiplicationALL::RunImpl() {
    
        const auto &input = GetInput();
    
      16
        const int b_size = std::get<0>(input);
    
      16
        Matrix src_a = std::get<1>(input);
    
        1/2✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.

      16
        Matrix src_b = std::get<2>(input);
    
      16
        const int n = static_cast<int>(src_a.size());
    
      16
        const int grid_sz = n / b_size;
    
      16
        const int total_elems = n * n;
    
      16
        int rank = 0;
    
      16
        int size = 1;
    
        1/2✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.

      16
        MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    
        1/2✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.

      16
        MPI_Comm_size(MPI_COMM_WORLD, &size);
    
        1/2✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.

      16
        std::vector<double> flat_a = FlattenMatrix(src_a);
    
        1/2✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.

      16
        std::vector<double> flat_b = FlattenMatrix(src_b);
    
        1/2✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.

      16
        MPI_Bcast(flat_a.data(), total_elems, MPI_DOUBLE, 0, MPI_COMM_WORLD);
    
        1/2✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.

      16
        MPI_Bcast(flat_b.data(), total_elems, MPI_DOUBLE, 0, MPI_COMM_WORLD);
    
        1/2✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.

      16
        src_a = UnflattenMatrix(flat_a, static_cast<std::size_t>(n), static_cast<std::size_t>(n));
    
        1/4✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.

      16
        src_b = UnflattenMatrix(flat_b, static_cast<std::size_t>(n), static_cast<std::size_t>(n));
    
      16
        const int base_block_rows = grid_sz / size;
    
      16
        const int extra_block_rows = grid_sz % size;
    
        2/2✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.

      16
        const int local_block_rows = base_block_rows + (rank < extra_block_rows ? 1 : 0);
    
        1/2✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.

      16
        const int block_row_start = (rank * base_block_rows) + std::min(rank, extra_block_rows);
    
        1/2✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.

      16
        Matrix local_result = ComputeLocalResult(src_a, src_b, b_size, grid_sz, block_row_start, local_block_rows, n);
    
        1/2✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.

      16
        std::vector<double> local_flat = FlattenMatrix(local_result);
    
        1/2✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.

      16
        auto [recv_counts, displs] = BuildGatherLayout(size, base_block_rows, extra_block_rows, b_size, n);
    
        2/4✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 16 times.
✗ Branch 5 not taken.

      16
        std::vector<double> global_flat(total_elems);
    
        1/2✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.

      16
        MPI_Allgatherv(local_flat.data(), static_cast<int>(local_flat.size()), MPI_DOUBLE, global_flat.data(),
    
                       recv_counts.data(), displs.data(), MPI_DOUBLE, MPI_COMM_WORLD);
    
        2/6✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
✓ Branch 5 taken 16 times.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.

      16
        GetOutput() = UnflattenMatrix(global_flat, static_cast<std::size_t>(n), static_cast<std::size_t>(n));
    
      16
        return true;
    
      48
      }
    
      16
      bool TimurACannonMatrixMultiplicationALL::PostProcessingImpl() {
    
      16
        return true;
    
      }
    
      }  // namespace timur_a_cannon

Line	Branch	Exec	Source
1			#include "timur_a_cannon/all/include/ops_all.hpp"
2
3			#include <mpi.h>
4			#include <omp.h>
5
6			#include <algorithm>
7			#include <cstddef>
8			#include <tuple>
9			#include <utility>
10			#include <vector>
11
12			namespace timur_a_cannon {
13
14			namespace {
15
16			using Matrix = std::vector<std::vector<double>>;
17
18		76	void CopyBlocksForStep(const Matrix &src_a, const Matrix &src_b, int b_size, int global_i, int shift, int j,
19			Matrix &block_a, Matrix &block_b) {
20	2/2 ✓ Branch 0 taken 196 times. ✓ Branch 1 taken 76 times.	272	for (int row = 0; row < b_size; ++row) {
21	2/2 ✓ Branch 0 taken 556 times. ✓ Branch 1 taken 196 times.	752	for (int col = 0; col < b_size; ++col) {
22		556	block_a[row][col] = src_a[(global_i * b_size) + row][(shift * b_size) + col];
23		556	block_b[row][col] = src_b[(shift * b_size) + row][(j * b_size) + col];
24			}
25			}
26		76	}
27
28		34	void ScatterBlockIntoResult(Matrix &local_result, const Matrix &block_c, int local_i, int j, int b_size) {
29	2/2 ✓ Branch 0 taken 86 times. ✓ Branch 1 taken 34 times.	120	for (int row = 0; row < b_size; ++row) {
30	2/2 ✓ Branch 0 taken 242 times. ✓ Branch 1 taken 86 times.	328	for (int col = 0; col < b_size; ++col) {
31		242	local_result[(local_i * b_size) + row][(j * b_size) + col] = block_c[row][col];
32			}
33			}
34		34	}
35
36	2/2 ✓ Branch 0 taken 47 times. ✓ Branch 1 taken 1 times.	48	std::vector<double> FlattenMatrix(const Matrix &matrix) {
37			const std::size_t rows = matrix.size();
38	2/2 ✓ Branch 0 taken 47 times. ✓ Branch 1 taken 1 times.	48	const std::size_t cols = rows == 0 ? 0 : matrix[0].size();
39		48	std::vector<double> flat(rows * cols);
40
41	2/2 ✓ Branch 0 taken 200 times. ✓ Branch 1 taken 48 times.	248	for (std::size_t row = 0; row < rows; ++row) {
42		200	std::copy(matrix[row].begin(), matrix[row].end(), flat.begin() + static_cast<std::ptrdiff_t>(row * cols));
43			}
44
45		48	return flat;
46			}
47
48		48	Matrix UnflattenMatrix(const std::vector<double> &flat, std::size_t rows, std::size_t cols) {
49	1/2 ✓ Branch 2 taken 48 times. ✗ Branch 3 not taken.	48	Matrix matrix(rows, std::vector<double>(cols));
50
51	2/2 ✓ Branch 0 taken 240 times. ✓ Branch 1 taken 48 times.	288	for (std::size_t row = 0; row < rows; ++row) {
52		240	const std::ptrdiff_t begin_idx = (static_cast<std::ptrdiff_t>(row) * static_cast<std::ptrdiff_t>(cols));
53		240	const std::ptrdiff_t end_idx = (static_cast<std::ptrdiff_t>(row + 1) * static_cast<std::ptrdiff_t>(cols));
54		240	std::copy(flat.begin() + begin_idx, flat.begin() + end_idx, matrix[row].begin());
55			}
56
57		48	return matrix;
58			}
59
60		16	std::pair<std::vector<int>, std::vector<int>> BuildGatherLayout(int size, int base_block_rows, int extra_block_rows,
61			int b_size, int n) {
62		16	std::vector<int> recv_counts(size);
63	1/4 ✓ Branch 1 taken 16 times. ✗ Branch 2 not taken. ✗ Branch 3 not taken. ✗ Branch 4 not taken.	16	std::vector<int> displs(size);
64			int offset = 0;
65	2/2 ✓ Branch 0 taken 32 times. ✓ Branch 1 taken 16 times.	48	for (int proc = 0; proc < size; ++proc) {
66	2/2 ✓ Branch 0 taken 28 times. ✓ Branch 1 taken 4 times.	32	const int proc_block_rows = base_block_rows + (proc < extra_block_rows ? 1 : 0);
67		32	recv_counts[proc] = proc_block_rows * b_size * n;
68		32	displs[proc] = offset;
69		32	offset += recv_counts[proc];
70			}
71	1/2 ✓ Branch 1 taken 16 times. ✗ Branch 2 not taken.	32	return {recv_counts, displs};
72			}
73
74			} // namespace
75
76		16	TimurACannonMatrixMultiplicationALL::TimurACannonMatrixMultiplicationALL(
77	1/2 ✓ Branch 1 taken 16 times. ✗ Branch 2 not taken.	16	const std::tuple<int, std::vector<std::vector<double>>, std::vector<std::vector<double>>> &in) {
78			SetTypeOfTask(GetStaticTypeOfTask());
79			GetInput() = in;
80		16	}
81
82		16	bool TimurACannonMatrixMultiplicationALL::ValidationImpl() {
83			const auto &input = GetInput();
84		16	const int b_size = std::get<0>(input);
85			const auto &mat_a = std::get<1>(input);
86			const auto &mat_b = std::get<2>(input);
87
88	3/6 ✓ Branch 0 taken 16 times. ✗ Branch 1 not taken. ✓ Branch 2 taken 16 times. ✗ Branch 3 not taken. ✓ Branch 4 taken 16 times. ✗ Branch 5 not taken.	16	if (b_size <= 0 \|\| mat_a.empty() \|\| mat_b.empty()) {
89			return false;
90			}
91
92			const std::size_t n = mat_a.size();
93	2/4 ✓ Branch 0 taken 16 times. ✗ Branch 1 not taken. ✓ Branch 2 taken 16 times. ✗ Branch 3 not taken.	16	if (mat_b.size() != n \|\| (n % static_cast<std::size_t>(b_size) != 0)) {
94			return false;
95			}
96
97			const auto is_square_n = [n](const Matrix &matrix) {
98			return std::ranges::all_of(matrix, [n](const std::vector<double> &row) { return row.size() == n; });
99			};
100
101	1/2 ✓ Branch 0 taken 16 times. ✗ Branch 1 not taken.	16	return is_square_n(mat_a) && is_square_n(mat_b);
102			}
103
104	1/2 ✗ Branch 0 not taken. ✓ Branch 1 taken 16 times.	16	bool TimurACannonMatrixMultiplicationALL::PreProcessingImpl() {
105			GetOutput().clear();
106		16	return true;
107			}
108
109		76	void TimurACannonMatrixMultiplicationALL::BlockMultiplyAccumulate(const std::vector<std::vector<double>> &a,
110			const std::vector<std::vector<double>> &b,
111			std::vector<std::vector<double>> &c, int b_size) {
112	2/2 ✓ Branch 0 taken 196 times. ✓ Branch 1 taken 76 times.	272	for (int i = 0; i < b_size; ++i) {
113	2/2 ✓ Branch 0 taken 556 times. ✓ Branch 1 taken 196 times.	752	for (int k = 0; k < b_size; ++k) {
114		556	const double temp = a[i][k];
115	2/2 ✓ Branch 0 taken 1684 times. ✓ Branch 1 taken 556 times.	2240	for (int j = 0; j < b_size; ++j) {
116		1684	c[i][j] += temp * b[k][j];
117			}
118			}
119			}
120		76	}
121
122		16	std::vector<std::vector<double>> TimurACannonMatrixMultiplicationALL::ComputeLocalResult(const Matrix &src_a,
123			const Matrix &src_b,
124			int b_size, int grid_sz,
125			int block_row_start,
126			int local_block_rows, int n) {
127		16	Matrix local_result(static_cast<std::size_t>(local_block_rows) * static_cast<std::size_t>(b_size),
128	1/2 ✓ Branch 2 taken 16 times. ✗ Branch 3 not taken.	16	std::vector<double>(static_cast<std::size_t>(n), 0.0));
129
130		16	#pragma omp parallel for default(none) \
131			shared(local_result, src_a, src_b, b_size, grid_sz, block_row_start, local_block_rows)
132			for (int local_i = 0; local_i < local_block_rows; ++local_i) {
133			for (int j = 0; j < grid_sz; ++j) {
134			Matrix block_c(b_size, std::vector<double>(b_size, 0.0));
135			const int global_i = block_row_start + local_i;
136
137			for (int step = 0; step < grid_sz; ++step) {
138			const int shift = (global_i + j + step) % grid_sz;
139			Matrix block_a(b_size, std::vector<double>(b_size));
140			Matrix block_b(b_size, std::vector<double>(b_size));
141			CopyBlocksForStep(src_a, src_b, b_size, global_i, shift, j, block_a, block_b);
142			BlockMultiplyAccumulate(block_a, block_b, block_c, b_size);
143			}
144
145			ScatterBlockIntoResult(local_result, block_c, local_i, j, b_size);
146			}
147			}
148
149		16	return local_result;
150			}
151
152		16	bool TimurACannonMatrixMultiplicationALL::RunImpl() {
153			const auto &input = GetInput();
154		16	const int b_size = std::get<0>(input);
155		16	Matrix src_a = std::get<1>(input);
156	1/2 ✓ Branch 1 taken 16 times. ✗ Branch 2 not taken.	16	Matrix src_b = std::get<2>(input);
157		16	const int n = static_cast<int>(src_a.size());
158		16	const int grid_sz = n / b_size;
159		16	const int total_elems = n * n;
160
161		16	int rank = 0;
162		16	int size = 1;
163	1/2 ✓ Branch 1 taken 16 times. ✗ Branch 2 not taken.	16	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
164	1/2 ✓ Branch 1 taken 16 times. ✗ Branch 2 not taken.	16	MPI_Comm_size(MPI_COMM_WORLD, &size);
165
166	1/2 ✓ Branch 1 taken 16 times. ✗ Branch 2 not taken.	16	std::vector<double> flat_a = FlattenMatrix(src_a);
167	1/2 ✓ Branch 1 taken 16 times. ✗ Branch 2 not taken.	16	std::vector<double> flat_b = FlattenMatrix(src_b);
168
169	1/2 ✓ Branch 1 taken 16 times. ✗ Branch 2 not taken.	16	MPI_Bcast(flat_a.data(), total_elems, MPI_DOUBLE, 0, MPI_COMM_WORLD);
170	1/2 ✓ Branch 1 taken 16 times. ✗ Branch 2 not taken.	16	MPI_Bcast(flat_b.data(), total_elems, MPI_DOUBLE, 0, MPI_COMM_WORLD);
171
172	1/2 ✓ Branch 1 taken 16 times. ✗ Branch 2 not taken.	16	src_a = UnflattenMatrix(flat_a, static_cast<std::size_t>(n), static_cast<std::size_t>(n));
173	1/4 ✓ Branch 1 taken 16 times. ✗ Branch 2 not taken. ✗ Branch 5 not taken. ✗ Branch 6 not taken.	16	src_b = UnflattenMatrix(flat_b, static_cast<std::size_t>(n), static_cast<std::size_t>(n));
174
175		16	const int base_block_rows = grid_sz / size;
176		16	const int extra_block_rows = grid_sz % size;
177	2/2 ✓ Branch 0 taken 14 times. ✓ Branch 1 taken 2 times.	16	const int local_block_rows = base_block_rows + (rank < extra_block_rows ? 1 : 0);
178	1/2 ✓ Branch 1 taken 16 times. ✗ Branch 2 not taken.	16	const int block_row_start = (rank * base_block_rows) + std::min(rank, extra_block_rows);
179
180	1/2 ✓ Branch 1 taken 16 times. ✗ Branch 2 not taken.	16	Matrix local_result = ComputeLocalResult(src_a, src_b, b_size, grid_sz, block_row_start, local_block_rows, n);
181
182	1/2 ✓ Branch 1 taken 16 times. ✗ Branch 2 not taken.	16	std::vector<double> local_flat = FlattenMatrix(local_result);
183	1/2 ✓ Branch 1 taken 16 times. ✗ Branch 2 not taken.	16	auto [recv_counts, displs] = BuildGatherLayout(size, base_block_rows, extra_block_rows, b_size, n);
184
185	2/4 ✓ Branch 1 taken 16 times. ✗ Branch 2 not taken. ✓ Branch 4 taken 16 times. ✗ Branch 5 not taken.	16	std::vector<double> global_flat(total_elems);
186	1/2 ✓ Branch 1 taken 16 times. ✗ Branch 2 not taken.	16	MPI_Allgatherv(local_flat.data(), static_cast<int>(local_flat.size()), MPI_DOUBLE, global_flat.data(),
187			recv_counts.data(), displs.data(), MPI_DOUBLE, MPI_COMM_WORLD);
188
189	2/6 ✓ Branch 1 taken 16 times. ✗ Branch 2 not taken. ✓ Branch 5 taken 16 times. ✗ Branch 6 not taken. ✗ Branch 7 not taken. ✗ Branch 8 not taken.	16	GetOutput() = UnflattenMatrix(global_flat, static_cast<std::size_t>(n), static_cast<std::size_t>(n));
190		16	return true;
191		48	}
192
193		16	bool TimurACannonMatrixMultiplicationALL::PostProcessingImpl() {
194		16	return true;
195			}
196
197			} // namespace timur_a_cannon
198