GCC Code Coverage Report

Directory:	./
File:	tasks/safronov_m_multiplication_matrix_blockscheme_cannon/all/src/ops_all.cpp
Date:	2026-06-04 20:25:32

	Exec	Total	Coverage
Lines:	106	124	85.5%
Functions:	11	13	84.6%
Branches:	65	126	51.6%

  
      Line
      Branch
      Exec
      Source
    
      #include "safronov_m_multiplication_matrix_blockscheme_cannon/all/include/ops_all.hpp"
    
      #include <mpi.h>
    
      #include <algorithm>
    
      #include <cmath>
    
      #include <cstddef>
    
      #include <utility>
    
      #include <vector>
    
      #include "oneapi/tbb/blocked_range2d.h"
    
      #include "oneapi/tbb/parallel_for.h"
    
      #include "safronov_m_multiplication_matrix_blockscheme_cannon/common/include/common.hpp"
    
      namespace safronov_m_multiplication_matrix_blocksscheme_cannon {
    
        1/2✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.

      16
      SafronovMMultiplicationMatrixBlockSchemeCannonALL::SafronovMMultiplicationMatrixBlockSchemeCannonALL(const InType &in) {
    
        SetTypeOfTask(GetStaticTypeOfTask());
    
        GetInput() = in;
    
      16
      }
    
      16
      bool SafronovMMultiplicationMatrixBlockSchemeCannonALL::ValidationImpl() {
    
        const auto &in = GetInput();
    
      16
        int n = std::get<0>(in);
    
        const auto &a = std::get<1>(in);
    
        const auto &b = std::get<2>(in);
    
        6/12✓ Branch 0 taken 16 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 16 times.
✗ Branch 4 not taken.
✓ Branch 5 taken 16 times.
✗ Branch 6 not taken.
✓ Branch 7 taken 16 times.
✗ Branch 8 not taken.
✓ Branch 9 taken 16 times.
✗ Branch 10 not taken.
✓ Branch 11 taken 16 times.

      16
        return (n > 0) && (!a.empty() && !b.empty()) && (a.size() == a[0].size()) && (b.size() == b[0].size()) &&
    
      16
               (a.size() == b.size());
    
      }
    
        1/2✗ Branch 0 not taken.
✓ Branch 1 taken 16 times.

      16
      bool SafronovMMultiplicationMatrixBlockSchemeCannonALL::PreProcessingImpl() {
    
        GetOutput().clear();
    
      16
        return true;
    
      }
    
      ✗
      int SafronovMMultiplicationMatrixBlockSchemeCannonALL::CalcPaddedSize(int n, int q) {
    
      16
        return ((n + q - 1) / q) * q;
    
      }
    
      16
      void SafronovMMultiplicationMatrixBlockSchemeCannonALL::PadMatrix(const std::vector<std::vector<double>> &src,
    
                                                                        std::vector<std::vector<double>> &dst, int padded_n) {
    
      16
        auto sz = static_cast<size_t>(padded_n);
    
        1/2✓ Branch 2 taken 16 times.
✗ Branch 3 not taken.

      32
        dst = std::vector<std::vector<double>>(sz, std::vector<double>(sz, 0.0));
    
        size_t n = src.size();
    
        2/2✓ Branch 0 taken 80 times.
✓ Branch 1 taken 16 times.

      96
        for (size_t i = 0; i < n; ++i) {
    
      80
          std::copy(src[i].begin(), src[i].end(), dst[i].begin());
    
        }
    
      16
      }
    
      8
      void SafronovMMultiplicationMatrixBlockSchemeCannonALL::ParallelMultiplyBlocks(const std::vector<double> &a,
    
                                                                                     const std::vector<double> &b,
    
                                                                                     std::vector<double> &c, int block_size) {
    
      248
        tbb::parallel_for(tbb::blocked_range2d<int>(0, block_size, 0, block_size), [&](const tbb::blocked_range2d<int> &r) {
    
        2/2✓ Branch 0 taken 240 times.
✓ Branch 1 taken 240 times.

      480
          for (int i = r.rows().begin(); i < r.rows().end(); ++i) {
    
        2/2✓ Branch 0 taken 1666 times.
✓ Branch 1 taken 240 times.

      1906
            for (int k = 0; k < block_size; ++k) {
    
      1666
              double temp = a[(i * block_size) + k];
    
        2/2✓ Branch 0 taken 1684 times.
✓ Branch 1 taken 1666 times.

      3350
              for (int j = r.cols().begin(); j < r.cols().end(); ++j) {
    
      1684
                c[(i * block_size) + j] += temp * b[(k * block_size) + j];
    
              }
    
            }
    
          }
    
      240
        });
    
      8
      }
    
      8
      void SafronovMMultiplicationMatrixBlockSchemeCannonALL::DistributeData(
    
          MPI_Comm comm, int worker_rank, int worker_size, int q, int block_size,
    
          const std::vector<std::vector<double>> &matrix_a_full, const std::vector<std::vector<double>> &matrix_b_full,
    
          std::vector<double> &local_a, std::vector<double> &local_b) {
    
      8
        size_t b_sz = static_cast<size_t>(block_size) * block_size;
    
        1/2✓ Branch 0 taken 8 times.
✗ Branch 1 not taken.

      8
        if (worker_rank == 0) {
    
        2/2✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.

      16
          for (int proc = 0; proc < worker_size; ++proc) {
    
      8
            int row = proc / q;
    
      8
            int col = proc % q;
    
      8
            std::vector<double> send_a(static_cast<size_t>(block_size) * block_size);
    
        1/4✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.

      8
            std::vector<double> send_b(static_cast<size_t>(block_size) * block_size);
    
        2/2✓ Branch 0 taken 40 times.
✓ Branch 1 taken 8 times.

      48
            for (int i = 0; i < block_size; ++i) {
    
        2/2✓ Branch 0 taken 242 times.
✓ Branch 1 taken 40 times.

      282
              for (int j = 0; j < block_size; ++j) {
    
      242
                int a_row = (row * block_size) + i;
    
      242
                int a_col = (((col + row) % q) * block_size) + j;
    
      242
                int b_row = (((row + col) % q) * block_size) + i;
    
      242
                int b_col = (col * block_size) + j;
    
      242
                send_a[(i * block_size) + j] = matrix_a_full[a_row][a_col];
    
      242
                send_b[(i * block_size) + j] = matrix_b_full[b_row][b_col];
    
              }
    
            }
    
        1/2✓ Branch 0 taken 8 times.
✗ Branch 1 not taken.

      8
            if (proc == 0) {
    
              local_a = std::move(send_a);
    
              local_b = std::move(send_b);
    
            } else {
    
      ✗
              MPI_Send(send_a.data(), block_size * block_size, MPI_DOUBLE, proc, 0, comm);
    
      ✗
              MPI_Send(send_b.data(), block_size * block_size, MPI_DOUBLE, proc, 1, comm);
    
            }
    
          }
    
        } else {
    
      ✗
          local_a.resize(b_sz);
    
      ✗
          local_b.resize(b_sz);
    
      ✗
          MPI_Recv(local_a.data(), block_size * block_size, MPI_DOUBLE, 0, 0, comm, MPI_STATUS_IGNORE);
    
      ✗
          MPI_Recv(local_b.data(), block_size * block_size, MPI_DOUBLE, 0, 1, comm, MPI_STATUS_IGNORE);
    
        }
    
      8
      }
    
      8
      void SafronovMMultiplicationMatrixBlockSchemeCannonALL::CannonAlgorithm(MPI_Comm comm, int worker_rank, int q,
    
                                                                              int block_size, std::vector<double> &local_a,
    
                                                                              std::vector<double> &local_b,
    
                                                                              std::vector<double> &local_c) {
    
      8
        int row = worker_rank / q;
    
      8
        int col = worker_rank % q;
    
      8
        int left = (row * q) + ((col - 1 + q) % q);
    
      8
        int right = (row * q) + ((col + 1) % q);
    
      8
        int up = (((row - 1 + q) % q) * q) + col;
    
      8
        int down = (((row + 1) % q) * q) + col;
    
        2/2✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.

      16
        for (int step = 0; step < q; ++step) {
    
      8
          ParallelMultiplyBlocks(local_a, local_b, local_c, block_size);
    
        1/2✗ Branch 0 not taken.
✓ Branch 1 taken 8 times.

      8
          if (step < q - 1) {
    
      ✗
            std::vector<double> next_a(static_cast<size_t>(block_size) * block_size);
    
      ✗
            std::vector<double> next_b(static_cast<size_t>(block_size) * block_size);
    
      ✗
            MPI_Sendrecv(local_a.data(), block_size * block_size, MPI_DOUBLE, left, 10, next_a.data(),
    
                         block_size * block_size, MPI_DOUBLE, right, 10, comm, MPI_STATUS_IGNORE);
    
      ✗
            MPI_Sendrecv(local_b.data(), block_size * block_size, MPI_DOUBLE, up, 11, next_b.data(), block_size * block_size,
    
                         MPI_DOUBLE, down, 11, comm, MPI_STATUS_IGNORE);
    
            local_a = std::move(next_a);
    
            local_b = std::move(next_b);
    
          }
    
        }
    
      8
      }
    
      ✗
      void SafronovMMultiplicationMatrixBlockSchemeCannonALL::FillResultFromBuffer(std::vector<double> &flat_result,
    
                                                                                   const std::vector<double> &buffer, int row,
    
                                                                                   int col, int block_size, int padded_n) {
    
        2/6✓ Branch 0 taken 40 times.
✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.

      48
        for (int i = 0; i < block_size; ++i) {
    
        2/6✓ Branch 0 taken 242 times.
✓ Branch 1 taken 40 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.

      282
          for (int j = 0; j < block_size; ++j) {
    
      ✗
            int global_row = (row * block_size) + i;
    
      ✗
            int global_col = (col * block_size) + j;
    
      242
            flat_result[(global_row * padded_n) + global_col] = buffer[(i * block_size) + j];
    
          }
    
        }
    
      ✗
      }
    
      8
      void SafronovMMultiplicationMatrixBlockSchemeCannonALL::CollectResult(MPI_Comm comm, int worker_rank, int worker_size,
    
                                                                            int q, int block_size,
    
                                                                            std::vector<double> &flat_result,
    
                                                                            const std::vector<double> &local_c) {
    
      8
        int padded_n = q * block_size;
    
        1/2✓ Branch 0 taken 8 times.
✗ Branch 1 not taken.

      8
        if (worker_rank == 0) {
    
          FillResultFromBuffer(flat_result, local_c, 0, 0, block_size, padded_n);
    
      8
          std::vector<double> recv_buf(static_cast<std::size_t>(block_size) * block_size);
    
        1/2✗ Branch 0 not taken.
✓ Branch 1 taken 8 times.

      8
          for (int proc = 1; proc < worker_size; ++proc) {
    
      ✗
            MPI_Recv(recv_buf.data(), block_size * block_size, MPI_DOUBLE, proc, 20, comm, MPI_STATUS_IGNORE);
    
      ✗
            FillResultFromBuffer(flat_result, recv_buf, proc / q, proc % q, block_size, padded_n);
    
          }
    
        } else {
    
      ✗
          MPI_Send(local_c.data(), block_size * block_size, MPI_DOUBLE, 0, 20, comm);
    
        }
    
      8
      }
    
      16
      bool SafronovMMultiplicationMatrixBlockSchemeCannonALL::RunImpl() {
    
      16
        int rank = 0;
    
      16
        int size = 1;
    
      16
        MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    
      16
        MPI_Comm_size(MPI_COMM_WORLD, &size);
    
        2/2✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.

      16
        int q = static_cast<int>(std::sqrt(size));
    
      16
        int active = q * q;
    
      16
        int n = 0;
    
        2/2✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.

      16
        if (rank == 0) {
    
      8
          n = static_cast<int>(std::get<1>(GetInput()).size());
    
        }
    
      16
        MPI_Bcast(&n, 1, MPI_INT, 0, MPI_COMM_WORLD);
    
      16
        int padded_n = CalcPaddedSize(n, std::max(1, q));
    
        int block_size = padded_n / std::max(1, q);
    
      16
        std::vector<std::vector<double>> padded_a;
    
      16
        std::vector<std::vector<double>> padded_b;
    
        2/2✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.

      16
        if (rank == 0) {
    
        1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.

      8
          PadMatrix(std::get<1>(GetInput()), padded_a, padded_n);
    
        1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.

      8
          PadMatrix(std::get<2>(GetInput()), padded_b, padded_n);
    
        }
    
        1/2✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.

      16
        std::vector<double> flat_result(static_cast<size_t>(padded_n) * padded_n, 0.0);
    
      16
        MPI_Comm comm = MPI_COMM_NULL;
    
        2/2✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.

      16
        int color = (rank < active) ? 0 : MPI_UNDEFINED;
    
        1/2✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.

      16
        MPI_Comm_split(MPI_COMM_WORLD, color, rank, &comm);
    
        2/2✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.

      16
        if (rank < active) {
    
      8
          int wrank = 0;
    
      8
          int wsize = 0;
    
        1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.

      8
          MPI_Comm_rank(comm, &wrank);
    
        1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.

      8
          MPI_Comm_size(comm, &wsize);
    
        1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.

      8
          std::vector<double> local_a(static_cast<size_t>(block_size) * block_size);
    
        1/4✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.

      8
          std::vector<double> local_b(static_cast<size_t>(block_size) * block_size);
    
        1/4✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.

      8
          std::vector<double> local_c(static_cast<size_t>(block_size) * block_size, 0.0);
    
        1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.

      8
          DistributeData(comm, wrank, wsize, q, block_size, padded_a, padded_b, local_a, local_b);
    
        1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.

      8
          CannonAlgorithm(comm, wrank, q, block_size, local_a, local_b, local_c);
    
        1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.

      8
          CollectResult(comm, wrank, wsize, q, block_size, flat_result, local_c);
    
        1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.

      8
          MPI_Comm_free(&comm);
    
        }
    
        1/2✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.

      16
        MPI_Bcast(flat_result.data(), padded_n * padded_n, MPI_DOUBLE, 0, MPI_COMM_WORLD);
    
        2/6✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 16 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.

      16
        std::vector<std::vector<double>> result(static_cast<size_t>(n), std::vector<double>(static_cast<size_t>(n)));
    
      16
        auto un = static_cast<size_t>(n);
    
        auto upadded_n = static_cast<size_t>(padded_n);
    
        2/2✓ Branch 0 taken 80 times.
✓ Branch 1 taken 16 times.

      96
        for (size_t i = 0; i < un; ++i) {
    
        2/2✓ Branch 0 taken 484 times.
✓ Branch 1 taken 80 times.

      564
          for (size_t j = 0; j < un; ++j) {
    
      484
            result[i][j] = flat_result[(i * upadded_n) + j];
    
          }
    
        }
    
      16
        GetOutput() = std::move(result);
    
      16
        return true;
    
      32
      }
    
      16
      bool SafronovMMultiplicationMatrixBlockSchemeCannonALL::PostProcessingImpl() {
    
      16
        return true;
    
      }
    
      }  // namespace safronov_m_multiplication_matrix_blocksscheme_cannon

Line	Branch	Exec	Source
1			#include "safronov_m_multiplication_matrix_blockscheme_cannon/all/include/ops_all.hpp"
2
3			#include <mpi.h>
4
5			#include <algorithm>
6			#include <cmath>
7			#include <cstddef>
8			#include <utility>
9			#include <vector>
10
11			#include "oneapi/tbb/blocked_range2d.h"
12			#include "oneapi/tbb/parallel_for.h"
13			#include "safronov_m_multiplication_matrix_blockscheme_cannon/common/include/common.hpp"
14
15			namespace safronov_m_multiplication_matrix_blocksscheme_cannon {
16
17	1/2 ✓ Branch 1 taken 16 times. ✗ Branch 2 not taken.	16	SafronovMMultiplicationMatrixBlockSchemeCannonALL::SafronovMMultiplicationMatrixBlockSchemeCannonALL(const InType &in) {
18			SetTypeOfTask(GetStaticTypeOfTask());
19			GetInput() = in;
20		16	}
21
22		16	bool SafronovMMultiplicationMatrixBlockSchemeCannonALL::ValidationImpl() {
23			const auto &in = GetInput();
24		16	int n = std::get<0>(in);
25			const auto &a = std::get<1>(in);
26			const auto &b = std::get<2>(in);
27
28	6/12 ✓ Branch 0 taken 16 times. ✗ Branch 1 not taken. ✗ Branch 2 not taken. ✓ Branch 3 taken 16 times. ✗ Branch 4 not taken. ✓ Branch 5 taken 16 times. ✗ Branch 6 not taken. ✓ Branch 7 taken 16 times. ✗ Branch 8 not taken. ✓ Branch 9 taken 16 times. ✗ Branch 10 not taken. ✓ Branch 11 taken 16 times.	16	return (n > 0) && (!a.empty() && !b.empty()) && (a.size() == a[0].size()) && (b.size() == b[0].size()) &&
29		16	(a.size() == b.size());
30			}
31
32	1/2 ✗ Branch 0 not taken. ✓ Branch 1 taken 16 times.	16	bool SafronovMMultiplicationMatrixBlockSchemeCannonALL::PreProcessingImpl() {
33			GetOutput().clear();
34		16	return true;
35			}
36
37		✗	int SafronovMMultiplicationMatrixBlockSchemeCannonALL::CalcPaddedSize(int n, int q) {
38		16	return ((n + q - 1) / q) * q;
39			}
40
41		16	void SafronovMMultiplicationMatrixBlockSchemeCannonALL::PadMatrix(const std::vector<std::vector<double>> &src,
42			std::vector<std::vector<double>> &dst, int padded_n) {
43		16	auto sz = static_cast<size_t>(padded_n);
44
45	1/2 ✓ Branch 2 taken 16 times. ✗ Branch 3 not taken.	32	dst = std::vector<std::vector<double>>(sz, std::vector<double>(sz, 0.0));
46
47			size_t n = src.size();
48	2/2 ✓ Branch 0 taken 80 times. ✓ Branch 1 taken 16 times.	96	for (size_t i = 0; i < n; ++i) {
49		80	std::copy(src[i].begin(), src[i].end(), dst[i].begin());
50			}
51		16	}
52
53		8	void SafronovMMultiplicationMatrixBlockSchemeCannonALL::ParallelMultiplyBlocks(const std::vector<double> &a,
54			const std::vector<double> &b,
55			std::vector<double> &c, int block_size) {
56		248	tbb::parallel_for(tbb::blocked_range2d<int>(0, block_size, 0, block_size), [&](const tbb::blocked_range2d<int> &r) {
57	2/2 ✓ Branch 0 taken 240 times. ✓ Branch 1 taken 240 times.	480	for (int i = r.rows().begin(); i < r.rows().end(); ++i) {
58	2/2 ✓ Branch 0 taken 1666 times. ✓ Branch 1 taken 240 times.	1906	for (int k = 0; k < block_size; ++k) {
59		1666	double temp = a[(i * block_size) + k];
60
61	2/2 ✓ Branch 0 taken 1684 times. ✓ Branch 1 taken 1666 times.	3350	for (int j = r.cols().begin(); j < r.cols().end(); ++j) {
62		1684	c[(i * block_size) + j] += temp * b[(k * block_size) + j];
63			}
64			}
65			}
66		240	});
67		8	}
68
69		8	void SafronovMMultiplicationMatrixBlockSchemeCannonALL::DistributeData(
70			MPI_Comm comm, int worker_rank, int worker_size, int q, int block_size,
71			const std::vector<std::vector<double>> &matrix_a_full, const std::vector<std::vector<double>> &matrix_b_full,
72			std::vector<double> &local_a, std::vector<double> &local_b) {
73		8	size_t b_sz = static_cast<size_t>(block_size) * block_size;
74	1/2 ✓ Branch 0 taken 8 times. ✗ Branch 1 not taken.	8	if (worker_rank == 0) {
75	2/2 ✓ Branch 0 taken 8 times. ✓ Branch 1 taken 8 times.	16	for (int proc = 0; proc < worker_size; ++proc) {
76		8	int row = proc / q;
77		8	int col = proc % q;
78
79		8	std::vector<double> send_a(static_cast<size_t>(block_size) * block_size);
80	1/4 ✓ Branch 1 taken 8 times. ✗ Branch 2 not taken. ✗ Branch 3 not taken. ✗ Branch 4 not taken.	8	std::vector<double> send_b(static_cast<size_t>(block_size) * block_size);
81
82	2/2 ✓ Branch 0 taken 40 times. ✓ Branch 1 taken 8 times.	48	for (int i = 0; i < block_size; ++i) {
83	2/2 ✓ Branch 0 taken 242 times. ✓ Branch 1 taken 40 times.	282	for (int j = 0; j < block_size; ++j) {
84		242	int a_row = (row * block_size) + i;
85		242	int a_col = (((col + row) % q) * block_size) + j;
86		242	int b_row = (((row + col) % q) * block_size) + i;
87		242	int b_col = (col * block_size) + j;
88
89		242	send_a[(i * block_size) + j] = matrix_a_full[a_row][a_col];
90		242	send_b[(i * block_size) + j] = matrix_b_full[b_row][b_col];
91			}
92			}
93
94	1/2 ✓ Branch 0 taken 8 times. ✗ Branch 1 not taken.	8	if (proc == 0) {
95			local_a = std::move(send_a);
96			local_b = std::move(send_b);
97			} else {
98		✗	MPI_Send(send_a.data(), block_size * block_size, MPI_DOUBLE, proc, 0, comm);
99		✗	MPI_Send(send_b.data(), block_size * block_size, MPI_DOUBLE, proc, 1, comm);
100			}
101			}
102			} else {
103		✗	local_a.resize(b_sz);
104		✗	local_b.resize(b_sz);
105		✗	MPI_Recv(local_a.data(), block_size * block_size, MPI_DOUBLE, 0, 0, comm, MPI_STATUS_IGNORE);
106		✗	MPI_Recv(local_b.data(), block_size * block_size, MPI_DOUBLE, 0, 1, comm, MPI_STATUS_IGNORE);
107			}
108		8	}
109
110		8	void SafronovMMultiplicationMatrixBlockSchemeCannonALL::CannonAlgorithm(MPI_Comm comm, int worker_rank, int q,
111			int block_size, std::vector<double> &local_a,
112			std::vector<double> &local_b,
113			std::vector<double> &local_c) {
114		8	int row = worker_rank / q;
115		8	int col = worker_rank % q;
116
117		8	int left = (row * q) + ((col - 1 + q) % q);
118		8	int right = (row * q) + ((col + 1) % q);
119		8	int up = (((row - 1 + q) % q) * q) + col;
120		8	int down = (((row + 1) % q) * q) + col;
121
122	2/2 ✓ Branch 0 taken 8 times. ✓ Branch 1 taken 8 times.	16	for (int step = 0; step < q; ++step) {
123		8	ParallelMultiplyBlocks(local_a, local_b, local_c, block_size);
124
125	1/2 ✗ Branch 0 not taken. ✓ Branch 1 taken 8 times.	8	if (step < q - 1) {
126		✗	std::vector<double> next_a(static_cast<size_t>(block_size) * block_size);
127		✗	std::vector<double> next_b(static_cast<size_t>(block_size) * block_size);
128
129		✗	MPI_Sendrecv(local_a.data(), block_size * block_size, MPI_DOUBLE, left, 10, next_a.data(),
130			block_size * block_size, MPI_DOUBLE, right, 10, comm, MPI_STATUS_IGNORE);
131
132		✗	MPI_Sendrecv(local_b.data(), block_size * block_size, MPI_DOUBLE, up, 11, next_b.data(), block_size * block_size,
133			MPI_DOUBLE, down, 11, comm, MPI_STATUS_IGNORE);
134
135			local_a = std::move(next_a);
136			local_b = std::move(next_b);
137			}
138			}
139		8	}
140
141		✗	void SafronovMMultiplicationMatrixBlockSchemeCannonALL::FillResultFromBuffer(std::vector<double> &flat_result,
142			const std::vector<double> &buffer, int row,
143			int col, int block_size, int padded_n) {
144	2/6 ✓ Branch 0 taken 40 times. ✓ Branch 1 taken 8 times. ✗ Branch 2 not taken. ✗ Branch 3 not taken. ✗ Branch 4 not taken. ✗ Branch 5 not taken.	48	for (int i = 0; i < block_size; ++i) {
145	2/6 ✓ Branch 0 taken 242 times. ✓ Branch 1 taken 40 times. ✗ Branch 2 not taken. ✗ Branch 3 not taken. ✗ Branch 4 not taken. ✗ Branch 5 not taken.	282	for (int j = 0; j < block_size; ++j) {
146		✗	int global_row = (row * block_size) + i;
147		✗	int global_col = (col * block_size) + j;
148		242	flat_result[(global_row * padded_n) + global_col] = buffer[(i * block_size) + j];
149			}
150			}
151		✗	}
152
153		8	void SafronovMMultiplicationMatrixBlockSchemeCannonALL::CollectResult(MPI_Comm comm, int worker_rank, int worker_size,
154			int q, int block_size,
155			std::vector<double> &flat_result,
156			const std::vector<double> &local_c) {
157		8	int padded_n = q * block_size;
158
159	1/2 ✓ Branch 0 taken 8 times. ✗ Branch 1 not taken.	8	if (worker_rank == 0) {
160			FillResultFromBuffer(flat_result, local_c, 0, 0, block_size, padded_n);
161
162		8	std::vector<double> recv_buf(static_cast<std::size_t>(block_size) * block_size);
163	1/2 ✗ Branch 0 not taken. ✓ Branch 1 taken 8 times.	8	for (int proc = 1; proc < worker_size; ++proc) {
164		✗	MPI_Recv(recv_buf.data(), block_size * block_size, MPI_DOUBLE, proc, 20, comm, MPI_STATUS_IGNORE);
165		✗	FillResultFromBuffer(flat_result, recv_buf, proc / q, proc % q, block_size, padded_n);
166			}
167			} else {
168		✗	MPI_Send(local_c.data(), block_size * block_size, MPI_DOUBLE, 0, 20, comm);
169			}
170		8	}
171
172		16	bool SafronovMMultiplicationMatrixBlockSchemeCannonALL::RunImpl() {
173		16	int rank = 0;
174		16	int size = 1;
175
176		16	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
177		16	MPI_Comm_size(MPI_COMM_WORLD, &size);
178
179	2/2 ✓ Branch 0 taken 8 times. ✓ Branch 1 taken 8 times.	16	int q = static_cast<int>(std::sqrt(size));
180		16	int active = q * q;
181
182		16	int n = 0;
183	2/2 ✓ Branch 0 taken 8 times. ✓ Branch 1 taken 8 times.	16	if (rank == 0) {
184		8	n = static_cast<int>(std::get<1>(GetInput()).size());
185			}
186
187		16	MPI_Bcast(&n, 1, MPI_INT, 0, MPI_COMM_WORLD);
188
189		16	int padded_n = CalcPaddedSize(n, std::max(1, q));
190			int block_size = padded_n / std::max(1, q);
191
192		16	std::vector<std::vector<double>> padded_a;
193		16	std::vector<std::vector<double>> padded_b;
194
195	2/2 ✓ Branch 0 taken 8 times. ✓ Branch 1 taken 8 times.	16	if (rank == 0) {
196	1/2 ✓ Branch 1 taken 8 times. ✗ Branch 2 not taken.	8	PadMatrix(std::get<1>(GetInput()), padded_a, padded_n);
197	1/2 ✓ Branch 1 taken 8 times. ✗ Branch 2 not taken.	8	PadMatrix(std::get<2>(GetInput()), padded_b, padded_n);
198			}
199
200	1/2 ✓ Branch 1 taken 16 times. ✗ Branch 2 not taken.	16	std::vector<double> flat_result(static_cast<size_t>(padded_n) * padded_n, 0.0);
201
202		16	MPI_Comm comm = MPI_COMM_NULL;
203	2/2 ✓ Branch 0 taken 8 times. ✓ Branch 1 taken 8 times.	16	int color = (rank < active) ? 0 : MPI_UNDEFINED;
204
205	1/2 ✓ Branch 1 taken 16 times. ✗ Branch 2 not taken.	16	MPI_Comm_split(MPI_COMM_WORLD, color, rank, &comm);
206
207	2/2 ✓ Branch 0 taken 8 times. ✓ Branch 1 taken 8 times.	16	if (rank < active) {
208		8	int wrank = 0;
209		8	int wsize = 0;
210
211	1/2 ✓ Branch 1 taken 8 times. ✗ Branch 2 not taken.	8	MPI_Comm_rank(comm, &wrank);
212	1/2 ✓ Branch 1 taken 8 times. ✗ Branch 2 not taken.	8	MPI_Comm_size(comm, &wsize);
213
214	1/2 ✓ Branch 1 taken 8 times. ✗ Branch 2 not taken.	8	std::vector<double> local_a(static_cast<size_t>(block_size) * block_size);
215	1/4 ✓ Branch 1 taken 8 times. ✗ Branch 2 not taken. ✗ Branch 3 not taken. ✗ Branch 4 not taken.	8	std::vector<double> local_b(static_cast<size_t>(block_size) * block_size);
216	1/4 ✓ Branch 1 taken 8 times. ✗ Branch 2 not taken. ✗ Branch 3 not taken. ✗ Branch 4 not taken.	8	std::vector<double> local_c(static_cast<size_t>(block_size) * block_size, 0.0);
217
218	1/2 ✓ Branch 1 taken 8 times. ✗ Branch 2 not taken.	8	DistributeData(comm, wrank, wsize, q, block_size, padded_a, padded_b, local_a, local_b);
219
220	1/2 ✓ Branch 1 taken 8 times. ✗ Branch 2 not taken.	8	CannonAlgorithm(comm, wrank, q, block_size, local_a, local_b, local_c);
221
222	1/2 ✓ Branch 1 taken 8 times. ✗ Branch 2 not taken.	8	CollectResult(comm, wrank, wsize, q, block_size, flat_result, local_c);
223
224	1/2 ✓ Branch 1 taken 8 times. ✗ Branch 2 not taken.	8	MPI_Comm_free(&comm);
225			}
226
227	1/2 ✓ Branch 1 taken 16 times. ✗ Branch 2 not taken.	16	MPI_Bcast(flat_result.data(), padded_n * padded_n, MPI_DOUBLE, 0, MPI_COMM_WORLD);
228
229	2/6 ✓ Branch 1 taken 16 times. ✗ Branch 2 not taken. ✓ Branch 4 taken 16 times. ✗ Branch 5 not taken. ✗ Branch 6 not taken. ✗ Branch 7 not taken.	16	std::vector<std::vector<double>> result(static_cast<size_t>(n), std::vector<double>(static_cast<size_t>(n)));
230
231		16	auto un = static_cast<size_t>(n);
232			auto upadded_n = static_cast<size_t>(padded_n);
233
234	2/2 ✓ Branch 0 taken 80 times. ✓ Branch 1 taken 16 times.	96	for (size_t i = 0; i < un; ++i) {
235	2/2 ✓ Branch 0 taken 484 times. ✓ Branch 1 taken 80 times.	564	for (size_t j = 0; j < un; ++j) {
236		484	result[i][j] = flat_result[(i * upadded_n) + j];
237			}
238			}
239
240		16	GetOutput() = std::move(result);
241		16	return true;
242		32	}
243
244		16	bool SafronovMMultiplicationMatrixBlockSchemeCannonALL::PostProcessingImpl() {
245		16	return true;
246			}
247
248			} // namespace safronov_m_multiplication_matrix_blocksscheme_cannon
249