GCC Code Coverage Report

Directory:	./
File:	tasks/chyokotov_a_dense_matrix_mul_foxs_algorithm/all/src/ops_all.cpp
Date:	2026-06-04 20:25:32

	Exec	Total	Coverage
Lines:	111	125	88.8%
Functions:	12	13	92.3%
Branches:	61	110	55.5%

  
      Line
      Branch
      Exec
      Source
    
      #include "chyokotov_a_dense_matrix_mul_foxs_algorithm/all/include/ops_all.hpp"
    
      #include <mpi.h>
    
      #include <tbb/blocked_range2d.h>
    
      #include <tbb/parallel_for.h>
    
      #include <algorithm>
    
      #include <cmath>
    
      #include <cstddef>
    
      #include <utility>
    
      #include <vector>
    
      #include "chyokotov_a_dense_matrix_mul_foxs_algorithm/common/include/common.hpp"
    
      namespace chyokotov_a_dense_matrix_mul_foxs_algorithm {
    
        1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.

      8
      ChyokotovADenseMatMulFoxAlgorithmALL::ChyokotovADenseMatMulFoxAlgorithmALL(const InType &in) {
    
        SetTypeOfTask(GetStaticTypeOfTask());
    
        GetInput() = in;
    
        GetOutput().clear();
    
      8
      }
    
      8
      bool ChyokotovADenseMatMulFoxAlgorithmALL::ValidationImpl() {
    
      8
        return (GetInput().first.size() == GetInput().second.size());
    
      }
    
        1/2✗ Branch 0 not taken.
✓ Branch 1 taken 8 times.

      8
      bool ChyokotovADenseMatMulFoxAlgorithmALL::PreProcessingImpl() {
    
        GetOutput().clear();
    
      8
        GetOutput().resize(GetInput().first.size(), 0.0);
    
      8
        return true;
    
      }
    
      ✗
      int ChyokotovADenseMatMulFoxAlgorithmALL::CalcPaddedSize(int n, int q) {
    
      ✗
        if (q <= 0) {
    
          return n;
    
        }
    
      ✗
        return ((n + q - 1) / q) * q;
    
      }
    
      6
      void ChyokotovADenseMatMulFoxAlgorithmALL::PadMatrix(const std::vector<double> &src, std::vector<double> &dst,
    
                                                           int original_n, int padded_n) {
    
      6
        dst.assign(static_cast<size_t>(padded_n) * padded_n, 0.0);
    
        2/2✓ Branch 0 taken 14 times.
✓ Branch 1 taken 6 times.

      20
        for (int i = 0; i < original_n; ++i) {
    
        2/2✓ Branch 0 taken 42 times.
✓ Branch 1 taken 14 times.

      56
          for (int j = 0; j < original_n; ++j) {
    
      42
            dst[(i * padded_n) + j] = src[(i * original_n) + j];
    
          }
    
        }
    
      6
      }
    
      3
      void ChyokotovADenseMatMulFoxAlgorithmALL::Multiply(const std::vector<double> &a_block,
    
                                                          const std::vector<double> &b_block, std::vector<double> &c_block,
    
                                                          int block_size) {
    
      6
        tbb::parallel_for(tbb::blocked_range2d<int>(0, block_size, 0, block_size),
    
      24
                          [&](const tbb::blocked_range2d<int> &range) {
    
        2/2✓ Branch 0 taken 21 times.
✓ Branch 1 taken 21 times.

      42
          for (int i = range.rows().begin(); i < range.rows().end(); ++i) {
    
        2/2✓ Branch 0 taken 73 times.
✓ Branch 1 taken 21 times.

      94
            for (int k = 0; k < block_size; ++k) {
    
      73
              double temp = a_block[(i * block_size) + k];
    
        2/2✓ Branch 0 taken 73 times.
✓ Branch 1 taken 73 times.

      146
              for (int j = range.cols().begin(); j < range.cols().end(); ++j) {
    
      73
                c_block[(i * block_size) + j] += temp * b_block[(k * block_size) + j];
    
              }
    
            }
    
          }
    
      21
        });
    
      3
      }
    
      3
      void ChyokotovADenseMatMulFoxAlgorithmALL::DistributeData(MPI_Comm comm, int worker_rank, int worker_size, int q,
    
                                                                int block_size, const std::vector<double> &matrix_a_full,
    
                                                                const std::vector<double> &matrix_b_full,
    
                                                                std::vector<double> &local_a, std::vector<double> &local_b) {
    
      3
        size_t block_sz = static_cast<size_t>(block_size) * block_size;
    
        1/2✓ Branch 0 taken 3 times.
✗ Branch 1 not taken.

      3
        if (worker_rank == 0) {
    
        2/2✓ Branch 0 taken 3 times.
✓ Branch 1 taken 3 times.

      6
          for (int proc = 0; proc < worker_size; ++proc) {
    
      3
            int row = proc / q;
    
      3
            int col = proc % q;
    
      3
            std::vector<double> send_a(block_sz);
    
        1/4✓ Branch 1 taken 3 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.

      3
            std::vector<double> send_b(block_sz);
    
        2/2✓ Branch 0 taken 7 times.
✓ Branch 1 taken 3 times.

      10
            for (int i = 0; i < block_size; ++i) {
    
        2/2✓ Branch 0 taken 21 times.
✓ Branch 1 taken 7 times.

      28
              for (int j = 0; j < block_size; ++j) {
    
      21
                int a_row = (row * block_size) + i;
    
      21
                int a_col = (((col + row) % q) * block_size) + j;
    
      21
                int b_row = (((row + col) % q) * block_size) + i;
    
      21
                int b_col = (col * block_size) + j;
    
      21
                send_a[(i * block_size) + j] = matrix_a_full[(a_row * block_size * q) + a_col];
    
      21
                send_b[(i * block_size) + j] = matrix_b_full[(b_row * block_size * q) + b_col];
    
              }
    
            }
    
        1/2✓ Branch 0 taken 3 times.
✗ Branch 1 not taken.

      3
            if (proc == 0) {
    
              local_a = std::move(send_a);
    
              local_b = std::move(send_b);
    
            } else {
    
      ✗
              MPI_Send(send_a.data(), static_cast<int>(block_sz), MPI_DOUBLE, proc, 0, comm);
    
      ✗
              MPI_Send(send_b.data(), static_cast<int>(block_sz), MPI_DOUBLE, proc, 1, comm);
    
            }
    
          }
    
        } else {
    
      ✗
          local_a.resize(block_sz);
    
      ✗
          local_b.resize(block_sz);
    
      ✗
          MPI_Recv(local_a.data(), static_cast<int>(block_sz), MPI_DOUBLE, 0, 0, comm, MPI_STATUS_IGNORE);
    
      ✗
          MPI_Recv(local_b.data(), static_cast<int>(block_sz), MPI_DOUBLE, 0, 1, comm, MPI_STATUS_IGNORE);
    
        }
    
      3
      }
    
      3
      void ChyokotovADenseMatMulFoxAlgorithmALL::FoxAlgorithm(MPI_Comm comm, int worker_rank, int q, int block_size,
    
                                                              std::vector<double> &local_a, std::vector<double> &local_b,
    
                                                              std::vector<double> &local_c) {
    
      3
        int row = worker_rank / q;
    
      3
        int col = worker_rank % q;
    
      3
        int left = (row * q) + ((col - 1 + q) % q);
    
      3
        int right = (row * q) + ((col + 1) % q);
    
      3
        int up = (((row - 1 + q) % q) * q) + col;
    
      3
        int down = (((row + 1) % q) * q) + col;
    
      3
        size_t block_sz = static_cast<size_t>(block_size) * block_size;
    
      3
        std::vector<double> next_a(block_sz);
    
        1/4✓ Branch 1 taken 3 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.

      3
        std::vector<double> next_b(block_sz);
    
        2/2✓ Branch 0 taken 3 times.
✓ Branch 1 taken 3 times.

      6
        for (int step = 0; step < q; ++step) {
    
        1/2✓ Branch 1 taken 3 times.
✗ Branch 2 not taken.

      3
          Multiply(local_a, local_b, local_c, block_size);
    
        1/2✗ Branch 0 not taken.
✓ Branch 1 taken 3 times.

      3
          if (step < q - 1) {
    
      ✗
            MPI_Sendrecv(local_a.data(), static_cast<int>(block_sz), MPI_DOUBLE, left, 10, next_a.data(),
    
                         static_cast<int>(block_sz), MPI_DOUBLE, right, 10, comm, MPI_STATUS_IGNORE);
    
      ✗
            MPI_Sendrecv(local_b.data(), static_cast<int>(block_sz), MPI_DOUBLE, up, 11, next_b.data(),
    
                         static_cast<int>(block_sz), MPI_DOUBLE, down, 11, comm, MPI_STATUS_IGNORE);
    
            local_a.swap(next_a);
    
            local_b.swap(next_b);
    
          }
    
        }
    
      3
      }
    
      3
      void ChyokotovADenseMatMulFoxAlgorithmALL::CollectResult(MPI_Comm comm, int worker_rank, int worker_size, int q,
    
                                                               int block_size, std::vector<double> &flat_result,
    
                                                               const std::vector<double> &local_c) {
    
      3
        int padded_n = q * block_size;
    
      3
        auto fillres = [&](const std::vector<double> &buffer, int row, int col) {
    
        2/2✓ Branch 0 taken 7 times.
✓ Branch 1 taken 3 times.

      10
          for (int i = 0; i < block_size; ++i) {
    
        2/2✓ Branch 0 taken 21 times.
✓ Branch 1 taken 7 times.

      28
            for (int j = 0; j < block_size; ++j) {
    
      21
              int global_row = (row * block_size) + i;
    
      21
              int global_col = (col * block_size) + j;
    
      21
              flat_result[(global_row * padded_n) + global_col] = buffer[(i * block_size) + j];
    
            }
    
          }
    
      6
        };
    
        1/2✓ Branch 0 taken 3 times.
✗ Branch 1 not taken.

      3
        if (worker_rank == 0) {
    
      3
          fillres(local_c, 0, 0);
    
      3
          std::vector<double> recv_buf(static_cast<size_t>(block_size) * block_size);
    
        1/2✗ Branch 0 not taken.
✓ Branch 1 taken 3 times.

      3
          for (int proc = 1; proc < worker_size; ++proc) {
    
      ✗
            MPI_Recv(recv_buf.data(), static_cast<int>(recv_buf.size()), MPI_DOUBLE, proc, 20, comm, MPI_STATUS_IGNORE);
    
      ✗
            fillres(recv_buf, proc / q, proc % q);
    
          }
    
        } else {
    
      ✗
          MPI_Send(local_c.data(), static_cast<int>(local_c.size()), MPI_DOUBLE, 0, 20, comm);
    
        }
    
      3
      }
    
      8
      bool ChyokotovADenseMatMulFoxAlgorithmALL::RunImpl() {
    
      8
        int rank = 0;
    
      8
        int size = 1;
    
      8
        MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    
      8
        MPI_Comm_size(MPI_COMM_WORLD, &size);
    
      8
        int q = static_cast<int>(std::sqrt(static_cast<double>(size)));
    
      8
        int active = q * q;
    
      8
        int n = 0;
    
        2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 4 times.

      8
        if (rank == 0) {
    
      4
          n = static_cast<int>(std::sqrt(static_cast<double>(GetInput().first.size())));
    
        }
    
      8
        MPI_Bcast(&n, 1, MPI_INT, 0, MPI_COMM_WORLD);
    
        2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.

      8
        if (n == 0) {
    
          return true;
    
        }
    
        int padded_n = CalcPaddedSize(n, std::max(1, q));
    
      6
        int block_size = padded_n / std::max(1, q);
    
      6
        std::vector<double> padded_a;
    
      6
        std::vector<double> padded_b;
    
        2/2✓ Branch 0 taken 3 times.
✓ Branch 1 taken 3 times.

      6
        if (rank == 0) {
    
        1/2✓ Branch 1 taken 3 times.
✗ Branch 2 not taken.

      3
          PadMatrix(GetInput().first, padded_a, n, padded_n);
    
        1/2✓ Branch 1 taken 3 times.
✗ Branch 2 not taken.

      3
          PadMatrix(GetInput().second, padded_b, n, padded_n);
    
        }
    
      6
        MPI_Comm comm = MPI_COMM_NULL;
    
        2/2✓ Branch 0 taken 3 times.
✓ Branch 1 taken 3 times.

      6
        int color = (rank < active) ? 0 : MPI_UNDEFINED;
    
        1/2✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.

      6
        MPI_Comm_split(MPI_COMM_WORLD, color, rank, &comm);
    
        1/4✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.

      6
        std::vector<double> flat_result(static_cast<size_t>(padded_n) * padded_n, 0.0);
    
        2/2✓ Branch 0 taken 3 times.
✓ Branch 1 taken 3 times.

      6
        if (rank < active) {
    
      3
          int wrank = 0;
    
      3
          int wsize = 0;
    
        1/2✓ Branch 1 taken 3 times.
✗ Branch 2 not taken.

      3
          MPI_Comm_rank(comm, &wrank);
    
        1/2✓ Branch 1 taken 3 times.
✗ Branch 2 not taken.

      3
          MPI_Comm_size(comm, &wsize);
    
      3
          size_t block_sz = static_cast<size_t>(block_size) * block_size;
    
        1/4✓ Branch 1 taken 3 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.

      3
          std::vector<double> local_a(block_sz);
    
        1/4✓ Branch 1 taken 3 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.

      3
          std::vector<double> local_b(block_sz);
    
        1/4✓ Branch 1 taken 3 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.

      3
          std::vector<double> local_c(block_sz, 0.0);
    
        1/2✓ Branch 1 taken 3 times.
✗ Branch 2 not taken.

      3
          DistributeData(comm, wrank, wsize, q, block_size, padded_a, padded_b, local_a, local_b);
    
        1/2✓ Branch 1 taken 3 times.
✗ Branch 2 not taken.

      3
          FoxAlgorithm(comm, wrank, q, block_size, local_a, local_b, local_c);
    
        1/2✓ Branch 1 taken 3 times.
✗ Branch 2 not taken.

      3
          CollectResult(comm, wrank, wsize, q, block_size, flat_result, local_c);
    
        1/2✓ Branch 1 taken 3 times.
✗ Branch 2 not taken.

      3
          MPI_Comm_free(&comm);
    
        }
    
        1/2✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.

      6
        MPI_Bcast(flat_result.data(), padded_n * padded_n, MPI_DOUBLE, 0, MPI_COMM_WORLD);
    
        1/2✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.

      6
        GetOutput().resize(static_cast<size_t>(n) * n);
    
        2/2✓ Branch 0 taken 14 times.
✓ Branch 1 taken 6 times.

      20
        for (int i = 0; i < n; ++i) {
    
        2/2✓ Branch 0 taken 42 times.
✓ Branch 1 taken 14 times.

      56
          for (int j = 0; j < n; ++j) {
    
      42
            GetOutput()[(i * n) + j] = flat_result[(i * padded_n) + j];
    
          }
    
        }
    
        return true;
    
      }
    
      8
      bool ChyokotovADenseMatMulFoxAlgorithmALL::PostProcessingImpl() {
    
      8
        return true;
    
      }
    
      }  // namespace chyokotov_a_dense_matrix_mul_foxs_algorithm

Line	Branch	Exec	Source
1			#include "chyokotov_a_dense_matrix_mul_foxs_algorithm/all/include/ops_all.hpp"
2
3			#include <mpi.h>
4			#include <tbb/blocked_range2d.h>
5			#include <tbb/parallel_for.h>
6
7			#include <algorithm>
8			#include <cmath>
9			#include <cstddef>
10			#include <utility>
11			#include <vector>
12
13			#include "chyokotov_a_dense_matrix_mul_foxs_algorithm/common/include/common.hpp"
14
15			namespace chyokotov_a_dense_matrix_mul_foxs_algorithm {
16
17	1/2 ✓ Branch 1 taken 8 times. ✗ Branch 2 not taken.	8	ChyokotovADenseMatMulFoxAlgorithmALL::ChyokotovADenseMatMulFoxAlgorithmALL(const InType &in) {
18			SetTypeOfTask(GetStaticTypeOfTask());
19			GetInput() = in;
20			GetOutput().clear();
21		8	}
22
23		8	bool ChyokotovADenseMatMulFoxAlgorithmALL::ValidationImpl() {
24		8	return (GetInput().first.size() == GetInput().second.size());
25			}
26
27	1/2 ✗ Branch 0 not taken. ✓ Branch 1 taken 8 times.	8	bool ChyokotovADenseMatMulFoxAlgorithmALL::PreProcessingImpl() {
28			GetOutput().clear();
29		8	GetOutput().resize(GetInput().first.size(), 0.0);
30		8	return true;
31			}
32
33		✗	int ChyokotovADenseMatMulFoxAlgorithmALL::CalcPaddedSize(int n, int q) {
34		✗	if (q <= 0) {
35			return n;
36			}
37		✗	return ((n + q - 1) / q) * q;
38			}
39
40		6	void ChyokotovADenseMatMulFoxAlgorithmALL::PadMatrix(const std::vector<double> &src, std::vector<double> &dst,
41			int original_n, int padded_n) {
42		6	dst.assign(static_cast<size_t>(padded_n) * padded_n, 0.0);
43
44	2/2 ✓ Branch 0 taken 14 times. ✓ Branch 1 taken 6 times.	20	for (int i = 0; i < original_n; ++i) {
45	2/2 ✓ Branch 0 taken 42 times. ✓ Branch 1 taken 14 times.	56	for (int j = 0; j < original_n; ++j) {
46		42	dst[(i * padded_n) + j] = src[(i * original_n) + j];
47			}
48			}
49		6	}
50
51		3	void ChyokotovADenseMatMulFoxAlgorithmALL::Multiply(const std::vector<double> &a_block,
52			const std::vector<double> &b_block, std::vector<double> &c_block,
53			int block_size) {
54		6	tbb::parallel_for(tbb::blocked_range2d<int>(0, block_size, 0, block_size),
55		24	[&](const tbb::blocked_range2d<int> &range) {
56	2/2 ✓ Branch 0 taken 21 times. ✓ Branch 1 taken 21 times.	42	for (int i = range.rows().begin(); i < range.rows().end(); ++i) {
57	2/2 ✓ Branch 0 taken 73 times. ✓ Branch 1 taken 21 times.	94	for (int k = 0; k < block_size; ++k) {
58		73	double temp = a_block[(i * block_size) + k];
59	2/2 ✓ Branch 0 taken 73 times. ✓ Branch 1 taken 73 times.	146	for (int j = range.cols().begin(); j < range.cols().end(); ++j) {
60		73	c_block[(i * block_size) + j] += temp * b_block[(k * block_size) + j];
61			}
62			}
63			}
64		21	});
65		3	}
66
67		3	void ChyokotovADenseMatMulFoxAlgorithmALL::DistributeData(MPI_Comm comm, int worker_rank, int worker_size, int q,
68			int block_size, const std::vector<double> &matrix_a_full,
69			const std::vector<double> &matrix_b_full,
70			std::vector<double> &local_a, std::vector<double> &local_b) {
71		3	size_t block_sz = static_cast<size_t>(block_size) * block_size;
72
73	1/2 ✓ Branch 0 taken 3 times. ✗ Branch 1 not taken.	3	if (worker_rank == 0) {
74	2/2 ✓ Branch 0 taken 3 times. ✓ Branch 1 taken 3 times.	6	for (int proc = 0; proc < worker_size; ++proc) {
75		3	int row = proc / q;
76		3	int col = proc % q;
77
78		3	std::vector<double> send_a(block_sz);
79	1/4 ✓ Branch 1 taken 3 times. ✗ Branch 2 not taken. ✗ Branch 3 not taken. ✗ Branch 4 not taken.	3	std::vector<double> send_b(block_sz);
80
81	2/2 ✓ Branch 0 taken 7 times. ✓ Branch 1 taken 3 times.	10	for (int i = 0; i < block_size; ++i) {
82	2/2 ✓ Branch 0 taken 21 times. ✓ Branch 1 taken 7 times.	28	for (int j = 0; j < block_size; ++j) {
83		21	int a_row = (row * block_size) + i;
84		21	int a_col = (((col + row) % q) * block_size) + j;
85		21	int b_row = (((row + col) % q) * block_size) + i;
86		21	int b_col = (col * block_size) + j;
87
88		21	send_a[(i * block_size) + j] = matrix_a_full[(a_row * block_size * q) + a_col];
89		21	send_b[(i * block_size) + j] = matrix_b_full[(b_row * block_size * q) + b_col];
90			}
91			}
92
93	1/2 ✓ Branch 0 taken 3 times. ✗ Branch 1 not taken.	3	if (proc == 0) {
94			local_a = std::move(send_a);
95			local_b = std::move(send_b);
96			} else {
97		✗	MPI_Send(send_a.data(), static_cast<int>(block_sz), MPI_DOUBLE, proc, 0, comm);
98		✗	MPI_Send(send_b.data(), static_cast<int>(block_sz), MPI_DOUBLE, proc, 1, comm);
99			}
100			}
101			} else {
102		✗	local_a.resize(block_sz);
103		✗	local_b.resize(block_sz);
104		✗	MPI_Recv(local_a.data(), static_cast<int>(block_sz), MPI_DOUBLE, 0, 0, comm, MPI_STATUS_IGNORE);
105		✗	MPI_Recv(local_b.data(), static_cast<int>(block_sz), MPI_DOUBLE, 0, 1, comm, MPI_STATUS_IGNORE);
106			}
107		3	}
108
109		3	void ChyokotovADenseMatMulFoxAlgorithmALL::FoxAlgorithm(MPI_Comm comm, int worker_rank, int q, int block_size,
110			std::vector<double> &local_a, std::vector<double> &local_b,
111			std::vector<double> &local_c) {
112		3	int row = worker_rank / q;
113		3	int col = worker_rank % q;
114
115		3	int left = (row * q) + ((col - 1 + q) % q);
116		3	int right = (row * q) + ((col + 1) % q);
117		3	int up = (((row - 1 + q) % q) * q) + col;
118		3	int down = (((row + 1) % q) * q) + col;
119
120		3	size_t block_sz = static_cast<size_t>(block_size) * block_size;
121		3	std::vector<double> next_a(block_sz);
122	1/4 ✓ Branch 1 taken 3 times. ✗ Branch 2 not taken. ✗ Branch 3 not taken. ✗ Branch 4 not taken.	3	std::vector<double> next_b(block_sz);
123
124	2/2 ✓ Branch 0 taken 3 times. ✓ Branch 1 taken 3 times.	6	for (int step = 0; step < q; ++step) {
125	1/2 ✓ Branch 1 taken 3 times. ✗ Branch 2 not taken.	3	Multiply(local_a, local_b, local_c, block_size);
126
127	1/2 ✗ Branch 0 not taken. ✓ Branch 1 taken 3 times.	3	if (step < q - 1) {
128		✗	MPI_Sendrecv(local_a.data(), static_cast<int>(block_sz), MPI_DOUBLE, left, 10, next_a.data(),
129			static_cast<int>(block_sz), MPI_DOUBLE, right, 10, comm, MPI_STATUS_IGNORE);
130
131		✗	MPI_Sendrecv(local_b.data(), static_cast<int>(block_sz), MPI_DOUBLE, up, 11, next_b.data(),
132			static_cast<int>(block_sz), MPI_DOUBLE, down, 11, comm, MPI_STATUS_IGNORE);
133
134			local_a.swap(next_a);
135			local_b.swap(next_b);
136			}
137			}
138		3	}
139
140		3	void ChyokotovADenseMatMulFoxAlgorithmALL::CollectResult(MPI_Comm comm, int worker_rank, int worker_size, int q,
141			int block_size, std::vector<double> &flat_result,
142			const std::vector<double> &local_c) {
143		3	int padded_n = q * block_size;
144
145		3	auto fillres = [&](const std::vector<double> &buffer, int row, int col) {
146	2/2 ✓ Branch 0 taken 7 times. ✓ Branch 1 taken 3 times.	10	for (int i = 0; i < block_size; ++i) {
147	2/2 ✓ Branch 0 taken 21 times. ✓ Branch 1 taken 7 times.	28	for (int j = 0; j < block_size; ++j) {
148		21	int global_row = (row * block_size) + i;
149		21	int global_col = (col * block_size) + j;
150		21	flat_result[(global_row * padded_n) + global_col] = buffer[(i * block_size) + j];
151			}
152			}
153		6	};
154
155	1/2 ✓ Branch 0 taken 3 times. ✗ Branch 1 not taken.	3	if (worker_rank == 0) {
156		3	fillres(local_c, 0, 0);
157
158		3	std::vector<double> recv_buf(static_cast<size_t>(block_size) * block_size);
159	1/2 ✗ Branch 0 not taken. ✓ Branch 1 taken 3 times.	3	for (int proc = 1; proc < worker_size; ++proc) {
160		✗	MPI_Recv(recv_buf.data(), static_cast<int>(recv_buf.size()), MPI_DOUBLE, proc, 20, comm, MPI_STATUS_IGNORE);
161		✗	fillres(recv_buf, proc / q, proc % q);
162			}
163			} else {
164		✗	MPI_Send(local_c.data(), static_cast<int>(local_c.size()), MPI_DOUBLE, 0, 20, comm);
165			}
166		3	}
167
168		8	bool ChyokotovADenseMatMulFoxAlgorithmALL::RunImpl() {
169		8	int rank = 0;
170		8	int size = 1;
171
172		8	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
173		8	MPI_Comm_size(MPI_COMM_WORLD, &size);
174
175		8	int q = static_cast<int>(std::sqrt(static_cast<double>(size)));
176		8	int active = q * q;
177
178		8	int n = 0;
179	2/2 ✓ Branch 0 taken 4 times. ✓ Branch 1 taken 4 times.	8	if (rank == 0) {
180		4	n = static_cast<int>(std::sqrt(static_cast<double>(GetInput().first.size())));
181			}
182
183		8	MPI_Bcast(&n, 1, MPI_INT, 0, MPI_COMM_WORLD);
184
185	2/2 ✓ Branch 0 taken 6 times. ✓ Branch 1 taken 2 times.	8	if (n == 0) {
186			return true;
187			}
188
189			int padded_n = CalcPaddedSize(n, std::max(1, q));
190		6	int block_size = padded_n / std::max(1, q);
191
192		6	std::vector<double> padded_a;
193		6	std::vector<double> padded_b;
194
195	2/2 ✓ Branch 0 taken 3 times. ✓ Branch 1 taken 3 times.	6	if (rank == 0) {
196	1/2 ✓ Branch 1 taken 3 times. ✗ Branch 2 not taken.	3	PadMatrix(GetInput().first, padded_a, n, padded_n);
197	1/2 ✓ Branch 1 taken 3 times. ✗ Branch 2 not taken.	3	PadMatrix(GetInput().second, padded_b, n, padded_n);
198			}
199
200		6	MPI_Comm comm = MPI_COMM_NULL;
201	2/2 ✓ Branch 0 taken 3 times. ✓ Branch 1 taken 3 times.	6	int color = (rank < active) ? 0 : MPI_UNDEFINED;
202	1/2 ✓ Branch 1 taken 6 times. ✗ Branch 2 not taken.	6	MPI_Comm_split(MPI_COMM_WORLD, color, rank, &comm);
203
204	1/4 ✓ Branch 1 taken 6 times. ✗ Branch 2 not taken. ✗ Branch 3 not taken. ✗ Branch 4 not taken.	6	std::vector<double> flat_result(static_cast<size_t>(padded_n) * padded_n, 0.0);
205
206	2/2 ✓ Branch 0 taken 3 times. ✓ Branch 1 taken 3 times.	6	if (rank < active) {
207		3	int wrank = 0;
208		3	int wsize = 0;
209	1/2 ✓ Branch 1 taken 3 times. ✗ Branch 2 not taken.	3	MPI_Comm_rank(comm, &wrank);
210	1/2 ✓ Branch 1 taken 3 times. ✗ Branch 2 not taken.	3	MPI_Comm_size(comm, &wsize);
211
212		3	size_t block_sz = static_cast<size_t>(block_size) * block_size;
213	1/4 ✓ Branch 1 taken 3 times. ✗ Branch 2 not taken. ✗ Branch 3 not taken. ✗ Branch 4 not taken.	3	std::vector<double> local_a(block_sz);
214	1/4 ✓ Branch 1 taken 3 times. ✗ Branch 2 not taken. ✗ Branch 3 not taken. ✗ Branch 4 not taken.	3	std::vector<double> local_b(block_sz);
215	1/4 ✓ Branch 1 taken 3 times. ✗ Branch 2 not taken. ✗ Branch 3 not taken. ✗ Branch 4 not taken.	3	std::vector<double> local_c(block_sz, 0.0);
216
217	1/2 ✓ Branch 1 taken 3 times. ✗ Branch 2 not taken.	3	DistributeData(comm, wrank, wsize, q, block_size, padded_a, padded_b, local_a, local_b);
218	1/2 ✓ Branch 1 taken 3 times. ✗ Branch 2 not taken.	3	FoxAlgorithm(comm, wrank, q, block_size, local_a, local_b, local_c);
219	1/2 ✓ Branch 1 taken 3 times. ✗ Branch 2 not taken.	3	CollectResult(comm, wrank, wsize, q, block_size, flat_result, local_c);
220
221	1/2 ✓ Branch 1 taken 3 times. ✗ Branch 2 not taken.	3	MPI_Comm_free(&comm);
222			}
223
224	1/2 ✓ Branch 1 taken 6 times. ✗ Branch 2 not taken.	6	MPI_Bcast(flat_result.data(), padded_n * padded_n, MPI_DOUBLE, 0, MPI_COMM_WORLD);
225
226	1/2 ✓ Branch 1 taken 6 times. ✗ Branch 2 not taken.	6	GetOutput().resize(static_cast<size_t>(n) * n);
227	2/2 ✓ Branch 0 taken 14 times. ✓ Branch 1 taken 6 times.	20	for (int i = 0; i < n; ++i) {
228	2/2 ✓ Branch 0 taken 42 times. ✓ Branch 1 taken 14 times.	56	for (int j = 0; j < n; ++j) {
229		42	GetOutput()[(i * n) + j] = flat_result[(i * padded_n) + j];
230			}
231			}
232
233			return true;
234			}
235
236		8	bool ChyokotovADenseMatMulFoxAlgorithmALL::PostProcessingImpl() {
237		8	return true;
238			}
239
240			} // namespace chyokotov_a_dense_matrix_mul_foxs_algorithm
241