GCC Code Coverage Report

Directory:	./
File:	tasks/remizov_k_dense_matrix_multiplication_cannon_algorithm/stl/src/ops_stl.cpp
Date:	2026-06-04 20:25:32

	Exec	Total	Coverage
Lines:	96	109	88.1%
Functions:	20	24	83.3%
Branches:	71	102	69.6%

  
      Line
      Branch
      Exec
      Source
    
      #include "remizov_k_dense_matrix_multiplication_cannon_algorithm/stl/include/ops_stl.hpp"
    
      #include <algorithm>
    
      #include <cstddef>
    
      #include <thread>
    
      #include <utility>
    
      #include <vector>
    
      #include "remizov_k_dense_matrix_multiplication_cannon_algorithm/common/include/common.hpp"
    
      namespace remizov_k_dense_matrix_multiplication_cannon_algorithm {
    
      namespace {
    
      template <typename IndexType, typename Func>
    
      720
      void ParallelFor(IndexType begin, IndexType end, const Func &func) {
    
        const std::size_t num_threads =
    
        1/2✗ Branch 1 not taken.
✓ Branch 2 taken 360 times.

      720
            std::max(static_cast<std::size_t>(1U), static_cast<std::size_t>(std::thread::hardware_concurrency()));
    
      720
        const IndexType range_length = end - begin;
    
        1/2✗ Branch 0 not taken.
✓ Branch 1 taken 360 times.

      720
        if (range_length <= 0) {
    
      ✗
          return;
    
        }
    
      720
        std::vector<std::thread> threads;
    
        1/2✓ Branch 1 taken 360 times.
✗ Branch 2 not taken.

      720
        threads.reserve(num_threads);
    
      720
        IndexType chunk_size = (range_length + static_cast<IndexType>(num_threads) - 1) / static_cast<IndexType>(num_threads);
    
        IndexType start = begin;
    
        2/2✓ Branch 0 taken 1280 times.
✓ Branch 1 taken 224 times.

      3008
        for (std::size_t thread_idx = 0; thread_idx < num_threads; ++thread_idx) {
    
        2/2✓ Branch 0 taken 1144 times.
✓ Branch 1 taken 136 times.

      2560
          IndexType chunk_end = std::min(end, start + chunk_size);
    
        2/2✓ Branch 0 taken 1144 times.
✓ Branch 1 taken 136 times.

      2560
          if (start >= chunk_end) {
    
            break;
    
          }
    
        1/2✓ Branch 1 taken 1144 times.
✗ Branch 2 not taken.

      2288
          threads.emplace_back([start, chunk_end, &func]() {
    
        10/10✓ Branch 0 taken 112 times.
✓ Branch 1 taken 112 times.
✓ Branch 2 taken 112 times.
✓ Branch 3 taken 112 times.
✓ Branch 4 taken 456 times.
✓ Branch 5 taken 456 times.
✓ Branch 6 taken 232 times.
✓ Branch 7 taken 232 times.
✓ Branch 8 taken 232 times.
✓ Branch 9 taken 232 times.

      2288
            for (IndexType i = start; i < chunk_end; ++i) {
    
      1144
              func(i);
    
            }
    
          });
    
          start = chunk_end;
    
        }
    
        2/2✓ Branch 0 taken 1144 times.
✓ Branch 1 taken 360 times.

      3008
        for (auto &th : threads) {
    
        1/2✓ Branch 0 taken 1144 times.
✗ Branch 1 not taken.

      2288
          if (th.joinable()) {
    
        1/2✓ Branch 1 taken 1144 times.
✗ Branch 2 not taken.

      2288
            th.join();
    
          }
    
        }
    
      720
      }
    
      template <typename Func>
    
      496
      void ParallelFor2D(int rows_begin, int rows_end, int cols_begin, int cols_end, const Func &func) {
    
      496
        const int rows = rows_end - rows_begin;
    
      496
        const int cols = cols_end - cols_begin;
    
      496
        const int total = rows * cols;
    
        1/2✗ Branch 0 not taken.
✓ Branch 1 taken 248 times.

      496
        if (total <= 0) {
    
      ✗
          return;
    
        }
    
      2336
        ParallelFor(0, total, [&](int linear_idx) {
    
      920
          int i = rows_begin + (linear_idx / cols);
    
      920
          int j = cols_begin + (linear_idx % cols);
    
      920
          func(i, j);
    
        });
    
      }
    
      }  // namespace
    
      64
      RemizovKDenseMatrixMultiplicationCannonAlgorithmStl::RemizovKDenseMatrixMultiplicationCannonAlgorithmStl(
    
        1/2✓ Branch 1 taken 64 times.
✗ Branch 2 not taken.

      64
          const InType &in) {
    
        SetTypeOfTask(GetStaticTypeOfTask());
    
        GetInput() = in;
    
      64
      }
    
      64
      bool RemizovKDenseMatrixMultiplicationCannonAlgorithmStl::ValidationImpl() {
    
        const auto &input_data = GetInput();
    
      64
        int block_dim = std::get<0>(input_data);
    
        const auto &mat_a = std::get<1>(input_data);
    
        const auto &mat_b = std::get<2>(input_data);
    
        1/2✓ Branch 0 taken 64 times.
✗ Branch 1 not taken.

      64
        if (block_dim <= 0) {
    
          return false;
    
        }
    
        2/4✓ Branch 0 taken 64 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 64 times.
✗ Branch 3 not taken.

      64
        if (mat_a.empty() || mat_b.empty()) {
    
          return false;
    
        }
    
        size_t n = mat_a.size();
    
        1/2✓ Branch 0 taken 64 times.
✗ Branch 1 not taken.

      64
        if (n != mat_a[0].size()) {
    
          return false;
    
        }
    
        2/4✓ Branch 0 taken 64 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 64 times.
✗ Branch 3 not taken.

      64
        if (n != mat_b.size() || n != mat_b[0].size()) {
    
          return false;
    
        }
    
      64
        return (n % static_cast<size_t>(block_dim) == 0);
    
      }
    
        1/2✗ Branch 0 not taken.
✓ Branch 1 taken 64 times.

      64
      bool RemizovKDenseMatrixMultiplicationCannonAlgorithmStl::PreProcessingImpl() {
    
        GetOutput().clear();
    
      64
        return true;
    
      }
    
      456
      void RemizovKDenseMatrixMultiplicationCannonAlgorithmStl::MultiplyBlock(const std::vector<std::vector<double>> &a,
    
                                                                              const std::vector<std::vector<double>> &b,
    
                                                                              std::vector<std::vector<double>> &c,
    
                                                                              int block_size) {
    
        2/2✓ Branch 0 taken 984 times.
✓ Branch 1 taken 456 times.

      1440
        for (int i = 0; i < block_size; ++i) {
    
        2/2✓ Branch 0 taken 2568 times.
✓ Branch 1 taken 984 times.

      3552
          for (int j = 0; j < block_size; ++j) {
    
            double acc = 0.0;
    
        2/2✓ Branch 0 taken 7704 times.
✓ Branch 1 taken 2568 times.

      10272
            for (int k = 0; k < block_size; ++k) {
    
      7704
              acc += a[i][k] * b[k][j];
    
            }
    
      2568
            c[i][j] += acc;
    
          }
    
        }
    
      456
      }
    
      ✗
      void RemizovKDenseMatrixMultiplicationCannonAlgorithmStl::ShiftBlocksLeft(
    
          std::vector<std::vector<std::vector<std::vector<double>>>> &matrix_blocks, int block_count) {
    
      56
        ParallelFor(0, block_count, [&](int i) {
    
      112
          auto first = std::move(matrix_blocks[i][0]);
    
        2/2✓ Branch 0 taken 112 times.
✓ Branch 1 taken 112 times.

      224
          for (int j = 1; j < block_count; ++j) {
    
      112
            matrix_blocks[i][j - 1] = std::move(matrix_blocks[i][j]);
    
          }
    
      112
          matrix_blocks[i][block_count - 1] = std::move(first);
    
      112
        });
    
      ✗
      }
    
      ✗
      void RemizovKDenseMatrixMultiplicationCannonAlgorithmStl::ShiftBlocksUp(
    
          std::vector<std::vector<std::vector<std::vector<double>>>> &matrix_blocks, int block_count) {
    
      ✗
        ParallelFor(0, block_count, [&](int j) {
    
      112
          auto first = std::move(matrix_blocks[0][j]);
    
        2/2✓ Branch 0 taken 112 times.
✓ Branch 1 taken 112 times.

      224
          for (int i = 1; i < block_count; ++i) {
    
      112
            matrix_blocks[i - 1][j] = std::move(matrix_blocks[i][j]);
    
          }
    
      112
          matrix_blocks[block_count - 1][j] = std::move(first);
    
      112
        });
    
      ✗
      }
    
      64
      void RemizovKDenseMatrixMultiplicationCannonAlgorithmStl::RunCannonCycle(
    
          std::vector<std::vector<std::vector<std::vector<double>>>> &a_blocks,
    
          std::vector<std::vector<std::vector<std::vector<double>>>> &b_blocks,
    
          std::vector<std::vector<std::vector<std::vector<double>>>> &c_blocks, int block_size, int block_count) {
    
        2/2✓ Branch 0 taken 120 times.
✓ Branch 1 taken 64 times.

      184
        for (int step = 0; step < block_count; ++step) {
    
      120
          ParallelFor2D(0, block_count, 0, block_count,
    
      576
                        [&](int i, int j) { MultiplyBlock(a_blocks[i][j], b_blocks[i][j], c_blocks[i][j], block_size); });
    
        2/2✓ Branch 0 taken 56 times.
✓ Branch 1 taken 64 times.

      120
          if (step < block_count - 1) {
    
      56
            ShiftBlocksLeft(a_blocks, block_count);
    
      56
            ShiftBlocksUp(b_blocks, block_count);
    
          }
    
        }
    
      64
      }
    
      ✗
      void RemizovKDenseMatrixMultiplicationCannonAlgorithmStl::InitializeBlocks(
    
          const std::vector<std::vector<double>> &matrix_a, const std::vector<std::vector<double>> &matrix_b,
    
          std::vector<std::vector<std::vector<std::vector<double>>>> &a_blocks,
    
          std::vector<std::vector<std::vector<std::vector<double>>>> &b_blocks, int block_size, int block_count) {
    
      ✗
        ParallelFor2D(0, block_count, 0, block_count, [&](int i, int j) {
    
      232
          int shift = (i + j) % block_count;
    
        2/2✓ Branch 0 taken 504 times.
✓ Branch 1 taken 232 times.

      736
          for (int bi = 0; bi < block_size; ++bi) {
    
        2/2✓ Branch 0 taken 1320 times.
✓ Branch 1 taken 504 times.

      1824
            for (int bj = 0; bj < block_size; ++bj) {
    
      1320
              a_blocks[i][j][bi][bj] = matrix_a[(i * block_size) + bi][(shift * block_size) + bj];
    
      1320
              b_blocks[i][j][bi][bj] = matrix_b[(shift * block_size) + bi][(j * block_size) + bj];
    
            }
    
          }
    
      232
        });
    
      ✗
      }
    
      ✗
      void RemizovKDenseMatrixMultiplicationCannonAlgorithmStl::AssembleOutput(
    
          std::vector<std::vector<std::vector<std::vector<double>>>> &c_blocks, std::vector<std::vector<double>> &output,
    
          int block_size, int block_count) {
    
      ✗
        ParallelFor2D(0, block_count, 0, block_count, [&](int i, int j) {
    
        2/2✓ Branch 0 taken 504 times.
✓ Branch 1 taken 232 times.

      736
          for (int bi = 0; bi < block_size; ++bi) {
    
        2/2✓ Branch 0 taken 1320 times.
✓ Branch 1 taken 504 times.

      1824
            for (int bj = 0; bj < block_size; ++bj) {
    
      1320
              output[(i * block_size) + bi][(j * block_size) + bj] = c_blocks[i][j][bi][bj];
    
            }
    
          }
    
      232
        });
    
      ✗
      }
    
      64
      bool RemizovKDenseMatrixMultiplicationCannonAlgorithmStl::RunImpl() {
    
        const auto &params = GetInput();
    
      64
        int block_dim = std::get<0>(params);
    
        const auto &source_a = std::get<1>(params);
    
        const auto &source_b = std::get<2>(params);
    
      64
        int matrix_size = static_cast<int>(source_a.size());
    
      64
        int blocks_per_dim = matrix_size / block_dim;
    
        using Block4D = std::vector<std::vector<std::vector<std::vector<double>>>>;
    
      64
        Block4D blocks_a(blocks_per_dim, std::vector<std::vector<std::vector<double>>>(
    
      64
                                             blocks_per_dim, std::vector<std::vector<double>>(
    
        3/6✓ Branch 2 taken 64 times.
✗ Branch 3 not taken.
✓ Branch 5 taken 64 times.
✗ Branch 6 not taken.
✓ Branch 8 taken 64 times.
✗ Branch 9 not taken.

      64
                                                                 block_dim, std::vector<double>(block_dim, 0.0))));
    
      64
        Block4D blocks_b(blocks_per_dim, std::vector<std::vector<std::vector<double>>>(
    
      64
                                             blocks_per_dim, std::vector<std::vector<double>>(
    
        4/8✓ Branch 1 taken 64 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 64 times.
✗ Branch 5 not taken.
✓ Branch 7 taken 64 times.
✗ Branch 8 not taken.
✓ Branch 10 taken 64 times.
✗ Branch 11 not taken.

      64
                                                                 block_dim, std::vector<double>(block_dim, 0.0))));
    
      64
        Block4D blocks_c(blocks_per_dim, std::vector<std::vector<std::vector<double>>>(
    
      64
                                             blocks_per_dim, std::vector<std::vector<double>>(
    
        5/10✓ Branch 1 taken 64 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 64 times.
✗ Branch 5 not taken.
✓ Branch 7 taken 64 times.
✗ Branch 8 not taken.
✓ Branch 10 taken 64 times.
✗ Branch 11 not taken.
✓ Branch 13 taken 64 times.
✗ Branch 14 not taken.

      128
                                                                 block_dim, std::vector<double>(block_dim, 0.0))));
    
      64
        InitializeBlocks(source_a, source_b, blocks_a, blocks_b, block_dim, blocks_per_dim);
    
        1/2✓ Branch 1 taken 64 times.
✗ Branch 2 not taken.

      64
        RunCannonCycle(blocks_a, blocks_b, blocks_c, block_dim, blocks_per_dim);
    
        3/6✓ Branch 1 taken 64 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 64 times.
✗ Branch 5 not taken.
✓ Branch 7 taken 64 times.
✗ Branch 8 not taken.

      128
        std::vector<std::vector<double>> result(matrix_size, std::vector<double>(matrix_size, 0.0));
    
      64
        AssembleOutput(blocks_c, result, block_dim, blocks_per_dim);
    
      64
        GetOutput() = std::move(result);
    
      64
        return true;
    
      64
      }
    
      64
      bool RemizovKDenseMatrixMultiplicationCannonAlgorithmStl::PostProcessingImpl() {
    
      64
        return true;
    
      }
    
      }  // namespace remizov_k_dense_matrix_multiplication_cannon_algorithm

Line	Branch	Exec	Source
1			#include "remizov_k_dense_matrix_multiplication_cannon_algorithm/stl/include/ops_stl.hpp"
2
3			#include <algorithm>
4			#include <cstddef>
5			#include <thread>
6			#include <utility>
7			#include <vector>
8
9			#include "remizov_k_dense_matrix_multiplication_cannon_algorithm/common/include/common.hpp"
10
11			namespace remizov_k_dense_matrix_multiplication_cannon_algorithm {
12
13			namespace {
14
15			template <typename IndexType, typename Func>
16		720	void ParallelFor(IndexType begin, IndexType end, const Func &func) {
17			const std::size_t num_threads =
18	1/2 ✗ Branch 1 not taken. ✓ Branch 2 taken 360 times.	720	std::max(static_cast<std::size_t>(1U), static_cast<std::size_t>(std::thread::hardware_concurrency()));
19		720	const IndexType range_length = end - begin;
20	1/2 ✗ Branch 0 not taken. ✓ Branch 1 taken 360 times.	720	if (range_length <= 0) {
21		✗	return;
22			}
23
24		720	std::vector<std::thread> threads;
25	1/2 ✓ Branch 1 taken 360 times. ✗ Branch 2 not taken.	720	threads.reserve(num_threads);
26
27		720	IndexType chunk_size = (range_length + static_cast<IndexType>(num_threads) - 1) / static_cast<IndexType>(num_threads);
28			IndexType start = begin;
29
30	2/2 ✓ Branch 0 taken 1280 times. ✓ Branch 1 taken 224 times.	3008	for (std::size_t thread_idx = 0; thread_idx < num_threads; ++thread_idx) {
31	2/2 ✓ Branch 0 taken 1144 times. ✓ Branch 1 taken 136 times.	2560	IndexType chunk_end = std::min(end, start + chunk_size);
32	2/2 ✓ Branch 0 taken 1144 times. ✓ Branch 1 taken 136 times.	2560	if (start >= chunk_end) {
33			break;
34			}
35
36	1/2 ✓ Branch 1 taken 1144 times. ✗ Branch 2 not taken.	2288	threads.emplace_back([start, chunk_end, &func]() {
37	10/10 ✓ Branch 0 taken 112 times. ✓ Branch 1 taken 112 times. ✓ Branch 2 taken 112 times. ✓ Branch 3 taken 112 times. ✓ Branch 4 taken 456 times. ✓ Branch 5 taken 456 times. ✓ Branch 6 taken 232 times. ✓ Branch 7 taken 232 times. ✓ Branch 8 taken 232 times. ✓ Branch 9 taken 232 times.	2288	for (IndexType i = start; i < chunk_end; ++i) {
38		1144	func(i);
39			}
40			});
41			start = chunk_end;
42			}
43
44	2/2 ✓ Branch 0 taken 1144 times. ✓ Branch 1 taken 360 times.	3008	for (auto &th : threads) {
45	1/2 ✓ Branch 0 taken 1144 times. ✗ Branch 1 not taken.	2288	if (th.joinable()) {
46	1/2 ✓ Branch 1 taken 1144 times. ✗ Branch 2 not taken.	2288	th.join();
47			}
48			}
49		720	}
50
51			template <typename Func>
52		496	void ParallelFor2D(int rows_begin, int rows_end, int cols_begin, int cols_end, const Func &func) {
53		496	const int rows = rows_end - rows_begin;
54		496	const int cols = cols_end - cols_begin;
55		496	const int total = rows * cols;
56	1/2 ✗ Branch 0 not taken. ✓ Branch 1 taken 248 times.	496	if (total <= 0) {
57		✗	return;
58			}
59
60		2336	ParallelFor(0, total, [&](int linear_idx) {
61		920	int i = rows_begin + (linear_idx / cols);
62		920	int j = cols_begin + (linear_idx % cols);
63		920	func(i, j);
64			});
65			}
66
67			} // namespace
68
69		64	RemizovKDenseMatrixMultiplicationCannonAlgorithmStl::RemizovKDenseMatrixMultiplicationCannonAlgorithmStl(
70	1/2 ✓ Branch 1 taken 64 times. ✗ Branch 2 not taken.	64	const InType &in) {
71			SetTypeOfTask(GetStaticTypeOfTask());
72			GetInput() = in;
73		64	}
74
75		64	bool RemizovKDenseMatrixMultiplicationCannonAlgorithmStl::ValidationImpl() {
76			const auto &input_data = GetInput();
77		64	int block_dim = std::get<0>(input_data);
78			const auto &mat_a = std::get<1>(input_data);
79			const auto &mat_b = std::get<2>(input_data);
80
81	1/2 ✓ Branch 0 taken 64 times. ✗ Branch 1 not taken.	64	if (block_dim <= 0) {
82			return false;
83			}
84	2/4 ✓ Branch 0 taken 64 times. ✗ Branch 1 not taken. ✓ Branch 2 taken 64 times. ✗ Branch 3 not taken.	64	if (mat_a.empty() \|\| mat_b.empty()) {
85			return false;
86			}
87
88			size_t n = mat_a.size();
89	1/2 ✓ Branch 0 taken 64 times. ✗ Branch 1 not taken.	64	if (n != mat_a[0].size()) {
90			return false;
91			}
92	2/4 ✓ Branch 0 taken 64 times. ✗ Branch 1 not taken. ✓ Branch 2 taken 64 times. ✗ Branch 3 not taken.	64	if (n != mat_b.size() \|\| n != mat_b[0].size()) {
93			return false;
94			}
95
96		64	return (n % static_cast<size_t>(block_dim) == 0);
97			}
98
99	1/2 ✗ Branch 0 not taken. ✓ Branch 1 taken 64 times.	64	bool RemizovKDenseMatrixMultiplicationCannonAlgorithmStl::PreProcessingImpl() {
100			GetOutput().clear();
101		64	return true;
102			}
103
104		456	void RemizovKDenseMatrixMultiplicationCannonAlgorithmStl::MultiplyBlock(const std::vector<std::vector<double>> &a,
105			const std::vector<std::vector<double>> &b,
106			std::vector<std::vector<double>> &c,
107			int block_size) {
108	2/2 ✓ Branch 0 taken 984 times. ✓ Branch 1 taken 456 times.	1440	for (int i = 0; i < block_size; ++i) {
109	2/2 ✓ Branch 0 taken 2568 times. ✓ Branch 1 taken 984 times.	3552	for (int j = 0; j < block_size; ++j) {
110			double acc = 0.0;
111	2/2 ✓ Branch 0 taken 7704 times. ✓ Branch 1 taken 2568 times.	10272	for (int k = 0; k < block_size; ++k) {
112		7704	acc += a[i][k] * b[k][j];
113			}
114		2568	c[i][j] += acc;
115			}
116			}
117		456	}
118
119		✗	void RemizovKDenseMatrixMultiplicationCannonAlgorithmStl::ShiftBlocksLeft(
120			std::vector<std::vector<std::vector<std::vector<double>>>> &matrix_blocks, int block_count) {
121		56	ParallelFor(0, block_count, [&](int i) {
122		112	auto first = std::move(matrix_blocks[i][0]);
123	2/2 ✓ Branch 0 taken 112 times. ✓ Branch 1 taken 112 times.	224	for (int j = 1; j < block_count; ++j) {
124		112	matrix_blocks[i][j - 1] = std::move(matrix_blocks[i][j]);
125			}
126		112	matrix_blocks[i][block_count - 1] = std::move(first);
127		112	});
128		✗	}
129
130		✗	void RemizovKDenseMatrixMultiplicationCannonAlgorithmStl::ShiftBlocksUp(
131			std::vector<std::vector<std::vector<std::vector<double>>>> &matrix_blocks, int block_count) {
132		✗	ParallelFor(0, block_count, [&](int j) {
133		112	auto first = std::move(matrix_blocks[0][j]);
134	2/2 ✓ Branch 0 taken 112 times. ✓ Branch 1 taken 112 times.	224	for (int i = 1; i < block_count; ++i) {
135		112	matrix_blocks[i - 1][j] = std::move(matrix_blocks[i][j]);
136			}
137		112	matrix_blocks[block_count - 1][j] = std::move(first);
138		112	});
139		✗	}
140
141		64	void RemizovKDenseMatrixMultiplicationCannonAlgorithmStl::RunCannonCycle(
142			std::vector<std::vector<std::vector<std::vector<double>>>> &a_blocks,
143			std::vector<std::vector<std::vector<std::vector<double>>>> &b_blocks,
144			std::vector<std::vector<std::vector<std::vector<double>>>> &c_blocks, int block_size, int block_count) {
145	2/2 ✓ Branch 0 taken 120 times. ✓ Branch 1 taken 64 times.	184	for (int step = 0; step < block_count; ++step) {
146		120	ParallelFor2D(0, block_count, 0, block_count,
147		576	[&](int i, int j) { MultiplyBlock(a_blocks[i][j], b_blocks[i][j], c_blocks[i][j], block_size); });
148
149	2/2 ✓ Branch 0 taken 56 times. ✓ Branch 1 taken 64 times.	120	if (step < block_count - 1) {
150		56	ShiftBlocksLeft(a_blocks, block_count);
151		56	ShiftBlocksUp(b_blocks, block_count);
152			}
153			}
154		64	}
155
156		✗	void RemizovKDenseMatrixMultiplicationCannonAlgorithmStl::InitializeBlocks(
157			const std::vector<std::vector<double>> &matrix_a, const std::vector<std::vector<double>> &matrix_b,
158			std::vector<std::vector<std::vector<std::vector<double>>>> &a_blocks,
159			std::vector<std::vector<std::vector<std::vector<double>>>> &b_blocks, int block_size, int block_count) {
160		✗	ParallelFor2D(0, block_count, 0, block_count, [&](int i, int j) {
161		232	int shift = (i + j) % block_count;
162	2/2 ✓ Branch 0 taken 504 times. ✓ Branch 1 taken 232 times.	736	for (int bi = 0; bi < block_size; ++bi) {
163	2/2 ✓ Branch 0 taken 1320 times. ✓ Branch 1 taken 504 times.	1824	for (int bj = 0; bj < block_size; ++bj) {
164		1320	a_blocks[i][j][bi][bj] = matrix_a[(i * block_size) + bi][(shift * block_size) + bj];
165		1320	b_blocks[i][j][bi][bj] = matrix_b[(shift * block_size) + bi][(j * block_size) + bj];
166			}
167			}
168		232	});
169		✗	}
170
171		✗	void RemizovKDenseMatrixMultiplicationCannonAlgorithmStl::AssembleOutput(
172			std::vector<std::vector<std::vector<std::vector<double>>>> &c_blocks, std::vector<std::vector<double>> &output,
173			int block_size, int block_count) {
174		✗	ParallelFor2D(0, block_count, 0, block_count, [&](int i, int j) {
175	2/2 ✓ Branch 0 taken 504 times. ✓ Branch 1 taken 232 times.	736	for (int bi = 0; bi < block_size; ++bi) {
176	2/2 ✓ Branch 0 taken 1320 times. ✓ Branch 1 taken 504 times.	1824	for (int bj = 0; bj < block_size; ++bj) {
177		1320	output[(i * block_size) + bi][(j * block_size) + bj] = c_blocks[i][j][bi][bj];
178			}
179			}
180		232	});
181		✗	}
182
183		64	bool RemizovKDenseMatrixMultiplicationCannonAlgorithmStl::RunImpl() {
184			const auto &params = GetInput();
185		64	int block_dim = std::get<0>(params);
186			const auto &source_a = std::get<1>(params);
187			const auto &source_b = std::get<2>(params);
188
189		64	int matrix_size = static_cast<int>(source_a.size());
190		64	int blocks_per_dim = matrix_size / block_dim;
191
192			using Block4D = std::vector<std::vector<std::vector<std::vector<double>>>>;
193		64	Block4D blocks_a(blocks_per_dim, std::vector<std::vector<std::vector<double>>>(
194		64	blocks_per_dim, std::vector<std::vector<double>>(
195	3/6 ✓ Branch 2 taken 64 times. ✗ Branch 3 not taken. ✓ Branch 5 taken 64 times. ✗ Branch 6 not taken. ✓ Branch 8 taken 64 times. ✗ Branch 9 not taken.	64	block_dim, std::vector<double>(block_dim, 0.0))));
196		64	Block4D blocks_b(blocks_per_dim, std::vector<std::vector<std::vector<double>>>(
197		64	blocks_per_dim, std::vector<std::vector<double>>(
198	4/8 ✓ Branch 1 taken 64 times. ✗ Branch 2 not taken. ✓ Branch 4 taken 64 times. ✗ Branch 5 not taken. ✓ Branch 7 taken 64 times. ✗ Branch 8 not taken. ✓ Branch 10 taken 64 times. ✗ Branch 11 not taken.	64	block_dim, std::vector<double>(block_dim, 0.0))));
199		64	Block4D blocks_c(blocks_per_dim, std::vector<std::vector<std::vector<double>>>(
200		64	blocks_per_dim, std::vector<std::vector<double>>(
201	5/10 ✓ Branch 1 taken 64 times. ✗ Branch 2 not taken. ✓ Branch 4 taken 64 times. ✗ Branch 5 not taken. ✓ Branch 7 taken 64 times. ✗ Branch 8 not taken. ✓ Branch 10 taken 64 times. ✗ Branch 11 not taken. ✓ Branch 13 taken 64 times. ✗ Branch 14 not taken.	128	block_dim, std::vector<double>(block_dim, 0.0))));
202
203		64	InitializeBlocks(source_a, source_b, blocks_a, blocks_b, block_dim, blocks_per_dim);
204	1/2 ✓ Branch 1 taken 64 times. ✗ Branch 2 not taken.	64	RunCannonCycle(blocks_a, blocks_b, blocks_c, block_dim, blocks_per_dim);
205
206	3/6 ✓ Branch 1 taken 64 times. ✗ Branch 2 not taken. ✓ Branch 4 taken 64 times. ✗ Branch 5 not taken. ✓ Branch 7 taken 64 times. ✗ Branch 8 not taken.	128	std::vector<std::vector<double>> result(matrix_size, std::vector<double>(matrix_size, 0.0));
207		64	AssembleOutput(blocks_c, result, block_dim, blocks_per_dim);
208
209		64	GetOutput() = std::move(result);
210		64	return true;
211		64	}
212
213		64	bool RemizovKDenseMatrixMultiplicationCannonAlgorithmStl::PostProcessingImpl() {
214		64	return true;
215			}
216
217			} // namespace remizov_k_dense_matrix_multiplication_cannon_algorithm
218