GCC Code Coverage Report


Directory: ./
File: tasks/remizov_k_dense_matrix_multiplication_cannon_algorithm/stl/src/ops_stl.cpp
Date: 2026-05-11 08:26:31
Exec Total Coverage
Lines: 96 109 88.1%
Functions: 20 24 83.3%
Branches: 71 102 69.6%

Line Branch Exec Source
1 #include "remizov_k_dense_matrix_multiplication_cannon_algorithm/stl/include/ops_stl.hpp"
2
3 #include <algorithm>
4 #include <cstddef>
5 #include <thread>
6 #include <utility>
7 #include <vector>
8
9 #include "remizov_k_dense_matrix_multiplication_cannon_algorithm/common/include/common.hpp"
10
11 namespace remizov_k_dense_matrix_multiplication_cannon_algorithm {
12
13 namespace {
14
15 template <typename IndexType, typename Func>
16 720 void ParallelFor(IndexType begin, IndexType end, const Func &func) {
17 const std::size_t num_threads =
18
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 360 times.
720 std::max(static_cast<std::size_t>(1U), static_cast<std::size_t>(std::thread::hardware_concurrency()));
19 720 const IndexType range_length = end - begin;
20
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 360 times.
720 if (range_length <= 0) {
21 return;
22 }
23
24 720 std::vector<std::thread> threads;
25
1/2
✓ Branch 1 taken 360 times.
✗ Branch 2 not taken.
720 threads.reserve(num_threads);
26
27 720 IndexType chunk_size = (range_length + static_cast<IndexType>(num_threads) - 1) / static_cast<IndexType>(num_threads);
28 IndexType start = begin;
29
30
2/2
✓ Branch 0 taken 1280 times.
✓ Branch 1 taken 224 times.
3008 for (std::size_t thread_idx = 0; thread_idx < num_threads; ++thread_idx) {
31
2/2
✓ Branch 0 taken 1144 times.
✓ Branch 1 taken 136 times.
2560 IndexType chunk_end = std::min(end, start + chunk_size);
32
2/2
✓ Branch 0 taken 1144 times.
✓ Branch 1 taken 136 times.
2560 if (start >= chunk_end) {
33 break;
34 }
35
36
1/2
✓ Branch 1 taken 1144 times.
✗ Branch 2 not taken.
2288 threads.emplace_back([start, chunk_end, &func]() {
37
10/10
✓ Branch 0 taken 112 times.
✓ Branch 1 taken 112 times.
✓ Branch 2 taken 112 times.
✓ Branch 3 taken 112 times.
✓ Branch 4 taken 456 times.
✓ Branch 5 taken 456 times.
✓ Branch 6 taken 232 times.
✓ Branch 7 taken 232 times.
✓ Branch 8 taken 232 times.
✓ Branch 9 taken 232 times.
2288 for (IndexType i = start; i < chunk_end; ++i) {
38 1144 func(i);
39 }
40 });
41 start = chunk_end;
42 }
43
44
2/2
✓ Branch 0 taken 1144 times.
✓ Branch 1 taken 360 times.
3008 for (auto &th : threads) {
45
1/2
✓ Branch 0 taken 1144 times.
✗ Branch 1 not taken.
2288 if (th.joinable()) {
46
1/2
✓ Branch 1 taken 1144 times.
✗ Branch 2 not taken.
2288 th.join();
47 }
48 }
49 720 }
50
51 template <typename Func>
52 496 void ParallelFor2D(int rows_begin, int rows_end, int cols_begin, int cols_end, const Func &func) {
53 496 const int rows = rows_end - rows_begin;
54 496 const int cols = cols_end - cols_begin;
55 496 const int total = rows * cols;
56
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 248 times.
496 if (total <= 0) {
57 return;
58 }
59
60 2336 ParallelFor(0, total, [&](int linear_idx) {
61 920 int i = rows_begin + (linear_idx / cols);
62 920 int j = cols_begin + (linear_idx % cols);
63 920 func(i, j);
64 });
65 }
66
67 } // namespace
68
69 64 RemizovKDenseMatrixMultiplicationCannonAlgorithmStl::RemizovKDenseMatrixMultiplicationCannonAlgorithmStl(
70
1/2
✓ Branch 1 taken 64 times.
✗ Branch 2 not taken.
64 const InType &in) {
71 SetTypeOfTask(GetStaticTypeOfTask());
72 GetInput() = in;
73 64 }
74
75 64 bool RemizovKDenseMatrixMultiplicationCannonAlgorithmStl::ValidationImpl() {
76 const auto &input_data = GetInput();
77 64 int block_dim = std::get<0>(input_data);
78 const auto &mat_a = std::get<1>(input_data);
79 const auto &mat_b = std::get<2>(input_data);
80
81
1/2
✓ Branch 0 taken 64 times.
✗ Branch 1 not taken.
64 if (block_dim <= 0) {
82 return false;
83 }
84
2/4
✓ Branch 0 taken 64 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 64 times.
✗ Branch 3 not taken.
64 if (mat_a.empty() || mat_b.empty()) {
85 return false;
86 }
87
88 size_t n = mat_a.size();
89
1/2
✓ Branch 0 taken 64 times.
✗ Branch 1 not taken.
64 if (n != mat_a[0].size()) {
90 return false;
91 }
92
2/4
✓ Branch 0 taken 64 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 64 times.
✗ Branch 3 not taken.
64 if (n != mat_b.size() || n != mat_b[0].size()) {
93 return false;
94 }
95
96 64 return (n % static_cast<size_t>(block_dim) == 0);
97 }
98
99
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 64 times.
64 bool RemizovKDenseMatrixMultiplicationCannonAlgorithmStl::PreProcessingImpl() {
100 GetOutput().clear();
101 64 return true;
102 }
103
104 456 void RemizovKDenseMatrixMultiplicationCannonAlgorithmStl::MultiplyBlock(const std::vector<std::vector<double>> &a,
105 const std::vector<std::vector<double>> &b,
106 std::vector<std::vector<double>> &c,
107 int block_size) {
108
2/2
✓ Branch 0 taken 984 times.
✓ Branch 1 taken 456 times.
1440 for (int i = 0; i < block_size; ++i) {
109
2/2
✓ Branch 0 taken 2568 times.
✓ Branch 1 taken 984 times.
3552 for (int j = 0; j < block_size; ++j) {
110 double acc = 0.0;
111
2/2
✓ Branch 0 taken 7704 times.
✓ Branch 1 taken 2568 times.
10272 for (int k = 0; k < block_size; ++k) {
112 7704 acc += a[i][k] * b[k][j];
113 }
114 2568 c[i][j] += acc;
115 }
116 }
117 456 }
118
119 void RemizovKDenseMatrixMultiplicationCannonAlgorithmStl::ShiftBlocksLeft(
120 std::vector<std::vector<std::vector<std::vector<double>>>> &matrix_blocks, int block_count) {
121 56 ParallelFor(0, block_count, [&](int i) {
122 112 auto first = std::move(matrix_blocks[i][0]);
123
2/2
✓ Branch 0 taken 112 times.
✓ Branch 1 taken 112 times.
224 for (int j = 1; j < block_count; ++j) {
124 112 matrix_blocks[i][j - 1] = std::move(matrix_blocks[i][j]);
125 }
126 112 matrix_blocks[i][block_count - 1] = std::move(first);
127 112 });
128 }
129
130 void RemizovKDenseMatrixMultiplicationCannonAlgorithmStl::ShiftBlocksUp(
131 std::vector<std::vector<std::vector<std::vector<double>>>> &matrix_blocks, int block_count) {
132 ParallelFor(0, block_count, [&](int j) {
133 112 auto first = std::move(matrix_blocks[0][j]);
134
2/2
✓ Branch 0 taken 112 times.
✓ Branch 1 taken 112 times.
224 for (int i = 1; i < block_count; ++i) {
135 112 matrix_blocks[i - 1][j] = std::move(matrix_blocks[i][j]);
136 }
137 112 matrix_blocks[block_count - 1][j] = std::move(first);
138 112 });
139 }
140
141 64 void RemizovKDenseMatrixMultiplicationCannonAlgorithmStl::RunCannonCycle(
142 std::vector<std::vector<std::vector<std::vector<double>>>> &a_blocks,
143 std::vector<std::vector<std::vector<std::vector<double>>>> &b_blocks,
144 std::vector<std::vector<std::vector<std::vector<double>>>> &c_blocks, int block_size, int block_count) {
145
2/2
✓ Branch 0 taken 120 times.
✓ Branch 1 taken 64 times.
184 for (int step = 0; step < block_count; ++step) {
146 120 ParallelFor2D(0, block_count, 0, block_count,
147 576 [&](int i, int j) { MultiplyBlock(a_blocks[i][j], b_blocks[i][j], c_blocks[i][j], block_size); });
148
149
2/2
✓ Branch 0 taken 56 times.
✓ Branch 1 taken 64 times.
120 if (step < block_count - 1) {
150 56 ShiftBlocksLeft(a_blocks, block_count);
151 56 ShiftBlocksUp(b_blocks, block_count);
152 }
153 }
154 64 }
155
156 void RemizovKDenseMatrixMultiplicationCannonAlgorithmStl::InitializeBlocks(
157 const std::vector<std::vector<double>> &matrix_a, const std::vector<std::vector<double>> &matrix_b,
158 std::vector<std::vector<std::vector<std::vector<double>>>> &a_blocks,
159 std::vector<std::vector<std::vector<std::vector<double>>>> &b_blocks, int block_size, int block_count) {
160 ParallelFor2D(0, block_count, 0, block_count, [&](int i, int j) {
161 232 int shift = (i + j) % block_count;
162
2/2
✓ Branch 0 taken 504 times.
✓ Branch 1 taken 232 times.
736 for (int bi = 0; bi < block_size; ++bi) {
163
2/2
✓ Branch 0 taken 1320 times.
✓ Branch 1 taken 504 times.
1824 for (int bj = 0; bj < block_size; ++bj) {
164 1320 a_blocks[i][j][bi][bj] = matrix_a[(i * block_size) + bi][(shift * block_size) + bj];
165 1320 b_blocks[i][j][bi][bj] = matrix_b[(shift * block_size) + bi][(j * block_size) + bj];
166 }
167 }
168 232 });
169 }
170
171 void RemizovKDenseMatrixMultiplicationCannonAlgorithmStl::AssembleOutput(
172 std::vector<std::vector<std::vector<std::vector<double>>>> &c_blocks, std::vector<std::vector<double>> &output,
173 int block_size, int block_count) {
174 ParallelFor2D(0, block_count, 0, block_count, [&](int i, int j) {
175
2/2
✓ Branch 0 taken 504 times.
✓ Branch 1 taken 232 times.
736 for (int bi = 0; bi < block_size; ++bi) {
176
2/2
✓ Branch 0 taken 1320 times.
✓ Branch 1 taken 504 times.
1824 for (int bj = 0; bj < block_size; ++bj) {
177 1320 output[(i * block_size) + bi][(j * block_size) + bj] = c_blocks[i][j][bi][bj];
178 }
179 }
180 232 });
181 }
182
183 64 bool RemizovKDenseMatrixMultiplicationCannonAlgorithmStl::RunImpl() {
184 const auto &params = GetInput();
185 64 int block_dim = std::get<0>(params);
186 const auto &source_a = std::get<1>(params);
187 const auto &source_b = std::get<2>(params);
188
189 64 int matrix_size = static_cast<int>(source_a.size());
190 64 int blocks_per_dim = matrix_size / block_dim;
191
192 using Block4D = std::vector<std::vector<std::vector<std::vector<double>>>>;
193 64 Block4D blocks_a(blocks_per_dim, std::vector<std::vector<std::vector<double>>>(
194 64 blocks_per_dim, std::vector<std::vector<double>>(
195
3/6
✓ Branch 2 taken 64 times.
✗ Branch 3 not taken.
✓ Branch 5 taken 64 times.
✗ Branch 6 not taken.
✓ Branch 8 taken 64 times.
✗ Branch 9 not taken.
64 block_dim, std::vector<double>(block_dim, 0.0))));
196 64 Block4D blocks_b(blocks_per_dim, std::vector<std::vector<std::vector<double>>>(
197 64 blocks_per_dim, std::vector<std::vector<double>>(
198
4/8
✓ Branch 1 taken 64 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 64 times.
✗ Branch 5 not taken.
✓ Branch 7 taken 64 times.
✗ Branch 8 not taken.
✓ Branch 10 taken 64 times.
✗ Branch 11 not taken.
64 block_dim, std::vector<double>(block_dim, 0.0))));
199 64 Block4D blocks_c(blocks_per_dim, std::vector<std::vector<std::vector<double>>>(
200 64 blocks_per_dim, std::vector<std::vector<double>>(
201
5/10
✓ Branch 1 taken 64 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 64 times.
✗ Branch 5 not taken.
✓ Branch 7 taken 64 times.
✗ Branch 8 not taken.
✓ Branch 10 taken 64 times.
✗ Branch 11 not taken.
✓ Branch 13 taken 64 times.
✗ Branch 14 not taken.
128 block_dim, std::vector<double>(block_dim, 0.0))));
202
203 64 InitializeBlocks(source_a, source_b, blocks_a, blocks_b, block_dim, blocks_per_dim);
204
1/2
✓ Branch 1 taken 64 times.
✗ Branch 2 not taken.
64 RunCannonCycle(blocks_a, blocks_b, blocks_c, block_dim, blocks_per_dim);
205
206
3/6
✓ Branch 1 taken 64 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 64 times.
✗ Branch 5 not taken.
✓ Branch 7 taken 64 times.
✗ Branch 8 not taken.
128 std::vector<std::vector<double>> result(matrix_size, std::vector<double>(matrix_size, 0.0));
207 64 AssembleOutput(blocks_c, result, block_dim, blocks_per_dim);
208
209 64 GetOutput() = std::move(result);
210 64 return true;
211 64 }
212
213 64 bool RemizovKDenseMatrixMultiplicationCannonAlgorithmStl::PostProcessingImpl() {
214 64 return true;
215 }
216
217 } // namespace remizov_k_dense_matrix_multiplication_cannon_algorithm
218