GCC Code Coverage Report


Directory: ./
File: tasks/remizov_k_dense_matrix_multiplication_cannon_algorithm/all/src/ops_all.cpp
Date: 2026-06-04 20:25:32
Exec Total Coverage
Lines: 105 107 98.1%
Functions: 15 16 93.8%
Branches: 72 100 72.0%

Line Branch Exec Source
1 #include "remizov_k_dense_matrix_multiplication_cannon_algorithm/all/include/ops_all.hpp"
2
3 #include <tbb/blocked_range2d.h>
4 #include <tbb/parallel_for.h>
5
6 #ifdef _OPENMP
7 # include <omp.h>
8 #endif
9
10 #include <algorithm>
11 #include <cstddef>
12 #include <thread>
13 #include <utility>
14 #include <vector>
15
16 #include "remizov_k_dense_matrix_multiplication_cannon_algorithm/common/include/common.hpp"
17
18 namespace remizov_k_dense_matrix_multiplication_cannon_algorithm {
19
20 16 RemizovKDenseMatrixMultiplicationCannonAlgorithmAll::RemizovKDenseMatrixMultiplicationCannonAlgorithmAll(
21
1/2
✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
16 const InType &in) {
22 SetTypeOfTask(GetStaticTypeOfTask());
23 GetInput() = in;
24 16 }
25
26 16 bool RemizovKDenseMatrixMultiplicationCannonAlgorithmAll::ValidationImpl() {
27 const auto &input_data = GetInput();
28
29 16 int block_dim = std::get<0>(input_data);
30 const auto &mat_a = std::get<1>(input_data);
31 const auto &mat_b = std::get<2>(input_data);
32
33
1/2
✓ Branch 0 taken 16 times.
✗ Branch 1 not taken.
16 if (block_dim <= 0) {
34 return false;
35 }
36
2/4
✓ Branch 0 taken 16 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 16 times.
✗ Branch 3 not taken.
16 if (mat_a.empty() || mat_b.empty()) {
37 return false;
38 }
39
40 size_t n = mat_a.size();
41
1/2
✓ Branch 0 taken 16 times.
✗ Branch 1 not taken.
16 if (n != mat_a[0].size()) {
42 return false;
43 }
44
2/4
✓ Branch 0 taken 16 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 16 times.
✗ Branch 3 not taken.
16 if (n != mat_b.size() || n != mat_b[0].size()) {
45 return false;
46 }
47
48 16 return (n % static_cast<size_t>(block_dim) == 0);
49 }
50
51
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 16 times.
16 bool RemizovKDenseMatrixMultiplicationCannonAlgorithmAll::PreProcessingImpl() {
52 GetOutput().clear();
53 16 return true;
54 }
55
56 void RemizovKDenseMatrixMultiplicationCannonAlgorithmAll::MultiplyBlock(const std::vector<std::vector<double>> &a,
57 const std::vector<std::vector<double>> &b,
58 std::vector<std::vector<double>> &c,
59 int block_size) {
60 #ifdef _OPENMP
61 114 # pragma omp parallel for collapse(2) schedule(static) default(none) shared(a, b, c, block_size)
62 #endif
63 for (int i = 0; i < block_size; ++i) {
64 for (int j = 0; j < block_size; ++j) {
65 double acc = 0.0;
66 for (int k = 0; k < block_size; ++k) {
67 acc += a[i][k] * b[k][j];
68 }
69 c[i][j] += acc;
70 }
71 }
72 }
73
74 14 void RemizovKDenseMatrixMultiplicationCannonAlgorithmAll::ShiftBlocksLeft(
75 std::vector<std::vector<std::vector<std::vector<double>>>> &matrix_blocks, int block_count) {
76 14 const unsigned int num_threads = std::max(1U, std::thread::hardware_concurrency());
77 14 std::vector<std::thread> threads;
78
1/2
✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
14 threads.reserve(num_threads);
79
80 14 const int rows_per_thread = (block_count + static_cast<int>(num_threads) - 1) / static_cast<int>(num_threads);
81
1/2
✓ Branch 0 taken 42 times.
✗ Branch 1 not taken.
42 for (unsigned int thread_idx = 0; thread_idx < num_threads; ++thread_idx) {
82 42 const int start = static_cast<int>(thread_idx) * rows_per_thread;
83
2/2
✓ Branch 0 taken 28 times.
✓ Branch 1 taken 14 times.
42 const int end = std::min(start + rows_per_thread, block_count);
84
2/2
✓ Branch 0 taken 28 times.
✓ Branch 1 taken 14 times.
42 if (start >= end) {
85 break;
86 }
87
1/2
✓ Branch 1 taken 28 times.
✗ Branch 2 not taken.
28 threads.emplace_back([&matrix_blocks, block_count, start, end]() {
88
2/2
✓ Branch 0 taken 28 times.
✓ Branch 1 taken 28 times.
56 for (int i = start; i < end; ++i) {
89 28 auto first = std::move(matrix_blocks[i][0]);
90
2/2
✓ Branch 0 taken 28 times.
✓ Branch 1 taken 28 times.
56 for (int j = 1; j < block_count; ++j) {
91 28 matrix_blocks[i][j - 1] = std::move(matrix_blocks[i][j]);
92 }
93 28 matrix_blocks[i][block_count - 1] = std::move(first);
94 28 }
95 28 });
96 }
97
2/2
✓ Branch 0 taken 28 times.
✓ Branch 1 taken 14 times.
42 for (auto &th : threads) {
98
1/2
✓ Branch 0 taken 28 times.
✗ Branch 1 not taken.
28 if (th.joinable()) {
99
1/2
✓ Branch 1 taken 28 times.
✗ Branch 2 not taken.
28 th.join();
100 }
101 }
102 14 }
103
104 14 void RemizovKDenseMatrixMultiplicationCannonAlgorithmAll::ShiftBlocksUp(
105 std::vector<std::vector<std::vector<std::vector<double>>>> &matrix_blocks, int block_count) {
106 14 const unsigned int num_threads = std::max(1U, std::thread::hardware_concurrency());
107 14 std::vector<std::thread> threads;
108
1/2
✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
14 threads.reserve(num_threads);
109
110 14 const int cols_per_thread = (block_count + static_cast<int>(num_threads) - 1) / static_cast<int>(num_threads);
111
1/2
✓ Branch 0 taken 42 times.
✗ Branch 1 not taken.
42 for (unsigned int thread_idx = 0; thread_idx < num_threads; ++thread_idx) {
112 42 const int start = static_cast<int>(thread_idx) * cols_per_thread;
113
2/2
✓ Branch 0 taken 28 times.
✓ Branch 1 taken 14 times.
42 const int end = std::min(start + cols_per_thread, block_count);
114
2/2
✓ Branch 0 taken 28 times.
✓ Branch 1 taken 14 times.
42 if (start >= end) {
115 break;
116 }
117
1/2
✓ Branch 1 taken 28 times.
✗ Branch 2 not taken.
28 threads.emplace_back([&matrix_blocks, block_count, start, end]() {
118
2/2
✓ Branch 0 taken 28 times.
✓ Branch 1 taken 28 times.
56 for (int j = start; j < end; ++j) {
119 28 auto first = std::move(matrix_blocks[0][j]);
120
2/2
✓ Branch 0 taken 28 times.
✓ Branch 1 taken 28 times.
56 for (int i = 1; i < block_count; ++i) {
121 28 matrix_blocks[i - 1][j] = std::move(matrix_blocks[i][j]);
122 }
123 28 matrix_blocks[block_count - 1][j] = std::move(first);
124 28 }
125 28 });
126 }
127
2/2
✓ Branch 0 taken 28 times.
✓ Branch 1 taken 14 times.
42 for (auto &th : threads) {
128
1/2
✓ Branch 0 taken 28 times.
✗ Branch 1 not taken.
28 if (th.joinable()) {
129
1/2
✓ Branch 1 taken 28 times.
✗ Branch 2 not taken.
28 th.join();
130 }
131 }
132 14 }
133
134 16 void RemizovKDenseMatrixMultiplicationCannonAlgorithmAll::RunCannonCycle(
135 std::vector<std::vector<std::vector<std::vector<double>>>> &a_blocks,
136 std::vector<std::vector<std::vector<std::vector<double>>>> &b_blocks,
137 std::vector<std::vector<std::vector<std::vector<double>>>> &c_blocks, int block_size, int block_count) {
138
2/2
✓ Branch 0 taken 30 times.
✓ Branch 1 taken 16 times.
46 for (int step = 0; step < block_count; ++step) {
139 30 tbb::parallel_for(tbb::blocked_range2d<int>(0, block_count, 0, block_count),
140 144 [&](const tbb::blocked_range2d<int> &r) {
141
2/2
✓ Branch 0 taken 114 times.
✓ Branch 1 taken 114 times.
228 for (int i = r.rows().begin(); i != r.rows().end(); ++i) {
142
2/2
✓ Branch 0 taken 114 times.
✓ Branch 1 taken 114 times.
228 for (int j = r.cols().begin(); j != r.cols().end(); ++j) {
143 114 MultiplyBlock(a_blocks[i][j], b_blocks[i][j], c_blocks[i][j], block_size);
144 }
145 }
146 114 });
147
148
2/2
✓ Branch 0 taken 14 times.
✓ Branch 1 taken 16 times.
30 if (step < block_count - 1) {
149 14 ShiftBlocksLeft(a_blocks, block_count);
150 14 ShiftBlocksUp(b_blocks, block_count);
151 }
152 }
153 16 }
154
155 16 void RemizovKDenseMatrixMultiplicationCannonAlgorithmAll::InitializeBlocks(
156 const std::vector<std::vector<double>> &matrix_a, const std::vector<std::vector<double>> &matrix_b,
157 std::vector<std::vector<std::vector<std::vector<double>>>> &a_blocks,
158 std::vector<std::vector<std::vector<std::vector<double>>>> &b_blocks, int block_size, int block_count) {
159 74 tbb::parallel_for(tbb::blocked_range2d<int>(0, block_count, 0, block_count), [&](const tbb::blocked_range2d<int> &r) {
160
2/2
✓ Branch 0 taken 58 times.
✓ Branch 1 taken 58 times.
116 for (int i = r.rows().begin(); i != r.rows().end(); ++i) {
161
2/2
✓ Branch 0 taken 58 times.
✓ Branch 1 taken 58 times.
116 for (int j = r.cols().begin(); j != r.cols().end(); ++j) {
162 58 const int shift = (i + j) % block_count;
163
2/2
✓ Branch 0 taken 126 times.
✓ Branch 1 taken 58 times.
184 for (int bi = 0; bi < block_size; ++bi) {
164
2/2
✓ Branch 0 taken 330 times.
✓ Branch 1 taken 126 times.
456 for (int bj = 0; bj < block_size; ++bj) {
165 330 a_blocks[i][j][bi][bj] = matrix_a[(i * block_size) + bi][(shift * block_size) + bj];
166 330 b_blocks[i][j][bi][bj] = matrix_b[(shift * block_size) + bi][(j * block_size) + bj];
167 }
168 }
169 }
170 }
171 58 });
172 16 }
173
174 16 void RemizovKDenseMatrixMultiplicationCannonAlgorithmAll::AssembleOutput(
175 std::vector<std::vector<std::vector<std::vector<double>>>> &c_blocks, std::vector<std::vector<double>> &output,
176 int block_size, int block_count) {
177 74 tbb::parallel_for(tbb::blocked_range2d<int>(0, block_count, 0, block_count), [&](const tbb::blocked_range2d<int> &r) {
178
2/2
✓ Branch 0 taken 58 times.
✓ Branch 1 taken 58 times.
116 for (int i = r.rows().begin(); i != r.rows().end(); ++i) {
179
2/2
✓ Branch 0 taken 58 times.
✓ Branch 1 taken 58 times.
116 for (int j = r.cols().begin(); j != r.cols().end(); ++j) {
180
2/2
✓ Branch 0 taken 126 times.
✓ Branch 1 taken 58 times.
184 for (int bi = 0; bi < block_size; ++bi) {
181
2/2
✓ Branch 0 taken 330 times.
✓ Branch 1 taken 126 times.
456 for (int bj = 0; bj < block_size; ++bj) {
182 330 output[(i * block_size) + bi][(j * block_size) + bj] = c_blocks[i][j][bi][bj];
183 }
184 }
185 }
186 }
187 58 });
188 16 }
189
190 16 bool RemizovKDenseMatrixMultiplicationCannonAlgorithmAll::RunImpl() {
191 const auto &params = GetInput();
192
193 16 const int block_dim = std::get<0>(params);
194 const auto &source_a = std::get<1>(params);
195 const auto &source_b = std::get<2>(params);
196
197 16 const int matrix_size = static_cast<int>(source_a.size());
198 16 const int blocks_per_dim = matrix_size / block_dim;
199
200 using Block4D = std::vector<std::vector<std::vector<std::vector<double>>>>;
201 16 Block4D blocks_a(blocks_per_dim, std::vector<std::vector<std::vector<double>>>(
202 16 blocks_per_dim, std::vector<std::vector<double>>(
203
3/6
✓ Branch 2 taken 16 times.
✗ Branch 3 not taken.
✓ Branch 5 taken 16 times.
✗ Branch 6 not taken.
✓ Branch 8 taken 16 times.
✗ Branch 9 not taken.
16 block_dim, std::vector<double>(block_dim, 0.0))));
204
1/2
✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
16 Block4D blocks_b = blocks_a;
205
1/2
✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
16 Block4D blocks_c = blocks_a;
206
207
1/2
✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
16 InitializeBlocks(source_a, source_b, blocks_a, blocks_b, block_dim, blocks_per_dim);
208
1/2
✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
16 RunCannonCycle(blocks_a, blocks_b, blocks_c, block_dim, blocks_per_dim);
209
210
2/4
✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 16 times.
✗ Branch 5 not taken.
16 std::vector<std::vector<double>> result(matrix_size, std::vector<double>(matrix_size, 0.0));
211
1/2
✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
16 AssembleOutput(blocks_c, result, block_dim, blocks_per_dim);
212
213 16 GetOutput() = std::move(result);
214 16 return true;
215 16 }
216
217 16 bool RemizovKDenseMatrixMultiplicationCannonAlgorithmAll::PostProcessingImpl() {
218 16 return true;
219 }
220
221 } // namespace remizov_k_dense_matrix_multiplication_cannon_algorithm
222