GCC Code Coverage Report


Directory: ./
File: tasks/baranov_a_mult_matrix_fox_algorithm/all/src/ops_all.cpp
Date: 2026-05-11 08:26:31
Exec Total Coverage
Lines: 85 103 82.5%
Functions: 13 16 81.2%
Branches: 50 70 71.4%

Line Branch Exec Source
1 #include "baranov_a_mult_matrix_fox_algorithm/all/include/ops_all.hpp"
2
3 #include <algorithm>
4 #include <cmath>
5 #include <cstddef>
6 #include <thread>
7 #include <vector>
8
9 #include "baranov_a_mult_matrix_fox_algorithm/common/include/common.hpp"
10
11 namespace baranov_a_mult_matrix_fox_algorithm_all {
12
13 namespace {
14
15 18 void MultiplyBlock(const std::vector<double> &matrix_a, const std::vector<double> &matrix_b,
16 std::vector<double> &output, size_t n, size_t i_start, size_t i_end, size_t j_start, size_t j_end,
17 size_t k_start, size_t k_end) {
18
2/2
✓ Branch 0 taken 1152 times.
✓ Branch 1 taken 18 times.
1170 for (size_t i = i_start; i < i_end; ++i) {
19
2/2
✓ Branch 0 taken 73728 times.
✓ Branch 1 taken 1152 times.
74880 for (size_t j = j_start; j < j_end; ++j) {
20 double sum = 0.0;
21
2/2
✓ Branch 0 taken 4718592 times.
✓ Branch 1 taken 73728 times.
4792320 for (size_t k = k_start; k < k_end; ++k) {
22 4718592 sum += matrix_a[(i * n) + k] * matrix_b[(k * n) + j];
23 }
24 73728 output[(i * n) + j] += sum;
25 }
26 }
27 18 }
28
29 void MultiplySEQ(const std::vector<double> &matrix_a, const std::vector<double> &matrix_b, std::vector<double> &output,
30 size_t n) {
31 for (size_t i = 0; i < n; ++i) {
32 for (size_t j = 0; j < n; ++j) {
33 double sum = 0.0;
34 for (size_t k = 0; k < n; ++k) {
35 sum += matrix_a[(i * n) + k] * matrix_b[(k * n) + j];
36 }
37 output[(i * n) + j] = sum;
38 }
39 }
40 }
41
42 118 void MultiplyRowRange(const std::vector<double> &matrix_a, const std::vector<double> &matrix_b,
43 std::vector<double> &output, size_t n, size_t start_i, size_t end_i) {
44
2/2
✓ Branch 0 taken 240 times.
✓ Branch 1 taken 118 times.
358 for (size_t i = start_i; i < end_i; ++i) {
45
2/2
✓ Branch 0 taken 3352 times.
✓ Branch 1 taken 240 times.
3592 for (size_t j = 0; j < n; ++j) {
46 double sum = 0.0;
47
2/2
✓ Branch 0 taken 78660 times.
✓ Branch 1 taken 3352 times.
82012 for (size_t k = 0; k < n; ++k) {
48 78660 sum += matrix_a[(i * n) + k] * matrix_b[(k * n) + j];
49 }
50 3352 output[(i * n) + j] = sum;
51 }
52 }
53 118 }
54
55 36 void MultiplySTL(const std::vector<double> &matrix_a, const std::vector<double> &matrix_b, std::vector<double> &output,
56 size_t n) {
57 36 unsigned int num_threads = std::thread::hardware_concurrency();
58
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 36 times.
36 if (num_threads == 0) {
59 num_threads = 4;
60 }
61
62 36 std::vector<std::thread> threads;
63 36 size_t chunk_size = (n + num_threads - 1) / num_threads;
64
65
2/2
✓ Branch 0 taken 136 times.
✓ Branch 1 taken 18 times.
154 for (unsigned int tid = 0; tid < num_threads; ++tid) {
66 136 size_t start_i = tid * chunk_size;
67
2/2
✓ Branch 0 taken 118 times.
✓ Branch 1 taken 18 times.
136 size_t end_i = std::min(start_i + chunk_size, n);
68
2/2
✓ Branch 0 taken 118 times.
✓ Branch 1 taken 18 times.
136 if (start_i >= n) {
69 break;
70 }
71
72
1/2
✓ Branch 1 taken 118 times.
✗ Branch 2 not taken.
236 threads.emplace_back([&, start_i, end_i]() { MultiplyRowRange(matrix_a, matrix_b, output, n, start_i, end_i); });
73 }
74
75
2/2
✓ Branch 0 taken 118 times.
✓ Branch 1 taken 36 times.
154 for (auto &thread : threads) {
76
1/2
✓ Branch 1 taken 118 times.
✗ Branch 2 not taken.
118 thread.join();
77 }
78 36 }
79
80 void FoxBlockSEQ(const std::vector<double> &matrix_a, const std::vector<double> &matrix_b, std::vector<double> &output,
81 size_t n, size_t block_size) {
82 size_t num_blocks = (n + block_size - 1) / block_size;
83
84 std::ranges::fill(output, 0.0);
85
86 for (size_t bk = 0; bk < num_blocks; ++bk) {
87 for (size_t bi = 0; bi < num_blocks; ++bi) {
88 for (size_t bj = 0; bj < num_blocks; ++bj) {
89 size_t broadcast_block = (bi + bk) % num_blocks;
90 size_t i_start = bi * block_size;
91 size_t i_end = std::min(i_start + block_size, n);
92 size_t j_start = bj * block_size;
93 size_t j_end = std::min(j_start + block_size, n);
94 size_t k_start = broadcast_block * block_size;
95 size_t k_end = std::min(k_start + block_size, n);
96
97 MultiplyBlock(matrix_a, matrix_b, output, n, i_start, i_end, j_start, j_end, k_start, k_end);
98 }
99 }
100 }
101 }
102
103 18 void ProcessBlockRange(const std::vector<double> &matrix_a, const std::vector<double> &matrix_b,
104 std::vector<double> &output, size_t n, size_t bk, size_t num_blocks, size_t block_size,
105 const std::vector<size_t> &block_indices, size_t start_idx, size_t end_idx) {
106
2/2
✓ Branch 0 taken 18 times.
✓ Branch 1 taken 18 times.
36 for (size_t idx = start_idx; idx < end_idx; ++idx) {
107 18 size_t linear_idx = block_indices[idx];
108 18 size_t bi = linear_idx / num_blocks;
109 18 size_t bj = linear_idx % num_blocks;
110
111 18 size_t broadcast_block = (bi + bk) % num_blocks;
112
113 18 size_t i_start = bi * block_size;
114 18 size_t i_end = std::min(i_start + block_size, n);
115 18 size_t j_start = bj * block_size;
116 18 size_t j_end = std::min(j_start + block_size, n);
117 18 size_t k_start = broadcast_block * block_size;
118 18 size_t k_end = std::min(k_start + block_size, n);
119
120 18 MultiplyBlock(matrix_a, matrix_b, output, n, i_start, i_end, j_start, j_end, k_start, k_end);
121 }
122 18 }
123
124 4 void FoxBlockSTL(const std::vector<double> &matrix_a, const std::vector<double> &matrix_b, std::vector<double> &output,
125 size_t n, size_t block_size) {
126
1/2
✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
4 size_t num_blocks = (n + block_size - 1) / block_size;
127
128 std::ranges::fill(output, 0.0);
129
130 4 unsigned int num_threads = std::thread::hardware_concurrency();
131
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
4 if (num_threads == 0) {
132 num_threads = 4;
133 }
134
135 4 std::vector<size_t> block_indices(num_blocks * num_blocks);
136
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 4 times.
14 for (size_t idx = 0; idx < num_blocks * num_blocks; ++idx) {
137 10 block_indices[idx] = idx;
138 }
139
140
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 4 times.
10 for (size_t bk = 0; bk < num_blocks; ++bk) {
141 6 std::vector<std::thread> threads;
142 6 size_t chunk_size = (block_indices.size() + num_threads - 1) / num_threads;
143
144
2/2
✓ Branch 0 taken 20 times.
✓ Branch 1 taken 4 times.
24 for (unsigned int tid = 0; tid < num_threads; ++tid) {
145
2/2
✓ Branch 0 taken 18 times.
✓ Branch 1 taken 2 times.
20 size_t start_idx = tid * chunk_size;
146
2/2
✓ Branch 0 taken 18 times.
✓ Branch 1 taken 2 times.
20 size_t end_idx = std::min(start_idx + chunk_size, block_indices.size());
147
2/2
✓ Branch 0 taken 18 times.
✓ Branch 1 taken 2 times.
20 if (start_idx >= block_indices.size()) {
148 break;
149 }
150
151
1/2
✓ Branch 1 taken 18 times.
✗ Branch 2 not taken.
18 threads.emplace_back([&, start_idx, end_idx, bk]() {
152 18 ProcessBlockRange(matrix_a, matrix_b, output, n, bk, num_blocks, block_size, block_indices, start_idx, end_idx);
153 18 });
154 }
155
156
2/2
✓ Branch 0 taken 18 times.
✓ Branch 1 taken 6 times.
24 for (auto &thread : threads) {
157
1/2
✓ Branch 1 taken 18 times.
✗ Branch 2 not taken.
18 thread.join();
158 }
159 6 }
160 4 }
161
162 void MultiplyDispatch(bool use_parallel, const std::vector<double> &matrix_a, const std::vector<double> &matrix_b,
163 std::vector<double> &output, size_t n) {
164 if (!use_parallel) {
165 MultiplySEQ(matrix_a, matrix_b, output, n);
166 return;
167 }
168 36 MultiplySTL(matrix_a, matrix_b, output, n);
169 }
170
171 4 void FoxBlockDispatch(bool use_parallel, const std::vector<double> &matrix_a, const std::vector<double> &matrix_b,
172 std::vector<double> &output, size_t n, size_t block_size) {
173
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
4 if (!use_parallel) {
174 FoxBlockSEQ(matrix_a, matrix_b, output, n, block_size);
175 return;
176 }
177 4 FoxBlockSTL(matrix_a, matrix_b, output, n, block_size);
178 }
179
180 } // namespace
181
182 40 BaranovAMultMatrixFoxAlgorithmALL::BaranovAMultMatrixFoxAlgorithmALL(
183
1/2
✓ Branch 1 taken 40 times.
✗ Branch 2 not taken.
40 const baranov_a_mult_matrix_fox_algorithm::InType &in) {
184 SetTypeOfTask(GetStaticTypeOfTask());
185 GetInput() = in;
186 40 GetOutput() = std::vector<double>();
187 40 }
188
189 40 bool BaranovAMultMatrixFoxAlgorithmALL::ValidationImpl() {
190 const auto &[matrix_size, matrix_a, matrix_b] = GetInput();
191
3/6
✓ Branch 0 taken 40 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 40 times.
✗ Branch 4 not taken.
✓ Branch 5 taken 40 times.
40 return matrix_size > 0 && matrix_a.size() == matrix_size * matrix_size &&
192 40 matrix_b.size() == matrix_size * matrix_size;
193 }
194
195 40 bool BaranovAMultMatrixFoxAlgorithmALL::PreProcessingImpl() {
196 const auto &[matrix_size, matrix_a, matrix_b] = GetInput();
197 40 GetOutput() = std::vector<double>(matrix_size * matrix_size, 0.0);
198 40 return true;
199 }
200
201 void BaranovAMultMatrixFoxAlgorithmALL::StandardMultiplication(size_t n) {
202 const auto &[matrix_size, matrix_a, matrix_b] = GetInput();
203 auto &output = GetOutput();
204 MultiplyDispatch(true, matrix_a, matrix_b, output, n);
205 36 }
206
207 void BaranovAMultMatrixFoxAlgorithmALL::FoxBlockMultiplication(size_t n, size_t block_size) {
208 const auto &[matrix_size, matrix_a, matrix_b] = GetInput();
209 auto &output = GetOutput();
210 4 FoxBlockDispatch(true, matrix_a, matrix_b, output, n, block_size);
211 4 }
212
213 40 bool BaranovAMultMatrixFoxAlgorithmALL::RunImpl() {
214 const auto &[matrix_size, matrix_a, matrix_b] = GetInput();
215 40 size_t n = matrix_size;
216 size_t block_size = 64;
217
2/2
✓ Branch 0 taken 36 times.
✓ Branch 1 taken 4 times.
40 if (n < block_size) {
218 StandardMultiplication(n);
219 } else {
220 FoxBlockMultiplication(n, block_size);
221 }
222 40 return true;
223 }
224
225 40 bool BaranovAMultMatrixFoxAlgorithmALL::PostProcessingImpl() {
226 40 return true;
227 }
228
229 } // namespace baranov_a_mult_matrix_fox_algorithm_all
230