GCC Code Coverage Report


Directory: ./
File: tasks/sinev_a_mult_matrix_fox_algorithm/stl/src/ops_stl.cpp
Date: 2026-06-04 20:25:32
Exec Total Coverage
Lines: 27 119 22.7%
Functions: 6 14 42.9%
Branches: 18 106 17.0%

Line Branch Exec Source
1 #include "sinev_a_mult_matrix_fox_algorithm/stl/include/ops_stl.hpp"
2
3 #include <atomic>
4 #include <cmath>
5 #include <cstddef>
6 #include <thread>
7 #include <vector>
8
9 #include "sinev_a_mult_matrix_fox_algorithm/common/include/common.hpp"
10
11 namespace sinev_a_mult_matrix_fox_algorithm {
12
13
1/2
✓ Branch 1 taken 104 times.
✗ Branch 2 not taken.
104 SinevAMultMatrixFoxAlgorithmSTL::SinevAMultMatrixFoxAlgorithmSTL(const InType &in) {
14 SetTypeOfTask(GetStaticTypeOfTask());
15 GetInput() = in;
16 GetOutput() = {};
17 104 }
18
19 104 bool SinevAMultMatrixFoxAlgorithmSTL::ValidationImpl() {
20 const auto &[matrix_size, matrix_a, matrix_b] = GetInput();
21
3/6
✓ Branch 0 taken 104 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 104 times.
✗ Branch 4 not taken.
✓ Branch 5 taken 104 times.
104 return matrix_size > 0 && matrix_a.size() == matrix_size * matrix_size &&
22 104 matrix_b.size() == matrix_size * matrix_size;
23 }
24
25 104 bool SinevAMultMatrixFoxAlgorithmSTL::PreProcessingImpl() {
26 const auto &[matrix_size, matrix_a, matrix_b] = GetInput();
27 104 GetOutput() = std::vector<double>(matrix_size * matrix_size, 0.0);
28 104 return true;
29 }
30
31 104 void SinevAMultMatrixFoxAlgorithmSTL::SimpleMultiply(size_t n, const std::vector<double> &a,
32 const std::vector<double> &b, std::vector<double> &c) {
33
2/2
✓ Branch 0 taken 2592 times.
✓ Branch 1 taken 104 times.
2696 for (size_t i = 0; i < n; ++i) {
34
2/2
✓ Branch 0 taken 157056 times.
✓ Branch 1 taken 2592 times.
159648 for (size_t k = 0; k < n; ++k) {
35 157056 double tmp = a[(i * n) + k];
36
2/2
✓ Branch 0 taken 12599664 times.
✓ Branch 1 taken 157056 times.
12756720 for (size_t j = 0; j < n; ++j) {
37 12599664 c[(i * n) + j] += tmp * b[(k * n) + j];
38 }
39 }
40 }
41 104 }
42
43 void SinevAMultMatrixFoxAlgorithmSTL::DecomposeToBlocks(const std::vector<double> &src, std::vector<double> &dst,
44 size_t n, size_t bs, int q) {
45 unsigned int num_threads = std::thread::hardware_concurrency();
46 if (num_threads == 0) {
47 num_threads = 2;
48 }
49
50 std::vector<std::thread> threads;
51 threads.reserve(num_threads);
52 std::atomic<size_t> next_block(0);
53 size_t total_blocks = static_cast<size_t>(q) * static_cast<size_t>(q);
54
55 for (unsigned int thread_idx = 0; thread_idx < num_threads; ++thread_idx) {
56 threads.emplace_back([&]() {
57 size_t block_idx = 0;
58 while ((block_idx = next_block.fetch_add(1)) < total_blocks) {
59 int bi = static_cast<int>(block_idx / q);
60 int bj = static_cast<int>(block_idx % q);
61
62 const size_t block_off = block_idx * (bs * bs);
63 for (size_t i = 0; i < bs; ++i) {
64 for (size_t j = 0; j < bs; ++j) {
65 const size_t src_idx = ((static_cast<size_t>(bi) * bs + i) * n) + (static_cast<size_t>(bj) * bs + j);
66 const size_t dst_idx = block_off + (i * bs) + j;
67 dst[dst_idx] = src[src_idx];
68 }
69 }
70 }
71 });
72 }
73
74 for (auto &thread : threads) {
75 thread.join();
76 }
77 }
78
79 void SinevAMultMatrixFoxAlgorithmSTL::AssembleFromBlocks(const std::vector<double> &src, std::vector<double> &dst,
80 size_t n, size_t bs, int q) {
81 unsigned int num_threads = std::thread::hardware_concurrency();
82 if (num_threads == 0) {
83 num_threads = 2;
84 }
85
86 std::vector<std::thread> threads;
87 threads.reserve(num_threads);
88 std::atomic<size_t> next_block(0);
89 size_t total_blocks = static_cast<size_t>(q) * static_cast<size_t>(q);
90
91 for (unsigned int thread_idx = 0; thread_idx < num_threads; ++thread_idx) {
92 threads.emplace_back([&]() {
93 size_t block_idx = 0;
94 while ((block_idx = next_block.fetch_add(1)) < total_blocks) {
95 int bi = static_cast<int>(block_idx / q);
96 int bj = static_cast<int>(block_idx % q);
97
98 const size_t block_off = block_idx * (bs * bs);
99 for (size_t i = 0; i < bs; ++i) {
100 for (size_t j = 0; j < bs; ++j) {
101 const size_t src_idx = block_off + (i * bs) + j;
102 const size_t dst_idx = ((static_cast<size_t>(bi) * bs + i) * n) + (static_cast<size_t>(bj) * bs + j);
103 dst[dst_idx] = src[src_idx];
104 }
105 }
106 }
107 });
108 }
109
110 for (auto &thread : threads) {
111 thread.join();
112 }
113 }
114
115 void SinevAMultMatrixFoxAlgorithmSTL::MultiplyBlocks(const std::vector<double> &blocks_a,
116 const std::vector<double> &blocks_b, std::vector<double> &blocks_c,
117 size_t bs, size_t a_off, size_t b_off, size_t c_off) {
118 for (size_t ii = 0; ii < bs; ++ii) {
119 for (size_t kk = 0; kk < bs; ++kk) {
120 const double val = blocks_a[a_off + (ii * bs) + kk];
121 const size_t b_base = b_off + (kk * bs);
122 const size_t c_base = c_off + (ii * bs);
123 for (size_t jj = 0; jj < bs; ++jj) {
124 blocks_c[c_base + jj] += val * blocks_b[b_base + jj];
125 }
126 }
127 }
128 }
129
130 void SinevAMultMatrixFoxAlgorithmSTL::FoxStep(const std::vector<double> &blocks_a, const std::vector<double> &blocks_b,
131 std::vector<double> &blocks_c, size_t bs, int q, int step) {
132 const size_t block_size = bs * bs;
133 unsigned int num_threads = std::thread::hardware_concurrency();
134 if (num_threads == 0) {
135 num_threads = 2;
136 }
137
138 std::vector<std::thread> threads;
139 threads.reserve(num_threads);
140 std::atomic<size_t> next_cell(0);
141 size_t total_cells = static_cast<size_t>(q) * static_cast<size_t>(q);
142
143 for (unsigned int thread_idx = 0; thread_idx < num_threads; ++thread_idx) {
144 threads.emplace_back([&]() {
145 size_t cell_idx = 0;
146 while ((cell_idx = next_cell.fetch_add(1)) < total_cells) {
147 int i = static_cast<int>(cell_idx / q);
148 int j = static_cast<int>(cell_idx % q);
149 const int k = (i + step) % q;
150
151 const size_t a_off = (static_cast<size_t>((i * q) + k)) * block_size;
152 const size_t b_off = (static_cast<size_t>((k * q) + j)) * block_size;
153 const size_t c_off = (static_cast<size_t>((i * q) + j)) * block_size;
154
155 MultiplyBlocks(blocks_a, blocks_b, blocks_c, bs, a_off, b_off, c_off);
156 }
157 });
158 }
159
160 for (auto &thread : threads) {
161 thread.join();
162 }
163 }
164
165 104 bool SinevAMultMatrixFoxAlgorithmSTL::RunImpl() {
166 const auto &input = GetInput();
167
2/2
✓ Branch 0 taken 88 times.
✓ Branch 1 taken 16 times.
104 const size_t n = std::get<0>(input);
168 const auto &a = std::get<1>(input);
169 const auto &b = std::get<2>(input);
170 auto &c = GetOutput();
171
172
2/2
✓ Branch 0 taken 88 times.
✓ Branch 1 taken 16 times.
104 if (n <= 64) {
173 88 SimpleMultiply(n, a, b, c);
174 88 return true;
175 }
176
177 size_t bs = 64;
178
3/4
✓ Branch 0 taken 48 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 32 times.
✓ Branch 3 taken 16 times.
48 while (n % bs != 0 && bs > 16) {
179 32 bs /= 2;
180 }
181
182
1/2
✓ Branch 0 taken 16 times.
✗ Branch 1 not taken.
16 if (n % bs != 0) {
183 16 SimpleMultiply(n, a, b, c);
184 16 return true;
185 }
186
187 const int actual_q = static_cast<int>(n / bs);
188
189 const auto total_blocks = static_cast<size_t>(actual_q) * static_cast<size_t>(actual_q);
190 const auto block_elements = bs * bs;
191
192 std::vector<double> blocks_a(total_blocks * block_elements);
193 std::vector<double> blocks_b(total_blocks * block_elements);
194 std::vector<double> blocks_c(total_blocks * block_elements, 0.0);
195
196 DecomposeToBlocks(a, blocks_a, n, bs, actual_q);
197 DecomposeToBlocks(b, blocks_b, n, bs, actual_q);
198
199 for (int step = 0; step < actual_q; ++step) {
200 FoxStep(blocks_a, blocks_b, blocks_c, bs, actual_q, step);
201 }
202
203 AssembleFromBlocks(blocks_c, c, n, bs, actual_q);
204
205 return true;
206 }
207
208 size_t SinevAMultMatrixFoxAlgorithmSTL::ChooseBlockSize(size_t n) {
209 if (n % 128 == 0) {
210 return 128;
211 }
212 if (n % 64 == 0) {
213 return 64;
214 }
215 if (n % 32 == 0) {
216 return 32;
217 }
218 if (n % 16 == 0) {
219 return 16;
220 }
221 return 1;
222 }
223
224 104 bool SinevAMultMatrixFoxAlgorithmSTL::PostProcessingImpl() {
225 104 return true;
226 }
227
228 } // namespace sinev_a_mult_matrix_fox_algorithm
229