GCC Code Coverage Report


Directory: ./
File: tasks/baranov_a_mult_matrix_fox_algorithm/stl/src/ops_stl.cpp
Date: 2026-05-11 08:26:31
Exec Total Coverage
Lines: 84 85 98.8%
Functions: 13 14 92.9%
Branches: 51 64 79.7%

Line Branch Exec Source
1 #include "baranov_a_mult_matrix_fox_algorithm/stl/include/ops_stl.hpp"
2
3 #include <algorithm>
4 #include <cmath>
5 #include <cstddef>
6 #include <thread>
7 #include <vector>
8
9 #include "baranov_a_mult_matrix_fox_algorithm/common/include/common.hpp"
10
11 namespace baranov_a_mult_matrix_fox_algorithm_stl {
12
13 namespace {
14
15 72 void MultiplyBlock(const std::vector<double> &matrix_a, const std::vector<double> &matrix_b,
16 std::vector<double> &output, size_t n, size_t i_start, size_t i_end, size_t j_start, size_t j_end,
17 size_t k_start, size_t k_end) {
18
2/2
✓ Branch 0 taken 4608 times.
✓ Branch 1 taken 72 times.
4680 for (size_t i = i_start; i < i_end; ++i) {
19
2/2
✓ Branch 0 taken 294912 times.
✓ Branch 1 taken 4608 times.
299520 for (size_t j = j_start; j < j_end; ++j) {
20 double sum = 0.0;
21
2/2
✓ Branch 0 taken 18874368 times.
✓ Branch 1 taken 294912 times.
19169280 for (size_t k = k_start; k < k_end; ++k) {
22 18874368 sum += matrix_a[(i * n) + k] * matrix_b[(k * n) + j];
23 }
24 294912 output[(i * n) + j] += sum;
25 }
26 }
27 72 }
28
29 472 void MultiplyRowRange(const std::vector<double> &matrix_a, const std::vector<double> &matrix_b,
30 std::vector<double> &output, size_t n, size_t start_i, size_t end_i) {
31
2/2
✓ Branch 0 taken 960 times.
✓ Branch 1 taken 472 times.
1432 for (size_t i = start_i; i < end_i; ++i) {
32
2/2
✓ Branch 0 taken 13408 times.
✓ Branch 1 taken 960 times.
14368 for (size_t j = 0; j < n; ++j) {
33 double sum = 0.0;
34
2/2
✓ Branch 0 taken 314640 times.
✓ Branch 1 taken 13408 times.
328048 for (size_t k = 0; k < n; ++k) {
35 314640 sum += matrix_a[(i * n) + k] * matrix_b[(k * n) + j];
36 }
37 13408 output[(i * n) + j] = sum;
38 }
39 }
40 472 }
41
42 144 void ParallelRowMultiplication(const std::vector<double> &matrix_a, const std::vector<double> &matrix_b,
43 std::vector<double> &output, size_t n, size_t start_i, size_t end_i) {
44 144 unsigned int num_threads = std::thread::hardware_concurrency();
45
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 144 times.
144 if (num_threads == 0) {
46 num_threads = 4;
47 }
48
49 144 std::vector<std::thread> threads;
50 144 size_t chunk_size = (end_i - start_i + num_threads - 1) / num_threads;
51
52
2/2
✓ Branch 0 taken 544 times.
✓ Branch 1 taken 72 times.
616 for (unsigned int tid = 0; tid < num_threads; ++tid) {
53 544 size_t i_start = start_i + (tid * chunk_size);
54
2/2
✓ Branch 0 taken 472 times.
✓ Branch 1 taken 72 times.
544 size_t i_end_local = std::min(i_start + chunk_size, end_i);
55
2/2
✓ Branch 0 taken 472 times.
✓ Branch 1 taken 72 times.
544 if (i_start >= end_i) {
56 break;
57 }
58
59 472 threads.emplace_back(
60
1/2
✓ Branch 1 taken 472 times.
✗ Branch 2 not taken.
944 [&, i_start, i_end_local]() { MultiplyRowRange(matrix_a, matrix_b, output, n, i_start, i_end_local); });
61 }
62
63
2/2
✓ Branch 0 taken 472 times.
✓ Branch 1 taken 144 times.
616 for (auto &thread : threads) {
64
1/2
✓ Branch 1 taken 472 times.
✗ Branch 2 not taken.
472 thread.join();
65 }
66 144 }
67
68 72 void ProcessBlockRange(const std::vector<double> &matrix_a, const std::vector<double> &matrix_b,
69 std::vector<double> &output, size_t n, size_t block_k, size_t num_blocks, size_t block_size,
70 const std::vector<size_t> &block_indices, size_t start_idx, size_t end_idx) {
71
2/2
✓ Branch 0 taken 72 times.
✓ Branch 1 taken 72 times.
144 for (size_t idx = start_idx; idx < end_idx; ++idx) {
72 72 size_t linear_idx = block_indices[idx];
73 72 size_t block_i = linear_idx / num_blocks;
74 72 size_t block_j = linear_idx % num_blocks;
75
76 72 size_t broadcast_block = (block_i + block_k) % num_blocks;
77
78 72 size_t i_start = block_i * block_size;
79 72 size_t i_end = std::min(i_start + block_size, n);
80 72 size_t j_start = block_j * block_size;
81 72 size_t j_end = std::min(j_start + block_size, n);
82 72 size_t k_start = broadcast_block * block_size;
83 72 size_t k_end = std::min(k_start + block_size, n);
84
85 72 MultiplyBlock(matrix_a, matrix_b, output, n, i_start, i_end, j_start, j_end, k_start, k_end);
86 }
87 72 }
88
89 24 void ParallelBlockProcessing(const std::vector<double> &matrix_a, const std::vector<double> &matrix_b,
90 std::vector<double> &output, size_t n, size_t block_k, size_t num_blocks,
91 size_t block_size, const std::vector<size_t> &block_indices) {
92 24 unsigned int num_threads = std::thread::hardware_concurrency();
93
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 24 times.
24 if (num_threads == 0) {
94 num_threads = 4;
95 }
96
97 24 std::vector<std::thread> threads;
98 24 size_t chunk_size = (block_indices.size() + num_threads - 1) / num_threads;
99
100
2/2
✓ Branch 0 taken 80 times.
✓ Branch 1 taken 16 times.
96 for (unsigned int thread_id = 0; thread_id < num_threads; ++thread_id) {
101
2/2
✓ Branch 0 taken 72 times.
✓ Branch 1 taken 8 times.
80 size_t start_idx = thread_id * chunk_size;
102
2/2
✓ Branch 0 taken 72 times.
✓ Branch 1 taken 8 times.
80 size_t end_idx = std::min(start_idx + chunk_size, block_indices.size());
103
2/2
✓ Branch 0 taken 72 times.
✓ Branch 1 taken 8 times.
80 if (start_idx >= block_indices.size()) {
104 break;
105 }
106
107
1/2
✓ Branch 1 taken 72 times.
✗ Branch 2 not taken.
72 threads.emplace_back([&, start_idx, end_idx]() {
108 72 ProcessBlockRange(matrix_a, matrix_b, output, n, block_k, num_blocks, block_size, block_indices, start_idx,
109 end_idx);
110 72 });
111 }
112
113
2/2
✓ Branch 0 taken 72 times.
✓ Branch 1 taken 24 times.
96 for (auto &thread : threads) {
114
1/2
✓ Branch 1 taken 72 times.
✗ Branch 2 not taken.
72 thread.join();
115 }
116 24 }
117
118 } // namespace
119
120 160 BaranovAMultMatrixFoxAlgorithmSTL::BaranovAMultMatrixFoxAlgorithmSTL(
121
1/2
✓ Branch 1 taken 160 times.
✗ Branch 2 not taken.
160 const baranov_a_mult_matrix_fox_algorithm::InType &in) {
122 SetTypeOfTask(GetStaticTypeOfTask());
123 GetInput() = in;
124 160 GetOutput() = std::vector<double>();
125 160 }
126
127 160 bool BaranovAMultMatrixFoxAlgorithmSTL::ValidationImpl() {
128 const auto &[matrix_size, matrix_a, matrix_b] = GetInput();
129
3/6
✓ Branch 0 taken 160 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 160 times.
✗ Branch 4 not taken.
✓ Branch 5 taken 160 times.
160 return matrix_size > 0 && matrix_a.size() == matrix_size * matrix_size &&
130 160 matrix_b.size() == matrix_size * matrix_size;
131 }
132
133 160 bool BaranovAMultMatrixFoxAlgorithmSTL::PreProcessingImpl() {
134 const auto &[matrix_size, matrix_a, matrix_b] = GetInput();
135 160 GetOutput() = std::vector<double>(matrix_size * matrix_size, 0.0);
136 160 return true;
137 }
138
139 void BaranovAMultMatrixFoxAlgorithmSTL::StandardMultiplication(size_t n) {
140 const auto &[matrix_size, matrix_a, matrix_b] = GetInput();
141 auto &output = GetOutput();
142 144 ParallelRowMultiplication(matrix_a, matrix_b, output, n, 0, n);
143 144 }
144
145
1/2
✓ Branch 0 taken 16 times.
✗ Branch 1 not taken.
16 void BaranovAMultMatrixFoxAlgorithmSTL::FoxBlockMultiplication(size_t n, size_t block_size) {
146 const auto &[matrix_size, matrix_a, matrix_b] = GetInput();
147 auto &output = GetOutput();
148
149
1/2
✓ Branch 0 taken 16 times.
✗ Branch 1 not taken.
16 size_t num_blocks = (n + block_size - 1) / block_size;
150
151 std::ranges::fill(output, 0.0);
152
153 16 std::vector<size_t> block_indices(num_blocks * num_blocks);
154
2/2
✓ Branch 0 taken 40 times.
✓ Branch 1 taken 16 times.
56 for (size_t idx = 0; idx < num_blocks * num_blocks; ++idx) {
155 40 block_indices[idx] = idx;
156 }
157
158
2/2
✓ Branch 0 taken 24 times.
✓ Branch 1 taken 16 times.
40 for (size_t block_k = 0; block_k < num_blocks; ++block_k) {
159
1/2
✓ Branch 1 taken 24 times.
✗ Branch 2 not taken.
24 ParallelBlockProcessing(matrix_a, matrix_b, output, n, block_k, num_blocks, block_size, block_indices);
160 }
161 16 }
162
163 160 bool BaranovAMultMatrixFoxAlgorithmSTL::RunImpl() {
164 const auto &[matrix_size, matrix_a, matrix_b] = GetInput();
165 160 size_t n = matrix_size;
166
167 size_t block_size = 64;
168
2/2
✓ Branch 0 taken 144 times.
✓ Branch 1 taken 16 times.
160 if (n < block_size) {
169 StandardMultiplication(n);
170 } else {
171 16 FoxBlockMultiplication(n, block_size);
172 }
173
174 160 return true;
175 }
176
177 160 bool BaranovAMultMatrixFoxAlgorithmSTL::PostProcessingImpl() {
178 160 return true;
179 }
180
181 } // namespace baranov_a_mult_matrix_fox_algorithm_stl
182