GCC Code Coverage Report


Directory: ./
File: tasks/yakimov_i_mult_of_dense_matrices_fox_algorithm/all/src/ops_all.cpp
Date: 2026-06-04 20:25:32
Exec Total Coverage
Lines: 90 142 63.4%
Functions: 13 20 65.0%
Branches: 53 110 48.2%

Line Branch Exec Source
1 #include "yakimov_i_mult_of_dense_matrices_fox_algorithm/all/include/ops_all.hpp"
2
3 #include <algorithm>
4 #include <cstddef>
5 #include <fstream>
6 #include <string>
7 #include <thread>
8 #include <vector>
9
10 #ifdef _OPENMP
11 # include <omp.h>
12 #endif
13
14 #include <tbb/parallel_for.h>
15 #include <tbb/task_group.h>
16
17 #include "util/include/util.hpp"
18 #include "yakimov_i_mult_of_dense_matrices_fox_algorithm/common/include/common.hpp"
19
20 namespace yakimov_i_mult_of_dense_matrices_fox_algorithm {
21
22 namespace {
23
24 constexpr int kSmallMatrixThreshold = 64;
25 constexpr int kMediumMatrixThreshold = 256;
26 constexpr int kBlockSizeSmall = 32;
27 constexpr int kBlockSizeMedium = 64;
28 constexpr int kBlockSizeLarge = 128;
29 constexpr int kUnrollFactor = 4;
30
31 32 bool ReadDimensions(std::ifstream &file, DenseMatrix &matrix) {
32 32 file >> matrix.rows;
33 32 file >> matrix.cols;
34
2/4
✓ Branch 0 taken 32 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 32 times.
32 return matrix.rows > 0 && matrix.cols > 0;
35 }
36
37 32 bool ReadMatrixData(std::ifstream &file, DenseMatrix &matrix) {
38 32 auto total_elements = static_cast<std::size_t>(matrix.rows) * static_cast<std::size_t>(matrix.cols);
39 32 matrix.data.resize(total_elements, 0.0);
40
2/2
✓ Branch 0 taken 128 times.
✓ Branch 1 taken 32 times.
160 for (int i = 0; i < matrix.rows; ++i) {
41
2/2
✓ Branch 0 taken 1084 times.
✓ Branch 1 taken 128 times.
1212 for (int j = 0; j < matrix.cols; ++j) {
42
1/2
✓ Branch 1 taken 1084 times.
✗ Branch 2 not taken.
1084 if (!(file >> matrix(i, j))) {
43 return false;
44 }
45 }
46 }
47 return true;
48 }
49
50 32 bool ReadMatrixFromFileImpl(const std::string &filename, DenseMatrix &matrix) {
51 32 std::ifstream file(filename);
52
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 32 times.
32 if (!file.is_open()) {
53 return false;
54 }
55
56
2/4
✓ Branch 1 taken 32 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✓ Branch 4 taken 32 times.
32 if (!ReadDimensions(file, matrix)) {
57 return false;
58 }
59
2/4
✓ Branch 1 taken 32 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✓ Branch 4 taken 32 times.
32 if (!ReadMatrixData(file, matrix)) {
60 return false;
61 }
62
63
1/2
✓ Branch 1 taken 32 times.
✗ Branch 2 not taken.
32 file.close();
64 return true;
65 32 }
66
67 4 void SimpleMultiplySeq(const DenseMatrix &a, const DenseMatrix &b, DenseMatrix &result) {
68 4 result.rows = a.rows;
69 4 result.cols = b.cols;
70 4 result.data.assign(static_cast<std::size_t>(result.rows) * result.cols, 0.0);
71
72
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 4 times.
12 for (int i = 0; i < a.rows; ++i) {
73
2/2
✓ Branch 0 taken 16 times.
✓ Branch 1 taken 8 times.
24 for (int j = 0; j < b.cols; ++j) {
74 double sum = 0.0;
75
2/2
✓ Branch 0 taken 48 times.
✓ Branch 1 taken 16 times.
64 for (int k = 0; k < a.cols; ++k) {
76 48 sum += a(i, k) * b(k, j);
77 }
78 16 result(i, j) = sum;
79 }
80 }
81 4 }
82
83 void SimpleMultiplyOMP(const DenseMatrix &a, const DenseMatrix &b, DenseMatrix &result) {
84 result.rows = a.rows;
85 result.cols = b.cols;
86 result.data.assign(static_cast<std::size_t>(result.rows) * result.cols, 0.0);
87
88 #ifdef _OPENMP
89 # pragma omp parallel for schedule(static) default(none) shared(a, b, result)
90 #endif
91 for (int i = 0; i < a.rows; ++i) {
92 for (int j = 0; j < b.cols; ++j) {
93 double sum = 0.0;
94 for (int k = 0; k < a.cols; ++k) {
95 sum += a(i, k) * b(k, j);
96 }
97 result(i, j) = sum;
98 }
99 }
100 }
101
102 void SimpleMultiplyTBB(const DenseMatrix &a, const DenseMatrix &b, DenseMatrix &result) {
103 result.rows = a.rows;
104 result.cols = b.cols;
105 result.data.assign(static_cast<std::size_t>(result.rows) * result.cols, 0.0);
106
107 tbb::parallel_for(0, a.rows, [&](int i) {
108 for (int j = 0; j < b.cols; ++j) {
109 double sum = 0.0;
110 for (int k = 0; k < a.cols; ++k) {
111 sum += a(i, k) * b(k, j);
112 }
113 result(i, j) = sum;
114 }
115 });
116 }
117
118 12 void MultiplyBlockUnrolled(const DenseMatrix &a, const DenseMatrix &b, DenseMatrix &result, int row_start,
119 int col_start, int block_size, int a_row_offset, int b_col_offset) {
120 12 int block_size_aligned = (block_size / kUnrollFactor) * kUnrollFactor;
121
122
2/2
✓ Branch 0 taken 54 times.
✓ Branch 1 taken 12 times.
66 for (int i = 0; i < block_size; ++i) {
123
2/2
✓ Branch 0 taken 518 times.
✓ Branch 1 taken 54 times.
572 for (int j = 0; j < block_size; ++j) {
124 double sum = 0.0;
125 int k = 0;
126
127
2/2
✓ Branch 0 taken 1382 times.
✓ Branch 1 taken 518 times.
1900 for (; k < block_size_aligned; k += kUnrollFactor) {
128 1382 sum += a(row_start + i, a_row_offset + k) * b(b_col_offset + k, col_start + j);
129 1382 sum += a(row_start + i, a_row_offset + k + 1) * b(b_col_offset + k + 1, col_start + j);
130 1382 sum += a(row_start + i, a_row_offset + k + 2) * b(b_col_offset + k + 2, col_start + j);
131 1382 sum += a(row_start + i, a_row_offset + k + 3) * b(b_col_offset + k + 3, col_start + j);
132 }
133
134
2/2
✓ Branch 0 taken 1438 times.
✓ Branch 1 taken 518 times.
1956 for (; k < block_size; ++k) {
135 1438 sum += a(row_start + i, a_row_offset + k) * b(b_col_offset + k, col_start + j);
136 }
137
138 518 result(row_start + i, col_start + j) += sum;
139 }
140 }
141 12 }
142
143 void ProcessStageOMP(const DenseMatrix &a, const DenseMatrix &b, DenseMatrix &result, int stage, int num_blocks,
144 int block_size) {
145 #ifdef _OPENMP
146 12 # pragma omp parallel for schedule(dynamic) default(none) shared(a, b, result, stage, num_blocks, block_size)
147 #endif
148 for (int idx = 0; idx < num_blocks * num_blocks; ++idx) {
149 int i = idx / num_blocks;
150 int j = idx % num_blocks;
151 int broadcast_block = (i + stage) % num_blocks;
152 MultiplyBlockUnrolled(a, b, result, i * block_size, j * block_size, block_size, broadcast_block * block_size,
153 j * block_size);
154 }
155 }
156
157 void ProcessStageTBB(const DenseMatrix &a, const DenseMatrix &b, DenseMatrix &result, int stage, int num_blocks,
158 int block_size) {
159 for (int i = 0; i < num_blocks; ++i) {
160 int broadcast_block = (i + stage) % num_blocks;
161 for (int j = 0; j < num_blocks; ++j) {
162 MultiplyBlockUnrolled(a, b, result, i * block_size, j * block_size, block_size, broadcast_block * block_size,
163 j * block_size);
164 }
165 }
166 }
167
168 4 void HandleNonSquareMatrices(const DenseMatrix &a, const DenseMatrix &b, DenseMatrix &result) {
169 4 int n = a.rows;
170
171
1/2
✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
4 if (n <= kSmallMatrixThreshold) {
172 4 SimpleMultiplySeq(a, b, result);
173 } else if (n <= kMediumMatrixThreshold) {
174 SimpleMultiplyOMP(a, b, result);
175 } else {
176 SimpleMultiplyTBB(a, b, result);
177 }
178 4 }
179
180 12 void HandleSmallNumBlocks(const DenseMatrix &a, const DenseMatrix &b, DenseMatrix &result, int num_blocks,
181 int block_size) {
182
2/2
✓ Branch 0 taken 12 times.
✓ Branch 1 taken 12 times.
24 for (int stage = 0; stage < num_blocks; ++stage) {
183 ProcessStageOMP(a, b, result, stage, num_blocks, block_size);
184 }
185 12 }
186
187 void HandleMediumNumBlocks(const DenseMatrix &a, const DenseMatrix &b, DenseMatrix &result, int num_blocks,
188 int block_size) {
189 tbb::parallel_for(0, num_blocks, [&](int stage) { ProcessStageOMP(a, b, result, stage, num_blocks, block_size); });
190 }
191
192 void HandleLargeNumBlocks(const DenseMatrix &a, const DenseMatrix &b, DenseMatrix &result, int num_blocks,
193 int block_size) {
194 unsigned int hardware_threads = std::thread::hardware_concurrency();
195 int num_tasks = static_cast<int>(hardware_threads);
196 num_tasks = std::max(2, std::min(num_tasks, num_blocks));
197 int stages_per_task = num_blocks / num_tasks;
198 int remaining_stages = num_blocks % num_tasks;
199
200 tbb::task_group task_group;
201
202 int stage_start = 0;
203 for (int task_index = 0; task_index < num_tasks; ++task_index) {
204 int stages_for_this_task = stages_per_task;
205 if (task_index < remaining_stages) {
206 stages_for_this_task = stages_for_this_task + 1;
207 }
208 if (stages_for_this_task == 0) {
209 continue;
210 }
211
212 int stage_end = stage_start + stages_for_this_task;
213
214 task_group.run([&a, &b, &result, stage_start, stage_end, num_blocks, block_size]() {
215 for (int stage = stage_start; stage < stage_end; ++stage) {
216 ProcessStageTBB(a, b, result, stage, num_blocks, block_size);
217 }
218 });
219
220 stage_start = stage_end;
221 }
222
223 task_group.wait();
224 }
225
226 16 void FoxAlgorithmAdaptive(const DenseMatrix &a, const DenseMatrix &b, DenseMatrix &result, int block_size) {
227
4/6
✓ Branch 0 taken 12 times.
✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 12 times.
✗ Branch 4 not taken.
✓ Branch 5 taken 12 times.
16 bool is_not_square = (a.rows != a.cols) || (b.rows != b.cols) || (a.rows != b.rows);
228 16 bool is_not_divisible = (a.rows % block_size != 0);
229
230
2/2
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 12 times.
16 if (is_not_square || is_not_divisible) {
231 4 HandleNonSquareMatrices(a, b, result);
232 4 return;
233 }
234
235 int n = a.rows;
236 12 int num_blocks = n / block_size;
237 12 result.rows = n;
238 12 result.cols = n;
239 12 result.data.assign(static_cast<std::size_t>(n) * n, 0.0);
240
241
1/2
✓ Branch 0 taken 12 times.
✗ Branch 1 not taken.
12 if (num_blocks <= 8) {
242 12 HandleSmallNumBlocks(a, b, result, num_blocks, block_size);
243 } else if (num_blocks <= 32) {
244 HandleMediumNumBlocks(a, b, result, num_blocks, block_size);
245 } else {
246 HandleLargeNumBlocks(a, b, result, num_blocks, block_size);
247 }
248 }
249
250 } // namespace
251
252
1/2
✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
16 YakimovIMultOfDenseMatricesFoxAlgorithmAll::YakimovIMultOfDenseMatricesFoxAlgorithmAll(const InType &in) {
253 this->SetTypeOfTask(YakimovIMultOfDenseMatricesFoxAlgorithmAll::GetStaticTypeOfTask());
254 16 this->GetInput() = in;
255 this->GetOutput() = 0.0;
256
257
1/2
✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
16 std::string task_name = "yakimov_i_mult_of_dense_matrices_fox_algorithm";
258
1/2
✓ Branch 2 taken 16 times.
✗ Branch 3 not taken.
32 this->matrix_a_filename_ = ppc::util::GetAbsoluteTaskPath(task_name, "A_" + std::to_string(in) + ".txt");
259
1/2
✓ Branch 2 taken 16 times.
✗ Branch 3 not taken.
48 this->matrix_b_filename_ = ppc::util::GetAbsoluteTaskPath(task_name, "B_" + std::to_string(in) + ".txt");
260 16 }
261
262 16 bool YakimovIMultOfDenseMatricesFoxAlgorithmAll::ValidationImpl() {
263
2/4
✓ Branch 0 taken 16 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 16 times.
16 return (this->GetInput() > 0) && (this->GetOutput() == 0.0);
264 }
265
266 16 bool YakimovIMultOfDenseMatricesFoxAlgorithmAll::PreProcessingImpl() {
267
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 16 times.
16 if (!ReadMatrixFromFileImpl(this->matrix_a_filename_, this->matrix_a_)) {
268 return false;
269 }
270
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 16 times.
16 if (!ReadMatrixFromFileImpl(this->matrix_b_filename_, this->matrix_b_)) {
271 return false;
272 }
273
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 16 times.
16 if (this->matrix_a_.cols != this->matrix_b_.rows) {
274 return false;
275 }
276
277
3/4
✓ Branch 0 taken 12 times.
✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 12 times.
16 if (this->matrix_a_.rows != this->matrix_a_.cols || this->matrix_b_.rows != this->matrix_b_.cols ||
278 this->matrix_a_.rows != this->matrix_b_.rows) {
279 4 this->block_size_ = 0;
280 4 return true;
281 }
282
283 12 int n = this->matrix_a_.rows;
284
285
1/2
✓ Branch 0 taken 12 times.
✗ Branch 1 not taken.
12 if (n <= kSmallMatrixThreshold) {
286 12 this->block_size_ = kBlockSizeSmall;
287 } else if (n <= kMediumMatrixThreshold) {
288 this->block_size_ = kBlockSizeMedium;
289 } else {
290 this->block_size_ = kBlockSizeLarge;
291 }
292
293
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
12 while (this->block_size_ * 2 <= n && this->block_size_ < kBlockSizeLarge) {
294 this->block_size_ *= 2;
295 }
296
297 12 this->block_size_ = std::min(this->block_size_, n);
298
299 12 return this->block_size_ > 0;
300 }
301
302 16 bool YakimovIMultOfDenseMatricesFoxAlgorithmAll::RunImpl() {
303 16 FoxAlgorithmAdaptive(this->matrix_a_, this->matrix_b_, this->result_matrix_, this->block_size_);
304 16 return true;
305 }
306
307 16 bool YakimovIMultOfDenseMatricesFoxAlgorithmAll::PostProcessingImpl() {
308 double sum = 0.0;
309
2/2
✓ Branch 0 taken 534 times.
✓ Branch 1 taken 16 times.
550 for (double value : this->result_matrix_.data) {
310 534 sum += value;
311 }
312 16 this->GetOutput() = sum;
313 16 return true;
314 }
315
316 } // namespace yakimov_i_mult_of_dense_matrices_fox_algorithm
317