GCC Code Coverage Report


Directory: ./
File: tasks/akhmetov_daniil_strassen_dense_double/omp/src/ops_omp.cpp
Date: 2026-06-04 20:25:32
Exec Total Coverage
Lines: 142 144 98.6%
Functions: 13 13 100.0%
Branches: 42 78 53.8%

Line Branch Exec Source
1 #include "akhmetov_daniil_strassen_dense_double/omp/include/ops_omp.hpp"
2
3 #include <cstddef>
4 #include <utility>
5 #include <vector>
6
7 #include "akhmetov_daniil_strassen_dense_double/common/include/common.hpp"
8
9 namespace akhmetov_daniil_strassen_dense_double {
10
11
1/2
✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.
12 AkhmetovDStrassenDenseDoubleOMP::AkhmetovDStrassenDenseDoubleOMP(const InType &in) {
12 SetTypeOfTask(GetStaticTypeOfTask());
13
1/2
✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.
12 GetInput() = in;
14 12 }
15
16
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 12 times.
12 bool AkhmetovDStrassenDenseDoubleOMP::ValidationImpl() {
17 const auto &input = GetInput();
18
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 12 times.
12 if (input.empty()) {
19 return false;
20 }
21 12 const size_t n = format::GetN(input);
22
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 12 times.
12 if (n == 0) {
23 return false;
24 }
25 12 const size_t expected_size = 1 + (2 * n * n);
26 12 return input.size() == expected_size;
27 }
28
29 12 bool AkhmetovDStrassenDenseDoubleOMP::PreProcessingImpl() {
30 const auto &input = GetInput();
31 12 const size_t n = format::GetN(input);
32 12 GetOutput().assign(n * n, 0.0);
33 12 return true;
34 }
35
36 namespace {
37
38 constexpr size_t kThreshold = 64;
39 [[maybe_unused]] constexpr size_t kParallelThreshold = 256;
40
41 inline size_t NextPow2(size_t n) {
42 size_t p = 1;
43
2/2
✓ Branch 0 taken 60 times.
✓ Branch 1 taken 8 times.
68 while (p < n) {
44 60 p <<= 1;
45 }
46 return p;
47 }
48
49 228 Matrix StandardMultiply(const Matrix &a, const Matrix &b, size_t size) {
50 228 Matrix c(size * size, 0.0);
51 228 #pragma omp parallel for default(none) shared(a, b, c, size) schedule(static) if (size >= kParallelThreshold)
52 for (size_t i = 0; i < size; ++i) {
53 for (size_t k = 0; k < size; ++k) {
54 const double aik = a.at((i * size) + k);
55 const size_t bk = k * size;
56 const size_t ci = i * size;
57 for (size_t j = 0; j < size; ++j) {
58 c.at(ci + j) += aik * b.at(bk + j);
59 }
60 }
61 }
62 228 return c;
63 }
64
65 72 void Split(const Matrix &src, Matrix &a11, Matrix &a12, Matrix &a21, Matrix &a22, size_t size) {
66 72 const size_t half = size / 2;
67 72 const size_t block = half * half;
68 72 a11.assign(block, 0.0);
69 72 a12.assign(block, 0.0);
70 72 a21.assign(block, 0.0);
71 72 a22.assign(block, 0.0);
72
73 72 #pragma omp parallel for default(none) shared(src, a11, a12, a21, a22, size, half) \
74 schedule(static) if (size >= kParallelThreshold)
75 for (size_t i = 0; i < half; ++i) {
76 const size_t is = i * size;
77 const size_t ih = i * half;
78 const size_t is2 = (i + half) * size;
79 for (size_t j = 0; j < half; ++j) {
80 a11.at(ih + j) = src.at(is + j);
81 a12.at(ih + j) = src.at(is + j + half);
82 a21.at(ih + j) = src.at(is2 + j);
83 a22.at(ih + j) = src.at(is2 + j + half);
84 }
85 }
86 72 }
87
88 void Merge(Matrix &dst, const Matrix &c11, const Matrix &c12, const Matrix &c21, const Matrix &c22, size_t size) {
89 36 const size_t half = size / 2;
90 36 #pragma omp parallel for default(none) shared(dst, c11, c12, c21, c22, size, half) \
91 schedule(static) if (size >= kParallelThreshold)
92 for (size_t i = 0; i < half; ++i) {
93 const size_t is = i * size;
94 const size_t ih = i * half;
95 const size_t is2 = (i + half) * size;
96 for (size_t j = 0; j < half; ++j) {
97 dst.at(is + j) = c11.at(ih + j);
98 dst.at(is + j + half) = c12.at(ih + j);
99 dst.at(is2 + j) = c21.at(ih + j);
100 dst.at(is2 + j + half) = c22.at(ih + j);
101 }
102 }
103 }
104
105 216 inline void AddInto(const Matrix &a, const Matrix &b, Matrix &c) {
106 const size_t n = a.size();
107 216 #pragma omp parallel for default(none) shared(a, b, c, n) \
108 schedule(static) if (n >= (kParallelThreshold * kParallelThreshold))
109 for (size_t i = 0; i < n; ++i) {
110 c.at(i) = a.at(i) + b.at(i);
111 }
112 216 }
113
114 144 inline void SubInto(const Matrix &a, const Matrix &b, Matrix &c) {
115 const size_t n = a.size();
116 144 #pragma omp parallel for default(none) shared(a, b, c, n) \
117 schedule(static) if (n >= (kParallelThreshold * kParallelThreshold))
118 for (size_t i = 0; i < n; ++i) {
119 c.at(i) = a.at(i) - b.at(i);
120 }
121 144 }
122
123 struct Frame {
124 Matrix a;
125 Matrix b;
126 Matrix result;
127 size_t size;
128 size_t stage{0};
129
130 Matrix a11;
131 Matrix a12;
132 Matrix a21;
133 Matrix a22;
134 Matrix b11;
135 Matrix b12;
136 Matrix b21;
137 Matrix b22;
138
139 Matrix m1;
140 Matrix m2;
141 Matrix m3;
142 Matrix m4;
143 Matrix m5;
144 Matrix m6;
145 Matrix m7;
146
147 Matrix temp_a;
148 Matrix temp_b;
149
150 260 Frame(Matrix aa, Matrix bb, size_t s) : a(std::move(aa)), b(std::move(bb)), size(s) {}
151 };
152
153
1/2
✓ Branch 0 taken 548 times.
✗ Branch 1 not taken.
548 void ProcessTopFrame(std::vector<Frame> &stack, Matrix &final_result) {
154
1/2
✓ Branch 0 taken 548 times.
✗ Branch 1 not taken.
548 if (stack.empty()) {
155 return;
156 }
157
158
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 548 times.
548 const size_t current_index = stack.size() - 1;
159 Frame &frame = stack.at(current_index);
160
161
2/2
✓ Branch 0 taken 224 times.
✓ Branch 1 taken 324 times.
548 if (frame.size <= kThreshold) {
162 224 Matrix base = StandardMultiply(frame.a, frame.b, frame.size);
163 stack.pop_back();
164
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 224 times.
224 if (stack.empty()) {
165 final_result = std::move(base);
166 } else {
167 224 stack.back().temp_a = std::move(base);
168 }
169 return;
170 }
171
172 324 const size_t half = frame.size / 2;
173 324 const size_t block_size = half * half;
174
175
9/10
✓ Branch 0 taken 36 times.
✓ Branch 1 taken 36 times.
✓ Branch 2 taken 36 times.
✓ Branch 3 taken 36 times.
✓ Branch 4 taken 36 times.
✓ Branch 5 taken 36 times.
✓ Branch 6 taken 36 times.
✓ Branch 7 taken 36 times.
✓ Branch 8 taken 36 times.
✗ Branch 9 not taken.
324 switch (frame.stage) {
176 36 case 0: {
177 36 Split(frame.a, frame.a11, frame.a12, frame.a21, frame.a22, frame.size);
178 36 Split(frame.b, frame.b11, frame.b12, frame.b21, frame.b22, frame.size);
179
180 36 frame.temp_a.assign(block_size, 0.0);
181 36 frame.temp_b.assign(block_size, 0.0);
182 36 frame.m1.assign(block_size, 0.0);
183 36 frame.m2.assign(block_size, 0.0);
184 36 frame.m3.assign(block_size, 0.0);
185 36 frame.m4.assign(block_size, 0.0);
186 36 frame.m5.assign(block_size, 0.0);
187 36 frame.m6.assign(block_size, 0.0);
188 36 frame.m7.assign(block_size, 0.0);
189
190 36 frame.stage = 1;
191 36 return;
192 }
193
194 36 case 1: {
195 36 AddInto(frame.a11, frame.a22, frame.temp_a);
196 36 AddInto(frame.b11, frame.b22, frame.temp_b);
197
198 36 stack.emplace_back(frame.temp_a, frame.temp_b, half);
199 36 stack.at(current_index).stage = 2;
200 36 return;
201 }
202
203 36 case 2: {
204 36 frame.m1 = frame.temp_a;
205 36 AddInto(frame.a21, frame.a22, frame.temp_a);
206
207 36 stack.emplace_back(frame.temp_a, frame.b11, half);
208 36 stack.at(current_index).stage = 3;
209 36 return;
210 }
211
212 36 case 3: {
213 36 frame.m2 = frame.temp_a;
214 36 SubInto(frame.b12, frame.b22, frame.temp_b);
215
216 36 stack.emplace_back(frame.a11, frame.temp_b, half);
217 36 stack.at(current_index).stage = 4;
218 36 return;
219 }
220
221 36 case 4: {
222 36 frame.m3 = frame.temp_a;
223 36 SubInto(frame.b21, frame.b11, frame.temp_b);
224
225 36 stack.emplace_back(frame.a22, frame.temp_b, half);
226 36 stack.at(current_index).stage = 5;
227 36 return;
228 }
229
230 36 case 5: {
231 36 frame.m4 = frame.temp_a;
232 36 AddInto(frame.a11, frame.a12, frame.temp_a);
233
234 36 stack.emplace_back(frame.temp_a, frame.b22, half);
235 36 stack.at(current_index).stage = 6;
236 36 return;
237 }
238
239 36 case 6: {
240 36 frame.m5 = frame.temp_a;
241 36 SubInto(frame.a21, frame.a11, frame.temp_a);
242 36 AddInto(frame.b11, frame.b12, frame.temp_b);
243
244 36 stack.emplace_back(frame.temp_a, frame.temp_b, half);
245 36 stack.at(current_index).stage = 7;
246 36 return;
247 }
248
249 36 case 7: {
250 36 frame.m6 = frame.temp_a;
251 36 SubInto(frame.a12, frame.a22, frame.temp_a);
252 36 AddInto(frame.b21, frame.b22, frame.temp_b);
253
254 36 stack.emplace_back(frame.temp_a, frame.temp_b, half);
255 36 stack.at(current_index).stage = 8;
256 36 return;
257 }
258
259 36 case 8: {
260 36 frame.m7 = frame.temp_a;
261
262 36 Matrix c11(block_size);
263
1/4
✓ Branch 1 taken 36 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
36 Matrix c12(block_size);
264
1/4
✓ Branch 1 taken 36 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
36 Matrix c21(block_size);
265
1/4
✓ Branch 1 taken 36 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
36 Matrix c22(block_size);
266
267 36 #pragma omp parallel for default(none) shared(c11, c12, c21, c22, frame, block_size) \
268 schedule(static) if (block_size >= (kParallelThreshold * kParallelThreshold))
269 for (size_t i = 0; i < block_size; ++i) {
270 c11.at(i) = frame.m1.at(i) + frame.m4.at(i) - frame.m5.at(i) + frame.m7.at(i);
271 c12.at(i) = frame.m3.at(i) + frame.m5.at(i);
272 c21.at(i) = frame.m2.at(i) + frame.m4.at(i);
273 c22.at(i) = frame.m1.at(i) - frame.m2.at(i) + frame.m3.at(i) + frame.m6.at(i);
274 }
275
276
1/4
✓ Branch 1 taken 36 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
36 Matrix merged(frame.size * frame.size, 0.0);
277 36 Merge(merged, c11, c12, c21, c22, frame.size);
278
279 stack.pop_back();
280
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 28 times.
36 if (stack.empty()) {
281 final_result = std::move(merged);
282 } else {
283 28 stack.back().temp_a = std::move(merged);
284 }
285 return;
286 }
287
288 default:
289 return;
290 }
291 }
292
293 8 Matrix StrassenMultiply(const Matrix &a_init, const Matrix &b_init, size_t size_init) {
294 8 std::vector<Frame> stack;
295
1/2
✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
8 stack.emplace_back(a_init, b_init, size_init);
296
297 8 Matrix result;
298
2/2
✓ Branch 0 taken 548 times.
✓ Branch 1 taken 8 times.
556 while (!stack.empty()) {
299
1/2
✓ Branch 1 taken 548 times.
✗ Branch 2 not taken.
548 ProcessTopFrame(stack, result);
300 }
301 8 return result;
302 8 }
303
304 16 Matrix PadTo(const Matrix &src, size_t n, size_t new_n) {
305
1/2
✓ Branch 0 taken 16 times.
✗ Branch 1 not taken.
16 if (new_n == n) {
306 16 return src;
307 }
308 Matrix padded(new_n * new_n, 0.0);
309 #pragma omp parallel for default(none) shared(padded, src, n, new_n) schedule(static) if (new_n >= kParallelThreshold)
310 for (size_t i = 0; i < n; ++i) {
311 const size_t is = i * n;
312 const size_t id = i * new_n;
313 for (size_t j = 0; j < n; ++j) {
314 padded.at(id + j) = src.at(is + j);
315 }
316 }
317 return padded;
318 }
319
320 } // namespace
321
322 12 bool AkhmetovDStrassenDenseDoubleOMP::RunImpl() {
323 const auto &input = GetInput();
324 auto &output = GetOutput();
325
326 12 const size_t n = format::GetN(input);
327 12 const Matrix a = format::GetA(input);
328
1/2
✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.
12 const Matrix b = format::GetB(input);
329
330
2/2
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 8 times.
12 if (n <= kThreshold) {
331
1/4
✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
4 output = StandardMultiply(a, b, n);
332 4 return true;
333 }
334
335 const size_t new_n = NextPow2(n);
336
1/2
✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
8 const Matrix a_padded = PadTo(a, n, new_n);
337
1/2
✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
8 const Matrix b_padded = PadTo(b, n, new_n);
338
339
1/2
✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
8 const Matrix result_padded = StrassenMultiply(a_padded, b_padded, new_n);
340
341
1/4
✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
8 output.assign(n * n, 0.0);
342
1/2
✓ Branch 0 taken 8 times.
✗ Branch 1 not taken.
8 #pragma omp parallel for default(none) shared(output, result_padded, n, new_n) \
343 schedule(static) if (n >= kParallelThreshold)
344 for (size_t i = 0; i < n; ++i) {
345 const size_t is = i * new_n;
346 const size_t id = i * n;
347 for (size_t j = 0; j < n; ++j) {
348 output.at(id + j) = result_padded.at(is + j);
349 }
350 }
351
352 return true;
353 }
354
355 12 bool AkhmetovDStrassenDenseDoubleOMP::PostProcessingImpl() {
356 const auto &input = GetInput();
357 12 const size_t n = format::GetN(input);
358 12 return GetOutput().size() == n * n;
359 }
360
361 } // namespace akhmetov_daniil_strassen_dense_double
362