GCC Code Coverage Report


Directory: ./
File: tasks/tabalaev_a_matrix_mul_strassen/omp/src/ops_omp.cpp
Date: 2026-04-02 17:12:27
Exec Total Coverage
Lines: 99 99 100.0%
Functions: 11 11 100.0%
Branches: 53 106 50.0%

Line Branch Exec Source
1 #include "tabalaev_a_matrix_mul_strassen/omp/include/ops_omp.hpp"
2
3 #include <algorithm>
4 #include <cmath>
5 #include <cstddef>
6 #include <stack>
7 #include <utility>
8 #include <vector>
9
10 #include "tabalaev_a_matrix_mul_strassen/common/include/common.hpp"
11
12 namespace tabalaev_a_matrix_mul_strassen {
13
14 static constexpr size_t kBaseCaseSize = 128;
15
16
1/2
✓ Branch 1 taken 24 times.
✗ Branch 2 not taken.
24 TabalaevAMatrixMulStrassenOMP::TabalaevAMatrixMulStrassenOMP(const InType &in) {
17 SetTypeOfTask(GetStaticTypeOfTask());
18
1/2
✓ Branch 1 taken 24 times.
✗ Branch 2 not taken.
24 GetInput() = in;
19 GetOutput() = {};
20 24 }
21
22 24 bool TabalaevAMatrixMulStrassenOMP::ValidationImpl() {
23 const auto &in = GetInput();
24
2/4
✗ Branch 0 not taken.
✓ Branch 1 taken 24 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 24 times.
24 return in.a_rows > 0 && in.a_cols_b_rows > 0 && in.b_cols > 0 &&
25
2/4
✓ Branch 0 taken 24 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 24 times.
48 in.a.size() == static_cast<size_t>(in.a_rows * in.a_cols_b_rows) &&
26
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 24 times.
24 in.b.size() == static_cast<size_t>(in.a_cols_b_rows * in.b_cols);
27 }
28
29 24 bool TabalaevAMatrixMulStrassenOMP::PreProcessingImpl() {
30 GetOutput() = {};
31 const auto &in = GetInput();
32
33 24 a_rows_ = in.a_rows;
34 24 a_cols_b_rows_ = in.a_cols_b_rows;
35 24 b_cols_ = in.b_cols;
36
37 24 size_t max_dim = std::max({a_rows_, a_cols_b_rows_, b_cols_});
38 24 padded_n_ = 1;
39
2/2
✓ Branch 0 taken 112 times.
✓ Branch 1 taken 24 times.
136 while (padded_n_ < max_dim) {
40 112 padded_n_ *= 2;
41 }
42
43 24 padded_a_.assign(padded_n_ * padded_n_, 0.0);
44 24 padded_b_.assign(padded_n_ * padded_n_, 0.0);
45
46 auto &padded_a = padded_a_;
47 auto &padded_b = padded_b_;
48 24 size_t a_rows = a_rows_;
49 24 size_t a_cols_b_rows = a_cols_b_rows_;
50 24 size_t b_cols = b_cols_;
51 24 size_t padded_n = padded_n_;
52
53 24 #pragma omp parallel default(none) shared(in, padded_a, padded_b, a_rows, a_cols_b_rows, b_cols, padded_n)
54 {
55 #pragma omp for nowait
56 for (size_t i = 0; i < a_rows; ++i) {
57 for (size_t j = 0; j < a_cols_b_rows; ++j) {
58 padded_a[(i * padded_n) + j] = in.a[(i * a_cols_b_rows) + j];
59 }
60 }
61 #pragma omp for
62 for (size_t i = 0; i < a_cols_b_rows; ++i) {
63 for (size_t j = 0; j < b_cols; ++j) {
64 padded_b[(i * padded_n) + j] = in.b[(i * b_cols) + j];
65 }
66 }
67 }
68
69 24 return true;
70 }
71
72 24 bool TabalaevAMatrixMulStrassenOMP::RunImpl() {
73 48 result_c_ = StrassenMultiply(padded_a_, padded_b_, padded_n_);
74
75 auto &out = GetOutput();
76 24 out.assign(a_rows_ * b_cols_, 0.0);
77
78 const auto &result_c = result_c_;
79 24 size_t a_rows = a_rows_;
80 24 size_t b_cols = b_cols_;
81 24 size_t padded_n = padded_n_;
82
83 24 #pragma omp parallel for default(none) shared(out, result_c, a_rows, b_cols, padded_n)
84 for (size_t i = 0; i < a_rows; ++i) {
85 for (size_t j = 0; j < b_cols; ++j) {
86 out[(i * b_cols) + j] = result_c[(i * padded_n) + j];
87 }
88 }
89
90 24 return true;
91 }
92
93 24 bool TabalaevAMatrixMulStrassenOMP::PostProcessingImpl() {
94 24 return true;
95 }
96
97 48 std::vector<double> TabalaevAMatrixMulStrassenOMP::Add(const std::vector<double> &mat_a,
98 const std::vector<double> &mat_b) {
99 const size_t n = mat_a.size();
100 48 std::vector<double> res(n);
101
102 48 #pragma omp parallel for default(none) shared(mat_a, mat_b, res, n)
103 for (size_t i = 0; i < n; ++i) {
104 res[i] = mat_a[i] + mat_b[i];
105 }
106
107 48 return res;
108 }
109
110 32 std::vector<double> TabalaevAMatrixMulStrassenOMP::Subtract(const std::vector<double> &mat_a,
111 const std::vector<double> &mat_b) {
112 const size_t n = mat_a.size();
113 32 std::vector<double> res(mat_a.size());
114
115 32 #pragma omp parallel for default(none) shared(mat_a, mat_b, res, n)
116 for (size_t i = 0; i < n; ++i) {
117 res[i] = mat_a[i] - mat_b[i];
118 }
119
120 32 return res;
121 }
122
123 72 std::vector<double> TabalaevAMatrixMulStrassenOMP::BaseMultiply(const std::vector<double> &mat_a,
124 const std::vector<double> &mat_b, size_t n) {
125 72 std::vector<double> res(n * n, 0.0);
126
127 72 #pragma omp parallel for default(none) shared(mat_a, mat_b, res, n)
128 for (size_t i = 0; i < n; ++i) {
129 for (size_t k = 0; k < n; ++k) {
130 double temp = mat_a[(i * n) + k];
131 if (temp == 0.0) {
132 continue;
133 }
134 for (size_t j = 0; j < n; ++j) {
135 res[(i * n) + j] += temp * mat_b[(k * n) + j];
136 }
137 }
138 }
139
140 72 return res;
141 }
142
143 16 void TabalaevAMatrixMulStrassenOMP::SplitMatrix(const std::vector<double> &src, size_t n, std::vector<double> &c11,
144 std::vector<double> &c12, std::vector<double> &c21,
145 std::vector<double> &c22) {
146 16 size_t h = n / 2;
147 16 size_t sz = h * h;
148 16 c11.resize(sz);
149 16 c12.resize(sz);
150 16 c21.resize(sz);
151 16 c22.resize(sz);
152
153 16 #pragma omp parallel for collapse(2) default(none) shared(src, c11, c12, c21, c22, h, n)
154 for (size_t i = 0; i < h; ++i) {
155 for (size_t j = 0; j < h; ++j) {
156 size_t src_idx = (i * n) + j;
157 size_t dst_idx = (i * h) + j;
158 c11[dst_idx] = src[src_idx];
159 c12[dst_idx] = src[src_idx + h];
160 c21[dst_idx] = src[src_idx + (h * n)];
161 c22[dst_idx] = src[src_idx + (h * n) + h];
162 }
163 }
164 16 }
165
166 8 std::vector<double> TabalaevAMatrixMulStrassenOMP::CombineMatrix(const std::vector<double> &c11,
167 const std::vector<double> &c12,
168 const std::vector<double> &c21,
169 const std::vector<double> &c22, size_t n) {
170 8 size_t h = n / 2;
171 8 std::vector<double> res(n * n);
172
173 8 #pragma omp parallel for collapse(2) default(none) shared(res, c11, c12, c21, c22, h, n)
174 for (size_t i = 0; i < h; ++i) {
175 for (size_t j = 0; j < h; ++j) {
176 size_t src_idx = (i * h) + j;
177 res[(i * n) + j] = c11[src_idx];
178 res[(i * n) + j + h] = c12[src_idx];
179 res[((i + h) * n) + j] = c21[src_idx];
180 res[((i + h) * n) + j + h] = c22[src_idx];
181 }
182 }
183 8 return res;
184 }
185
186 24 std::vector<double> TabalaevAMatrixMulStrassenOMP::StrassenMultiply(const std::vector<double> &mat_a,
187 const std::vector<double> &mat_b, size_t n) {
188 std::stack<StrassenFrameOMP> frames;
189 std::stack<std::vector<double>> results;
190
191 24 frames.push({mat_a, mat_b, n, 0});
192
193
2/2
✓ Branch 0 taken 88 times.
✓ Branch 1 taken 24 times.
112 while (!frames.empty()) {
194 StrassenFrameOMP current = std::move(frames.top());
195 frames.pop();
196
197
2/2
✓ Branch 0 taken 72 times.
✓ Branch 1 taken 16 times.
88 if (current.n <= kBaseCaseSize) {
198
1/2
✓ Branch 1 taken 72 times.
✗ Branch 2 not taken.
72 results.push(BaseMultiply(current.mat_a, current.mat_b, current.n));
199 continue;
200 }
201
202
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
16 if (current.stage == 8) {
203
1/2
✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
8 std::vector<std::vector<double>> p(7);
204
205
2/2
✓ Branch 0 taken 56 times.
✓ Branch 1 taken 8 times.
64 for (int i = 6; i >= 0; --i) {
206 56 p[i] = std::move(results.top());
207 results.pop();
208 }
209
210 8 size_t h = current.n / 2;
211 8 size_t sz = h * h;
212
1/2
✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
8 std::vector<double> c11(sz);
213
1/4
✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
8 std::vector<double> c12(sz);
214
1/4
✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
8 std::vector<double> c21(sz);
215
1/4
✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
8 std::vector<double> c22(sz);
216
217 8 #pragma omp parallel for default(none) shared(p, c11, c12, c21, c22, sz)
218 for (size_t i = 0; i < sz; ++i) {
219 c11[i] = p[0][i] + p[3][i] - p[4][i] + p[6][i];
220 c12[i] = p[2][i] + p[4][i];
221 c21[i] = p[1][i] + p[3][i];
222 c22[i] = p[0][i] - p[1][i] + p[2][i] + p[5][i];
223 }
224
225
2/6
✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 8 times.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
16 results.push(CombineMatrix(c11, c12, c21, c22, current.n));
226 8 } else {
227 8 size_t h = current.n / 2;
228 8 std::vector<double> a11;
229 8 std::vector<double> a12;
230 8 std::vector<double> a21;
231 8 std::vector<double> a22;
232 8 std::vector<double> b11;
233 8 std::vector<double> b12;
234 8 std::vector<double> b21;
235 8 std::vector<double> b22;
236
1/2
✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
8 SplitMatrix(current.mat_a, current.n, a11, a12, a21, a22);
237
1/2
✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
8 SplitMatrix(current.mat_b, current.n, b11, b12, b21, b22);
238
239
1/2
✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
16 frames.push({{}, {}, current.n, 8});
240
241 8 frames.push({Subtract(a12, a22), Add(b21, b22), h, 0});
242 8 frames.push({Subtract(a21, a11), Add(b11, b12), h, 0});
243 8 frames.push({Add(a11, a12), b22, h, 0});
244 8 frames.push({a22, Subtract(b21, b11), h, 0});
245 8 frames.push({a11, Subtract(b12, b22), h, 0});
246 8 frames.push({Add(a21, a22), b11, h, 0});
247
1/4
✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
8 frames.push({Add(a11, a22), Add(b11, b22), h, 0});
248 }
249 88 }
250
251 24 return std::move(results.top());
252
24/48
✓ Branch 1 taken 24 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 24 times.
✗ Branch 5 not taken.
✓ Branch 7 taken 24 times.
✗ Branch 8 not taken.
✓ Branch 10 taken 8 times.
✗ Branch 11 not taken.
✓ Branch 13 taken 8 times.
✗ Branch 14 not taken.
✓ Branch 16 taken 8 times.
✗ Branch 17 not taken.
✓ Branch 19 taken 8 times.
✗ Branch 20 not taken.
✓ Branch 22 taken 8 times.
✗ Branch 23 not taken.
✓ Branch 25 taken 8 times.
✗ Branch 26 not taken.
✓ Branch 28 taken 8 times.
✗ Branch 29 not taken.
✓ Branch 31 taken 8 times.
✗ Branch 32 not taken.
✓ Branch 34 taken 8 times.
✗ Branch 35 not taken.
✓ Branch 37 taken 8 times.
✗ Branch 38 not taken.
✓ Branch 40 taken 8 times.
✗ Branch 41 not taken.
✓ Branch 43 taken 8 times.
✗ Branch 44 not taken.
✓ Branch 46 taken 8 times.
✗ Branch 47 not taken.
✓ Branch 49 taken 8 times.
✗ Branch 50 not taken.
✓ Branch 52 taken 8 times.
✗ Branch 53 not taken.
✓ Branch 55 taken 8 times.
✗ Branch 56 not taken.
✓ Branch 58 taken 8 times.
✗ Branch 59 not taken.
✓ Branch 61 taken 8 times.
✗ Branch 62 not taken.
✓ Branch 64 taken 8 times.
✗ Branch 65 not taken.
✓ Branch 67 taken 8 times.
✗ Branch 68 not taken.
✓ Branch 70 taken 8 times.
✗ Branch 71 not taken.
80 }
253
254 } // namespace tabalaev_a_matrix_mul_strassen
255