GCC Code Coverage Report


Directory: ./
File: tasks/tabalaev_a_matrix_mul_strassen/all/src/ops_all.cpp
Date: 2026-06-04 20:25:32
Exec Total Coverage
Lines: 177 209 84.7%
Functions: 15 15 100.0%
Branches: 94 260 36.2%

Line Branch Exec Source
1 #include "tabalaev_a_matrix_mul_strassen/all/include/ops_all.hpp"
2
3 #include <mpi.h>
4 #include <omp.h>
5
6 #include <algorithm>
7 #include <array>
8 #include <cmath>
9 #include <cstddef>
10 #include <cstdint>
11 #include <stack>
12 #include <utility>
13 #include <vector>
14
15 #include "tabalaev_a_matrix_mul_strassen/common/include/common.hpp"
16 #include "util/include/util.hpp"
17
18 namespace tabalaev_a_matrix_mul_strassen {
19
20 static constexpr size_t kBaseCaseSize = 128;
21
22
1/2
✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.
12 TabalaevAMatrixMulStrassenALL::TabalaevAMatrixMulStrassenALL(const InType &in) {
23 SetTypeOfTask(GetStaticTypeOfTask());
24
1/2
✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.
12 GetInput() = in;
25 GetOutput() = {};
26 12 }
27
28 12 bool TabalaevAMatrixMulStrassenALL::ValidationImpl() {
29 12 int rank = 0;
30 12 int size = 1;
31
32 12 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
33 12 MPI_Comm_size(MPI_COMM_WORLD, &size);
34
35 12 omp_set_num_threads(ppc::util::GetNumThreads());
36
37 12 int is_valid = 0;
38
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 6 times.
12 if (rank == 0) {
39 const auto &in = GetInput();
40
2/4
✗ Branch 0 not taken.
✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 6 times.
6 bool valid = in.a_rows > 0 && in.a_cols_b_rows > 0 && in.b_cols > 0 &&
41
2/4
✓ Branch 0 taken 6 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 6 times.
12 in.a.size() == static_cast<size_t>(in.a_rows * in.a_cols_b_rows) &&
42
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 6 times.
6 in.b.size() == static_cast<size_t>(in.a_cols_b_rows * in.b_cols);
43 6 is_valid = valid ? 1 : 0;
44 }
45 12 MPI_Bcast(&is_valid, 1, MPI_INT, 0, MPI_COMM_WORLD);
46 12 return is_valid == 1;
47 }
48
49 12 bool TabalaevAMatrixMulStrassenALL::PreProcessingImpl() {
50 GetOutput() = {};
51
52 12 int rank = 0;
53 12 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
54
55
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 6 times.
12 if (rank == 0) {
56 const auto &in = GetInput();
57
58 6 a_rows_ = in.a_rows;
59 6 a_cols_b_rows_ = in.a_cols_b_rows;
60 6 b_cols_ = in.b_cols;
61
62 6 size_t max_dim = std::max({a_rows_, a_cols_b_rows_, b_cols_});
63 6 padded_n_ = 1;
64
2/2
✓ Branch 0 taken 28 times.
✓ Branch 1 taken 6 times.
34 while (padded_n_ < max_dim) {
65 28 padded_n_ *= 2;
66 }
67
68 6 padded_a_.assign(padded_n_ * padded_n_, 0.0);
69 6 padded_b_.assign(padded_n_ * padded_n_, 0.0);
70
71 auto &padded_a = padded_a_;
72 auto &padded_b = padded_b_;
73 6 size_t a_rows = a_rows_;
74 6 size_t a_cols_b_rows = a_cols_b_rows_;
75 6 size_t b_cols = b_cols_;
76 6 size_t padded_n = padded_n_;
77
78 6 #pragma omp parallel default(none) shared(in, padded_a, padded_b, a_rows, a_cols_b_rows, b_cols, padded_n)
79 {
80 #pragma omp for nowait
81 for (size_t i = 0; i < a_rows; ++i) {
82 for (size_t j = 0; j < a_cols_b_rows; ++j) {
83 padded_a[(i * padded_n) + j] = in.a[(i * a_cols_b_rows) + j];
84 }
85 }
86 #pragma omp for
87 for (size_t i = 0; i < a_cols_b_rows; ++i) {
88 for (size_t j = 0; j < b_cols; ++j) {
89 padded_b[(i * padded_n) + j] = in.b[(i * b_cols) + j];
90 }
91 }
92 }
93 }
94
95 12 return true;
96 }
97
98 12 bool TabalaevAMatrixMulStrassenALL::RunImpl() {
99 12 int rank = 0;
100 12 int size = 0;
101 12 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
102 12 MPI_Comm_size(MPI_COMM_WORLD, &size);
103
104 12 int procs = omp_get_num_procs();
105 12 int threads_per_process = std::max(1, procs / size);
106 12 omp_set_num_threads(threads_per_process);
107
108 12 std::array<uint64_t, 3> dims = {0, 0, 0};
109
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 6 times.
12 if (rank == 0) {
110 6 dims[0] = static_cast<uint64_t>(a_rows_);
111 6 dims[1] = static_cast<uint64_t>(b_cols_);
112 6 dims[2] = static_cast<uint64_t>(padded_n_);
113 }
114 12 MPI_Bcast(dims.data(), 3, MPI_UINT64_T, 0, MPI_COMM_WORLD);
115
116
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 6 times.
12 if (rank != 0) {
117 6 a_rows_ = static_cast<size_t>(dims[0]);
118 6 b_cols_ = static_cast<size_t>(dims[1]);
119 6 padded_n_ = static_cast<size_t>(dims[2]);
120 }
121
122
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 6 times.
12 if (rank == 0) {
123 6 RunMaster(size);
124 } else {
125 6 RunWorker();
126 }
127
128 auto &out = GetOutput();
129
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 6 times.
12 if (rank != 0) {
130 6 out.assign(a_rows_ * b_cols_, 0.0);
131 }
132
133 12 MPI_Bcast(out.data(), static_cast<int>(out.size()), MPI_DOUBLE, 0, MPI_COMM_WORLD);
134
135 12 return true;
136 }
137
138 6 void TabalaevAMatrixMulStrassenALL::RunMaster(int size) {
139
3/4
✓ Branch 0 taken 6 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 4 times.
✓ Branch 3 taken 2 times.
6 if (size == 1 || padded_n_ <= kBaseCaseSize) {
140 4 MasterBase(size);
141 } else {
142 2 MasterAll(size);
143 }
144
145 auto &out = GetOutput();
146 6 out.assign(a_rows_ * b_cols_, 0.0);
147
148 6 const auto &result_c = result_c_;
149 6 size_t a_rows = a_rows_;
150 6 size_t b_cols = b_cols_;
151 6 size_t padded_n = padded_n_;
152
153 6 #pragma omp parallel for default(none) shared(out, result_c, a_rows, b_cols, padded_n)
154 for (size_t i = 0; i < a_rows; ++i) {
155 for (size_t j = 0; j < b_cols; ++j) {
156 out[(i * b_cols) + j] = result_c[(i * padded_n) + j];
157 }
158 }
159 6 }
160
161 4 void TabalaevAMatrixMulStrassenALL::MasterBase(int size) {
162 4 result_c_ = StrassenMultiply(padded_a_, padded_b_, padded_n_);
163
164 4 uint64_t terminate_signal = 0;
165
2/2
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 4 times.
8 for (int dest = 1; dest < size; ++dest) {
166 4 MPI_Send(&terminate_signal, 1, MPI_UINT64_T, dest, 0, MPI_COMM_WORLD);
167 }
168 4 }
169
170 2 void TabalaevAMatrixMulStrassenALL::MasterAll(int size) {
171 2 size_t h = padded_n_ / 2;
172
173 2 std::vector<double> a11;
174 2 std::vector<double> a12;
175 2 std::vector<double> a21;
176 2 std::vector<double> a22;
177
178 2 std::vector<double> b11;
179 2 std::vector<double> b12;
180 2 std::vector<double> b21;
181 2 std::vector<double> b22;
182
183
1/2
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
2 SplitMatrix(padded_a_, padded_n_, a11, a12, a21, a22);
184
1/2
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
2 SplitMatrix(padded_b_, padded_n_, b11, b12, b21, b22);
185
186 std::vector<std::vector<double>> task_a = {Add(a11, a22), Add(a21, a22), a11, a22, Add(a11, a12),
187
4/12
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 14 times.
✓ Branch 4 taken 2 times.
✓ Branch 5 taken 14 times.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
16 Subtract(a21, a11), Subtract(a12, a22)};
188 std::vector<std::vector<double>> task_b = {Add(b11, b22), b11, Subtract(b12, b22), Subtract(b21, b11),
189
4/10
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 14 times.
✓ Branch 4 taken 2 times.
✓ Branch 5 taken 14 times.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
16 b22, Add(b11, b12), Add(b21, b22)};
190
191
2/4
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
2 std::vector<std::vector<double>> p(7, std::vector<double>(h * h));
192 int num_tasks = 7;
193 2 int tasks_sent = 0;
194 int tasks_completed = 0;
195
196 2 int h_squared = static_cast<int>(h * h);
197
198
3/4
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
4 for (int dest = 1; dest < size && tasks_sent < num_tasks; ++dest) {
199 2 auto h_msg = static_cast<uint64_t>(h);
200
1/2
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
2 MPI_Send(&h_msg, 1, MPI_UINT64_T, dest, 0, MPI_COMM_WORLD);
201
1/2
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
2 MPI_Send(&tasks_sent, 1, MPI_INT, dest, 1, MPI_COMM_WORLD);
202
1/2
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
2 MPI_Send(task_a[tasks_sent].data(), h_squared, MPI_DOUBLE, dest, 2, MPI_COMM_WORLD);
203
1/2
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
2 MPI_Send(task_b[tasks_sent].data(), h_squared, MPI_DOUBLE, dest, 3, MPI_COMM_WORLD);
204 2 tasks_sent++;
205 }
206
207
2/2
✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.
16 while (tasks_completed < num_tasks) {
208 MPI_Status status;
209 14 int task_id = 0;
210
1/2
✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
14 MPI_Recv(&task_id, 1, MPI_INT, MPI_ANY_SOURCE, 4, MPI_COMM_WORLD, &status);
211
212 14 int worker = status.MPI_SOURCE;
213
1/2
✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
14 MPI_Recv(p[task_id].data(), h_squared, MPI_DOUBLE, worker, 5, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
214 14 tasks_completed++;
215
216
2/2
✓ Branch 0 taken 12 times.
✓ Branch 1 taken 2 times.
14 if (tasks_sent < num_tasks) {
217 12 auto h_msg = static_cast<uint64_t>(h);
218
1/2
✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.
12 MPI_Send(&h_msg, 1, MPI_UINT64_T, worker, 0, MPI_COMM_WORLD);
219
1/2
✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.
12 MPI_Send(&tasks_sent, 1, MPI_INT, worker, 1, MPI_COMM_WORLD);
220
1/2
✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.
12 MPI_Send(task_a[tasks_sent].data(), h_squared, MPI_DOUBLE, worker, 2, MPI_COMM_WORLD);
221
1/2
✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.
12 MPI_Send(task_b[tasks_sent].data(), h_squared, MPI_DOUBLE, worker, 3, MPI_COMM_WORLD);
222 12 tasks_sent++;
223 }
224 }
225
226 2 uint64_t terminate_signal = 0;
227
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
4 for (int dest = 1; dest < size; ++dest) {
228
1/2
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
2 MPI_Send(&terminate_signal, 1, MPI_UINT64_T, dest, 0, MPI_COMM_WORLD);
229 }
230
231
1/2
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
2 std::vector<double> c11(h * h);
232
1/4
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
2 std::vector<double> c12(h * h);
233
1/4
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
2 std::vector<double> c21(h * h);
234
1/4
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
2 std::vector<double> c22(h * h);
235
236 2 #pragma omp parallel for default(none) shared(p, c11, c12, c21, c22, h)
237 for (size_t i = 0; i < h * h; ++i) {
238 c11[i] = p[0][i] + p[3][i] - p[4][i] + p[6][i];
239 c12[i] = p[2][i] + p[4][i];
240 c21[i] = p[1][i] + p[3][i];
241 c22[i] = p[0][i] - p[1][i] + p[2][i] + p[5][i];
242 }
243
244
2/6
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
4 result_c_ = CombineMatrix(c11, c12, c21, c22, padded_n_);
245
14/36
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 7 taken 2 times.
✗ Branch 8 not taken.
✓ Branch 10 taken 2 times.
✗ Branch 11 not taken.
✓ Branch 13 taken 2 times.
✗ Branch 14 not taken.
✓ Branch 16 taken 2 times.
✗ Branch 17 not taken.
✓ Branch 19 taken 2 times.
✗ Branch 20 not taken.
✓ Branch 22 taken 2 times.
✗ Branch 23 not taken.
✓ Branch 25 taken 2 times.
✗ Branch 26 not taken.
✓ Branch 28 taken 2 times.
✗ Branch 29 not taken.
✓ Branch 31 taken 2 times.
✗ Branch 32 not taken.
✓ Branch 34 taken 2 times.
✗ Branch 35 not taken.
✓ Branch 37 taken 2 times.
✗ Branch 38 not taken.
✓ Branch 40 taken 2 times.
✗ Branch 41 not taken.
✗ Branch 45 not taken.
✗ Branch 46 not taken.
✗ Branch 47 not taken.
✗ Branch 48 not taken.
✗ Branch 49 not taken.
✗ Branch 50 not taken.
✗ Branch 51 not taken.
✗ Branch 52 not taken.
8 }
246
247 6 void TabalaevAMatrixMulStrassenALL::RunWorker() {
248 while (true) {
249 20 uint64_t h_msg = 0;
250 MPI_Status status;
251 20 MPI_Recv(&h_msg, 1, MPI_UINT64_T, 0, 0, MPI_COMM_WORLD, &status);
252
253
2/2
✓ Branch 0 taken 14 times.
✓ Branch 1 taken 6 times.
20 if (h_msg == 0) {
254 break;
255 }
256 auto h = static_cast<size_t>(h_msg);
257 14 int h_squared = static_cast<int>(h * h);
258
259 14 int task_id = 0;
260 14 MPI_Recv(&task_id, 1, MPI_INT, 0, 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
261
262 14 std::vector<double> a_sub(h * h);
263
2/6
✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 14 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
14 std::vector<double> b_sub(h * h);
264
1/2
✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
14 MPI_Recv(a_sub.data(), h_squared, MPI_DOUBLE, 0, 2, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
265
1/2
✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
14 MPI_Recv(b_sub.data(), h_squared, MPI_DOUBLE, 0, 3, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
266
267
1/2
✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
14 std::vector<double> res = StrassenMultiply(a_sub, b_sub, h);
268
269
1/2
✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
14 MPI_Send(&task_id, 1, MPI_INT, 0, 4, MPI_COMM_WORLD);
270
1/2
✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
14 MPI_Send(res.data(), h_squared, MPI_DOUBLE, 0, 5, MPI_COMM_WORLD);
271 14 }
272 6 }
273
274 12 bool TabalaevAMatrixMulStrassenALL::PostProcessingImpl() {
275 12 return true;
276 }
277
278 12 std::vector<double> TabalaevAMatrixMulStrassenALL::Add(const std::vector<double> &mat_a,
279 const std::vector<double> &mat_b) {
280 const size_t n = mat_a.size();
281 12 std::vector<double> res(n);
282
283 12 #pragma omp parallel for default(none) shared(mat_a, mat_b, res, n)
284 for (size_t i = 0; i < n; ++i) {
285 res[i] = mat_a[i] + mat_b[i];
286 }
287
288 12 return res;
289 }
290
291 8 std::vector<double> TabalaevAMatrixMulStrassenALL::Subtract(const std::vector<double> &mat_a,
292 const std::vector<double> &mat_b) {
293 const size_t n = mat_a.size();
294 8 std::vector<double> res(n);
295
296 8 #pragma omp parallel for default(none) shared(mat_a, mat_b, res, n)
297 for (size_t i = 0; i < n; ++i) {
298 res[i] = mat_a[i] - mat_b[i];
299 }
300
301 8 return res;
302 }
303
304 18 std::vector<double> TabalaevAMatrixMulStrassenALL::BaseMultiply(const std::vector<double> &mat_a,
305 const std::vector<double> &mat_b, size_t n) {
306 18 std::vector<double> res(n * n, 0.0);
307
308 18 #pragma omp parallel for default(none) shared(mat_a, mat_b, res, n)
309 for (size_t i = 0; i < n; ++i) {
310 for (size_t k = 0; k < n; ++k) {
311 double temp = mat_a[(i * n) + k];
312 if (temp == 0.0) {
313 continue;
314 }
315 for (size_t j = 0; j < n; ++j) {
316 res[(i * n) + j] += temp * mat_b[(k * n) + j];
317 }
318 }
319 }
320
321 18 return res;
322 }
323
324 4 void TabalaevAMatrixMulStrassenALL::SplitMatrix(const std::vector<double> &src, size_t n, std::vector<double> &c11,
325 std::vector<double> &c12, std::vector<double> &c21,
326 std::vector<double> &c22) {
327 4 size_t h = n / 2;
328 4 size_t sz = h * h;
329 4 c11.resize(sz);
330 4 c12.resize(sz);
331 4 c21.resize(sz);
332 4 c22.resize(sz);
333
334 4 #pragma omp parallel for collapse(2) default(none) shared(src, c11, c12, c21, c22, h, n)
335 for (size_t i = 0; i < h; ++i) {
336 for (size_t j = 0; j < h; ++j) {
337 size_t src_idx = (i * n) + j;
338 size_t dst_idx = (i * h) + j;
339 c11[dst_idx] = src[src_idx];
340 c12[dst_idx] = src[src_idx + h];
341 c21[dst_idx] = src[src_idx + (h * n)];
342 c22[dst_idx] = src[src_idx + (h * n) + h];
343 }
344 }
345 4 }
346
347 2 std::vector<double> TabalaevAMatrixMulStrassenALL::CombineMatrix(const std::vector<double> &c11,
348 const std::vector<double> &c12,
349 const std::vector<double> &c21,
350 const std::vector<double> &c22, size_t n) {
351 2 size_t h = n / 2;
352 2 std::vector<double> res(n * n);
353
354 2 #pragma omp parallel for collapse(2) default(none) shared(res, c11, c12, c21, c22, h, n)
355 for (size_t i = 0; i < h; ++i) {
356 for (size_t j = 0; j < h; ++j) {
357 size_t src_idx = (i * h) + j;
358 res[(i * n) + j] = c11[src_idx];
359 res[(i * n) + j + h] = c12[src_idx];
360 res[((i + h) * n) + j] = c21[src_idx];
361 res[((i + h) * n) + j + h] = c22[src_idx];
362 }
363 }
364 2 return res;
365 }
366
367 18 std::vector<double> TabalaevAMatrixMulStrassenALL::StrassenMultiply(const std::vector<double> &mat_a,
368 const std::vector<double> &mat_b, size_t n) {
369 std::stack<StrassenFrameALL> frames;
370 std::stack<std::vector<double>> results;
371
372 18 frames.push({mat_a, mat_b, n, 0});
373
374
2/2
✓ Branch 0 taken 18 times.
✓ Branch 1 taken 18 times.
36 while (!frames.empty()) {
375 StrassenFrameALL current = std::move(frames.top());
376 frames.pop();
377
378
1/2
✓ Branch 0 taken 18 times.
✗ Branch 1 not taken.
18 if (current.n <= kBaseCaseSize) {
379
1/2
✓ Branch 1 taken 18 times.
✗ Branch 2 not taken.
18 results.push(BaseMultiply(current.mat_a, current.mat_b, current.n));
380 continue;
381 }
382
383 if (current.stage == 8) {
384 std::vector<std::vector<double>> p(7);
385
386 for (int i = 6; i >= 0; --i) {
387 p[i] = std::move(results.top());
388 results.pop();
389 }
390
391 size_t h = current.n / 2;
392 size_t sz = h * h;
393 std::vector<double> c11(sz);
394 std::vector<double> c12(sz);
395 std::vector<double> c21(sz);
396 std::vector<double> c22(sz);
397
398 #pragma omp parallel for default(none) shared(p, c11, c12, c21, c22, sz)
399 for (size_t i = 0; i < sz; ++i) {
400 c11[i] = p[0][i] + p[3][i] - p[4][i] + p[6][i];
401 c12[i] = p[2][i] + p[4][i];
402 c21[i] = p[1][i] + p[3][i];
403 c22[i] = p[0][i] - p[1][i] + p[2][i] + p[5][i];
404 }
405
406 results.push(CombineMatrix(c11, c12, c21, c22, current.n));
407 } else {
408 size_t h = current.n / 2;
409 std::vector<double> a11;
410 std::vector<double> a12;
411 std::vector<double> a21;
412 std::vector<double> a22;
413 std::vector<double> b11;
414 std::vector<double> b12;
415 std::vector<double> b21;
416 std::vector<double> b22;
417
418 SplitMatrix(current.mat_a, current.n, a11, a12, a21, a22);
419 SplitMatrix(current.mat_b, current.n, b11, b12, b21, b22);
420
421 frames.push({{}, {}, current.n, 8});
422
423 frames.push({Subtract(a12, a22), Add(b21, b22), h, 0});
424 frames.push({Subtract(a21, a11), Add(b11, b12), h, 0});
425 frames.push({Add(a11, a12), b22, h, 0});
426 frames.push({a22, Subtract(b21, b11), h, 0});
427 frames.push({a11, Subtract(b12, b22), h, 0});
428 frames.push({Add(a21, a22), b11, h, 0});
429 frames.push({Add(a11, a22), Add(b11, b22), h, 0});
430 }
431 18 }
432
433 18 return std::move(results.top());
434
3/48
✓ Branch 1 taken 18 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 18 times.
✗ Branch 5 not taken.
✓ Branch 7 taken 18 times.
✗ Branch 8 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✗ Branch 31 not taken.
✗ Branch 32 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
✗ Branch 37 not taken.
✗ Branch 38 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
✗ Branch 46 not taken.
✗ Branch 47 not taken.
✗ Branch 49 not taken.
✗ Branch 50 not taken.
✗ Branch 52 not taken.
✗ Branch 53 not taken.
✗ Branch 55 not taken.
✗ Branch 56 not taken.
✗ Branch 58 not taken.
✗ Branch 59 not taken.
✗ Branch 61 not taken.
✗ Branch 62 not taken.
✗ Branch 64 not taken.
✗ Branch 65 not taken.
✗ Branch 67 not taken.
✗ Branch 68 not taken.
✗ Branch 70 not taken.
✗ Branch 71 not taken.
18 }
435
436 } // namespace tabalaev_a_matrix_mul_strassen
437