GCC Code Coverage Report


Directory: ./
File: tasks/muhammadkhon_i_stressen_alg/all/src/ops_all.cpp
Date: 2026-06-04 20:25:32
Exec Total Coverage
Lines: 83 191 43.5%
Functions: 9 12 75.0%
Branches: 41 196 20.9%

Line Branch Exec Source
1 #include "muhammadkhon_i_stressen_alg/all/include/ops_all.hpp"
2
3 #include <mpi.h>
4 #include <omp.h>
5
6 #include <algorithm>
7 #include <array>
8 #include <cstddef>
9 #include <cstdint>
10 #include <functional>
11 #include <vector>
12
13 #include "muhammadkhon_i_stressen_alg/common/include/common.hpp"
14 #include "util/include/util.hpp"
15
16 namespace muhammadkhon_i_stressen_alg {
17
18 namespace {
19
20 constexpr std::size_t kCutoff = 64;
21 constexpr std::size_t kBlockSize = 64;
22
23 std::size_t NextPow2(std::size_t x) {
24 6 if (x <= 1) {
25 return 1;
26 }
27 std::size_t p = 1;
28
2/2
✓ Branch 0 taken 24 times.
✓ Branch 1 taken 6 times.
30 while (p < x) {
29 24 p <<= 1;
30 }
31 return p;
32 }
33
34 void ZeroMatrix(double *dst, std::size_t stride, std::size_t n) {
35
2/2
✓ Branch 0 taken 168 times.
✓ Branch 1 taken 6 times.
174 for (std::size_t i = 0; i < n; ++i) {
36 168 std::fill_n(dst + (i * stride), n, 0.0);
37 }
38 }
39
40 void AddToBuffer(const double *a, std::size_t a_stride, const double *b, std::size_t b_stride, double *dst,
41 std::size_t n, double b_coeff) {
42 for (std::size_t i = 0; i < n; ++i) {
43 const double *a_row = a + (i * a_stride);
44 const double *b_row = b + (i * b_stride);
45 double *dst_row = dst + (i * n);
46 for (std::size_t j = 0; j < n; ++j) {
47 dst_row[j] = a_row[j] + (b_coeff * b_row[j]);
48 }
49 }
50 }
51
52 void MulMicroBlock(const double *a, std::size_t a_stride, const double *b, std::size_t b_stride, double *c,
53 std::size_t c_stride, std::size_t i_begin, std::size_t i_end, std::size_t k_begin, std::size_t k_end,
54 std::size_t j_begin, std::size_t j_end) {
55 for (std::size_t i = i_begin; i < i_end; ++i) {
56 double *c_row = c + (i * c_stride);
57 const double *a_row = a + (i * a_stride);
58 for (std::size_t k = k_begin; k < k_end; ++k) {
59 const double aik = a_row[k];
60 const double *b_row = b + (k * b_stride);
61 for (std::size_t j = j_begin; j < j_end; ++j) {
62 c_row[j] += aik * b_row[j];
63 }
64 }
65 }
66 }
67
68 6 void NaiveMulBlocked(const double *a, std::size_t a_stride, const double *b, std::size_t b_stride, double *c,
69 std::size_t c_stride, std::size_t n) {
70 6 ZeroMatrix(c, c_stride, n);
71
72 6 const auto n_signed = static_cast<std::ptrdiff_t>(n);
73 const auto block_signed = static_cast<std::ptrdiff_t>(kBlockSize);
74
75 6 #pragma omp parallel for schedule(static) default(none) \
76 shared(a, a_stride, b, b_stride, c, c_stride, n, n_signed, block_signed)
77 for (std::ptrdiff_t ii = 0; ii < n_signed; ii += block_signed) {
78 const auto ii_usize = static_cast<std::size_t>(ii);
79 const std::size_t i_end = std::min(ii_usize + kBlockSize, n);
80 for (std::size_t kk = 0; kk < n; kk += kBlockSize) {
81 const std::size_t k_end = std::min(kk + kBlockSize, n);
82 for (std::size_t jj = 0; jj < n; jj += kBlockSize) {
83 const std::size_t j_end = std::min(jj + kBlockSize, n);
84 MulMicroBlock(a, a_stride, b, b_stride, c, c_stride, ii_usize, i_end, kk, k_end, jj, j_end);
85 }
86 }
87 }
88 6 }
89
90 void CombineQuadrants(const std::vector<double> &m1, const std::vector<double> &m2, const std::vector<double> &m3,
91 const std::vector<double> &m4, const std::vector<double> &m5, const std::vector<double> &m6,
92 const std::vector<double> &m7, double *c, std::size_t c_stride, std::size_t half) {
93 for (std::size_t i = 0; i < half; ++i) {
94 double *c11 = c + (i * c_stride);
95 double *c12 = c11 + half;
96 double *c21 = c + ((i + half) * c_stride);
97 double *c22 = c21 + half;
98 const double *m1r = m1.data() + (i * half);
99 const double *m2r = m2.data() + (i * half);
100 const double *m3r = m3.data() + (i * half);
101 const double *m4r = m4.data() + (i * half);
102 const double *m5r = m5.data() + (i * half);
103 const double *m6r = m6.data() + (i * half);
104 const double *m7r = m7.data() + (i * half);
105 for (std::size_t j = 0; j < half; ++j) {
106 c11[j] = m1r[j] + m4r[j] - m5r[j] + m7r[j];
107 c12[j] = m3r[j] + m5r[j];
108 c21[j] = m2r[j] + m4r[j];
109 c22[j] = m1r[j] - m2r[j] + m3r[j] + m6r[j];
110 }
111 }
112 }
113
114 // Последовательный Штрассен через std::function (без рекурсивных свободных функций)
115
1/2
✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.
6 void StrassenSeq(const double *a_in, std::size_t a_stride_in, const double *b_in, std::size_t b_stride_in, double *c_in,
116 std::size_t c_stride_in, std::size_t n_in) {
117 std::function<void(const double *, std::size_t, const double *, std::size_t, double *, std::size_t, std::size_t)>
118 6 impl = [&](const double *a, std::size_t a_stride, const double *b, std::size_t b_stride, double *c,
119 std::size_t c_stride, std::size_t n) {
120
1/2
✓ Branch 0 taken 6 times.
✗ Branch 1 not taken.
6 if (n <= kCutoff) {
121 6 NaiveMulBlocked(a, a_stride, b, b_stride, c, c_stride, n);
122 6 return;
123 }
124 const std::size_t half = n / 2;
125
126 const double *a11 = a;
127 const double *a12 = a + half;
128 const double *a21 = a + (half * a_stride);
129 const double *a22 = a21 + half;
130 const double *b11 = b;
131 const double *b12 = b + half;
132 const double *b21 = b + (half * b_stride);
133 const double *b22 = b21 + half;
134
135 std::vector<double> lhs(half * half);
136 std::vector<double> rhs(half * half);
137 std::vector<double> m1(half * half);
138 std::vector<double> m2(half * half);
139 std::vector<double> m3(half * half);
140 std::vector<double> m4(half * half);
141 std::vector<double> m5(half * half);
142 std::vector<double> m6(half * half);
143 std::vector<double> m7(half * half);
144
145 AddToBuffer(a11, a_stride, a22, a_stride, lhs.data(), half, 1.0);
146 AddToBuffer(b11, b_stride, b22, b_stride, rhs.data(), half, 1.0);
147 impl(lhs.data(), half, rhs.data(), half, m1.data(), half, half);
148
149 AddToBuffer(a21, a_stride, a22, a_stride, lhs.data(), half, 1.0);
150 impl(lhs.data(), half, b11, b_stride, m2.data(), half, half);
151
152 AddToBuffer(b12, b_stride, b22, b_stride, rhs.data(), half, -1.0);
153 impl(a11, a_stride, rhs.data(), half, m3.data(), half, half);
154
155 AddToBuffer(b21, b_stride, b11, b_stride, rhs.data(), half, -1.0);
156 impl(a22, a_stride, rhs.data(), half, m4.data(), half, half);
157
158 AddToBuffer(a11, a_stride, a12, a_stride, lhs.data(), half, 1.0);
159 impl(lhs.data(), half, b22, b_stride, m5.data(), half, half);
160
161 AddToBuffer(a21, a_stride, a11, a_stride, lhs.data(), half, -1.0);
162 AddToBuffer(b11, b_stride, b12, b_stride, rhs.data(), half, 1.0);
163 impl(lhs.data(), half, rhs.data(), half, m6.data(), half, half);
164
165 AddToBuffer(a12, a_stride, a22, a_stride, lhs.data(), half, -1.0);
166 AddToBuffer(b21, b_stride, b22, b_stride, rhs.data(), half, 1.0);
167 impl(lhs.data(), half, rhs.data(), half, m7.data(), half, half);
168
169 CombineQuadrants(m1, m2, m3, m4, m5, m6, m7, c, c_stride, half);
170 };
171
172
1/2
✓ Branch 0 taken 6 times.
✗ Branch 1 not taken.
6 impl(a_in, a_stride_in, b_in, b_stride_in, c_in, c_stride_in, n_in);
173 6 }
174
175 // OMP-параллельный Штрассен (верхний уровень через tasks, базовый — OMP parallel for)
176 6 void StrassenOmpLocal(const double *a, std::size_t a_stride, const double *b, std::size_t b_stride, double *c,
177 std::size_t c_stride, std::size_t n) {
178
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 6 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
6 if (n <= kCutoff || ppc::util::GetNumThreads() <= 1) {
179 6 StrassenSeq(a, a_stride, b, b_stride, c, c_stride, n);
180 6 return;
181 }
182
183 const std::size_t half = n / 2;
184
185 const double *a11 = a;
186 const double *a12 = a + half;
187 const double *a21 = a + (half * a_stride);
188 const double *a22 = a21 + half;
189 const double *b11 = b;
190 const double *b12 = b + half;
191 const double *b21 = b + (half * b_stride);
192 const double *b22 = b21 + half;
193
194 std::vector<double> m1;
195 std::vector<double> m2;
196 std::vector<double> m3;
197 std::vector<double> m4;
198 std::vector<double> m5;
199 std::vector<double> m6;
200 std::vector<double> m7;
201
202 #pragma omp parallel default(none) \
203 shared(m1, m2, m3, m4, m5, m6, m7, a11, a12, a21, a22, b11, b12, b21, b22, a_stride, b_stride, half)
204 {
205 #pragma omp single nowait
206 {
207 #pragma omp task default(none) shared(m1, a11, a22, b11, b22, a_stride, b_stride, half)
208 {
209 std::vector<double> lhs(half * half);
210 std::vector<double> rhs(half * half);
211 AddToBuffer(a11, a_stride, a22, a_stride, lhs.data(), half, 1.0);
212 AddToBuffer(b11, b_stride, b22, b_stride, rhs.data(), half, 1.0);
213 m1.assign(half * half, 0.0);
214 StrassenSeq(lhs.data(), half, rhs.data(), half, m1.data(), half, half);
215 }
216 #pragma omp task default(none) shared(m2, a21, a22, b11, a_stride, b_stride, half)
217 {
218 std::vector<double> lhs(half * half);
219 AddToBuffer(a21, a_stride, a22, a_stride, lhs.data(), half, 1.0);
220 m2.assign(half * half, 0.0);
221 StrassenSeq(lhs.data(), half, b11, b_stride, m2.data(), half, half);
222 }
223 #pragma omp task default(none) shared(m3, a11, b12, b22, a_stride, b_stride, half)
224 {
225 std::vector<double> rhs(half * half);
226 AddToBuffer(b12, b_stride, b22, b_stride, rhs.data(), half, -1.0);
227 m3.assign(half * half, 0.0);
228 StrassenSeq(a11, a_stride, rhs.data(), half, m3.data(), half, half);
229 }
230 #pragma omp task default(none) shared(m4, a22, b21, b11, a_stride, b_stride, half)
231 {
232 std::vector<double> rhs(half * half);
233 AddToBuffer(b21, b_stride, b11, b_stride, rhs.data(), half, -1.0);
234 m4.assign(half * half, 0.0);
235 StrassenSeq(a22, a_stride, rhs.data(), half, m4.data(), half, half);
236 }
237 #pragma omp task default(none) shared(m5, a11, a12, b22, a_stride, b_stride, half)
238 {
239 std::vector<double> lhs(half * half);
240 AddToBuffer(a11, a_stride, a12, a_stride, lhs.data(), half, 1.0);
241 m5.assign(half * half, 0.0);
242 StrassenSeq(lhs.data(), half, b22, b_stride, m5.data(), half, half);
243 }
244 #pragma omp task default(none) shared(m6, a21, a11, b11, b12, a_stride, b_stride, half)
245 {
246 std::vector<double> lhs(half * half);
247 std::vector<double> rhs(half * half);
248 AddToBuffer(a21, a_stride, a11, a_stride, lhs.data(), half, -1.0);
249 AddToBuffer(b11, b_stride, b12, b_stride, rhs.data(), half, 1.0);
250 m6.assign(half * half, 0.0);
251 StrassenSeq(lhs.data(), half, rhs.data(), half, m6.data(), half, half);
252 }
253 #pragma omp task default(none) shared(m7, a12, a22, b21, b22, a_stride, b_stride, half)
254 {
255 std::vector<double> lhs(half * half);
256 std::vector<double> rhs(half * half);
257 AddToBuffer(a12, a_stride, a22, a_stride, lhs.data(), half, -1.0);
258 AddToBuffer(b21, b_stride, b22, b_stride, rhs.data(), half, 1.0);
259 m7.assign(half * half, 0.0);
260 StrassenSeq(lhs.data(), half, rhs.data(), half, m7.data(), half, half);
261 }
262 #pragma omp taskwait
263 }
264 }
265
266 CombineQuadrants(m1, m2, m3, m4, m5, m6, m7, c, c_stride, half);
267 }
268
269 void AddContribution(double *accum, std::size_t stride, const std::vector<double> &block, std::size_t row_offset,
270 std::size_t col_offset, std::size_t half, double coeff) {
271 for (std::size_t i = 0; i < half; ++i) {
272 double *dst_row = accum + ((row_offset + i) * stride) + col_offset;
273 const double *src_row = block.data() + (i * half);
274 for (std::size_t j = 0; j < half; ++j) {
275 dst_row[j] += coeff * src_row[j];
276 }
277 }
278 }
279
280 void ComputeAssignedProduct(int task_id, const double *a, std::size_t a_stride, const double *b, std::size_t b_stride,
281 std::size_t half, std::vector<double> &local_accum) {
282 const double *a11 = a;
283 const double *a12 = a + half;
284 const double *a21 = a + (half * a_stride);
285 const double *a22 = a21 + half;
286 const double *b11 = b;
287 const double *b12 = b + half;
288 const double *b21 = b + (half * b_stride);
289 const double *b22 = b21 + half;
290
291 std::vector<double> m(half * half, 0.0);
292 std::vector<double> lhs(half * half);
293 std::vector<double> rhs(half * half);
294
295 switch (task_id) {
296 case 0: // M1 = (A11+A22)(B11+B22) -> C11, C22
297 AddToBuffer(a11, a_stride, a22, a_stride, lhs.data(), half, 1.0);
298 AddToBuffer(b11, b_stride, b22, b_stride, rhs.data(), half, 1.0);
299 StrassenOmpLocal(lhs.data(), half, rhs.data(), half, m.data(), half, half);
300 AddContribution(local_accum.data(), half * 2, m, 0, 0, half, 1.0);
301 AddContribution(local_accum.data(), half * 2, m, half, half, half, 1.0);
302 break;
303 case 1: // M2 = (A21+A22)B11 -> C21, C22
304 AddToBuffer(a21, a_stride, a22, a_stride, lhs.data(), half, 1.0);
305 StrassenOmpLocal(lhs.data(), half, b11, b_stride, m.data(), half, half);
306 AddContribution(local_accum.data(), half * 2, m, half, 0, half, 1.0);
307 AddContribution(local_accum.data(), half * 2, m, half, half, half, -1.0);
308 break;
309 case 2: // M3 = A11(B12-B22) -> C12, C22
310 AddToBuffer(b12, b_stride, b22, b_stride, rhs.data(), half, -1.0);
311 StrassenOmpLocal(a11, a_stride, rhs.data(), half, m.data(), half, half);
312 AddContribution(local_accum.data(), half * 2, m, 0, half, half, 1.0);
313 AddContribution(local_accum.data(), half * 2, m, half, half, half, 1.0);
314 break;
315 case 3: // M4 = A22(B21-B11) -> C11, C21
316 AddToBuffer(b21, b_stride, b11, b_stride, rhs.data(), half, -1.0);
317 StrassenOmpLocal(a22, a_stride, rhs.data(), half, m.data(), half, half);
318 AddContribution(local_accum.data(), half * 2, m, 0, 0, half, 1.0);
319 AddContribution(local_accum.data(), half * 2, m, half, 0, half, 1.0);
320 break;
321 case 4: // M5 = (A11+A12)B22 -> C11, C12
322 AddToBuffer(a11, a_stride, a12, a_stride, lhs.data(), half, 1.0);
323 StrassenOmpLocal(lhs.data(), half, b22, b_stride, m.data(), half, half);
324 AddContribution(local_accum.data(), half * 2, m, 0, 0, half, -1.0);
325 AddContribution(local_accum.data(), half * 2, m, 0, half, half, 1.0);
326 break;
327 case 5: // M6 = (A21-A11)(B11+B12) -> C22
328 AddToBuffer(a21, a_stride, a11, a_stride, lhs.data(), half, -1.0);
329 AddToBuffer(b11, b_stride, b12, b_stride, rhs.data(), half, 1.0);
330 StrassenOmpLocal(lhs.data(), half, rhs.data(), half, m.data(), half, half);
331 AddContribution(local_accum.data(), half * 2, m, half, half, half, 1.0);
332 break;
333 case 6: // M7 = (A12-A22)(B21+B22) -> C11
334 AddToBuffer(a12, a_stride, a22, a_stride, lhs.data(), half, -1.0);
335 AddToBuffer(b21, b_stride, b22, b_stride, rhs.data(), half, 1.0);
336 StrassenOmpLocal(lhs.data(), half, rhs.data(), half, m.data(), half, half);
337 AddContribution(local_accum.data(), half * 2, m, 0, 0, half, 1.0);
338 break;
339 default:
340 break;
341 }
342 }
343
344 } // namespace
345
346
1/2
✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.
12 MuhammadkhonIStressenAlgALL::MuhammadkhonIStressenAlgALL(const InType &in) {
347 SetTypeOfTask(GetStaticTypeOfTask());
348
1/2
✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.
12 GetInput() = in;
349 GetOutput() = {};
350 12 }
351
352 12 bool MuhammadkhonIStressenAlgALL::ValidationImpl() {
353 12 int rank = 0;
354 12 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
355
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 6 times.
12 if (rank != 0) {
356 return true;
357 }
358 const auto &in = GetInput();
359
2/4
✓ Branch 0 taken 6 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 6 times.
✗ Branch 3 not taken.
6 return in.a_rows > 0 && in.a_cols_b_rows > 0 && in.b_cols > 0 &&
360
2/4
✓ Branch 0 taken 6 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 6 times.
✗ Branch 3 not taken.
12 in.a.size() == static_cast<size_t>(in.a_rows * in.a_cols_b_rows) &&
361
1/2
✓ Branch 0 taken 6 times.
✗ Branch 1 not taken.
6 in.b.size() == static_cast<size_t>(in.a_cols_b_rows * in.b_cols);
362 }
363
364 12 bool MuhammadkhonIStressenAlgALL::PreProcessingImpl() {
365 12 int rank = 0;
366 12 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
367
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 6 times.
12 if (rank == 0) {
368 GetOutput() = {};
369 const auto &in = GetInput();
370 6 a_rows_ = in.a_rows;
371 6 a_cols_b_rows_ = in.a_cols_b_rows;
372 6 b_cols_ = in.b_cols;
373
374
1/2
✓ Branch 0 taken 6 times.
✗ Branch 1 not taken.
12 const size_t max_dim = std::max({a_rows_, a_cols_b_rows_, b_cols_});
375 6 padded_n_ = NextPow2(max_dim);
376
377 6 padded_a_.assign(padded_n_ * padded_n_, 0.0);
378 6 padded_b_.assign(padded_n_ * padded_n_, 0.0);
379
380
2/2
✓ Branch 0 taken 165 times.
✓ Branch 1 taken 6 times.
171 for (size_t i = 0; i < a_rows_; ++i) {
381
2/2
✓ Branch 0 taken 8421 times.
✓ Branch 1 taken 165 times.
8586 for (size_t j = 0; j < a_cols_b_rows_; ++j) {
382 8421 padded_a_[(i * padded_n_) + j] = in.a[(i * a_cols_b_rows_) + j];
383 }
384 }
385
2/2
✓ Branch 0 taken 155 times.
✓ Branch 1 taken 6 times.
161 for (size_t i = 0; i < a_cols_b_rows_; ++i) {
386
2/2
✓ Branch 0 taken 8421 times.
✓ Branch 1 taken 155 times.
8576 for (size_t j = 0; j < b_cols_; ++j) {
387 8421 padded_b_[(i * padded_n_) + j] = in.b[(i * b_cols_) + j];
388 }
389 }
390 } else {
391 GetOutput().clear();
392 }
393 12 return true;
394 }
395
396 12 bool MuhammadkhonIStressenAlgALL::RunImpl() {
397 12 int rank = 0;
398 12 int world_size = 1;
399 12 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
400 12 MPI_Comm_size(MPI_COMM_WORLD, &world_size);
401
402 // Broadcast размеры
403 12 std::array<std::uint64_t, 4> dims = {0, 0, 0, 0};
404
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 6 times.
12 if (rank == 0) {
405 6 dims[0] = static_cast<std::uint64_t>(a_rows_);
406 6 dims[1] = static_cast<std::uint64_t>(a_cols_b_rows_);
407 6 dims[2] = static_cast<std::uint64_t>(b_cols_);
408 6 dims[3] = static_cast<std::uint64_t>(padded_n_);
409 }
410 12 MPI_Bcast(dims.data(), 4, MPI_UINT64_T, 0, MPI_COMM_WORLD);
411
412
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 6 times.
12 if (rank != 0) {
413 6 a_rows_ = static_cast<size_t>(dims[0]);
414 6 a_cols_b_rows_ = static_cast<size_t>(dims[1]);
415 6 b_cols_ = static_cast<size_t>(dims[2]);
416 6 padded_n_ = static_cast<size_t>(dims[3]);
417 6 padded_a_.assign(padded_n_ * padded_n_, 0.0);
418 6 padded_b_.assign(padded_n_ * padded_n_, 0.0);
419 }
420
421 12 MPI_Bcast(padded_a_.data(), static_cast<int>(padded_a_.size()), MPI_DOUBLE, 0, MPI_COMM_WORLD);
422 12 MPI_Bcast(padded_b_.data(), static_cast<int>(padded_b_.size()), MPI_DOUBLE, 0, MPI_COMM_WORLD);
423
424 12 result_c_.assign(padded_n_ * padded_n_, 0.0);
425
426
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
12 if (padded_n_ <= kCutoff || world_size == 1) {
427
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 6 times.
12 if (rank == 0) {
428 6 StrassenOmpLocal(padded_a_.data(), padded_n_, padded_b_.data(), padded_n_, result_c_.data(), padded_n_,
429 padded_n_);
430 }
431 } else {
432 const std::size_t half = padded_n_ / 2;
433 std::vector<double> local_c(padded_n_ * padded_n_, 0.0);
434
435 for (int task_id = rank; task_id < 7; task_id += world_size) {
436 ComputeAssignedProduct(task_id, padded_a_.data(), padded_n_, padded_b_.data(), padded_n_, half, local_c);
437 }
438
439 MPI_Reduce(local_c.data(), result_c_.data(), static_cast<int>(result_c_.size()), MPI_DOUBLE, MPI_SUM, 0,
440 MPI_COMM_WORLD);
441 }
442
443 auto &out = GetOutput();
444 12 out.assign(a_rows_ * b_cols_, 0.0);
445
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 6 times.
12 if (rank == 0) {
446
2/2
✓ Branch 0 taken 165 times.
✓ Branch 1 taken 6 times.
171 for (size_t i = 0; i < a_rows_; ++i) {
447
2/2
✓ Branch 0 taken 8571 times.
✓ Branch 1 taken 165 times.
8736 for (size_t j = 0; j < b_cols_; ++j) {
448 8571 out[(i * b_cols_) + j] = result_c_[(i * padded_n_) + j];
449 }
450 }
451 }
452 12 MPI_Bcast(out.data(), static_cast<int>(out.size()), MPI_DOUBLE, 0, MPI_COMM_WORLD);
453
454 12 return true;
455 }
456
457 12 bool MuhammadkhonIStressenAlgALL::PostProcessingImpl() {
458 12 return true;
459 }
460
461 } // namespace muhammadkhon_i_stressen_alg
462