GCC Code Coverage Report


Directory: ./
File: tasks/cheremkhin_a_matr_mult_cannon_alg/all/src/ops_all.cpp
Date: 2026-06-04 20:25:32
Exec Total Coverage
Lines: 166 166 100.0%
Functions: 12 12 100.0%
Branches: 105 166 63.3%

Line Branch Exec Source
1 #include "cheremkhin_a_matr_mult_cannon_alg/all/include/ops_all.hpp"
2
3 #include <mpi.h>
4 #include <omp.h>
5
6 #include <cmath>
7 #include <cstddef>
8 #include <cstdint>
9 #include <utility>
10 #include <vector>
11
12 #include "cheremkhin_a_matr_mult_cannon_alg/common/include/common.hpp"
13 #include "util/include/util.hpp"
14
15 namespace cheremkhin_a_matr_mult_cannon_alg {
16
17 namespace {
18
19 inline std::size_t Idx(std::size_t n, std::size_t r, std::size_t c) {
20 return (r * n) + c;
21 }
22
23 std::size_t CeilDiv(std::size_t a, std::size_t b) {
24 14 return (a + b - 1) / b;
25 }
26
27 14 int ChooseVirtualGridSize(int world_size) {
28
1/2
✓ Branch 0 taken 14 times.
✗ Branch 1 not taken.
14 if (world_size <= 1) {
29 return 1;
30 }
31
32 14 int grid_dim = static_cast<int>(std::sqrt(static_cast<double>(world_size)));
33
2/2
✓ Branch 0 taken 14 times.
✓ Branch 1 taken 14 times.
28 while ((grid_dim * grid_dim) < world_size) {
34 14 ++grid_dim;
35 }
36 return grid_dim;
37 }
38
39 int MakeVirtualRank(int row, int col, int grid_dim) {
40 252 return (row * grid_dim) + col;
41 }
42
43 int GetOwnerRank(int virtual_rank, int world_size) {
44 112 return virtual_rank % world_size;
45 }
46
47 14 std::vector<int> GetOwnedVirtualRanks(int world_rank, int world_size, int grid_dim) {
48 14 std::vector<int> owned_ranks;
49 14 const int virtual_size = grid_dim * grid_dim;
50
2/2
✓ Branch 0 taken 28 times.
✓ Branch 1 taken 14 times.
42 for (int virtual_rank = world_rank; virtual_rank < virtual_size; virtual_rank += world_size) {
51 owned_ranks.push_back(virtual_rank);
52 }
53 14 return owned_ranks;
54 }
55
56 void CopyGlobalToPadded(const std::vector<double> &src, std::vector<double> &dst, std::size_t src_n,
57 std::size_t dst_n) {
58 7 const auto src_n64 = static_cast<std::int64_t>(src_n);
59 7 #pragma omp parallel for default(none) schedule(static) shared(src, dst, src_n, dst_n, src_n64)
60 for (std::int64_t i = 0; i < src_n64; ++i) {
61 for (std::size_t j = 0; j < src_n; ++j) {
62 dst[Idx(dst_n, static_cast<std::size_t>(i), j)] = src[Idx(src_n, static_cast<std::size_t>(i), j)];
63 }
64 }
65 7 }
66
67 void CopyPaddedToGlobal(const std::vector<double> &src, std::vector<double> &dst, std::size_t src_n,
68 std::size_t dst_n) {
69 14 const auto dst_n64 = static_cast<std::int64_t>(dst_n);
70 14 #pragma omp parallel for default(none) schedule(static) shared(src, dst, src_n, dst_n, dst_n64)
71 for (std::int64_t i = 0; i < dst_n64; ++i) {
72 for (std::size_t j = 0; j < dst_n; ++j) {
73 dst[Idx(dst_n, static_cast<std::size_t>(i), j)] = src[Idx(src_n, static_cast<std::size_t>(i), j)];
74 }
75 }
76 }
77
78 void ExtractLocalBlock(const std::vector<double> &src, std::vector<double> &block, std::size_t global_n,
79 std::size_t block_n, int block_row, int block_col) {
80 28 const std::size_t row0 = static_cast<std::size_t>(block_row) * block_n;
81 28 const std::size_t col0 = static_cast<std::size_t>(block_col) * block_n;
82 28 const auto block_n64 = static_cast<std::int64_t>(block_n);
83 28 #pragma omp parallel for default(none) schedule(static) shared(src, block, global_n, block_n, row0, col0, block_n64)
84 for (std::int64_t i = 0; i < block_n64; ++i) {
85 const std::size_t src_row = (row0 + static_cast<std::size_t>(i)) * global_n;
86 const std::size_t dst_row = static_cast<std::size_t>(i) * block_n;
87 for (std::size_t j = 0; j < block_n; ++j) {
88 block[dst_row + j] = src[src_row + col0 + j];
89 }
90 }
91 }
92
93 void InsertLocalBlock(const std::vector<double> &block, std::vector<double> &dst, std::size_t global_n,
94 std::size_t block_n, int block_row, int block_col) {
95 28 const std::size_t row0 = static_cast<std::size_t>(block_row) * block_n;
96 28 const std::size_t col0 = static_cast<std::size_t>(block_col) * block_n;
97 28 const auto block_n64 = static_cast<std::int64_t>(block_n);
98
1/2
✓ Branch 0 taken 14 times.
✗ Branch 1 not taken.
14 #pragma omp parallel for default(none) schedule(static) shared(block, dst, global_n, block_n, row0, col0, block_n64)
99 for (std::int64_t i = 0; i < block_n64; ++i) {
100 const std::size_t src_row = static_cast<std::size_t>(i) * block_n;
101 const std::size_t dst_row = (row0 + static_cast<std::size_t>(i)) * global_n;
102 for (std::size_t j = 0; j < block_n; ++j) {
103 dst[dst_row + col0 + j] = block[src_row + j];
104 }
105 }
106 14 }
107
108 void MulAddLocal(const std::vector<double> &a, const std::vector<double> &b, std::vector<double> &c,
109 std::size_t block_n) {
110 56 const auto block_n64 = static_cast<std::int64_t>(block_n);
111
112 56 #pragma omp parallel for default(none) schedule(static) shared(a, b, c, block_n, block_n64)
113 for (std::int64_t ii = 0; ii < block_n64; ++ii) {
114 const auto row = static_cast<std::size_t>(ii);
115 const std::size_t a_row = row * block_n;
116 const std::size_t c_row = row * block_n;
117 double *c_block = c.data() + c_row;
118 for (std::size_t kk = 0; kk < block_n; ++kk) {
119 const double aik = a[a_row + kk];
120 const double *b_block = b.data() + (kk * block_n);
121 for (std::int64_t jj = 0; jj < block_n64; ++jj) {
122 c_block[jj] += aik * b_block[jj];
123 }
124 }
125 }
126 }
127
128 28 struct LocalCell {
129 int virtual_rank = 0;
130 std::vector<double> a;
131 std::vector<double> b;
132 std::vector<double> c;
133 };
134
135 int GetRow(int virtual_rank, int grid_dim) {
136 112 return virtual_rank / grid_dim;
137 }
138
139 int GetCol(int virtual_rank, int grid_dim) {
140 112 return virtual_rank % grid_dim;
141 }
142
143 struct ShiftTargets {
144 int source_rank = 0;
145 int dest_rank = 0;
146 int source_owner = 0;
147 int dest_owner = 0;
148 };
149
150 112 ShiftTargets ComputeShiftTargets(int virtual_rank, const std::vector<int> &owner_by_rank, int grid_dim,
151 bool horizontal_shift) {
152 const int row = GetRow(virtual_rank, grid_dim);
153 const int col = GetCol(virtual_rank, grid_dim);
154
155 ShiftTargets targets;
156
2/2
✓ Branch 0 taken 56 times.
✓ Branch 1 taken 56 times.
112 targets.source_rank = horizontal_shift ? MakeVirtualRank(row, (col + 1) % grid_dim, grid_dim)
157 56 : MakeVirtualRank((row + 1) % grid_dim, col, grid_dim);
158
2/2
✓ Branch 0 taken 56 times.
✓ Branch 1 taken 56 times.
112 targets.dest_rank = horizontal_shift ? MakeVirtualRank(row, (col + grid_dim - 1) % grid_dim, grid_dim)
159 56 : MakeVirtualRank((row + grid_dim - 1) % grid_dim, col, grid_dim);
160 112 targets.source_owner = owner_by_rank[static_cast<std::size_t>(targets.source_rank)];
161 112 targets.dest_owner = owner_by_rank[static_cast<std::size_t>(targets.dest_rank)];
162 112 return targets;
163 }
164
165 28 void ExchangePhase(const std::vector<std::vector<double>> &current_buffers,
166 std::vector<std::vector<double>> &next_buffers, const std::vector<int> &virtual_ranks,
167 const std::vector<int> &owner_by_rank, const std::vector<int> &local_index_by_rank, int grid_dim,
168 int world_rank, int tag_base, bool horizontal_shift) {
169 std::size_t recv_count = 0;
170 std::size_t send_count = 0;
171
2/2
✓ Branch 0 taken 56 times.
✓ Branch 1 taken 28 times.
84 for (int virtual_rank : virtual_ranks) {
172 56 const auto targets = ComputeShiftTargets(virtual_rank, owner_by_rank, grid_dim, horizontal_shift);
173
2/2
✓ Branch 0 taken 28 times.
✓ Branch 1 taken 28 times.
56 recv_count += (targets.source_owner != world_rank) ? 1U : 0U;
174
2/2
✓ Branch 0 taken 28 times.
✓ Branch 1 taken 28 times.
84 send_count += (targets.dest_owner != world_rank) ? 1U : 0U;
175 }
176
177 28 std::vector<MPI_Request> recv_requests(recv_count, MPI_REQUEST_NULL);
178
1/4
✓ Branch 1 taken 28 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
28 std::vector<MPI_Request> send_requests(send_count, MPI_REQUEST_NULL);
179 std::size_t recv_idx = 0;
180 std::size_t send_idx = 0;
181
182
2/2
✓ Branch 0 taken 56 times.
✓ Branch 1 taken 28 times.
84 for (std::size_t idx = 0; idx < virtual_ranks.size(); ++idx) {
183 56 const int virtual_rank = virtual_ranks[idx];
184 56 const auto targets = ComputeShiftTargets(virtual_rank, owner_by_rank, grid_dim, horizontal_shift);
185
186
2/2
✓ Branch 0 taken 28 times.
✓ Branch 1 taken 28 times.
56 if (targets.source_owner == world_rank) {
187
1/2
✓ Branch 1 taken 28 times.
✗ Branch 2 not taken.
28 const int local_source_idx = local_index_by_rank[static_cast<std::size_t>(targets.source_rank)];
188
1/2
✓ Branch 1 taken 28 times.
✗ Branch 2 not taken.
28 next_buffers[idx] = current_buffers[static_cast<std::size_t>(local_source_idx)];
189 } else {
190
1/2
✓ Branch 1 taken 28 times.
✗ Branch 2 not taken.
28 MPI_Irecv(next_buffers[idx].data(), static_cast<int>(next_buffers[idx].size()), MPI_DOUBLE, targets.source_owner,
191 tag_base + virtual_rank, MPI_COMM_WORLD, &recv_requests[recv_idx]);
192 28 ++recv_idx;
193 }
194
195
2/2
✓ Branch 0 taken 28 times.
✓ Branch 1 taken 28 times.
56 if (targets.dest_owner != world_rank) {
196 28 MPI_Isend(current_buffers[idx].data(), static_cast<int>(current_buffers[idx].size()), MPI_DOUBLE,
197
1/2
✓ Branch 1 taken 28 times.
✗ Branch 2 not taken.
28 targets.dest_owner, tag_base + targets.dest_rank, MPI_COMM_WORLD, &send_requests[send_idx]);
198 28 ++send_idx;
199 }
200 }
201
202
2/2
✓ Branch 0 taken 14 times.
✓ Branch 1 taken 14 times.
28 if (!recv_requests.empty()) {
203
1/2
✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
14 MPI_Waitall(static_cast<int>(recv_requests.size()), recv_requests.data(), MPI_STATUSES_IGNORE);
204 }
205
2/2
✓ Branch 0 taken 14 times.
✓ Branch 1 taken 14 times.
28 if (!send_requests.empty()) {
206
1/2
✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
14 MPI_Waitall(static_cast<int>(send_requests.size()), send_requests.data(), MPI_STATUSES_IGNORE);
207 }
208 28 }
209
210 14 void DistributeInitiallyAlignedBlocks(const std::vector<double> &a_global, const std::vector<double> &b_global,
211 std::vector<LocalCell> &local_cells, const std::vector<int> &local_index_by_rank,
212 std::size_t global_n, std::size_t block_n, int grid_dim, int world_rank,
213 int world_size) {
214 constexpr int kTagA = 1000;
215 constexpr int kTagB = 2000;
216
217
2/2
✓ Branch 0 taken 7 times.
✓ Branch 1 taken 7 times.
14 if (world_rank == 0) {
218
2/2
✓ Branch 0 taken 14 times.
✓ Branch 1 taken 7 times.
21 for (int row = 0; row < grid_dim; ++row) {
219
2/2
✓ Branch 0 taken 28 times.
✓ Branch 1 taken 14 times.
42 for (int col = 0; col < grid_dim; ++col) {
220 const int virtual_rank = MakeVirtualRank(row, col, grid_dim);
221 const int owner_rank = GetOwnerRank(virtual_rank, world_size);
222 28 const int a_col = (row + col) % grid_dim;
223 const int b_row = (row + col) % grid_dim;
224
225 28 std::vector<double> a_block(block_n * block_n, 0.0);
226
1/4
✓ Branch 1 taken 28 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
28 std::vector<double> b_block(block_n * block_n, 0.0);
227 ExtractLocalBlock(a_global, a_block, global_n, block_n, row, a_col);
228 ExtractLocalBlock(b_global, b_block, global_n, block_n, b_row, col);
229
230
2/2
✓ Branch 0 taken 14 times.
✓ Branch 1 taken 14 times.
28 if (owner_rank == 0) {
231 14 const int local_idx = local_index_by_rank[static_cast<std::size_t>(virtual_rank)];
232 14 local_cells[static_cast<std::size_t>(local_idx)].a = std::move(a_block);
233 14 local_cells[static_cast<std::size_t>(local_idx)].b = std::move(b_block);
234 } else {
235
1/2
✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
14 MPI_Send(a_block.data(), static_cast<int>(a_block.size()), MPI_DOUBLE, owner_rank, kTagA + virtual_rank,
236 MPI_COMM_WORLD);
237
1/2
✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
14 MPI_Send(b_block.data(), static_cast<int>(b_block.size()), MPI_DOUBLE, owner_rank, kTagB + virtual_rank,
238 MPI_COMM_WORLD);
239 }
240 }
241 }
242 } else {
243
2/2
✓ Branch 0 taken 14 times.
✓ Branch 1 taken 7 times.
21 for (auto &cell : local_cells) {
244 14 MPI_Recv(cell.a.data(), static_cast<int>(cell.a.size()), MPI_DOUBLE, 0, kTagA + cell.virtual_rank, MPI_COMM_WORLD,
245 MPI_STATUS_IGNORE);
246 14 MPI_Recv(cell.b.data(), static_cast<int>(cell.b.size()), MPI_DOUBLE, 0, kTagB + cell.virtual_rank, MPI_COMM_WORLD,
247 MPI_STATUS_IGNORE);
248 }
249 }
250 14 }
251
252 14 void ShiftBlocksCannon(std::vector<LocalCell> &local_cells, const std::vector<int> &owner_by_rank, std::size_t block_n,
253 int grid_dim, int world_rank) {
254 constexpr int kShiftATagBase = 3000;
255 constexpr int kShiftBTagBase = 5000;
256
257 14 std::vector<int> virtual_ranks(local_cells.size(), 0);
258
2/6
✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 14 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
14 std::vector<int> local_index_by_rank(static_cast<std::size_t>(grid_dim * grid_dim), -1);
259
2/6
✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 14 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
14 std::vector<std::vector<double>> current_a(local_cells.size());
260
1/2
✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
14 std::vector<std::vector<double>> current_b(local_cells.size());
261
2/4
✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 14 times.
✗ Branch 5 not taken.
14 std::vector<std::vector<double>> next_a(local_cells.size(), std::vector<double>(block_n * block_n, 0.0));
262
2/4
✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 14 times.
✗ Branch 5 not taken.
14 std::vector<std::vector<double>> next_b(local_cells.size(), std::vector<double>(block_n * block_n, 0.0));
263
264
2/2
✓ Branch 0 taken 28 times.
✓ Branch 1 taken 14 times.
42 for (std::size_t idx = 0; idx < local_cells.size(); ++idx) {
265 28 virtual_ranks[idx] = local_cells[idx].virtual_rank;
266 28 local_index_by_rank[static_cast<std::size_t>(local_cells[idx].virtual_rank)] = static_cast<int>(idx);
267
1/2
✓ Branch 1 taken 28 times.
✗ Branch 2 not taken.
28 current_a[idx] = local_cells[idx].a;
268
1/2
✓ Branch 1 taken 28 times.
✗ Branch 2 not taken.
28 current_b[idx] = local_cells[idx].b;
269 }
270
271
1/2
✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
14 ExchangePhase(current_a, next_a, virtual_ranks, owner_by_rank, local_index_by_rank, grid_dim, world_rank,
272 kShiftATagBase, true);
273
1/2
✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
14 ExchangePhase(current_b, next_b, virtual_ranks, owner_by_rank, local_index_by_rank, grid_dim, world_rank,
274 kShiftBTagBase, false);
275
276
2/2
✓ Branch 0 taken 28 times.
✓ Branch 1 taken 14 times.
42 for (std::size_t idx = 0; idx < local_cells.size(); ++idx) {
277 28 local_cells[idx].a = std::move(next_a[idx]);
278 28 local_cells[idx].b = std::move(next_b[idx]);
279 }
280 28 }
281
282 14 void GatherResultBlocks(const std::vector<LocalCell> &local_cells, std::vector<double> &global_matrix,
283 const std::vector<int> &local_index_by_rank, std::size_t global_n, std::size_t block_n,
284 int grid_dim, int world_rank, int world_size) {
285 constexpr int kTagC = 7000;
286
287
2/2
✓ Branch 0 taken 7 times.
✓ Branch 1 taken 7 times.
14 if (world_rank == 0) {
288
2/2
✓ Branch 0 taken 28 times.
✓ Branch 1 taken 7 times.
35 for (int virtual_rank = 0; virtual_rank < grid_dim * grid_dim; ++virtual_rank) {
289 28 const int row = virtual_rank / grid_dim;
290 28 const int col = virtual_rank % grid_dim;
291 const int owner_rank = GetOwnerRank(virtual_rank, world_size);
292
293
2/2
✓ Branch 0 taken 14 times.
✓ Branch 1 taken 14 times.
28 if (owner_rank == 0) {
294 14 const int local_idx = local_index_by_rank[static_cast<std::size_t>(virtual_rank)];
295 14 InsertLocalBlock(local_cells[static_cast<std::size_t>(local_idx)].c, global_matrix, global_n, block_n, row,
296 col);
297 } else {
298 14 std::vector<double> block(block_n * block_n, 0.0);
299
1/2
✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
14 MPI_Recv(block.data(), static_cast<int>(block.size()), MPI_DOUBLE, owner_rank, kTagC + virtual_rank,
300 MPI_COMM_WORLD, MPI_STATUS_IGNORE);
301 InsertLocalBlock(block, global_matrix, global_n, block_n, row, col);
302 }
303 }
304 } else {
305
2/2
✓ Branch 0 taken 14 times.
✓ Branch 1 taken 7 times.
21 for (const auto &cell : local_cells) {
306 14 MPI_Send(cell.c.data(), static_cast<int>(cell.c.size()), MPI_DOUBLE, 0, kTagC + cell.virtual_rank,
307 MPI_COMM_WORLD);
308 }
309 }
310 14 }
311
312 } // namespace
313
314
1/2
✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
14 CheremkhinAMatrMultCannonAlgALL::CheremkhinAMatrMultCannonAlgALL(const InType &in) {
315 SetTypeOfTask(GetStaticTypeOfTask());
316 GetInput() = in;
317 GetOutput() = {};
318 14 }
319
320 14 bool CheremkhinAMatrMultCannonAlgALL::ValidationImpl() {
321 14 const std::size_t n = std::get<0>(GetInput());
322 const auto &a = std::get<1>(GetInput());
323 const auto &b = std::get<2>(GetInput());
324
3/6
✓ Branch 0 taken 14 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 14 times.
✗ Branch 4 not taken.
✓ Branch 5 taken 14 times.
14 return n > 0 && a.size() == n * n && b.size() == n * n;
325 }
326
327 14 bool CheremkhinAMatrMultCannonAlgALL::PreProcessingImpl() {
328 GetOutput() = {};
329 14 return true;
330 }
331
332 14 bool CheremkhinAMatrMultCannonAlgALL::RunImpl() {
333 14 const std::size_t n = std::get<0>(GetInput());
334 const auto &a_in = std::get<1>(GetInput());
335 const auto &b_in = std::get<2>(GetInput());
336 14 const int requested_threads = ppc::util::GetNumThreads();
337 14 int world_rank = 0;
338 14 int world_size = 0;
339 14 MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);
340 14 MPI_Comm_size(MPI_COMM_WORLD, &world_size);
341
342 14 omp_set_num_threads(requested_threads);
343
344 14 const int q = ChooseVirtualGridSize(world_size);
345 14 const int virtual_size = q * q;
346 14 const std::size_t block_n = CeilDiv(n, static_cast<std::size_t>(q));
347 14 const std::size_t padded_n = block_n * static_cast<std::size_t>(q);
348
349 14 std::vector<int> owner_by_rank(static_cast<std::size_t>(virtual_size), 0);
350
2/2
✓ Branch 0 taken 56 times.
✓ Branch 1 taken 14 times.
70 for (int virtual_rank = 0; virtual_rank < virtual_size; ++virtual_rank) {
351 56 owner_by_rank[static_cast<std::size_t>(virtual_rank)] = GetOwnerRank(virtual_rank, world_size);
352 }
353
354
1/2
✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
14 const std::vector<int> owned_virtual_ranks = GetOwnedVirtualRanks(world_rank, world_size, q);
355
1/4
✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
14 std::vector<int> local_index_by_rank(static_cast<std::size_t>(virtual_size), -1);
356
1/2
✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
14 std::vector<LocalCell> local_cells;
357
1/2
✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
14 local_cells.reserve(owned_virtual_ranks.size());
358
2/2
✓ Branch 0 taken 28 times.
✓ Branch 1 taken 14 times.
42 for (std::size_t idx = 0; idx < owned_virtual_ranks.size(); ++idx) {
359 28 const int virtual_rank = owned_virtual_ranks[idx];
360
1/2
✓ Branch 1 taken 28 times.
✗ Branch 2 not taken.
28 local_index_by_rank[static_cast<std::size_t>(virtual_rank)] = static_cast<int>(idx);
361 28 LocalCell cell;
362 28 cell.virtual_rank = virtual_rank;
363
1/2
✓ Branch 1 taken 28 times.
✗ Branch 2 not taken.
28 cell.a.assign(block_n * block_n, 0.0);
364
1/2
✓ Branch 1 taken 28 times.
✗ Branch 2 not taken.
28 cell.b.assign(block_n * block_n, 0.0);
365
2/4
✓ Branch 1 taken 28 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 28 times.
✗ Branch 5 not taken.
28 cell.c.assign(block_n * block_n, 0.0);
366 local_cells.push_back(std::move(cell));
367 28 }
368
369 14 std::vector<double> a_padded;
370 14 std::vector<double> b_padded;
371
2/2
✓ Branch 0 taken 7 times.
✓ Branch 1 taken 7 times.
14 if (world_rank == 0) {
372
1/2
✓ Branch 1 taken 7 times.
✗ Branch 2 not taken.
7 a_padded.assign(padded_n * padded_n, 0.0);
373
1/2
✓ Branch 1 taken 7 times.
✗ Branch 2 not taken.
7 b_padded.assign(padded_n * padded_n, 0.0);
374 CopyGlobalToPadded(a_in, a_padded, n, padded_n);
375 CopyGlobalToPadded(b_in, b_padded, n, padded_n);
376 }
377
378
1/2
✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
14 DistributeInitiallyAlignedBlocks(a_padded, b_padded, local_cells, local_index_by_rank, padded_n, block_n, q,
379 world_rank, world_size);
380
381
2/2
✓ Branch 0 taken 28 times.
✓ Branch 1 taken 14 times.
42 for (int step = 0; step < q; ++step) {
382
2/2
✓ Branch 0 taken 56 times.
✓ Branch 1 taken 28 times.
84 for (auto &cell : local_cells) {
383 56 MulAddLocal(cell.a, cell.b, cell.c, block_n);
384 }
385
2/2
✓ Branch 0 taken 14 times.
✓ Branch 1 taken 14 times.
28 if (step + 1 < q) {
386
1/2
✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
14 ShiftBlocksCannon(local_cells, owner_by_rank, block_n, q, world_rank);
387 }
388 }
389
390
1/4
✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
14 std::vector<double> c_padded(padded_n * padded_n, 0.0);
391
1/2
✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
14 GatherResultBlocks(local_cells, c_padded, local_index_by_rank, padded_n, block_n, q, world_rank, world_size);
392
1/2
✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
14 MPI_Bcast(c_padded.data(), static_cast<int>(c_padded.size()), MPI_DOUBLE, 0, MPI_COMM_WORLD);
393
394
1/4
✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
14 std::vector<double> out(n * n, 0.0);
395 CopyPaddedToGlobal(c_padded, out, padded_n, n);
396
397 GetOutput() = std::move(out);
398 14 return true;
399 14 }
400
401 14 bool CheremkhinAMatrMultCannonAlgALL::PostProcessingImpl() {
402 14 return true;
403 }
404
405 } // namespace cheremkhin_a_matr_mult_cannon_alg
406