GCC Code Coverage Report


Directory: ./
File: tasks/zyazeva_s_matrix_mult_cannon_alg/all/src/ops_all.cpp
Date: 2026-06-04 20:25:32
Exec Total Coverage
Lines: 38 145 26.2%
Functions: 5 14 35.7%
Branches: 24 144 16.7%

Line Branch Exec Source
1 #include "zyazeva_s_matrix_mult_cannon_alg/all/include/ops_all.hpp"
2
3 #include <mpi.h>
4 #include <omp.h>
5
6 #include <cmath>
7 #include <cstddef>
8 #include <utility>
9 #include <vector>
10
11 #include "zyazeva_s_matrix_mult_cannon_alg/common/include/common.hpp"
12
13 namespace zyazeva_s_matrix_mult_cannon_alg {
14
15 bool ZyazevaSMatrixMultCannonAlgALL::IsPerfectSquare(int x) {
16 int root = static_cast<int>(std::sqrt(x));
17 return root * root == x;
18 }
19
20 void ZyazevaSMatrixMultCannonAlgALL::MultiplyBlocks(const std::vector<double> &a, const std::vector<double> &b,
21 std::vector<double> &c, int block_size) {
22 for (int i = 0; i < block_size; ++i) {
23 for (int k = 0; k < block_size; ++k) {
24 const size_t i_idx = static_cast<size_t>(i) * static_cast<size_t>(block_size);
25 const size_t k_idx = static_cast<size_t>(k) * static_cast<size_t>(block_size);
26 double a_ik = a[i_idx + static_cast<size_t>(k)];
27 for (int j = 0; j < block_size; ++j) {
28 c[i_idx + static_cast<size_t>(j)] += a_ik * b[k_idx + static_cast<size_t>(j)];
29 }
30 }
31 }
32 }
33
34
1/2
✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
14 ZyazevaSMatrixMultCannonAlgALL::ZyazevaSMatrixMultCannonAlgALL(const InType &in) {
35 SetTypeOfTask(GetStaticTypeOfTask());
36 GetInput() = in;
37 GetOutput() = {};
38
1/2
✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
14 MPI_Comm_rank(MPI_COMM_WORLD, &rank_);
39
1/2
✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
14 MPI_Comm_size(MPI_COMM_WORLD, &mpi_size_);
40 14 }
41
42 14 bool ZyazevaSMatrixMultCannonAlgALL::ValidationImpl() {
43 14 int valid = 0;
44
2/2
✓ Branch 0 taken 7 times.
✓ Branch 1 taken 7 times.
14 if (rank_ == 0) {
45 7 const size_t sz = std::get<0>(GetInput());
46 const auto &m1 = std::get<1>(GetInput());
47 const auto &m2 = std::get<2>(GetInput());
48
3/6
✓ Branch 0 taken 7 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 7 times.
✗ Branch 4 not taken.
✓ Branch 5 taken 7 times.
7 valid = (sz > 0 && m1.size() == sz * sz && m2.size() == sz * sz) ? 1 : 0;
49 }
50 14 MPI_Bcast(&valid, 1, MPI_INT, 0, MPI_COMM_WORLD);
51 14 return valid != 0;
52 }
53
54 14 bool ZyazevaSMatrixMultCannonAlgALL::PreProcessingImpl() {
55 GetOutput() = {};
56 14 return true;
57 }
58
59 void ZyazevaSMatrixMultCannonAlgALL::RegularMultiplication(const std::vector<double> &m1, const std::vector<double> &m2,
60 std::vector<double> &res, int sz) {
61 7 #pragma omp parallel for default(none) shared(m1, m2, res, sz)
62 for (int i = 0; i < sz; ++i) {
63 const size_t i_offset = static_cast<size_t>(i) * static_cast<size_t>(sz);
64 for (int j = 0; j < sz; ++j) {
65 double sum = 0.0;
66 for (int k = 0; k < sz; ++k) {
67 const size_t k_offset = static_cast<size_t>(k) * static_cast<size_t>(sz);
68 sum += m1[i_offset + static_cast<size_t>(k)] * m2[k_offset + static_cast<size_t>(j)];
69 }
70 res[i_offset + static_cast<size_t>(j)] = sum;
71 }
72 }
73 7 }
74
75 void ZyazevaSMatrixMultCannonAlgALL::InitializeBlocks(const std::vector<double> &m1, const std::vector<double> &m2,
76 std::vector<std::vector<double>> &blocks_a,
77 std::vector<std::vector<double>> &blocks_b, int grid_size,
78 int block_size, size_t grid_size_t, size_t block_size_t,
79 size_t sz_t) {
80 for (int i = 0; i < grid_size; ++i) {
81 for (int j = 0; j < grid_size; ++j) {
82 const size_t block_idx = (static_cast<size_t>(i) * grid_size_t) + static_cast<size_t>(j);
83 blocks_a[block_idx].resize(block_size_t * block_size_t);
84 blocks_b[block_idx].resize(block_size_t * block_size_t);
85
86 for (int bi = 0; bi < block_size; ++bi) {
87 for (int bj = 0; bj < block_size; ++bj) {
88 const size_t global_i = (static_cast<size_t>(i) * block_size_t) + static_cast<size_t>(bi);
89 const size_t global_j = (static_cast<size_t>(j) * block_size_t) + static_cast<size_t>(bj);
90 const size_t local_idx = (static_cast<size_t>(bi) * block_size_t) + static_cast<size_t>(bj);
91
92 blocks_a[block_idx][local_idx] = m1[(global_i * sz_t) + global_j];
93 blocks_b[block_idx][local_idx] = m2[(global_i * sz_t) + global_j];
94 }
95 }
96 }
97 }
98 }
99
100 void ZyazevaSMatrixMultCannonAlgALL::AlignBlocks(const std::vector<std::vector<double>> &blocks_a,
101 const std::vector<std::vector<double>> &blocks_b,
102 std::vector<std::vector<double>> &aligned_a,
103 std::vector<std::vector<double>> &aligned_b, int grid_size,
104 size_t grid_size_t) {
105 #pragma omp parallel for default(none) shared(blocks_a, blocks_b, aligned_a, aligned_b, grid_size, grid_size_t) \
106 collapse(2)
107 for (int i = 0; i < grid_size; ++i) {
108 for (int j = 0; j < grid_size; ++j) {
109 const size_t block_idx = (static_cast<size_t>(i) * grid_size_t) + static_cast<size_t>(j);
110
111 const size_t a_src_idx = (static_cast<size_t>(i) * grid_size_t) + static_cast<size_t>((j + i) % grid_size);
112 aligned_a[block_idx] = blocks_a[a_src_idx];
113
114 const size_t b_src_idx = (static_cast<size_t>((i + j) % grid_size) * grid_size_t) + static_cast<size_t>(j);
115 aligned_b[block_idx] = blocks_b[b_src_idx];
116 }
117 }
118 }
119
120 void ZyazevaSMatrixMultCannonAlgALL::CannonStep(std::vector<std::vector<double>> &aligned_a,
121 std::vector<std::vector<double>> &aligned_b,
122 std::vector<std::vector<double>> &blocks_c, int grid_size,
123 int block_size, size_t grid_size_t, int step) {
124 #pragma omp parallel for default(none) shared(aligned_a, aligned_b, blocks_c, grid_size, block_size, grid_size_t) \
125 collapse(2)
126 for (int i = 0; i < grid_size; ++i) {
127 for (int j = 0; j < grid_size; ++j) {
128 const size_t block_idx = (static_cast<size_t>(i) * grid_size_t) + static_cast<size_t>(j);
129 MultiplyBlocks(aligned_a[block_idx], aligned_b[block_idx], blocks_c[block_idx], block_size);
130 }
131 }
132
133 if (step < grid_size - 1) {
134 std::vector<std::vector<double>> new_aligned_a(grid_size_t * grid_size_t);
135 std::vector<std::vector<double>> new_aligned_b(grid_size_t * grid_size_t);
136
137 #pragma omp parallel for default(none) \
138 shared(aligned_a, aligned_b, new_aligned_a, new_aligned_b, grid_size, grid_size_t) collapse(2)
139 for (int i = 0; i < grid_size; ++i) {
140 for (int j = 0; j < grid_size; ++j) {
141 const size_t block_idx = (static_cast<size_t>(i) * grid_size_t) + static_cast<size_t>(j);
142
143 const size_t a_src_idx = (static_cast<size_t>(i) * grid_size_t) + static_cast<size_t>((j + 1) % grid_size);
144 new_aligned_a[block_idx] = aligned_a[a_src_idx];
145
146 const size_t b_src_idx = (static_cast<size_t>((i + 1) % grid_size) * grid_size_t) + static_cast<size_t>(j);
147 new_aligned_b[block_idx] = aligned_b[b_src_idx];
148 }
149 }
150
151 aligned_a = std::move(new_aligned_a);
152 aligned_b = std::move(new_aligned_b);
153 }
154 }
155
156 void ZyazevaSMatrixMultCannonAlgALL::AssembleResult(const std::vector<std::vector<double>> &blocks_c,
157 std::vector<double> &res_m, int grid_size, int block_size,
158 size_t sz_t, size_t grid_size_t, size_t block_size_t) {
159 #pragma omp parallel for default(none) shared(blocks_c, res_m, grid_size, block_size, sz_t, grid_size_t, block_size_t) \
160 collapse(2)
161 for (int i = 0; i < grid_size; ++i) {
162 for (int j = 0; j < grid_size; ++j) {
163 const size_t block_idx = (static_cast<size_t>(i) * grid_size_t) + static_cast<size_t>(j);
164 const auto &block = blocks_c[block_idx];
165
166 for (int bi = 0; bi < block_size; ++bi) {
167 for (int bj = 0; bj < block_size; ++bj) {
168 const size_t global_i = (static_cast<size_t>(i) * block_size_t) + static_cast<size_t>(bi);
169 const size_t global_j = (static_cast<size_t>(j) * block_size_t) + static_cast<size_t>(bj);
170 const size_t local_idx = (static_cast<size_t>(bi) * block_size_t) + static_cast<size_t>(bj);
171
172 res_m[(global_i * sz_t) + global_j] = block[local_idx];
173 }
174 }
175 }
176 }
177 }
178
179 // Extracted from RunImpl to reduce cognitive complexity
180 void ZyazevaSMatrixMultCannonAlgALL::DistributeBlocks(const std::vector<double> &m1, const std::vector<double> &m2,
181 int grid, int block_size, int block_elems, int sz,
182 std::vector<double> &local_a,
183 std::vector<double> &local_b) const {
184 if (rank_ == 0) {
185 for (int proc = 0; proc < mpi_size_; ++proc) {
186 const int proc_row = proc / grid;
187 const int proc_col = proc % grid;
188
189 std::vector<double> tmp_a(block_elems);
190 std::vector<double> tmp_b(block_elems);
191
192 for (int i = 0; i < block_size; ++i) {
193 for (int j = 0; j < block_size; ++j) {
194 const int gi = (proc_row * block_size) + i;
195 const int gj = (proc_col * block_size) + j;
196 tmp_a[(i * block_size) + j] = m1[(gi * sz) + gj];
197 tmp_b[(i * block_size) + j] = m2[(gi * sz) + gj];
198 }
199 }
200
201 if (proc == 0) {
202 local_a = tmp_a;
203 local_b = tmp_b;
204 } else {
205 MPI_Send(tmp_a.data(), block_elems, MPI_DOUBLE, proc, 0, MPI_COMM_WORLD);
206 MPI_Send(tmp_b.data(), block_elems, MPI_DOUBLE, proc, 1, MPI_COMM_WORLD);
207 }
208 }
209 } else {
210 MPI_Recv(local_a.data(), block_elems, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
211 MPI_Recv(local_b.data(), block_elems, MPI_DOUBLE, 0, 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
212 }
213 }
214
215 void ZyazevaSMatrixMultCannonAlgALL::CollectResult(const std::vector<double> &local_c, std::vector<double> &result,
216 int grid, int block_size, int block_elems, int sz) const {
217 if (rank_ == 0) {
218 for (int proc = 0; proc < mpi_size_; ++proc) {
219 const int proc_row_p = proc / grid;
220 const int proc_col_p = proc % grid;
221
222 std::vector<double> block(block_elems);
223 if (proc == 0) {
224 block = local_c;
225 } else {
226 MPI_Recv(block.data(), block_elems, MPI_DOUBLE, proc, 30, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
227 }
228
229 for (int i = 0; i < block_size; ++i) {
230 for (int j = 0; j < block_size; ++j) {
231 const int gi = (proc_row_p * block_size) + i;
232 const int gj = (proc_col_p * block_size) + j;
233 result[(gi * sz) + gj] = block[(i * block_size) + j];
234 }
235 }
236 }
237 } else {
238 MPI_Send(local_c.data(), block_elems, MPI_DOUBLE, 0, 30, MPI_COMM_WORLD);
239 }
240 }
241
242 14 bool ZyazevaSMatrixMultCannonAlgALL::RunImpl() {
243 14 int sz = 0;
244
245
2/2
✓ Branch 0 taken 7 times.
✓ Branch 1 taken 7 times.
14 if (rank_ == 0) {
246 7 sz = static_cast<int>(std::get<0>(GetInput()));
247 }
248
249 14 MPI_Bcast(&sz, 1, MPI_INT, 0, MPI_COMM_WORLD);
250
251 14 const auto sz_t = static_cast<size_t>(sz);
252 14 const size_t mat_size = sz_t * sz_t;
253
254 14 std::vector<double> m1(mat_size);
255
1/4
✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
14 std::vector<double> m2(mat_size);
256
257
2/2
✓ Branch 0 taken 7 times.
✓ Branch 1 taken 7 times.
14 if (rank_ == 0) {
258
1/2
✓ Branch 1 taken 7 times.
✗ Branch 2 not taken.
7 m1 = std::get<1>(GetInput());
259
1/2
✓ Branch 1 taken 7 times.
✗ Branch 2 not taken.
7 m2 = std::get<2>(GetInput());
260 }
261
262
1/2
✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
14 MPI_Bcast(m1.data(), static_cast<int>(mat_size), MPI_DOUBLE, 0, MPI_COMM_WORLD);
263
1/2
✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
14 MPI_Bcast(m2.data(), static_cast<int>(mat_size), MPI_DOUBLE, 0, MPI_COMM_WORLD);
264
265
1/4
✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
14 std::vector<double> result(mat_size, 0.0);
266
267
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 14 times.
14 const int grid = static_cast<int>(std::sqrt(mpi_size_));
268
1/6
✗ Branch 0 not taken.
✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
14 const bool use_cannon = (grid * grid == mpi_size_) && (grid > 0) && (sz % grid == 0);
269
270 if (!use_cannon) {
271
2/2
✓ Branch 0 taken 7 times.
✓ Branch 1 taken 7 times.
14 if (rank_ == 0) {
272 7 RegularMultiplication(m1, m2, result, sz);
273 }
274
1/2
✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
14 MPI_Bcast(result.data(), static_cast<int>(mat_size), MPI_DOUBLE, 0, MPI_COMM_WORLD);
275
1/2
✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
14 GetOutput() = result;
276 return true;
277 }
278
279 const int block_size = sz / grid;
280 const int block_elems = block_size * block_size;
281
282 std::vector<double> local_a(block_elems);
283 std::vector<double> local_b(block_elems);
284 std::vector<double> local_c(block_elems, 0.0);
285
286 DistributeBlocks(m1, m2, grid, block_size, block_elems, sz, local_a, local_b);
287
288 const int proc_row = rank_ / grid;
289 const int proc_col = rank_ % grid;
290
291 for (int sh = 0; sh < proc_row; ++sh) {
292 const int send_to = (proc_row * grid) + ((proc_col - 1 + grid) % grid);
293 const int recv_from = (proc_row * grid) + ((proc_col + 1) % grid);
294 MPI_Sendrecv_replace(local_a.data(), block_elems, MPI_DOUBLE, send_to, 10, recv_from, 10, MPI_COMM_WORLD,
295 MPI_STATUS_IGNORE);
296 }
297
298 for (int sh = 0; sh < proc_col; ++sh) {
299 const int send_to = (((proc_row - 1 + grid) % grid) * grid) + proc_col;
300 const int recv_from = (((proc_row + 1) % grid) * grid) + proc_col;
301 MPI_Sendrecv_replace(local_b.data(), block_elems, MPI_DOUBLE, send_to, 11, recv_from, 11, MPI_COMM_WORLD,
302 MPI_STATUS_IGNORE);
303 }
304
305 for (int step = 0; step < grid; ++step) {
306 MultiplyBlocks(local_a, local_b, local_c, block_size);
307
308 const int left = (proc_row * grid) + ((proc_col - 1 + grid) % grid);
309 const int right = (proc_row * grid) + ((proc_col + 1) % grid);
310 MPI_Sendrecv_replace(local_a.data(), block_elems, MPI_DOUBLE, left, 20, right, 20, MPI_COMM_WORLD,
311 MPI_STATUS_IGNORE);
312
313 const int up = (((proc_row - 1 + grid) % grid) * grid) + proc_col;
314 const int down = (((proc_row + 1) % grid) * grid) + proc_col;
315 MPI_Sendrecv_replace(local_b.data(), block_elems, MPI_DOUBLE, up, 21, down, 21, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
316 }
317
318 CollectResult(local_c, result, grid, block_size, block_elems, sz);
319
320 MPI_Bcast(result.data(), static_cast<int>(mat_size), MPI_DOUBLE, 0, MPI_COMM_WORLD);
321 GetOutput() = result;
322 return true;
323 }
324
325 14 bool ZyazevaSMatrixMultCannonAlgALL::PostProcessingImpl() {
326 14 return true;
327 }
328
329 } // namespace zyazeva_s_matrix_mult_cannon_alg
330