GCC Code Coverage Report


Directory: ./
File: tasks/chyokotov_a_dense_matrix_mul_foxs_algorithm/all/src/ops_all.cpp
Date: 2026-06-04 20:25:32
Exec Total Coverage
Lines: 111 125 88.8%
Functions: 12 13 92.3%
Branches: 61 110 55.5%

Line Branch Exec Source
1 #include "chyokotov_a_dense_matrix_mul_foxs_algorithm/all/include/ops_all.hpp"
2
3 #include <mpi.h>
4 #include <tbb/blocked_range2d.h>
5 #include <tbb/parallel_for.h>
6
7 #include <algorithm>
8 #include <cmath>
9 #include <cstddef>
10 #include <utility>
11 #include <vector>
12
13 #include "chyokotov_a_dense_matrix_mul_foxs_algorithm/common/include/common.hpp"
14
15 namespace chyokotov_a_dense_matrix_mul_foxs_algorithm {
16
17
1/2
✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
8 ChyokotovADenseMatMulFoxAlgorithmALL::ChyokotovADenseMatMulFoxAlgorithmALL(const InType &in) {
18 SetTypeOfTask(GetStaticTypeOfTask());
19 GetInput() = in;
20 GetOutput().clear();
21 8 }
22
23 8 bool ChyokotovADenseMatMulFoxAlgorithmALL::ValidationImpl() {
24 8 return (GetInput().first.size() == GetInput().second.size());
25 }
26
27
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 8 times.
8 bool ChyokotovADenseMatMulFoxAlgorithmALL::PreProcessingImpl() {
28 GetOutput().clear();
29 8 GetOutput().resize(GetInput().first.size(), 0.0);
30 8 return true;
31 }
32
33 int ChyokotovADenseMatMulFoxAlgorithmALL::CalcPaddedSize(int n, int q) {
34 if (q <= 0) {
35 return n;
36 }
37 return ((n + q - 1) / q) * q;
38 }
39
40 6 void ChyokotovADenseMatMulFoxAlgorithmALL::PadMatrix(const std::vector<double> &src, std::vector<double> &dst,
41 int original_n, int padded_n) {
42 6 dst.assign(static_cast<size_t>(padded_n) * padded_n, 0.0);
43
44
2/2
✓ Branch 0 taken 14 times.
✓ Branch 1 taken 6 times.
20 for (int i = 0; i < original_n; ++i) {
45
2/2
✓ Branch 0 taken 42 times.
✓ Branch 1 taken 14 times.
56 for (int j = 0; j < original_n; ++j) {
46 42 dst[(i * padded_n) + j] = src[(i * original_n) + j];
47 }
48 }
49 6 }
50
51 3 void ChyokotovADenseMatMulFoxAlgorithmALL::Multiply(const std::vector<double> &a_block,
52 const std::vector<double> &b_block, std::vector<double> &c_block,
53 int block_size) {
54 6 tbb::parallel_for(tbb::blocked_range2d<int>(0, block_size, 0, block_size),
55 24 [&](const tbb::blocked_range2d<int> &range) {
56
2/2
✓ Branch 0 taken 21 times.
✓ Branch 1 taken 21 times.
42 for (int i = range.rows().begin(); i < range.rows().end(); ++i) {
57
2/2
✓ Branch 0 taken 73 times.
✓ Branch 1 taken 21 times.
94 for (int k = 0; k < block_size; ++k) {
58 73 double temp = a_block[(i * block_size) + k];
59
2/2
✓ Branch 0 taken 73 times.
✓ Branch 1 taken 73 times.
146 for (int j = range.cols().begin(); j < range.cols().end(); ++j) {
60 73 c_block[(i * block_size) + j] += temp * b_block[(k * block_size) + j];
61 }
62 }
63 }
64 21 });
65 3 }
66
67 3 void ChyokotovADenseMatMulFoxAlgorithmALL::DistributeData(MPI_Comm comm, int worker_rank, int worker_size, int q,
68 int block_size, const std::vector<double> &matrix_a_full,
69 const std::vector<double> &matrix_b_full,
70 std::vector<double> &local_a, std::vector<double> &local_b) {
71 3 size_t block_sz = static_cast<size_t>(block_size) * block_size;
72
73
1/2
✓ Branch 0 taken 3 times.
✗ Branch 1 not taken.
3 if (worker_rank == 0) {
74
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 3 times.
6 for (int proc = 0; proc < worker_size; ++proc) {
75 3 int row = proc / q;
76 3 int col = proc % q;
77
78 3 std::vector<double> send_a(block_sz);
79
1/4
✓ Branch 1 taken 3 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
3 std::vector<double> send_b(block_sz);
80
81
2/2
✓ Branch 0 taken 7 times.
✓ Branch 1 taken 3 times.
10 for (int i = 0; i < block_size; ++i) {
82
2/2
✓ Branch 0 taken 21 times.
✓ Branch 1 taken 7 times.
28 for (int j = 0; j < block_size; ++j) {
83 21 int a_row = (row * block_size) + i;
84 21 int a_col = (((col + row) % q) * block_size) + j;
85 21 int b_row = (((row + col) % q) * block_size) + i;
86 21 int b_col = (col * block_size) + j;
87
88 21 send_a[(i * block_size) + j] = matrix_a_full[(a_row * block_size * q) + a_col];
89 21 send_b[(i * block_size) + j] = matrix_b_full[(b_row * block_size * q) + b_col];
90 }
91 }
92
93
1/2
✓ Branch 0 taken 3 times.
✗ Branch 1 not taken.
3 if (proc == 0) {
94 local_a = std::move(send_a);
95 local_b = std::move(send_b);
96 } else {
97 MPI_Send(send_a.data(), static_cast<int>(block_sz), MPI_DOUBLE, proc, 0, comm);
98 MPI_Send(send_b.data(), static_cast<int>(block_sz), MPI_DOUBLE, proc, 1, comm);
99 }
100 }
101 } else {
102 local_a.resize(block_sz);
103 local_b.resize(block_sz);
104 MPI_Recv(local_a.data(), static_cast<int>(block_sz), MPI_DOUBLE, 0, 0, comm, MPI_STATUS_IGNORE);
105 MPI_Recv(local_b.data(), static_cast<int>(block_sz), MPI_DOUBLE, 0, 1, comm, MPI_STATUS_IGNORE);
106 }
107 3 }
108
109 3 void ChyokotovADenseMatMulFoxAlgorithmALL::FoxAlgorithm(MPI_Comm comm, int worker_rank, int q, int block_size,
110 std::vector<double> &local_a, std::vector<double> &local_b,
111 std::vector<double> &local_c) {
112 3 int row = worker_rank / q;
113 3 int col = worker_rank % q;
114
115 3 int left = (row * q) + ((col - 1 + q) % q);
116 3 int right = (row * q) + ((col + 1) % q);
117 3 int up = (((row - 1 + q) % q) * q) + col;
118 3 int down = (((row + 1) % q) * q) + col;
119
120 3 size_t block_sz = static_cast<size_t>(block_size) * block_size;
121 3 std::vector<double> next_a(block_sz);
122
1/4
✓ Branch 1 taken 3 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
3 std::vector<double> next_b(block_sz);
123
124
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 3 times.
6 for (int step = 0; step < q; ++step) {
125
1/2
✓ Branch 1 taken 3 times.
✗ Branch 2 not taken.
3 Multiply(local_a, local_b, local_c, block_size);
126
127
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 3 times.
3 if (step < q - 1) {
128 MPI_Sendrecv(local_a.data(), static_cast<int>(block_sz), MPI_DOUBLE, left, 10, next_a.data(),
129 static_cast<int>(block_sz), MPI_DOUBLE, right, 10, comm, MPI_STATUS_IGNORE);
130
131 MPI_Sendrecv(local_b.data(), static_cast<int>(block_sz), MPI_DOUBLE, up, 11, next_b.data(),
132 static_cast<int>(block_sz), MPI_DOUBLE, down, 11, comm, MPI_STATUS_IGNORE);
133
134 local_a.swap(next_a);
135 local_b.swap(next_b);
136 }
137 }
138 3 }
139
140 3 void ChyokotovADenseMatMulFoxAlgorithmALL::CollectResult(MPI_Comm comm, int worker_rank, int worker_size, int q,
141 int block_size, std::vector<double> &flat_result,
142 const std::vector<double> &local_c) {
143 3 int padded_n = q * block_size;
144
145 3 auto fillres = [&](const std::vector<double> &buffer, int row, int col) {
146
2/2
✓ Branch 0 taken 7 times.
✓ Branch 1 taken 3 times.
10 for (int i = 0; i < block_size; ++i) {
147
2/2
✓ Branch 0 taken 21 times.
✓ Branch 1 taken 7 times.
28 for (int j = 0; j < block_size; ++j) {
148 21 int global_row = (row * block_size) + i;
149 21 int global_col = (col * block_size) + j;
150 21 flat_result[(global_row * padded_n) + global_col] = buffer[(i * block_size) + j];
151 }
152 }
153 6 };
154
155
1/2
✓ Branch 0 taken 3 times.
✗ Branch 1 not taken.
3 if (worker_rank == 0) {
156 3 fillres(local_c, 0, 0);
157
158 3 std::vector<double> recv_buf(static_cast<size_t>(block_size) * block_size);
159
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 3 times.
3 for (int proc = 1; proc < worker_size; ++proc) {
160 MPI_Recv(recv_buf.data(), static_cast<int>(recv_buf.size()), MPI_DOUBLE, proc, 20, comm, MPI_STATUS_IGNORE);
161 fillres(recv_buf, proc / q, proc % q);
162 }
163 } else {
164 MPI_Send(local_c.data(), static_cast<int>(local_c.size()), MPI_DOUBLE, 0, 20, comm);
165 }
166 3 }
167
168 8 bool ChyokotovADenseMatMulFoxAlgorithmALL::RunImpl() {
169 8 int rank = 0;
170 8 int size = 1;
171
172 8 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
173 8 MPI_Comm_size(MPI_COMM_WORLD, &size);
174
175 8 int q = static_cast<int>(std::sqrt(static_cast<double>(size)));
176 8 int active = q * q;
177
178 8 int n = 0;
179
2/2
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 4 times.
8 if (rank == 0) {
180 4 n = static_cast<int>(std::sqrt(static_cast<double>(GetInput().first.size())));
181 }
182
183 8 MPI_Bcast(&n, 1, MPI_INT, 0, MPI_COMM_WORLD);
184
185
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.
8 if (n == 0) {
186 return true;
187 }
188
189 int padded_n = CalcPaddedSize(n, std::max(1, q));
190 6 int block_size = padded_n / std::max(1, q);
191
192 6 std::vector<double> padded_a;
193 6 std::vector<double> padded_b;
194
195
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 3 times.
6 if (rank == 0) {
196
1/2
✓ Branch 1 taken 3 times.
✗ Branch 2 not taken.
3 PadMatrix(GetInput().first, padded_a, n, padded_n);
197
1/2
✓ Branch 1 taken 3 times.
✗ Branch 2 not taken.
3 PadMatrix(GetInput().second, padded_b, n, padded_n);
198 }
199
200 6 MPI_Comm comm = MPI_COMM_NULL;
201
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 3 times.
6 int color = (rank < active) ? 0 : MPI_UNDEFINED;
202
1/2
✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.
6 MPI_Comm_split(MPI_COMM_WORLD, color, rank, &comm);
203
204
1/4
✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
6 std::vector<double> flat_result(static_cast<size_t>(padded_n) * padded_n, 0.0);
205
206
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 3 times.
6 if (rank < active) {
207 3 int wrank = 0;
208 3 int wsize = 0;
209
1/2
✓ Branch 1 taken 3 times.
✗ Branch 2 not taken.
3 MPI_Comm_rank(comm, &wrank);
210
1/2
✓ Branch 1 taken 3 times.
✗ Branch 2 not taken.
3 MPI_Comm_size(comm, &wsize);
211
212 3 size_t block_sz = static_cast<size_t>(block_size) * block_size;
213
1/4
✓ Branch 1 taken 3 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
3 std::vector<double> local_a(block_sz);
214
1/4
✓ Branch 1 taken 3 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
3 std::vector<double> local_b(block_sz);
215
1/4
✓ Branch 1 taken 3 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
3 std::vector<double> local_c(block_sz, 0.0);
216
217
1/2
✓ Branch 1 taken 3 times.
✗ Branch 2 not taken.
3 DistributeData(comm, wrank, wsize, q, block_size, padded_a, padded_b, local_a, local_b);
218
1/2
✓ Branch 1 taken 3 times.
✗ Branch 2 not taken.
3 FoxAlgorithm(comm, wrank, q, block_size, local_a, local_b, local_c);
219
1/2
✓ Branch 1 taken 3 times.
✗ Branch 2 not taken.
3 CollectResult(comm, wrank, wsize, q, block_size, flat_result, local_c);
220
221
1/2
✓ Branch 1 taken 3 times.
✗ Branch 2 not taken.
3 MPI_Comm_free(&comm);
222 }
223
224
1/2
✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.
6 MPI_Bcast(flat_result.data(), padded_n * padded_n, MPI_DOUBLE, 0, MPI_COMM_WORLD);
225
226
1/2
✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.
6 GetOutput().resize(static_cast<size_t>(n) * n);
227
2/2
✓ Branch 0 taken 14 times.
✓ Branch 1 taken 6 times.
20 for (int i = 0; i < n; ++i) {
228
2/2
✓ Branch 0 taken 42 times.
✓ Branch 1 taken 14 times.
56 for (int j = 0; j < n; ++j) {
229 42 GetOutput()[(i * n) + j] = flat_result[(i * padded_n) + j];
230 }
231 }
232
233 return true;
234 }
235
236 8 bool ChyokotovADenseMatMulFoxAlgorithmALL::PostProcessingImpl() {
237 8 return true;
238 }
239
240 } // namespace chyokotov_a_dense_matrix_mul_foxs_algorithm
241