GCC Code Coverage Report


Directory: ./
File: tasks/sinev_a_mult_matrix_fox_algorithm/all/src/ops_all.cpp
Date: 2026-06-04 20:25:32
Exec Total Coverage
Lines: 24 81 29.6%
Functions: 6 14 42.9%
Branches: 8 70 11.4%

Line Branch Exec Source
1 #include "sinev_a_mult_matrix_fox_algorithm/all/include/ops_all.hpp"
2
3 #include <mpi.h>
4 #include <omp.h>
5
6 #include <cmath>
7 #include <cstddef>
8 #include <vector>
9
10 #include "sinev_a_mult_matrix_fox_algorithm/common/include/common.hpp"
11
12 namespace sinev_a_mult_matrix_fox_algorithm {
13
14
1/2
✓ Branch 1 taken 26 times.
✗ Branch 2 not taken.
26 SinevAMultMatrixFoxAlgorithmALL::SinevAMultMatrixFoxAlgorithmALL(const InType &in) {
15 SetTypeOfTask(GetStaticTypeOfTask());
16
17 GetInput() = in;
18 GetOutput() = {};
19 26 }
20
21 26 bool SinevAMultMatrixFoxAlgorithmALL::ValidationImpl() {
22 const auto &[n, a, b] = GetInput();
23
24
3/6
✓ Branch 0 taken 26 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 26 times.
✗ Branch 4 not taken.
✓ Branch 5 taken 26 times.
26 return (n > 0U) && (a.size() == (n * n)) && (b.size() == (n * n));
25 }
26
27 26 bool SinevAMultMatrixFoxAlgorithmALL::PreProcessingImpl() {
28 const auto &[n, a, b] = GetInput();
29
30 26 GetOutput().resize(n * n, 0.0);
31
32 26 return true;
33 }
34
35 void SinevAMultMatrixFoxAlgorithmALL::SimpleMultiply(size_t n, const std::vector<double> &a,
36 const std::vector<double> &b, std::vector<double> &c) {
37 13 #pragma omp parallel for default(none) shared(n, a, b, c) collapse(2)
38 for (size_t i = 0; i < n; ++i) {
39 for (size_t j = 0; j < n; ++j) {
40 double sum = 0.0;
41
42 for (size_t k = 0; k < n; ++k) {
43 sum += a[(i * n) + k] * b[(k * n) + j];
44 }
45
46 c[(i * n) + j] = sum;
47 }
48 }
49 13 }
50
51 void SinevAMultMatrixFoxAlgorithmALL::DecomposeToBlocks(const std::vector<double> &src, std::vector<double> &dst,
52 size_t n, size_t bs, int q) {
53 #pragma omp parallel for default(none) shared(src, dst, n, bs, q) collapse(2)
54 for (int bi = 0; bi < q; ++bi) {
55 for (int bj = 0; bj < q; ++bj) {
56 const size_t block_offset = static_cast<size_t>((bi * q) + bj) * (bs * bs);
57
58 for (size_t i = 0; i < bs; ++i) {
59 for (size_t j = 0; j < bs; ++j) {
60 const size_t src_idx = ((static_cast<size_t>(bi) * bs + i) * n) + (static_cast<size_t>(bj) * bs + j);
61
62 const size_t dst_idx = block_offset + (i * bs) + j;
63
64 dst[dst_idx] = src[src_idx];
65 }
66 }
67 }
68 }
69 }
70
71 void SinevAMultMatrixFoxAlgorithmALL::AssembleFromBlocks(const std::vector<double> &src, std::vector<double> &dst,
72 size_t n, size_t bs, int q) {
73 #pragma omp parallel for default(none) shared(src, dst, n, bs, q) collapse(2)
74 for (int bi = 0; bi < q; ++bi) {
75 for (int bj = 0; bj < q; ++bj) {
76 const size_t block_offset = static_cast<size_t>((bi * q) + bj) * (bs * bs);
77
78 for (size_t i = 0; i < bs; ++i) {
79 for (size_t j = 0; j < bs; ++j) {
80 const size_t src_idx = block_offset + (i * bs) + j;
81
82 const size_t dst_idx = ((static_cast<size_t>(bi) * bs + i) * n) + (static_cast<size_t>(bj) * bs + j);
83
84 dst[dst_idx] = src[src_idx];
85 }
86 }
87 }
88 }
89 }
90
91 void SinevAMultMatrixFoxAlgorithmALL::LocalMatrixMultiply(const std::vector<double> &local_a,
92 const std::vector<double> &local_b,
93 std::vector<double> &local_c, size_t bs) {
94 #pragma omp parallel for default(none) shared(local_a, local_b, local_c, bs) collapse(2)
95 for (size_t i = 0; i < bs; ++i) {
96 for (size_t j = 0; j < bs; ++j) {
97 double sum = 0.0;
98
99 for (size_t k = 0; k < bs; ++k) {
100 sum += local_a[(i * bs) + k] * local_b[(k * bs) + j];
101 }
102
103 local_c[(i * bs) + j] += sum;
104 }
105 }
106 }
107
108 bool SinevAMultMatrixFoxAlgorithmALL::NeedFallback(size_t n, int q, int world_size) {
109 return ((q * q) != world_size) || ((n % static_cast<size_t>(q)) != 0U);
110 }
111
112 26 void SinevAMultMatrixFoxAlgorithmALL::ExecuteFallback(int rank, size_t n, const std::vector<double> &a,
113 const std::vector<double> &b, std::vector<double> &c) {
114
2/2
✓ Branch 0 taken 13 times.
✓ Branch 1 taken 13 times.
26 if (rank == 0) {
115 SimpleMultiply(n, a, b, c);
116 }
117
118 26 MPI_Bcast(c.data(), static_cast<int>(n * n), MPI_DOUBLE, 0, MPI_COMM_WORLD);
119 26 }
120
121 void SinevAMultMatrixFoxAlgorithmALL::ScatterBlocks(int rank, const std::vector<double> &blocks_a,
122 const std::vector<double> &blocks_b, std::vector<double> &local_a,
123 std::vector<double> &local_b, size_t block_size) {
124 const double *send_a = (rank == 0) ? blocks_a.data() : nullptr;
125
126 const double *send_b = (rank == 0) ? blocks_b.data() : nullptr;
127
128 MPI_Scatter(send_a, static_cast<int>(block_size), MPI_DOUBLE, local_a.data(), static_cast<int>(block_size),
129 MPI_DOUBLE, 0, MPI_COMM_WORLD);
130
131 MPI_Scatter(send_b, static_cast<int>(block_size), MPI_DOUBLE, local_b.data(), static_cast<int>(block_size),
132 MPI_DOUBLE, 0, MPI_COMM_WORLD);
133 }
134
135 void SinevAMultMatrixFoxAlgorithmALL::RunFoxStages(int q, int row, int col, size_t bs, size_t block_size,
136 MPI_Comm row_comm, std::vector<double> &local_a,
137 std::vector<double> &local_b, std::vector<double> &local_c) {
138 std::vector<double> temp_a(block_size);
139
140 for (int step = 0; step < q; ++step) {
141 const int root = (row + step) % q;
142
143 if (col == root) {
144 temp_a = local_a;
145 }
146
147 MPI_Bcast(temp_a.data(), static_cast<int>(block_size), MPI_DOUBLE, root, row_comm);
148
149 LocalMatrixMultiply(temp_a, local_b, local_c, bs);
150
151 const int send_to = (((row - 1 + q) % q) * q) + col;
152
153 const int recv_from = (((row + 1) % q) * q) + col;
154
155 MPI_Sendrecv_replace(local_b.data(), static_cast<int>(block_size), MPI_DOUBLE, send_to, 0, recv_from, 0,
156 MPI_COMM_WORLD, MPI_STATUS_IGNORE);
157 }
158 }
159
160 void SinevAMultMatrixFoxAlgorithmALL::GatherResult(int rank, int world_size, size_t n, size_t bs, size_t block_size,
161 int q, const std::vector<double> &local_c, std::vector<double> &c) {
162 std::vector<double> blocks_c;
163
164 if (rank == 0) {
165 blocks_c.resize(static_cast<size_t>(world_size) * block_size);
166 }
167
168 double *recv_buffer = (rank == 0) ? blocks_c.data() : nullptr;
169
170 MPI_Gather(local_c.data(), static_cast<int>(block_size), MPI_DOUBLE, recv_buffer, static_cast<int>(block_size),
171 MPI_DOUBLE, 0, MPI_COMM_WORLD);
172
173 if (rank == 0) {
174 AssembleFromBlocks(blocks_c, c, n, bs, q);
175 }
176
177 MPI_Bcast(c.data(), static_cast<int>(n * n), MPI_DOUBLE, 0, MPI_COMM_WORLD);
178 }
179
180 26 bool SinevAMultMatrixFoxAlgorithmALL::RunImpl() {
181 26 int rank = 0;
182 26 int world_size = 1;
183
184 26 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
185 26 MPI_Comm_size(MPI_COMM_WORLD, &world_size);
186
187 const auto &[n, a, b] = GetInput();
188
189 auto &c = GetOutput();
190
191
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 26 times.
26 const int q = static_cast<int>(std::sqrt(world_size));
192
193
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 26 times.
26 if (NeedFallback(n, q, world_size)) {
194 26 ExecuteFallback(rank, n, a, b, c);
195
196 26 return true;
197 }
198
199 const size_t bs = n / static_cast<size_t>(q);
200
201 const size_t block_size = bs * bs;
202
203 const int row = rank / q;
204 const int col = rank % q;
205
206 std::vector<double> local_a(block_size);
207 std::vector<double> local_b(block_size);
208 std::vector<double> local_c(block_size, 0.0);
209
210 std::vector<double> blocks_a;
211 std::vector<double> blocks_b;
212
213 if (rank == 0) {
214 blocks_a.resize(static_cast<size_t>(world_size) * block_size);
215
216 blocks_b.resize(static_cast<size_t>(world_size) * block_size);
217
218 DecomposeToBlocks(a, blocks_a, n, bs, q);
219
220 DecomposeToBlocks(b, blocks_b, n, bs, q);
221 }
222
223 ScatterBlocks(rank, blocks_a, blocks_b, local_a, local_b, block_size);
224
225 MPI_Comm row_comm = MPI_COMM_NULL;
226
227 const int color = row;
228 const int key = col;
229
230 MPI_Comm_split(MPI_COMM_WORLD, color, key, &row_comm);
231
232 RunFoxStages(q, row, col, bs, block_size, row_comm, local_a, local_b, local_c);
233
234 GatherResult(rank, world_size, n, bs, block_size, q, local_c, c);
235
236 if (row_comm != MPI_COMM_NULL) {
237 MPI_Comm_free(&row_comm);
238 }
239
240 return true;
241 }
242
243 26 bool SinevAMultMatrixFoxAlgorithmALL::PostProcessingImpl() {
244 26 return true;
245 }
246
247 } // namespace sinev_a_mult_matrix_fox_algorithm
248