GCC Code Coverage Report


Directory: ./
File: tasks/lukin_i_cannon_algorithm/mpi/src/ops_mpi.cpp
Date: 2026-01-10 02:40:41
Exec Total Coverage
Lines: 43 126 34.1%
Functions: 7 12 58.3%
Branches: 21 96 21.9%

Line Branch Exec Source
1 #include "lukin_i_cannon_algorithm/mpi/include/ops_mpi.hpp"
2
3 #include <mpi.h>
4
5 #include <cmath>
6 #include <cstddef>
7 #include <tuple>
8 #include <utility>
9 #include <vector>
10
11 #include "lukin_i_cannon_algorithm/common/include/common.hpp"
12
13 namespace lukin_i_cannon_algorithm {
14
15
1/2
✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
8 LukinICannonAlgorithmMPI::LukinICannonAlgorithmMPI(const InType &in) {
16 SetTypeOfTask(GetStaticTypeOfTask());
17
18 8 int rank = 0;
19
1/2
✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
8 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
20
2/2
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 4 times.
8 if (rank == 0) {
21 GetInput() = in;
22 }
23 8 GetOutput() = OutType();
24 8 }
25
26 8 bool LukinICannonAlgorithmMPI::ValidationImpl() {
27 8 int rank = 0;
28 8 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
29
2/2
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 4 times.
8 if (rank == 0) {
30 4 int proc_count = 0;
31 4 MPI_Comm_size(MPI_COMM_WORLD, &proc_count);
32
1/2
✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
4 int grid_size = static_cast<int>(std::floor(std::sqrt(proc_count)));
33 4 int rsize_a = static_cast<int>(std::get<0>(GetInput()).size());
34 4 int rsize_b = static_cast<int>(std::get<1>(GetInput()).size());
35 4 size_ = std::get<2>(GetInput());
36
3/6
✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 4 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 4 times.
✗ Branch 5 not taken.
4 return (rsize_a > 0) && (rsize_b > 0) && (rsize_a == size_ * size_) && (rsize_a == rsize_b) &&
37
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
4 (rsize_a % grid_size == 0);
38 }
39 return true;
40 }
41
42 8 bool LukinICannonAlgorithmMPI::PreProcessingImpl() {
43 8 return true;
44 }
45
46 8 bool LukinICannonAlgorithmMPI::RunImpl() {
47 8 int global_rank = -1;
48 8 int proc_count = 0;
49 8 MPI_Comm_rank(MPI_COMM_WORLD, &global_rank);
50 8 MPI_Comm_size(MPI_COMM_WORLD, &proc_count);
51
52 8 MPI_Bcast(&size_, 1, MPI_INT, 0, MPI_COMM_WORLD);
53
54 // случай, если 1 на 1 решетка процессов - просто seq версия
55
1/2
✓ Branch 0 taken 8 times.
✗ Branch 1 not taken.
8 if (proc_count < 4) {
56 8 return RunSeq(global_rank);
57 }
58
59 // для процессов, напрямую учавствующих в вычислениях, создается другой коммуникатор
60 return RunCannon(global_rank, proc_count);
61 }
62
63 8 bool LukinICannonAlgorithmMPI::PostProcessingImpl() {
64 8 return true;
65 }
66
67 4 void LukinICannonAlgorithmMPI::MulNSum(const double *a, const double *b, double *c, int size) {
68
2/2
✓ Branch 0 taken 26 times.
✓ Branch 1 taken 4 times.
30 for (int i = 0; i < size; i++) {
69
2/2
✓ Branch 0 taken 228 times.
✓ Branch 1 taken 26 times.
254 for (int k = 0; k < size; k++) {
70 228 double fixed = a[(i * size) + k];
71
2/2
✓ Branch 0 taken 2312 times.
✓ Branch 1 taken 228 times.
2540 for (int j = 0; j < size; j++) {
72 2312 c[(i * size) + j] += fixed * b[(k * size) + j];
73 }
74 }
75 }
76 4 }
77
78 8 bool LukinICannonAlgorithmMPI::RunSeq(int global_rank) {
79 8 std::vector<double> c(static_cast<size_t>(size_ * size_));
80
2/2
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 4 times.
8 if (global_rank == 0) {
81 double *a = std::get<0>(GetInput()).data();
82 double *b = std::get<1>(GetInput()).data();
83 4 LukinICannonAlgorithmMPI::MulNSum(a, b, c.data(), size_);
84 }
85
1/2
✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
8 MPI_Bcast(c.data(), size_ * size_, MPI_DOUBLE, 0, MPI_COMM_WORLD);
86 GetOutput() = std::move(c);
87 8 return true;
88 }
89
90 bool LukinICannonAlgorithmMPI::RunCannon(int global_rank, int proc_count) {
91 int grid_size = static_cast<int>(std::floor(std::sqrt(proc_count)));
92 int working_proc_count = grid_size * grid_size;
93 MPI_Comm mpi_comm_cannon = MPI_COMM_NULL;
94 int color = (global_rank < working_proc_count) ? 0 : MPI_UNDEFINED;
95 MPI_Comm_split(MPI_COMM_WORLD, color, global_rank, &mpi_comm_cannon);
96
97 if (mpi_comm_cannon != MPI_COMM_NULL) {
98 CannonWorkers(mpi_comm_cannon, grid_size, working_proc_count);
99 MPI_Comm_free(&mpi_comm_cannon);
100 } else {
101 CannonNonWorkers();
102 }
103
104 return true;
105 }
106
107 void LukinICannonAlgorithmMPI::CannonWorkers(MPI_Comm comm, int grid_size, int working_proc_count) {
108 int cannon_rank = -1;
109 MPI_Comm_rank(comm, &cannon_rank);
110
111 int block_size = size_ / grid_size;
112 int block_elems = block_size * block_size;
113 std::vector<double> a_block(block_elems);
114 std::vector<double> b_block(block_elems);
115 std::vector<double> c_block(block_elems, 0);
116
117 std::vector<double> a_blocks;
118 std::vector<double> b_blocks;
119
120 // ручная упаковка
121 if (cannon_rank == 0) {
122 a_blocks.resize(static_cast<size_t>(working_proc_count) * static_cast<size_t>(block_elems));
123 b_blocks.resize(static_cast<size_t>(working_proc_count) * static_cast<size_t>(block_elems));
124
125 MatrixPack(a_blocks.data(), b_blocks.data(), working_proc_count, block_elems, block_size, grid_size);
126 }
127
128 MPI_Scatter(a_blocks.data(), block_elems, MPI_DOUBLE, a_block.data(), block_elems, MPI_DOUBLE, 0, comm);
129
130 MPI_Scatter(b_blocks.data(), block_elems, MPI_DOUBLE, b_block.data(), block_elems, MPI_DOUBLE, 0, comm);
131
132 int row = cannon_rank / grid_size;
133 int col = cannon_rank % grid_size;
134
135 // начальный сдвиг
136 int left = (row * grid_size) + ((col - row + grid_size) % grid_size);
137 int right = (row * grid_size) + ((col + row) % grid_size);
138
139 MPI_Sendrecv_replace(a_block.data(), block_elems, MPI_DOUBLE, left, 0, right, 0, comm, MPI_STATUS_IGNORE);
140
141 int up = (((row - col + grid_size) % grid_size) * grid_size) + col;
142 int down = (((row + col) % grid_size) * grid_size) + col;
143
144 MPI_Sendrecv_replace(b_block.data(), block_elems, MPI_DOUBLE, up, 0, down, 0, comm, MPI_STATUS_IGNORE);
145
146 // цикл умножения и сдвига
147 for (int iter = 0; iter < grid_size; iter++) {
148 LukinICannonAlgorithmMPI::MulNSum(a_block.data(), b_block.data(), c_block.data(), block_size);
149
150 if (iter < grid_size - 1) {
151 left = (row * grid_size) + ((col - 1 + grid_size) % grid_size);
152 right = (row * grid_size) + ((col + 1) % grid_size);
153
154 MPI_Sendrecv_replace(a_block.data(), block_elems, MPI_DOUBLE, left, 0, right, 0, comm, MPI_STATUS_IGNORE);
155
156 up = (((row - 1 + grid_size) % grid_size) * grid_size) + col;
157 down = (((row + 1) % grid_size) * grid_size) + col;
158
159 MPI_Sendrecv_replace(b_block.data(), block_elems, MPI_DOUBLE, up, 0, down, 0, comm, MPI_STATUS_IGNORE);
160 }
161 }
162
163 // упаковка данных в результирующую
164 std::vector<double> c_blocks(static_cast<size_t>(size_ * size_));
165 MPI_Gather(c_block.data(), block_elems, MPI_DOUBLE, c_blocks.data(), block_elems, MPI_DOUBLE, 0, comm);
166
167 std::vector<double> c(static_cast<size_t>(size_ * size_));
168 if (cannon_rank == 0) {
169 MatrixUnpack(c.data(), c_blocks.data(), working_proc_count, block_elems, block_size, grid_size);
170 }
171
172 MPI_Bcast(c.data(), size_ * size_, MPI_DOUBLE, 0, MPI_COMM_WORLD);
173 GetOutput() = std::move(c);
174 }
175
176 void LukinICannonAlgorithmMPI::CannonNonWorkers() {
177 std::vector<double> c(static_cast<size_t>(size_ * size_));
178 MPI_Bcast(c.data(), size_ * size_, MPI_DOUBLE, 0, MPI_COMM_WORLD);
179 GetOutput() = std::move(c);
180 }
181
182 void LukinICannonAlgorithmMPI::MatrixPack(double *a_blocks, double *b_blocks, int working_proc_count, int block_elems,
183 int block_size, int grid_size) {
184 double *a = std::get<0>(GetInput()).data();
185 double *b = std::get<1>(GetInput()).data();
186
187 for (int proc = 0; proc < working_proc_count; proc++) {
188 int proc_i = proc / grid_size;
189 int proc_j = proc % grid_size;
190 int buf_offset = proc * block_elems;
191
192 for (int i = 0; i < block_size; i++) {
193 for (int j = 0; j < block_size; j++) {
194 int global_i = (proc_i * block_size) + i;
195 int global_j = (proc_j * block_size) + j;
196 int global_idx = (global_i * size_) + global_j;
197 int buf_idx = buf_offset + (i * block_size) + j;
198
199 a_blocks[buf_idx] = a[global_idx];
200 b_blocks[buf_idx] = b[global_idx];
201 }
202 }
203 }
204 }
205
206 void LukinICannonAlgorithmMPI::MatrixUnpack(double *c, const double *c_blocks, int working_proc_count, int block_elems,
207 int block_size, int grid_size) const {
208 for (int proc = 0; proc < working_proc_count; proc++) {
209 int proc_i = proc / grid_size;
210 int proc_j = proc % grid_size;
211 int buf_offset = proc * block_elems;
212
213 for (int i = 0; i < block_size; i++) {
214 for (int j = 0; j < block_size; j++) {
215 int global_i = (proc_i * block_size) + i;
216 int global_j = (proc_j * block_size) + j;
217 int global_idx = (global_i * size_) + global_j;
218 int buf_idx = buf_offset + (i * block_size) + j;
219
220 c[global_idx] = c_blocks[buf_idx];
221 }
222 }
223 }
224 }
225
226 } // namespace lukin_i_cannon_algorithm
227