GCC Code Coverage Report


Directory: ./
File: tasks/zyazeva_s_matrix_mult_cannon_alg/omp/src/ops_omp.cpp
Date: 2026-04-02 17:12:27
Exec Total Coverage
Lines: 67 74 90.5%
Functions: 8 12 66.7%
Branches: 43 62 69.4%

Line Branch Exec Source
1 #include "zyazeva_s_matrix_mult_cannon_alg/omp/include/ops_omp.hpp"
2
3 #include <omp.h>
4
5 #include <cmath>
6 #include <cstddef>
7 #include <utility>
8 #include <vector>
9
10 #include "zyazeva_s_matrix_mult_cannon_alg/common/include/common.hpp"
11
12 namespace zyazeva_s_matrix_mult_cannon_alg {
13
14 bool ZyazevaSMatrixMultCannonAlgOMP::IsPerfectSquare(int x) {
15 28 int root = static_cast<int>(std::sqrt(x));
16 28 return root * root == x;
17 }
18
19 23 void ZyazevaSMatrixMultCannonAlgOMP::MultiplyBlocks(const std::vector<double> &a, const std::vector<double> &b,
20 std::vector<double> &c, int block_size) {
21
2/2
✓ Branch 0 taken 98 times.
✓ Branch 1 taken 23 times.
121 for (int i = 0; i < block_size; ++i) {
22
2/2
✓ Branch 0 taken 636 times.
✓ Branch 1 taken 98 times.
734 for (int k = 0; k < block_size; ++k) {
23 636 const size_t i_idx = static_cast<size_t>(i) * static_cast<size_t>(block_size);
24 636 const size_t k_idx = static_cast<size_t>(k) * static_cast<size_t>(block_size);
25 636 double a_ik = a[i_idx + static_cast<size_t>(k)];
26
2/2
✓ Branch 0 taken 5882 times.
✓ Branch 1 taken 636 times.
6518 for (int j = 0; j < block_size; ++j) {
27 5882 c[i_idx + static_cast<size_t>(j)] += a_ik * b[k_idx + static_cast<size_t>(j)];
28 }
29 }
30 }
31 23 }
32
33
1/2
✓ Branch 1 taken 28 times.
✗ Branch 2 not taken.
28 ZyazevaSMatrixMultCannonAlgOMP::ZyazevaSMatrixMultCannonAlgOMP(const InType &in) {
34 SetTypeOfTask(GetStaticTypeOfTask());
35 GetInput() = in;
36 GetOutput() = {};
37 28 }
38
39 28 bool ZyazevaSMatrixMultCannonAlgOMP::ValidationImpl() {
40 28 const size_t sz = std::get<0>(GetInput());
41 const auto &m1 = std::get<1>(GetInput());
42 const auto &m2 = std::get<2>(GetInput());
43
44
3/6
✓ Branch 0 taken 28 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 28 times.
✗ Branch 4 not taken.
✓ Branch 5 taken 28 times.
28 return sz > 0 && m1.size() == sz * sz && m2.size() == sz * sz;
45 }
46
47 28 bool ZyazevaSMatrixMultCannonAlgOMP::PreProcessingImpl() {
48 GetOutput() = {};
49 28 return true;
50 }
51
52 void ZyazevaSMatrixMultCannonAlgOMP::RegularMultiplication(const std::vector<double> &m1, const std::vector<double> &m2,
53 std::vector<double> &res, int sz) {
54
1/2
✓ Branch 1 taken 19 times.
✗ Branch 2 not taken.
19 #pragma omp parallel for default(none) shared(m1, m2, res, sz)
55 for (int i = 0; i < sz; ++i) {
56 const size_t i_offset = static_cast<size_t>(i) * static_cast<size_t>(sz);
57 for (int j = 0; j < sz; ++j) {
58 double sum = 0.0;
59 for (int k = 0; k < sz; ++k) {
60 const size_t k_offset = static_cast<size_t>(k) * static_cast<size_t>(sz);
61 sum += m1[i_offset + static_cast<size_t>(k)] * m2[k_offset + static_cast<size_t>(j)];
62 }
63 res[i_offset + static_cast<size_t>(j)] = sum;
64 }
65 }
66 }
67
68 9 void ZyazevaSMatrixMultCannonAlgOMP::InitializeBlocks(const std::vector<double> &m1, const std::vector<double> &m2,
69 std::vector<std::vector<double>> &blocks_a,
70 std::vector<std::vector<double>> &blocks_b, int grid_size,
71 int block_size, size_t grid_size_t, size_t block_size_t,
72 size_t sz_t) {
73
2/2
✓ Branch 0 taken 11 times.
✓ Branch 1 taken 9 times.
20 for (int i = 0; i < grid_size; ++i) {
74
2/2
✓ Branch 0 taken 15 times.
✓ Branch 1 taken 11 times.
26 for (int j = 0; j < grid_size; ++j) {
75 15 const size_t block_idx = (static_cast<size_t>(i) * grid_size_t) + static_cast<size_t>(j);
76 15 blocks_a[block_idx].resize(block_size_t * block_size_t);
77 15 blocks_b[block_idx].resize(block_size_t * block_size_t);
78
79
2/2
✓ Branch 0 taken 70 times.
✓ Branch 1 taken 15 times.
85 for (int bi = 0; bi < block_size; ++bi) {
80
2/2
✓ Branch 0 taken 520 times.
✓ Branch 1 taken 70 times.
590 for (int bj = 0; bj < block_size; ++bj) {
81 520 const size_t global_i = (static_cast<size_t>(i) * block_size_t) + static_cast<size_t>(bi);
82 520 const size_t global_j = (static_cast<size_t>(j) * block_size_t) + static_cast<size_t>(bj);
83 520 const size_t local_idx = (static_cast<size_t>(bi) * block_size_t) + static_cast<size_t>(bj);
84
85 520 blocks_a[block_idx][local_idx] = m1[(global_i * sz_t) + global_j];
86 520 blocks_b[block_idx][local_idx] = m2[(global_i * sz_t) + global_j];
87 }
88 }
89 }
90 }
91 9 }
92
93 void ZyazevaSMatrixMultCannonAlgOMP::AlignBlocks(const std::vector<std::vector<double>> &blocks_a,
94 const std::vector<std::vector<double>> &blocks_b,
95 std::vector<std::vector<double>> &aligned_a,
96 std::vector<std::vector<double>> &aligned_b, int grid_size,
97 size_t grid_size_t) {
98 9 #pragma omp parallel for default(none) shared(blocks_a, blocks_b, aligned_a, aligned_b, grid_size, grid_size_t) \
99 collapse(2)
100 for (int i = 0; i < grid_size; ++i) {
101 for (int j = 0; j < grid_size; ++j) {
102 const size_t block_idx = (static_cast<size_t>(i) * grid_size_t) + static_cast<size_t>(j);
103
104 const size_t a_src_idx = (static_cast<size_t>(i) * grid_size_t) + static_cast<size_t>((j + i) % grid_size);
105 aligned_a[block_idx] = blocks_a[a_src_idx];
106
107 const size_t b_src_idx = (static_cast<size_t>((i + j) % grid_size) * grid_size_t) + static_cast<size_t>(j);
108 aligned_b[block_idx] = blocks_b[b_src_idx];
109 }
110 }
111 }
112
113 11 void ZyazevaSMatrixMultCannonAlgOMP::CannonStep(std::vector<std::vector<double>> &aligned_a,
114 std::vector<std::vector<double>> &aligned_b,
115 std::vector<std::vector<double>> &blocks_c, int grid_size,
116 int block_size, size_t grid_size_t, int step) {
117 11 #pragma omp parallel for default(none) shared(aligned_a, aligned_b, blocks_c, grid_size, block_size, grid_size_t) \
118 collapse(2)
119 for (int i = 0; i < grid_size; ++i) {
120 for (int j = 0; j < grid_size; ++j) {
121 const size_t block_idx = (static_cast<size_t>(i) * grid_size_t) + static_cast<size_t>(j);
122 MultiplyBlocks(aligned_a[block_idx], aligned_b[block_idx], blocks_c[block_idx], block_size);
123 }
124 }
125
126
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 9 times.
11 if (step < grid_size - 1) {
127 2 std::vector<std::vector<double>> new_aligned_a(grid_size_t * grid_size_t);
128
1/2
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
2 std::vector<std::vector<double>> new_aligned_b(grid_size_t * grid_size_t);
129
130 2 #pragma omp parallel for default(none) \
131 shared(aligned_a, aligned_b, new_aligned_a, new_aligned_b, grid_size, grid_size_t) collapse(2)
132 for (int i = 0; i < grid_size; ++i) {
133 for (int j = 0; j < grid_size; ++j) {
134 const size_t block_idx = (static_cast<size_t>(i) * grid_size_t) + static_cast<size_t>(j);
135
136 const size_t a_src_idx = (static_cast<size_t>(i) * grid_size_t) + static_cast<size_t>((j + 1) % grid_size);
137 new_aligned_a[block_idx] = aligned_a[a_src_idx];
138
139 const size_t b_src_idx = (static_cast<size_t>((i + 1) % grid_size) * grid_size_t) + static_cast<size_t>(j);
140 new_aligned_b[block_idx] = aligned_b[b_src_idx];
141 }
142 }
143
144 2 aligned_a = std::move(new_aligned_a);
145 2 aligned_b = std::move(new_aligned_b);
146 2 }
147 11 }
148
149 void ZyazevaSMatrixMultCannonAlgOMP::AssembleResult(const std::vector<std::vector<double>> &blocks_c,
150 std::vector<double> &res_m, int grid_size, int block_size,
151 size_t sz_t, size_t grid_size_t, size_t block_size_t) {
152
1/2
✓ Branch 1 taken 9 times.
✗ Branch 2 not taken.
9 #pragma omp parallel for default(none) shared(blocks_c, res_m, grid_size, block_size, sz_t, grid_size_t, block_size_t) \
153 collapse(2)
154 for (int i = 0; i < grid_size; ++i) {
155 for (int j = 0; j < grid_size; ++j) {
156 const size_t block_idx = (static_cast<size_t>(i) * grid_size_t) + static_cast<size_t>(j);
157 const auto &block = blocks_c[block_idx];
158
159 for (int bi = 0; bi < block_size; ++bi) {
160 for (int bj = 0; bj < block_size; ++bj) {
161 const size_t global_i = (static_cast<size_t>(i) * block_size_t) + static_cast<size_t>(bi);
162 const size_t global_j = (static_cast<size_t>(j) * block_size_t) + static_cast<size_t>(bj);
163 const size_t local_idx = (static_cast<size_t>(bi) * block_size_t) + static_cast<size_t>(bj);
164
165 res_m[(global_i * sz_t) + global_j] = block[local_idx];
166 }
167 }
168 }
169 }
170 }
171
172 28 bool ZyazevaSMatrixMultCannonAlgOMP::RunImpl() {
173 28 const auto sz = static_cast<int>(std::get<0>(GetInput()));
174 const auto &m1 = std::get<1>(GetInput());
175 const auto &m2 = std::get<2>(GetInput());
176
177 28 std::vector<double> res_m(static_cast<size_t>(sz) * static_cast<size_t>(sz), 0.0);
178
179 int num_threads = 1;
180
2/2
✓ Branch 0 taken 14 times.
✓ Branch 1 taken 14 times.
28 #pragma omp parallel default(none) shared(num_threads)
181 {
182 #pragma omp single
183 num_threads = omp_get_num_threads();
184 }
185
186 const bool can_use_cannon =
187
6/6
✓ Branch 0 taken 14 times.
✓ Branch 1 taken 14 times.
✓ Branch 2 taken 11 times.
✓ Branch 3 taken 3 times.
✓ Branch 4 taken 9 times.
✓ Branch 5 taken 2 times.
28 IsPerfectSquare(num_threads) && sz >= num_threads && (sz % static_cast<int>(std::sqrt(num_threads)) == 0);
188
189 if (!can_use_cannon) {
190 RegularMultiplication(m1, m2, res_m, sz);
191
1/2
✓ Branch 1 taken 19 times.
✗ Branch 2 not taken.
19 GetOutput() = res_m;
192 return true;
193 }
194
195 9 const int grid_size = static_cast<int>(std::sqrt(num_threads));
196 9 const int block_size = sz / grid_size;
197
198 9 const auto grid_size_t = static_cast<size_t>(grid_size);
199 9 const auto block_size_t = static_cast<size_t>(block_size);
200 const auto sz_t = static_cast<size_t>(sz);
201
202
1/4
✓ Branch 1 taken 9 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
9 std::vector<std::vector<double>> blocks_a(grid_size_t * grid_size_t);
203
1/2
✓ Branch 1 taken 9 times.
✗ Branch 2 not taken.
9 std::vector<std::vector<double>> blocks_b(grid_size_t * grid_size_t);
204 std::vector<std::vector<double>> blocks_c(grid_size_t * grid_size_t,
205
2/4
✓ Branch 1 taken 9 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 9 times.
✗ Branch 5 not taken.
9 std::vector<double>(block_size_t * block_size_t, 0.0));
206
207
1/2
✓ Branch 1 taken 9 times.
✗ Branch 2 not taken.
9 InitializeBlocks(m1, m2, blocks_a, blocks_b, grid_size, block_size, grid_size_t, block_size_t, sz_t);
208
209
1/2
✓ Branch 1 taken 9 times.
✗ Branch 2 not taken.
9 std::vector<std::vector<double>> aligned_a(grid_size_t * grid_size_t);
210
1/2
✓ Branch 1 taken 9 times.
✗ Branch 2 not taken.
9 std::vector<std::vector<double>> aligned_b(grid_size_t * grid_size_t);
211 AlignBlocks(blocks_a, blocks_b, aligned_a, aligned_b, grid_size, grid_size_t);
212
213
2/2
✓ Branch 0 taken 11 times.
✓ Branch 1 taken 9 times.
20 for (int step = 0; step < grid_size; ++step) {
214
1/2
✓ Branch 1 taken 11 times.
✗ Branch 2 not taken.
11 CannonStep(aligned_a, aligned_b, blocks_c, grid_size, block_size, grid_size_t, step);
215 }
216
217 AssembleResult(blocks_c, res_m, grid_size, block_size, sz_t, grid_size_t, block_size_t);
218
219
1/2
✓ Branch 1 taken 9 times.
✗ Branch 2 not taken.
9 GetOutput() = res_m;
220 return true;
221 9 }
222
223 28 bool ZyazevaSMatrixMultCannonAlgOMP::PostProcessingImpl() {
224 28 return true;
225 }
226
227 } // namespace zyazeva_s_matrix_mult_cannon_alg
228