GCC Code Coverage Report


Directory: ./
File: tasks/zyazeva_s_matrix_mult_cannon_alg/tbb/src/ops_tbb.cpp
Date: 2026-05-11 08:26:31
Exec Total Coverage
Lines: 87 88 98.9%
Functions: 15 15 100.0%
Branches: 42 72 58.3%

Line Branch Exec Source
1 #include "zyazeva_s_matrix_mult_cannon_alg/tbb/include/ops_tbb.hpp"
2
3 #include <tbb/tbb.h>
4
5 #include <algorithm>
6 #include <cmath>
7 #include <cstddef>
8 #include <utility>
9 #include <vector>
10
11 #include "zyazeva_s_matrix_mult_cannon_alg/common/include/common.hpp"
12
13 namespace zyazeva_s_matrix_mult_cannon_alg {
14
15 namespace {
16
17 using AlignedVector = std::vector<double, tbb::cache_aligned_allocator<double>>;
18
19 inline size_t BlockIndex(size_t row, size_t col, size_t grid_size) {
20 112 return (row * grid_size) + col;
21 }
22
23 inline size_t BlockOffset(size_t row, size_t col, size_t grid_size, size_t block_area) {
24 64 return BlockIndex(row, col, grid_size) * block_area;
25 }
26
27 28 size_t FindGridSize(int sz) {
28 const auto max_threads = tbb::this_task_arena::max_concurrency();
29 28 const int root = static_cast<int>(std::sqrt(max_threads));
30
31
1/2
✓ Branch 0 taken 44 times.
✗ Branch 1 not taken.
44 for (int k = root; k >= 1; --k) {
32
2/2
✓ Branch 0 taken 28 times.
✓ Branch 1 taken 16 times.
44 if (sz % k == 0) {
33 28 return static_cast<size_t>(k);
34 }
35 }
36 return 1;
37 }
38
39 void CopyBlockRow(const std::vector<double> &src_mat, AlignedVector &dst_mat, size_t gi, size_t bj, size_t sz,
40 size_t bs, size_t dst_base, size_t i) {
41 232 const size_t src = (gi * sz) + (bj * bs);
42 232 const size_t dst = dst_base + (i * bs);
43 232 std::copy_n(src_mat.data() + src, bs, dst_mat.data() + dst);
44 }
45
46 64 void InitializeBlock(size_t id, const std::vector<double> &m1, const std::vector<double> &m2, AlignedVector &a,
47 AlignedVector &b, size_t gs, size_t bs, size_t sz, size_t block_area) {
48 64 const size_t bi = id / gs;
49 64 const size_t bj = id % gs;
50 const size_t base = BlockOffset(bi, bj, gs, block_area);
51
52
2/2
✓ Branch 0 taken 232 times.
✓ Branch 1 taken 64 times.
296 for (size_t i = 0; i < bs; ++i) {
53 232 const size_t gi = (bi * bs) + i;
54 CopyBlockRow(m1, a, gi, bj, sz, bs, base, i);
55 CopyBlockRow(m2, b, gi, bj, sz, bs, base, i);
56 }
57 64 }
58
59 void InitializeBlocks(const std::vector<double> &m1, const std::vector<double> &m2, AlignedVector &a, AlignedVector &b,
60 size_t gs, size_t bs, size_t sz, size_t block_area, size_t total_blocks) {
61 tbb::parallel_for(static_cast<size_t>(0), total_blocks,
62 64 [&](size_t id) { InitializeBlock(id, m1, m2, a, b, gs, bs, sz, block_area); });
63 }
64
65 void SetMapEntry(std::vector<size_t> &map_a, std::vector<size_t> &map_b, size_t i, size_t j, size_t gs) {
66 const size_t idx = BlockIndex(i, j, gs);
67 64 map_a[idx] = BlockIndex(i, (j + i) % gs, gs);
68 64 map_b[idx] = BlockIndex((i + j) % gs, j, gs);
69 }
70
71 28 void InitializeMaps(std::vector<size_t> &map_a, std::vector<size_t> &map_b, size_t gs) {
72
2/2
✓ Branch 0 taken 40 times.
✓ Branch 1 taken 28 times.
68 for (size_t i = 0; i < gs; ++i) {
73
2/2
✓ Branch 0 taken 64 times.
✓ Branch 1 taken 40 times.
104 for (size_t j = 0; j < gs; ++j) {
74 SetMapEntry(map_a, map_b, i, j, gs);
75 }
76 }
77 28 }
78
79 void UpdateMapEntry(std::vector<size_t> &next_a, std::vector<size_t> &next_b, const std::vector<size_t> &map_a,
80 const std::vector<size_t> &map_b, size_t i, size_t j, size_t gs) {
81 const size_t idx = BlockIndex(i, j, gs);
82 48 next_a[idx] = map_a[BlockIndex(i, (j + 1) % gs, gs)];
83 48 next_b[idx] = map_b[BlockIndex((i + 1) % gs, j, gs)];
84 }
85
86 12 void UpdateMaps(std::vector<size_t> &map_a, std::vector<size_t> &map_b, size_t gs) {
87
1/2
✓ Branch 2 taken 12 times.
✗ Branch 3 not taken.
12 std::vector<size_t> next_a(map_a.size());
88
1/4
✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
12 std::vector<size_t> next_b(map_b.size());
89
90
2/2
✓ Branch 0 taken 24 times.
✓ Branch 1 taken 12 times.
36 for (size_t i = 0; i < gs; ++i) {
91
2/2
✓ Branch 0 taken 48 times.
✓ Branch 1 taken 24 times.
72 for (size_t j = 0; j < gs; ++j) {
92 UpdateMapEntry(next_a, next_b, map_a, map_b, i, j, gs);
93 }
94 }
95
96 map_a.swap(next_a);
97 map_b.swap(next_b);
98 12 }
99
100 360 void MultiplyBlockRow(const double *a, const double *b, double *c, int bs, int i) {
101 360 const double *a_row = a + (static_cast<size_t>(i) * bs);
102 double *c_row = c + (static_cast<size_t>(i) * bs);
103
104
2/2
✓ Branch 0 taken 2096 times.
✓ Branch 1 taken 360 times.
2456 for (int k = 0; k < bs; ++k) {
105 2096 const double a_val = a_row[k];
106
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2096 times.
2096 if (a_val == 0.0) {
107 continue;
108 }
109 2096 const double *b_row = b + (static_cast<size_t>(k) * bs);
110
111
2/2
✓ Branch 0 taken 19272 times.
✓ Branch 1 taken 2096 times.
21368 for (int j = 0; j < bs; ++j) {
112 19272 c_row[j] += a_val * b_row[j];
113 }
114 }
115 360 }
116
117 void MultiplyBlocks(const double *a, const double *b, double *c, int block_size) {
118 const int bs = block_size;
119
2/2
✓ Branch 0 taken 360 times.
✓ Branch 1 taken 112 times.
472 for (int i = 0; i < bs; ++i) {
120 360 MultiplyBlockRow(a, b, c, bs, i);
121 }
122 }
123
124 112 void PerformCannonStepForBlock(size_t id, const AlignedVector &a, const AlignedVector &b, AlignedVector &c,
125 const std::vector<size_t> &map_a, const std::vector<size_t> &map_b, size_t block_area,
126 int bs) {
127 112 const size_t a_idx = map_a[id];
128 112 const size_t b_idx = map_b[id];
129
130 112 MultiplyBlocks(a.data() + (a_idx * block_area), b.data() + (b_idx * block_area), c.data() + (id * block_area), bs);
131 112 }
132
133 void PerformCannonStep(const AlignedVector &a, const AlignedVector &b, AlignedVector &c,
134 const std::vector<size_t> &map_a, const std::vector<size_t> &map_b, size_t total_blocks,
135 size_t block_area, int bs) {
136 tbb::parallel_for(static_cast<size_t>(0), total_blocks,
137 112 [&](size_t id) { PerformCannonStepForBlock(id, a, b, c, map_a, map_b, block_area, bs); });
138 }
139
140 64 void AssembleResultBlock(size_t id, const AlignedVector &c, std::vector<double> &result, size_t gs, size_t bs,
141 size_t sz, size_t block_area) {
142 64 const size_t bi = id / gs;
143 64 const size_t bj = id % gs;
144 64 const size_t base = id * block_area;
145
146
2/2
✓ Branch 0 taken 232 times.
✓ Branch 1 taken 64 times.
296 for (size_t i = 0; i < bs; ++i) {
147 232 const size_t dst = ((bi * bs + i) * sz) + (bj * bs);
148 232 const size_t src = base + (i * bs);
149 232 std::copy_n(c.data() + src, bs, result.data() + dst);
150 }
151 64 }
152
153 void AssembleResult(const AlignedVector &c, std::vector<double> &result, size_t gs, size_t bs, size_t sz,
154 size_t block_area, size_t total_blocks) {
155 tbb::parallel_for(static_cast<size_t>(0), total_blocks,
156 64 [&](size_t id) { AssembleResultBlock(id, c, result, gs, bs, sz, block_area); });
157 }
158
159 } // namespace
160
161
1/2
✓ Branch 1 taken 28 times.
✗ Branch 2 not taken.
28 ZyazevaSMatrixMultCannonAlgTBB::ZyazevaSMatrixMultCannonAlgTBB(const InType &in) {
162 SetTypeOfTask(GetStaticTypeOfTask());
163 GetInput() = in;
164 GetOutput() = {};
165 28 }
166
167 28 bool ZyazevaSMatrixMultCannonAlgTBB::ValidationImpl() {
168 const auto &input = GetInput();
169 28 const size_t sz = std::get<0>(input);
170 const auto &m1 = std::get<1>(input);
171 const auto &m2 = std::get<2>(input);
172
173
3/6
✓ Branch 0 taken 28 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 28 times.
✗ Branch 4 not taken.
✓ Branch 5 taken 28 times.
28 return sz > 0 && m1.size() == sz * sz && m2.size() == sz * sz;
174 }
175
176
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 28 times.
28 bool ZyazevaSMatrixMultCannonAlgTBB::PreProcessingImpl() {
177 GetOutput().clear();
178 28 return true;
179 }
180
181 28 bool ZyazevaSMatrixMultCannonAlgTBB::PostProcessingImpl() {
182 28 return true;
183 }
184
185 28 bool ZyazevaSMatrixMultCannonAlgTBB::RunImpl() {
186 28 const int sz = static_cast<int>(std::get<0>(GetInput()));
187 const auto &m1 = std::get<1>(GetInput());
188 const auto &m2 = std::get<2>(GetInput());
189
190 28 const size_t grid_size = FindGridSize(sz);
191 28 const size_t bs = static_cast<size_t>(sz) / grid_size;
192 const size_t gs = grid_size;
193 28 const size_t block_area = bs * bs;
194 28 const size_t total_blocks = gs * gs;
195
196 28 AlignedVector a(total_blocks * block_area);
197
1/4
✓ Branch 1 taken 28 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
28 AlignedVector b(total_blocks * block_area);
198
2/6
✓ Branch 1 taken 28 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 28 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
28 AlignedVector c(total_blocks * block_area, 0.0);
199
200 28 InitializeBlocks(m1, m2, a, b, gs, bs, static_cast<size_t>(sz), block_area, total_blocks);
201
202
1/4
✓ Branch 1 taken 28 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
28 std::vector<size_t> map_a(total_blocks);
203
1/4
✓ Branch 1 taken 28 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
28 std::vector<size_t> map_b(total_blocks);
204 28 InitializeMaps(map_a, map_b, gs);
205
206
2/2
✓ Branch 0 taken 40 times.
✓ Branch 1 taken 28 times.
68 for (size_t step = 0; step < gs; ++step) {
207
1/2
✓ Branch 1 taken 40 times.
✗ Branch 2 not taken.
40 PerformCannonStep(a, b, c, map_a, map_b, total_blocks, block_area, static_cast<int>(bs));
208
2/2
✓ Branch 0 taken 12 times.
✓ Branch 1 taken 28 times.
40 if (step + 1 < gs) {
209
1/2
✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.
12 UpdateMaps(map_a, map_b, gs);
210 }
211 }
212
213
2/6
✓ Branch 1 taken 28 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 28 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
28 std::vector<double> result(static_cast<size_t>(sz) * sz);
214 28 AssembleResult(c, result, gs, bs, static_cast<size_t>(sz), block_area, total_blocks);
215
216 GetOutput() = std::move(result);
217 28 return true;
218 }
219
220 } // namespace zyazeva_s_matrix_mult_cannon_alg
221