GCC Code Coverage Report


Directory: ./
File: tasks/smyshlaev_a_sle_cg_seq/stl/src/ops_stl.cpp
Date: 2026-05-11 08:26:31
Exec Total Coverage
Lines: 57 158 36.1%
Functions: 8 18 44.4%
Branches: 34 174 19.5%

Line Branch Exec Source
1 #include "smyshlaev_a_sle_cg_seq/stl/include/ops_stl.hpp"
2
3 #include <algorithm>
4 #include <cmath>
5 #include <cstddef>
6 #include <numeric>
7 #include <thread>
8 #include <vector>
9
10 #include "smyshlaev_a_sle_cg_seq/common/include/common.hpp"
11 #include "util/include/util.hpp"
12
13 namespace smyshlaev_a_sle_cg_seq {
14
15 namespace {
16 void GetThreadBounds(int n, int num_threads, int tid, int &start, int &end) {
17 int chunk = n / num_threads;
18 int remainder = n % num_threads;
19 start = (tid * chunk) + std::min(tid, remainder);
20 end = start + chunk + (tid < remainder ? 1 : 0);
21 }
22
23 double ComputeDotProductStl(const std::vector<double> &v1, const std::vector<double> &v2, int num_threads) {
24 int n = static_cast<int>(v1.size());
25 std::vector<double> partial_results(num_threads, 0.0);
26 std::vector<std::thread> threads;
27 threads.reserve(num_threads);
28 for (int i = 0; i < num_threads; ++i) {
29 threads.emplace_back([&, i]() {
30 int start = 0;
31 int end = 0;
32 GetThreadBounds(n, num_threads, i, start, end);
33 double local_sum = 0.0;
34 for (int j = start; j < end; ++j) {
35 local_sum += v1[j] * v2[j];
36 }
37 partial_results[i] = local_sum;
38 });
39 }
40
41 for (auto &t : threads) {
42 t.join();
43 }
44 return std::accumulate(partial_results.begin(), partial_results.end(), 0.0);
45 }
46
47 void ComputeApStl(const std::vector<double> &matrix, const std::vector<double> &p, std::vector<double> &ap, int n,
48 int num_threads) {
49 std::vector<std::thread> threads;
50 threads.reserve(num_threads);
51 for (int i = 0; i < num_threads; ++i) {
52 threads.emplace_back([&, i]() {
53 int start = 0;
54 int end = 0;
55 GetThreadBounds(n, num_threads, i, start, end);
56 for (int row = start; row < end; ++row) {
57 double sum = 0.0;
58 for (int col = 0; col < n; ++col) {
59 sum += matrix[(row * n) + col] * p[col];
60 }
61 ap[row] = sum;
62 }
63 });
64 }
65 for (auto &t : threads) {
66 t.join();
67 }
68 }
69
70 double UpdateResultAndResidualStl(std::vector<double> &result, std::vector<double> &r, const std::vector<double> &p,
71 const std::vector<double> &ap, double alpha, int num_threads) {
72 int n = static_cast<int>(result.size());
73 std::vector<double> partial_rs_new(num_threads, 0.0);
74 std::vector<std::thread> threads;
75 threads.reserve(num_threads);
76 for (int i = 0; i < num_threads; ++i) {
77 threads.emplace_back([&, i]() {
78 int start = 0;
79 int end = 0;
80 GetThreadBounds(n, num_threads, i, start, end);
81 double local_rs = 0.0;
82 for (int j = start; j < end; ++j) {
83 result[j] += alpha * p[j];
84 r[j] -= alpha * ap[j];
85 local_rs += r[j] * r[j];
86 }
87 partial_rs_new[i] = local_rs;
88 });
89 }
90
91 for (auto &t : threads) {
92 t.join();
93 }
94 return std::accumulate(partial_rs_new.begin(), partial_rs_new.end(), 0.0);
95 }
96
97 void UpdatePStl(std::vector<double> &p, const std::vector<double> &r, double beta, int num_threads) {
98 int n = static_cast<int>(p.size());
99 std::vector<std::thread> threads;
100 threads.reserve(num_threads);
101 for (int i = 0; i < num_threads; ++i) {
102 threads.emplace_back([&, i]() {
103 int start = 0;
104 int end = 0;
105 GetThreadBounds(n, num_threads, i, start, end);
106 for (int j = start; j < end; ++j) {
107 p[j] = r[j] + (beta * p[j]);
108 }
109 });
110 }
111 for (auto &t : threads) {
112 t.join();
113 }
114 }
115
116 double ComputeDotProduct(const std::vector<double> &v1, const std::vector<double> &v2) {
117 double result = 0.0;
118 128 int n = static_cast<int>(v1.size());
119
2/2
✓ Branch 0 taken 336 times.
✓ Branch 1 taken 128 times.
464 for (int i = 0; i < n; ++i) {
120 336 result += v1[i] * v2[i];
121 }
122 return result;
123 }
124
125 128 void ComputeAp(const std::vector<double> &matrix, const std::vector<double> &p, std::vector<double> &ap, int n) {
126
2/2
✓ Branch 0 taken 336 times.
✓ Branch 1 taken 128 times.
464 for (int i = 0; i < n; ++i) {
127 double sum = 0.0;
128
2/2
✓ Branch 0 taken 944 times.
✓ Branch 1 taken 336 times.
1280 for (int j = 0; j < n; ++j) {
129 944 sum += matrix[(i * n) + j] * p[j];
130 }
131 336 ap[i] = sum;
132 }
133 128 }
134
135 128 double UpdateResultAndResidual(std::vector<double> &result, std::vector<double> &r, const std::vector<double> &p,
136 const std::vector<double> &ap, double alpha) {
137 double rs_new = 0.0;
138 128 int n = static_cast<int>(result.size());
139
2/2
✓ Branch 0 taken 336 times.
✓ Branch 1 taken 128 times.
464 for (int i = 0; i < n; ++i) {
140 336 result[i] += alpha * p[i];
141 336 r[i] -= alpha * ap[i];
142 336 rs_new += r[i] * r[i];
143 }
144 128 return rs_new;
145 }
146
147 void UpdateP(std::vector<double> &p, const std::vector<double> &r, double beta) {
148 int n = static_cast<int>(p.size());
149
2/2
✓ Branch 0 taken 160 times.
✓ Branch 1 taken 64 times.
224 for (int i = 0; i < n; ++i) {
150 160 p[i] = r[i] + (beta * p[i]);
151 }
152 }
153
154 } // namespace
155
156
1/2
✓ Branch 1 taken 64 times.
✗ Branch 2 not taken.
64 SmyshlaevASleCgTaskSTL::SmyshlaevASleCgTaskSTL(const InType &in) {
157 SetTypeOfTask(GetStaticTypeOfTask());
158 GetInput() = in;
159 64 }
160
161
1/2
✓ Branch 0 taken 64 times.
✗ Branch 1 not taken.
64 bool SmyshlaevASleCgTaskSTL::ValidationImpl() {
162 const auto &a = GetInput().A;
163 const auto &b = GetInput().b;
164
2/4
✓ Branch 0 taken 64 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 64 times.
✗ Branch 3 not taken.
64 if (a.empty() || b.empty()) {
165 return false;
166 }
167
1/2
✓ Branch 0 taken 64 times.
✗ Branch 1 not taken.
64 if (a.size() != b.size()) {
168 return false;
169 }
170
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 64 times.
64 if (a.size() != a[0].size()) {
171 return false;
172 }
173 return true;
174 }
175
176 64 bool SmyshlaevASleCgTaskSTL::PreProcessingImpl() {
177 const auto &a = GetInput().A;
178 size_t n = a.size();
179 64 flat_A_.resize(n * n);
180
2/2
✓ Branch 0 taken 176 times.
✓ Branch 1 taken 64 times.
240 for (size_t i = 0; i < n; ++i) {
181
2/2
✓ Branch 0 taken 528 times.
✓ Branch 1 taken 176 times.
704 for (size_t j = 0; j < n; ++j) {
182 528 flat_A_[(i * n) + j] = a[i][j];
183 }
184 }
185 64 return true;
186 }
187
188 64 bool SmyshlaevASleCgTaskSTL::RunSequential() {
189 64 const auto &b = GetInput().b;
190 64 int n = static_cast<int>(b.size());
191 64 std::vector<double> r = b;
192
1/2
✓ Branch 1 taken 64 times.
✗ Branch 2 not taken.
64 std::vector<double> p = r;
193
1/4
✓ Branch 1 taken 64 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
64 std::vector<double> ap(n, 0.0);
194
1/4
✓ Branch 1 taken 64 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
64 std::vector<double> result(n, 0.0);
195
196 double rs_old = 0.0;
197
2/2
✓ Branch 0 taken 176 times.
✓ Branch 1 taken 64 times.
240 for (int i = 0; i < n; ++i) {
198 176 rs_old += r[i] * r[i];
199 }
200
201 const double epsilon = 1e-9;
202
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 64 times.
64 if (std::sqrt(rs_old) < epsilon) {
203 GetOutput() = result;
204 return true;
205 }
206
207 64 const int max_iterations = n * 2;
208
1/2
✓ Branch 0 taken 128 times.
✗ Branch 1 not taken.
128 for (int iter = 0; iter < max_iterations; ++iter) {
209 128 ComputeAp(flat_A_, p, ap, n);
210 double p_ap = ComputeDotProduct(p, ap);
211
1/2
✓ Branch 0 taken 128 times.
✗ Branch 1 not taken.
128 if (std::abs(p_ap) < 1e-15) {
212 break;
213 }
214
215 128 double alpha = rs_old / p_ap;
216 128 double rs_new = UpdateResultAndResidual(result, r, p, ap, alpha);
217
2/2
✓ Branch 0 taken 64 times.
✓ Branch 1 taken 64 times.
128 if (std::sqrt(rs_new) < epsilon) {
218 break;
219 }
220
221 64 double beta = rs_new / rs_old;
222 UpdateP(p, r, beta);
223 rs_old = rs_new;
224 }
225
226
1/2
✓ Branch 1 taken 64 times.
✗ Branch 2 not taken.
64 GetOutput() = result;
227 return true;
228 }
229
230 bool SmyshlaevASleCgTaskSTL::RunParallel(int num_threads) {
231 const auto &b = GetInput().b;
232 int n = static_cast<int>(b.size());
233 std::vector<double> r = b;
234 std::vector<double> p = r;
235 std::vector<double> ap(n, 0.0);
236 std::vector<double> result(n, 0.0);
237
238 std::vector<double> partial_rs(num_threads, 0.0);
239 std::vector<std::thread> threads;
240 threads.reserve(num_threads);
241 for (int i = 0; i < num_threads; ++i) {
242 threads.emplace_back([&, i]() {
243 int start = 0;
244 int end = 0;
245 GetThreadBounds(n, num_threads, i, start, end);
246 double local_rs = 0.0;
247 for (int j = start; j < end; ++j) {
248 local_rs += r[j] * r[j];
249 }
250 partial_rs[i] = local_rs;
251 });
252 }
253 for (auto &t : threads) {
254 t.join();
255 }
256 double rs_old = std::accumulate(partial_rs.begin(), partial_rs.end(), 0.0);
257
258 const double epsilon = 1e-9;
259 if (std::sqrt(rs_old) < epsilon) {
260 GetOutput() = result;
261 return true;
262 }
263
264 const int max_iterations = n * 2;
265 for (int iter = 0; iter < max_iterations; ++iter) {
266 ComputeApStl(flat_A_, p, ap, n, num_threads);
267 double p_ap = ComputeDotProductStl(p, ap, num_threads);
268 if (std::abs(p_ap) < 1e-15) {
269 break;
270 }
271
272 double alpha = rs_old / p_ap;
273 double rs_new = UpdateResultAndResidualStl(result, r, p, ap, alpha, num_threads);
274 if (std::sqrt(rs_new) < epsilon) {
275 break;
276 }
277
278 double beta = rs_new / rs_old;
279 UpdatePStl(p, r, beta, num_threads);
280 rs_old = rs_new;
281 }
282
283 GetOutput() = result;
284 return true;
285 }
286
287
1/2
✓ Branch 0 taken 64 times.
✗ Branch 1 not taken.
64 bool SmyshlaevASleCgTaskSTL::RunImpl() {
288 const auto &b = GetInput().b;
289
1/2
✓ Branch 0 taken 64 times.
✗ Branch 1 not taken.
64 if (b.empty()) {
290 return true;
291 }
292 64 int n = static_cast<int>(b.size());
293
294 64 int num_threads = ppc::util::GetNumThreads();
295
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 64 times.
64 if (n < num_threads * 10) {
296 num_threads = 1;
297 }
298 if (num_threads == 1) {
299 64 return RunSequential();
300 }
301
302 return RunParallel(num_threads);
303 }
304
305 64 bool SmyshlaevASleCgTaskSTL::PostProcessingImpl() {
306 64 return true;
307 }
308
309 } // namespace smyshlaev_a_sle_cg_seq
310