| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #include "smyshlaev_a_sle_cg_seq/stl/include/ops_stl.hpp" | ||
| 2 | |||
| 3 | #include <algorithm> | ||
| 4 | #include <cmath> | ||
| 5 | #include <cstddef> | ||
| 6 | #include <numeric> | ||
| 7 | #include <thread> | ||
| 8 | #include <vector> | ||
| 9 | |||
| 10 | #include "smyshlaev_a_sle_cg_seq/common/include/common.hpp" | ||
| 11 | #include "util/include/util.hpp" | ||
| 12 | |||
| 13 | namespace smyshlaev_a_sle_cg_seq { | ||
| 14 | |||
| 15 | namespace { | ||
| 16 | void GetThreadBounds(int n, int num_threads, int tid, int &start, int &end) { | ||
| 17 | ✗ | int chunk = n / num_threads; | |
| 18 | ✗ | int remainder = n % num_threads; | |
| 19 | ✗ | start = (tid * chunk) + std::min(tid, remainder); | |
| 20 | ✗ | end = start + chunk + (tid < remainder ? 1 : 0); | |
| 21 | } | ||
| 22 | |||
| 23 | ✗ | double ComputeDotProductStl(const std::vector<double> &v1, const std::vector<double> &v2, int num_threads) { | |
| 24 | ✗ | int n = static_cast<int>(v1.size()); | |
| 25 | ✗ | std::vector<double> partial_results(num_threads, 0.0); | |
| 26 | ✗ | std::vector<std::thread> threads; | |
| 27 | ✗ | threads.reserve(num_threads); | |
| 28 | ✗ | for (int i = 0; i < num_threads; ++i) { | |
| 29 | ✗ | threads.emplace_back([&, i]() { | |
| 30 | int start = 0; | ||
| 31 | int end = 0; | ||
| 32 | ✗ | GetThreadBounds(n, num_threads, i, start, end); | |
| 33 | double local_sum = 0.0; | ||
| 34 | ✗ | for (int j = start; j < end; ++j) { | |
| 35 | ✗ | local_sum += v1[j] * v2[j]; | |
| 36 | } | ||
| 37 | ✗ | partial_results[i] = local_sum; | |
| 38 | ✗ | }); | |
| 39 | } | ||
| 40 | |||
| 41 | ✗ | for (auto &t : threads) { | |
| 42 | ✗ | t.join(); | |
| 43 | } | ||
| 44 | ✗ | return std::accumulate(partial_results.begin(), partial_results.end(), 0.0); | |
| 45 | ✗ | } | |
| 46 | |||
| 47 | ✗ | void ComputeApStl(const std::vector<double> &matrix, const std::vector<double> &p, std::vector<double> &ap, int n, | |
| 48 | int num_threads) { | ||
| 49 | ✗ | std::vector<std::thread> threads; | |
| 50 | ✗ | threads.reserve(num_threads); | |
| 51 | ✗ | for (int i = 0; i < num_threads; ++i) { | |
| 52 | ✗ | threads.emplace_back([&, i]() { | |
| 53 | int start = 0; | ||
| 54 | int end = 0; | ||
| 55 | ✗ | GetThreadBounds(n, num_threads, i, start, end); | |
| 56 | ✗ | for (int row = start; row < end; ++row) { | |
| 57 | double sum = 0.0; | ||
| 58 | ✗ | for (int col = 0; col < n; ++col) { | |
| 59 | ✗ | sum += matrix[(row * n) + col] * p[col]; | |
| 60 | } | ||
| 61 | ✗ | ap[row] = sum; | |
| 62 | } | ||
| 63 | ✗ | }); | |
| 64 | } | ||
| 65 | ✗ | for (auto &t : threads) { | |
| 66 | ✗ | t.join(); | |
| 67 | } | ||
| 68 | ✗ | } | |
| 69 | |||
| 70 | ✗ | double UpdateResultAndResidualStl(std::vector<double> &result, std::vector<double> &r, const std::vector<double> &p, | |
| 71 | const std::vector<double> &ap, double alpha, int num_threads) { | ||
| 72 | ✗ | int n = static_cast<int>(result.size()); | |
| 73 | ✗ | std::vector<double> partial_rs_new(num_threads, 0.0); | |
| 74 | ✗ | std::vector<std::thread> threads; | |
| 75 | ✗ | threads.reserve(num_threads); | |
| 76 | ✗ | for (int i = 0; i < num_threads; ++i) { | |
| 77 | ✗ | threads.emplace_back([&, i]() { | |
| 78 | int start = 0; | ||
| 79 | int end = 0; | ||
| 80 | ✗ | GetThreadBounds(n, num_threads, i, start, end); | |
| 81 | double local_rs = 0.0; | ||
| 82 | ✗ | for (int j = start; j < end; ++j) { | |
| 83 | ✗ | result[j] += alpha * p[j]; | |
| 84 | ✗ | r[j] -= alpha * ap[j]; | |
| 85 | ✗ | local_rs += r[j] * r[j]; | |
| 86 | } | ||
| 87 | ✗ | partial_rs_new[i] = local_rs; | |
| 88 | ✗ | }); | |
| 89 | } | ||
| 90 | |||
| 91 | ✗ | for (auto &t : threads) { | |
| 92 | ✗ | t.join(); | |
| 93 | } | ||
| 94 | ✗ | return std::accumulate(partial_rs_new.begin(), partial_rs_new.end(), 0.0); | |
| 95 | ✗ | } | |
| 96 | |||
| 97 | ✗ | void UpdatePStl(std::vector<double> &p, const std::vector<double> &r, double beta, int num_threads) { | |
| 98 | ✗ | int n = static_cast<int>(p.size()); | |
| 99 | ✗ | std::vector<std::thread> threads; | |
| 100 | ✗ | threads.reserve(num_threads); | |
| 101 | ✗ | for (int i = 0; i < num_threads; ++i) { | |
| 102 | ✗ | threads.emplace_back([&, i]() { | |
| 103 | int start = 0; | ||
| 104 | int end = 0; | ||
| 105 | ✗ | GetThreadBounds(n, num_threads, i, start, end); | |
| 106 | ✗ | for (int j = start; j < end; ++j) { | |
| 107 | ✗ | p[j] = r[j] + (beta * p[j]); | |
| 108 | } | ||
| 109 | ✗ | }); | |
| 110 | } | ||
| 111 | ✗ | for (auto &t : threads) { | |
| 112 | ✗ | t.join(); | |
| 113 | } | ||
| 114 | ✗ | } | |
| 115 | |||
| 116 | double ComputeDotProduct(const std::vector<double> &v1, const std::vector<double> &v2) { | ||
| 117 | double result = 0.0; | ||
| 118 | 128 | int n = static_cast<int>(v1.size()); | |
| 119 |
2/2✓ Branch 0 taken 336 times.
✓ Branch 1 taken 128 times.
|
464 | for (int i = 0; i < n; ++i) { |
| 120 | 336 | result += v1[i] * v2[i]; | |
| 121 | } | ||
| 122 | return result; | ||
| 123 | } | ||
| 124 | |||
| 125 | 128 | void ComputeAp(const std::vector<double> &matrix, const std::vector<double> &p, std::vector<double> &ap, int n) { | |
| 126 |
2/2✓ Branch 0 taken 336 times.
✓ Branch 1 taken 128 times.
|
464 | for (int i = 0; i < n; ++i) { |
| 127 | double sum = 0.0; | ||
| 128 |
2/2✓ Branch 0 taken 944 times.
✓ Branch 1 taken 336 times.
|
1280 | for (int j = 0; j < n; ++j) { |
| 129 | 944 | sum += matrix[(i * n) + j] * p[j]; | |
| 130 | } | ||
| 131 | 336 | ap[i] = sum; | |
| 132 | } | ||
| 133 | 128 | } | |
| 134 | |||
| 135 | 128 | double UpdateResultAndResidual(std::vector<double> &result, std::vector<double> &r, const std::vector<double> &p, | |
| 136 | const std::vector<double> &ap, double alpha) { | ||
| 137 | double rs_new = 0.0; | ||
| 138 | 128 | int n = static_cast<int>(result.size()); | |
| 139 |
2/2✓ Branch 0 taken 336 times.
✓ Branch 1 taken 128 times.
|
464 | for (int i = 0; i < n; ++i) { |
| 140 | 336 | result[i] += alpha * p[i]; | |
| 141 | 336 | r[i] -= alpha * ap[i]; | |
| 142 | 336 | rs_new += r[i] * r[i]; | |
| 143 | } | ||
| 144 | 128 | return rs_new; | |
| 145 | } | ||
| 146 | |||
| 147 | void UpdateP(std::vector<double> &p, const std::vector<double> &r, double beta) { | ||
| 148 | int n = static_cast<int>(p.size()); | ||
| 149 |
2/2✓ Branch 0 taken 160 times.
✓ Branch 1 taken 64 times.
|
224 | for (int i = 0; i < n; ++i) { |
| 150 | 160 | p[i] = r[i] + (beta * p[i]); | |
| 151 | } | ||
| 152 | } | ||
| 153 | |||
| 154 | } // namespace | ||
| 155 | |||
| 156 |
1/2✓ Branch 1 taken 64 times.
✗ Branch 2 not taken.
|
64 | SmyshlaevASleCgTaskSTL::SmyshlaevASleCgTaskSTL(const InType &in) { |
| 157 | SetTypeOfTask(GetStaticTypeOfTask()); | ||
| 158 | GetInput() = in; | ||
| 159 | 64 | } | |
| 160 | |||
| 161 |
1/2✓ Branch 0 taken 64 times.
✗ Branch 1 not taken.
|
64 | bool SmyshlaevASleCgTaskSTL::ValidationImpl() { |
| 162 | const auto &a = GetInput().A; | ||
| 163 | const auto &b = GetInput().b; | ||
| 164 |
2/4✓ Branch 0 taken 64 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 64 times.
✗ Branch 3 not taken.
|
64 | if (a.empty() || b.empty()) { |
| 165 | return false; | ||
| 166 | } | ||
| 167 |
1/2✓ Branch 0 taken 64 times.
✗ Branch 1 not taken.
|
64 | if (a.size() != b.size()) { |
| 168 | return false; | ||
| 169 | } | ||
| 170 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 64 times.
|
64 | if (a.size() != a[0].size()) { |
| 171 | ✗ | return false; | |
| 172 | } | ||
| 173 | return true; | ||
| 174 | } | ||
| 175 | |||
| 176 | 64 | bool SmyshlaevASleCgTaskSTL::PreProcessingImpl() { | |
| 177 | const auto &a = GetInput().A; | ||
| 178 | size_t n = a.size(); | ||
| 179 | 64 | flat_A_.resize(n * n); | |
| 180 |
2/2✓ Branch 0 taken 176 times.
✓ Branch 1 taken 64 times.
|
240 | for (size_t i = 0; i < n; ++i) { |
| 181 |
2/2✓ Branch 0 taken 528 times.
✓ Branch 1 taken 176 times.
|
704 | for (size_t j = 0; j < n; ++j) { |
| 182 | 528 | flat_A_[(i * n) + j] = a[i][j]; | |
| 183 | } | ||
| 184 | } | ||
| 185 | 64 | return true; | |
| 186 | } | ||
| 187 | |||
| 188 | 64 | bool SmyshlaevASleCgTaskSTL::RunSequential() { | |
| 189 | 64 | const auto &b = GetInput().b; | |
| 190 | 64 | int n = static_cast<int>(b.size()); | |
| 191 | 64 | std::vector<double> r = b; | |
| 192 |
1/2✓ Branch 1 taken 64 times.
✗ Branch 2 not taken.
|
64 | std::vector<double> p = r; |
| 193 |
1/4✓ Branch 1 taken 64 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
64 | std::vector<double> ap(n, 0.0); |
| 194 |
1/4✓ Branch 1 taken 64 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
64 | std::vector<double> result(n, 0.0); |
| 195 | |||
| 196 | double rs_old = 0.0; | ||
| 197 |
2/2✓ Branch 0 taken 176 times.
✓ Branch 1 taken 64 times.
|
240 | for (int i = 0; i < n; ++i) { |
| 198 | 176 | rs_old += r[i] * r[i]; | |
| 199 | } | ||
| 200 | |||
| 201 | const double epsilon = 1e-9; | ||
| 202 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 64 times.
|
64 | if (std::sqrt(rs_old) < epsilon) { |
| 203 | GetOutput() = result; | ||
| 204 | return true; | ||
| 205 | } | ||
| 206 | |||
| 207 | 64 | const int max_iterations = n * 2; | |
| 208 |
1/2✓ Branch 0 taken 128 times.
✗ Branch 1 not taken.
|
128 | for (int iter = 0; iter < max_iterations; ++iter) { |
| 209 | 128 | ComputeAp(flat_A_, p, ap, n); | |
| 210 | double p_ap = ComputeDotProduct(p, ap); | ||
| 211 |
1/2✓ Branch 0 taken 128 times.
✗ Branch 1 not taken.
|
128 | if (std::abs(p_ap) < 1e-15) { |
| 212 | break; | ||
| 213 | } | ||
| 214 | |||
| 215 | 128 | double alpha = rs_old / p_ap; | |
| 216 | 128 | double rs_new = UpdateResultAndResidual(result, r, p, ap, alpha); | |
| 217 |
2/2✓ Branch 0 taken 64 times.
✓ Branch 1 taken 64 times.
|
128 | if (std::sqrt(rs_new) < epsilon) { |
| 218 | break; | ||
| 219 | } | ||
| 220 | |||
| 221 | 64 | double beta = rs_new / rs_old; | |
| 222 | UpdateP(p, r, beta); | ||
| 223 | rs_old = rs_new; | ||
| 224 | } | ||
| 225 | |||
| 226 |
1/2✓ Branch 1 taken 64 times.
✗ Branch 2 not taken.
|
64 | GetOutput() = result; |
| 227 | return true; | ||
| 228 | } | ||
| 229 | |||
| 230 | ✗ | bool SmyshlaevASleCgTaskSTL::RunParallel(int num_threads) { | |
| 231 | ✗ | const auto &b = GetInput().b; | |
| 232 | ✗ | int n = static_cast<int>(b.size()); | |
| 233 | ✗ | std::vector<double> r = b; | |
| 234 | ✗ | std::vector<double> p = r; | |
| 235 | ✗ | std::vector<double> ap(n, 0.0); | |
| 236 | ✗ | std::vector<double> result(n, 0.0); | |
| 237 | |||
| 238 | ✗ | std::vector<double> partial_rs(num_threads, 0.0); | |
| 239 | ✗ | std::vector<std::thread> threads; | |
| 240 | ✗ | threads.reserve(num_threads); | |
| 241 | ✗ | for (int i = 0; i < num_threads; ++i) { | |
| 242 | ✗ | threads.emplace_back([&, i]() { | |
| 243 | int start = 0; | ||
| 244 | int end = 0; | ||
| 245 | ✗ | GetThreadBounds(n, num_threads, i, start, end); | |
| 246 | double local_rs = 0.0; | ||
| 247 | ✗ | for (int j = start; j < end; ++j) { | |
| 248 | ✗ | local_rs += r[j] * r[j]; | |
| 249 | } | ||
| 250 | ✗ | partial_rs[i] = local_rs; | |
| 251 | ✗ | }); | |
| 252 | } | ||
| 253 | ✗ | for (auto &t : threads) { | |
| 254 | ✗ | t.join(); | |
| 255 | } | ||
| 256 | double rs_old = std::accumulate(partial_rs.begin(), partial_rs.end(), 0.0); | ||
| 257 | |||
| 258 | const double epsilon = 1e-9; | ||
| 259 | ✗ | if (std::sqrt(rs_old) < epsilon) { | |
| 260 | ✗ | GetOutput() = result; | |
| 261 | return true; | ||
| 262 | } | ||
| 263 | |||
| 264 | ✗ | const int max_iterations = n * 2; | |
| 265 | ✗ | for (int iter = 0; iter < max_iterations; ++iter) { | |
| 266 | ✗ | ComputeApStl(flat_A_, p, ap, n, num_threads); | |
| 267 | ✗ | double p_ap = ComputeDotProductStl(p, ap, num_threads); | |
| 268 | ✗ | if (std::abs(p_ap) < 1e-15) { | |
| 269 | break; | ||
| 270 | } | ||
| 271 | |||
| 272 | ✗ | double alpha = rs_old / p_ap; | |
| 273 | ✗ | double rs_new = UpdateResultAndResidualStl(result, r, p, ap, alpha, num_threads); | |
| 274 | ✗ | if (std::sqrt(rs_new) < epsilon) { | |
| 275 | break; | ||
| 276 | } | ||
| 277 | |||
| 278 | ✗ | double beta = rs_new / rs_old; | |
| 279 | ✗ | UpdatePStl(p, r, beta, num_threads); | |
| 280 | rs_old = rs_new; | ||
| 281 | } | ||
| 282 | |||
| 283 | ✗ | GetOutput() = result; | |
| 284 | return true; | ||
| 285 | ✗ | } | |
| 286 | |||
| 287 |
1/2✓ Branch 0 taken 64 times.
✗ Branch 1 not taken.
|
64 | bool SmyshlaevASleCgTaskSTL::RunImpl() { |
| 288 | const auto &b = GetInput().b; | ||
| 289 |
1/2✓ Branch 0 taken 64 times.
✗ Branch 1 not taken.
|
64 | if (b.empty()) { |
| 290 | return true; | ||
| 291 | } | ||
| 292 | 64 | int n = static_cast<int>(b.size()); | |
| 293 | |||
| 294 | 64 | int num_threads = ppc::util::GetNumThreads(); | |
| 295 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 64 times.
|
64 | if (n < num_threads * 10) { |
| 296 | num_threads = 1; | ||
| 297 | } | ||
| 298 | ✗ | if (num_threads == 1) { | |
| 299 | 64 | return RunSequential(); | |
| 300 | } | ||
| 301 | |||
| 302 | ✗ | return RunParallel(num_threads); | |
| 303 | } | ||
| 304 | |||
| 305 | 64 | bool SmyshlaevASleCgTaskSTL::PostProcessingImpl() { | |
| 306 | 64 | return true; | |
| 307 | } | ||
| 308 | |||
| 309 | } // namespace smyshlaev_a_sle_cg_seq | ||
| 310 |