| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #include "kruglova_a_conjugate_gradient_sle/tbb/include/ops_tbb.hpp" | ||
| 2 | |||
| 3 | #include <oneapi/tbb/blocked_range.h> | ||
| 4 | #include <oneapi/tbb/parallel_for.h> | ||
| 5 | #include <oneapi/tbb/parallel_reduce.h> | ||
| 6 | |||
| 7 | #include <cmath> | ||
| 8 | #include <cstddef> | ||
| 9 | #include <vector> | ||
| 10 | |||
| 11 | #include "kruglova_a_conjugate_gradient_sle/common/include/common.hpp" | ||
| 12 | |||
| 13 | namespace kruglova_a_conjugate_gradient_sle { | ||
| 14 | |||
| 15 | namespace { | ||
| 16 | void MatrixVectorMultiply(const std::vector<double> &a, const std::vector<double> &p, std::vector<double> &ap, int n) { | ||
| 17 |
1/2✓ Branch 1 taken 132 times.
✗ Branch 2 not taken.
|
264 | tbb::parallel_for(tbb::blocked_range<int>(0, n, 256), [&](const tbb::blocked_range<int> &range) { |
| 18 |
2/2✓ Branch 0 taken 5508 times.
✓ Branch 1 taken 132 times.
|
5640 | for (int i = range.begin(); i < range.end(); ++i) { |
| 19 | double sum = 0.0; | ||
| 20 | 5508 | const size_t row_offset = static_cast<size_t>(i) * n; | |
| 21 |
2/2✓ Branch 0 taken 420572 times.
✓ Branch 1 taken 5508 times.
|
426080 | for (int j = 0; j < n; ++j) { |
| 22 | 420572 | sum += a[row_offset + j] * p[j]; | |
| 23 | } | ||
| 24 | 5508 | ap[i] = sum; | |
| 25 | } | ||
| 26 | 132 | }); | |
| 27 | } | ||
| 28 | |||
| 29 | 288 | double DotProduct(const std::vector<double> &v1, const std::vector<double> &v2, int n) { | |
| 30 | 576 | return tbb::parallel_reduce(tbb::blocked_range<int>(0, n, 512), 0.0, | |
| 31 | 288 | [&](const tbb::blocked_range<int> &range, double init) { | |
| 32 |
2/2✓ Branch 0 taken 11696 times.
✓ Branch 1 taken 288 times.
|
11984 | for (int i = range.begin(); i < range.end(); ++i) { |
| 33 | 11696 | init += v1[i] * v2[i]; | |
| 34 | } | ||
| 35 | return init; | ||
| 36 | ✗ | }, [](double a, double b) { return a + b; }); | |
| 37 | } | ||
| 38 | } // namespace | ||
| 39 | |||
| 40 |
1/2✓ Branch 1 taken 24 times.
✗ Branch 2 not taken.
|
24 | KruglovaAConjGradSleTBB::KruglovaAConjGradSleTBB(const InType &in) { |
| 41 | SetTypeOfTask(GetStaticTypeOfTask()); | ||
| 42 | GetInput() = in; | ||
| 43 | 24 | } | |
| 44 | |||
| 45 | 24 | bool KruglovaAConjGradSleTBB::ValidationImpl() { | |
| 46 | const auto &in = GetInput(); | ||
| 47 |
1/2✓ Branch 0 taken 24 times.
✗ Branch 1 not taken.
|
24 | if (in.size <= 0) { |
| 48 | return false; | ||
| 49 | } | ||
| 50 |
1/2✓ Branch 0 taken 24 times.
✗ Branch 1 not taken.
|
24 | if (in.A.size() != static_cast<size_t>(in.size) * static_cast<size_t>(in.size)) { |
| 51 | return false; | ||
| 52 | } | ||
| 53 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 24 times.
|
24 | if (in.b.size() != static_cast<size_t>(in.size)) { |
| 54 | ✗ | return false; | |
| 55 | } | ||
| 56 | return true; | ||
| 57 | } | ||
| 58 | |||
| 59 | 24 | bool KruglovaAConjGradSleTBB::PreProcessingImpl() { | |
| 60 | 24 | GetOutput().assign(GetInput().size, 0.0); | |
| 61 | 24 | return true; | |
| 62 | } | ||
| 63 | |||
| 64 | 24 | bool KruglovaAConjGradSleTBB::RunImpl() { | |
| 65 | 24 | const auto &a = GetInput().A; | |
| 66 | 24 | const auto &b = GetInput().b; | |
| 67 | 24 | int n = GetInput().size; | |
| 68 | auto &x = GetOutput(); | ||
| 69 | |||
| 70 | 24 | std::vector<double> r = b; | |
| 71 |
1/2✓ Branch 1 taken 24 times.
✗ Branch 2 not taken.
|
24 | std::vector<double> p = r; |
| 72 |
1/4✓ Branch 1 taken 24 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
24 | std::vector<double> ap(n, 0.0); |
| 73 | |||
| 74 |
1/2✓ Branch 1 taken 24 times.
✗ Branch 2 not taken.
|
24 | double rsold = DotProduct(r, r, n); |
| 75 | |||
| 76 | const double tolerance = 1e-8; | ||
| 77 | |||
| 78 |
1/2✓ Branch 0 taken 132 times.
✗ Branch 1 not taken.
|
132 | for (int iter = 0; iter < n * 2; ++iter) { |
| 79 | 132 | MatrixVectorMultiply(a, p, ap, n); | |
| 80 | |||
| 81 |
1/2✓ Branch 1 taken 132 times.
✗ Branch 2 not taken.
|
132 | double p_ap = DotProduct(p, ap, n); |
| 82 | |||
| 83 |
1/2✓ Branch 0 taken 132 times.
✗ Branch 1 not taken.
|
132 | if (std::abs(p_ap) < 1e-15) { |
| 84 | break; | ||
| 85 | } | ||
| 86 | |||
| 87 | 132 | double alpha = rsold / p_ap; | |
| 88 | |||
| 89 |
1/2✓ Branch 1 taken 132 times.
✗ Branch 2 not taken.
|
264 | tbb::parallel_for(tbb::blocked_range<int>(0, n, 1024), [&](const tbb::blocked_range<int> &range) { |
| 90 |
2/2✓ Branch 0 taken 5508 times.
✓ Branch 1 taken 132 times.
|
5640 | for (int i = range.begin(); i < range.end(); ++i) { |
| 91 | 5508 | x[i] += alpha * p[i]; | |
| 92 | 5508 | r[i] -= alpha * ap[i]; | |
| 93 | } | ||
| 94 | 132 | }); | |
| 95 | |||
| 96 |
1/2✓ Branch 1 taken 132 times.
✗ Branch 2 not taken.
|
132 | double rsnew = DotProduct(r, r, n); |
| 97 | |||
| 98 |
2/2✓ Branch 0 taken 108 times.
✓ Branch 1 taken 24 times.
|
132 | if (std::sqrt(rsnew) < tolerance) { |
| 99 | break; | ||
| 100 | } | ||
| 101 | |||
| 102 | 108 | double beta = rsnew / rsold; | |
| 103 | |||
| 104 |
1/4✓ Branch 1 taken 108 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
108 | tbb::parallel_for(tbb::blocked_range<int>(0, n, 1024), [&](const tbb::blocked_range<int> &range) { |
| 105 |
2/4✓ Branch 0 taken 4828 times.
✓ Branch 1 taken 108 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
4936 | for (int i = range.begin(); i < range.end(); ++i) { |
| 106 | 4828 | p[i] = r[i] + (beta * p[i]); | |
| 107 | } | ||
| 108 | }); | ||
| 109 | |||
| 110 | rsold = rsnew; | ||
| 111 | } | ||
| 112 | 24 | return true; | |
| 113 | } | ||
| 114 | |||
| 115 | 24 | bool KruglovaAConjGradSleTBB::PostProcessingImpl() { | |
| 116 | 24 | return true; | |
| 117 | } | ||
| 118 | |||
| 119 | } // namespace kruglova_a_conjugate_gradient_sle | ||
| 120 |