| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #include "sokolov_k_matrix_double_fox/tbb/include/ops_tbb.hpp" | ||
| 2 | |||
| 3 | #include <algorithm> | ||
| 4 | #include <cmath> | ||
| 5 | #include <cstddef> | ||
| 6 | #include <vector> | ||
| 7 | |||
| 8 | #include "oneapi/tbb/parallel_for.h" | ||
| 9 | #include "sokolov_k_matrix_double_fox/common/include/common.hpp" | ||
| 10 | |||
| 11 | namespace sokolov_k_matrix_double_fox { | ||
| 12 | |||
| 13 | namespace { | ||
| 14 | |||
| 15 | void DecomposeToBlocksTbb(const std::vector<double> &flat, std::vector<double> &blocks, int n, int bs, int q) { | ||
| 16 | 584 | tbb::parallel_for(0, q, [&](int bi) { | |
| 17 |
2/2✓ Branch 0 taken 2808 times.
✓ Branch 1 taken 440 times.
|
3248 | for (int bj = 0; bj < q; bj++) { |
| 18 | 2808 | int block_off = ((bi * q) + bj) * (bs * bs); | |
| 19 |
2/2✓ Branch 0 taken 13784 times.
✓ Branch 1 taken 2808 times.
|
16592 | for (int i = 0; i < bs; i++) { |
| 20 |
2/2✓ Branch 0 taken 103080 times.
✓ Branch 1 taken 13784 times.
|
116864 | for (int j = 0; j < bs; j++) { |
| 21 | 103080 | blocks[block_off + (i * bs) + j] = flat[(((bi * bs) + i) * n) + ((bj * bs) + j)]; | |
| 22 | } | ||
| 23 | } | ||
| 24 | } | ||
| 25 | 440 | }); | |
| 26 | } | ||
| 27 | |||
| 28 | void AssembleFromBlocksTbb(const std::vector<double> &blocks, std::vector<double> &flat, int n, int bs, int q) { | ||
| 29 | 316 | tbb::parallel_for(0, q, [&](int bi) { | |
| 30 |
2/2✓ Branch 0 taken 1404 times.
✓ Branch 1 taken 220 times.
|
1624 | for (int bj = 0; bj < q; bj++) { |
| 31 | 1404 | int block_off = ((bi * q) + bj) * (bs * bs); | |
| 32 |
2/2✓ Branch 0 taken 6892 times.
✓ Branch 1 taken 1404 times.
|
8296 | for (int i = 0; i < bs; i++) { |
| 33 |
2/2✓ Branch 0 taken 51540 times.
✓ Branch 1 taken 6892 times.
|
58432 | for (int j = 0; j < bs; j++) { |
| 34 | 51540 | flat[(((bi * bs) + i) * n) + ((bj * bs) + j)] = blocks[block_off + (i * bs) + j]; | |
| 35 | } | ||
| 36 | } | ||
| 37 | } | ||
| 38 | 220 | }); | |
| 39 | } | ||
| 40 | |||
| 41 | 11020 | void MultiplyBlocksTbb(const std::vector<double> &a, int a_off, const std::vector<double> &b, int b_off, | |
| 42 | std::vector<double> &c, int c_off, int bs) { | ||
| 43 |
2/2✓ Branch 0 taken 64132 times.
✓ Branch 1 taken 11020 times.
|
75152 | for (int i = 0; i < bs; i++) { |
| 44 |
2/2✓ Branch 0 taken 506572 times.
✓ Branch 1 taken 64132 times.
|
570704 | for (int k = 0; k < bs; k++) { |
| 45 | 506572 | double val = a[a_off + (i * bs) + k]; | |
| 46 |
2/2✓ Branch 0 taken 4512100 times.
✓ Branch 1 taken 506572 times.
|
5018672 | for (int j = 0; j < bs; j++) { |
| 47 | 4512100 | c[c_off + (i * bs) + j] += val * b[b_off + (k * bs) + j]; | |
| 48 | } | ||
| 49 | } | ||
| 50 | } | ||
| 51 | 11020 | } | |
| 52 | |||
| 53 | 220 | void FoxStepTbb(const std::vector<double> &a, const std::vector<double> &b, std::vector<double> &c, int bs, int q, | |
| 54 | int step) { | ||
| 55 | 220 | int bsq = bs * bs; | |
| 56 | 220 | tbb::parallel_for(0, q, [&](int i) { | |
| 57 | 1404 | int k = (i + step) % q; | |
| 58 |
2/2✓ Branch 0 taken 11020 times.
✓ Branch 1 taken 1404 times.
|
12424 | for (int j = 0; j < q; j++) { |
| 59 | 11020 | MultiplyBlocksTbb(a, ((i * q) + k) * bsq, b, ((k * q) + j) * bsq, c, ((i * q) + j) * bsq, bs); | |
| 60 | } | ||
| 61 | 1404 | }); | |
| 62 | 220 | } | |
| 63 | |||
| 64 | void FoxMultiplyTbb(const std::vector<double> &a, const std::vector<double> &b, std::vector<double> &c, int bs, int q) { | ||
| 65 |
2/2✓ Branch 0 taken 220 times.
✓ Branch 1 taken 48 times.
|
268 | for (int step = 0; step < q; step++) { |
| 66 | 220 | FoxStepTbb(a, b, c, bs, q, step); | |
| 67 | } | ||
| 68 | } | ||
| 69 | |||
| 70 | 48 | int ChooseBlockSizeTbb(int n) { | |
| 71 |
1/2✓ Branch 0 taken 68 times.
✗ Branch 1 not taken.
|
68 | for (int div = static_cast<int>(std::sqrt(static_cast<double>(n))); div >= 1; div--) { |
| 72 |
2/2✓ Branch 0 taken 20 times.
✓ Branch 1 taken 48 times.
|
68 | if (n % div == 0) { |
| 73 | return div; | ||
| 74 | } | ||
| 75 | } | ||
| 76 | return 1; | ||
| 77 | } | ||
| 78 | |||
| 79 | } // namespace | ||
| 80 | |||
| 81 | 48 | SokolovKMatrixDoubleFoxTBB::SokolovKMatrixDoubleFoxTBB(const InType &in) { | |
| 82 | SetTypeOfTask(GetStaticTypeOfTask()); | ||
| 83 | 48 | GetInput() = in; | |
| 84 | GetOutput() = 0; | ||
| 85 | 48 | } | |
| 86 | |||
| 87 | 48 | bool SokolovKMatrixDoubleFoxTBB::ValidationImpl() { | |
| 88 |
2/4✓ Branch 0 taken 48 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 48 times.
|
48 | return (GetInput() > 0) && (GetOutput() == 0); |
| 89 | } | ||
| 90 | |||
| 91 | 48 | bool SokolovKMatrixDoubleFoxTBB::PreProcessingImpl() { | |
| 92 | 48 | GetOutput() = 0; | |
| 93 | 48 | n_ = GetInput(); | |
| 94 | 48 | block_size_ = ChooseBlockSizeTbb(n_); | |
| 95 | 48 | q_ = n_ / block_size_; | |
| 96 | 48 | auto sz = static_cast<std::size_t>(n_) * n_; | |
| 97 | 48 | std::vector<double> a(sz, 1.5); | |
| 98 |
1/4✓ Branch 1 taken 48 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
48 | std::vector<double> b(sz, 2.0); |
| 99 |
1/2✓ Branch 1 taken 48 times.
✗ Branch 2 not taken.
|
48 | blocks_a_.resize(sz); |
| 100 |
1/2✓ Branch 1 taken 48 times.
✗ Branch 2 not taken.
|
48 | blocks_b_.resize(sz); |
| 101 |
1/4✓ Branch 1 taken 48 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
48 | blocks_c_.assign(sz, 0.0); |
| 102 |
1/2✓ Branch 1 taken 48 times.
✗ Branch 2 not taken.
|
48 | DecomposeToBlocksTbb(a, blocks_a_, n_, block_size_, q_); |
| 103 |
2/4✓ Branch 1 taken 48 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 48 times.
✗ Branch 4 not taken.
|
48 | DecomposeToBlocksTbb(b, blocks_b_, n_, block_size_, q_); |
| 104 | 48 | return true; | |
| 105 | } | ||
| 106 | |||
| 107 |
1/2✓ Branch 0 taken 48 times.
✗ Branch 1 not taken.
|
48 | bool SokolovKMatrixDoubleFoxTBB::RunImpl() { |
| 108 | std::ranges::fill(blocks_c_, 0.0); | ||
| 109 | 48 | FoxMultiplyTbb(blocks_a_, blocks_b_, blocks_c_, block_size_, q_); | |
| 110 | 48 | return true; | |
| 111 | } | ||
| 112 | |||
| 113 | 48 | bool SokolovKMatrixDoubleFoxTBB::PostProcessingImpl() { | |
| 114 | 48 | std::vector<double> result(static_cast<std::size_t>(n_) * n_); | |
| 115 |
1/2✓ Branch 1 taken 48 times.
✗ Branch 2 not taken.
|
48 | AssembleFromBlocksTbb(blocks_c_, result, n_, block_size_, q_); |
| 116 | 48 | double expected = 3.0 * n_; | |
| 117 |
1/2✓ Branch 0 taken 51540 times.
✗ Branch 1 not taken.
|
51540 | bool ok = std::ranges::all_of(result, [expected](double v) { return std::abs(v - expected) <= 1e-9; }); |
| 118 |
2/4✓ Branch 0 taken 48 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 48 times.
✗ Branch 3 not taken.
|
48 | GetOutput() = ok ? GetInput() : -1; |
| 119 | std::vector<double>().swap(blocks_a_); | ||
| 120 | std::vector<double>().swap(blocks_b_); | ||
| 121 | std::vector<double>().swap(blocks_c_); | ||
| 122 | 48 | return true; | |
| 123 | } | ||
| 124 | |||
| 125 | } // namespace sokolov_k_matrix_double_fox | ||
| 126 |