| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #include "sokolov_k_matrix_double_fox/stl/include/ops_stl.hpp" | ||
| 2 | |||
| 3 | #include <algorithm> | ||
| 4 | #include <cmath> | ||
| 5 | #include <cstddef> | ||
| 6 | #include <functional> | ||
| 7 | #include <thread> | ||
| 8 | #include <vector> | ||
| 9 | |||
| 10 | #include "sokolov_k_matrix_double_fox/common/include/common.hpp" | ||
| 11 | #include "util/include/util.hpp" | ||
| 12 | |||
| 13 | namespace sokolov_k_matrix_double_fox { | ||
| 14 | |||
| 15 | namespace { | ||
| 16 | |||
| 17 | 192 | void DecomposeToBlocks(const std::vector<double> &flat, std::vector<double> &blocks, int n, int bs, int q) { | |
| 18 |
2/2✓ Branch 0 taken 880 times.
✓ Branch 1 taken 192 times.
|
1072 | for (int bi = 0; bi < q; bi++) { |
| 19 |
2/2✓ Branch 0 taken 5616 times.
✓ Branch 1 taken 880 times.
|
6496 | for (int bj = 0; bj < q; bj++) { |
| 20 | 5616 | int block_off = ((bi * q) + bj) * (bs * bs); | |
| 21 |
2/2✓ Branch 0 taken 27568 times.
✓ Branch 1 taken 5616 times.
|
33184 | for (int i = 0; i < bs; i++) { |
| 22 |
2/2✓ Branch 0 taken 206160 times.
✓ Branch 1 taken 27568 times.
|
233728 | for (int j = 0; j < bs; j++) { |
| 23 | 206160 | blocks[block_off + (i * bs) + j] = flat[(((bi * bs) + i) * n) + ((bj * bs) + j)]; | |
| 24 | } | ||
| 25 | } | ||
| 26 | } | ||
| 27 | } | ||
| 28 | 192 | } | |
| 29 | |||
| 30 | 96 | void AssembleFromBlocks(const std::vector<double> &blocks, std::vector<double> &flat, int n, int bs, int q) { | |
| 31 |
2/2✓ Branch 0 taken 440 times.
✓ Branch 1 taken 96 times.
|
536 | for (int bi = 0; bi < q; bi++) { |
| 32 |
2/2✓ Branch 0 taken 2808 times.
✓ Branch 1 taken 440 times.
|
3248 | for (int bj = 0; bj < q; bj++) { |
| 33 | 2808 | int block_off = ((bi * q) + bj) * (bs * bs); | |
| 34 |
2/2✓ Branch 0 taken 13784 times.
✓ Branch 1 taken 2808 times.
|
16592 | for (int i = 0; i < bs; i++) { |
| 35 |
2/2✓ Branch 0 taken 103080 times.
✓ Branch 1 taken 13784 times.
|
116864 | for (int j = 0; j < bs; j++) { |
| 36 | 103080 | flat[(((bi * bs) + i) * n) + ((bj * bs) + j)] = blocks[block_off + (i * bs) + j]; | |
| 37 | } | ||
| 38 | } | ||
| 39 | } | ||
| 40 | } | ||
| 41 | 96 | } | |
| 42 | |||
| 43 | 22040 | void MultiplyBlocks(const std::vector<double> &a, int a_off, const std::vector<double> &b, int b_off, | |
| 44 | std::vector<double> &c, int c_off, int bs) { | ||
| 45 |
2/2✓ Branch 0 taken 128264 times.
✓ Branch 1 taken 22040 times.
|
150304 | for (int i = 0; i < bs; i++) { |
| 46 |
2/2✓ Branch 0 taken 1013144 times.
✓ Branch 1 taken 128264 times.
|
1141408 | for (int k = 0; k < bs; k++) { |
| 47 | 1013144 | double val = a[a_off + (i * bs) + k]; | |
| 48 |
2/2✓ Branch 0 taken 9024200 times.
✓ Branch 1 taken 1013144 times.
|
10037344 | for (int j = 0; j < bs; j++) { |
| 49 | 9024200 | c[c_off + (i * bs) + j] += val * b[b_off + (k * bs) + j]; | |
| 50 | } | ||
| 51 | } | ||
| 52 | } | ||
| 53 | 22040 | } | |
| 54 | |||
| 55 | 210 | void FoxWorker(const std::vector<double> &a, const std::vector<double> &b, std::vector<double> &c, int bs, int q, | |
| 56 | int row_begin, int row_end) { | ||
| 57 | 210 | int bsq = bs * bs; | |
| 58 |
2/2✓ Branch 0 taken 1046 times.
✓ Branch 1 taken 210 times.
|
1256 | for (int step = 0; step < q; step++) { |
| 59 |
2/2✓ Branch 0 taken 2808 times.
✓ Branch 1 taken 1046 times.
|
3854 | for (int i = row_begin; i < row_end; i++) { |
| 60 | 2808 | int k = (i + step) % q; | |
| 61 |
2/2✓ Branch 0 taken 22040 times.
✓ Branch 1 taken 2808 times.
|
24848 | for (int j = 0; j < q; j++) { |
| 62 | 22040 | MultiplyBlocks(a, ((i * q) + k) * bsq, b, ((k * q) + j) * bsq, c, ((i * q) + j) * bsq, bs); | |
| 63 | } | ||
| 64 | } | ||
| 65 | } | ||
| 66 | 210 | } | |
| 67 | |||
| 68 | 96 | int ChooseBlockSize(int n) { | |
| 69 |
1/2✓ Branch 0 taken 136 times.
✗ Branch 1 not taken.
|
136 | for (int div = static_cast<int>(std::sqrt(static_cast<double>(n))); div >= 1; div--) { |
| 70 |
2/2✓ Branch 0 taken 40 times.
✓ Branch 1 taken 96 times.
|
136 | if (n % div == 0) { |
| 71 | return div; | ||
| 72 | } | ||
| 73 | } | ||
| 74 | return 1; | ||
| 75 | } | ||
| 76 | |||
| 77 | } // namespace | ||
| 78 | |||
| 79 | 96 | SokolovKMatrixDoubleFoxSTL::SokolovKMatrixDoubleFoxSTL(const InType &in) { | |
| 80 | SetTypeOfTask(GetStaticTypeOfTask()); | ||
| 81 | 96 | GetInput() = in; | |
| 82 | GetOutput() = 0; | ||
| 83 | 96 | } | |
| 84 | |||
| 85 | 96 | bool SokolovKMatrixDoubleFoxSTL::ValidationImpl() { | |
| 86 |
2/4✓ Branch 0 taken 96 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 96 times.
|
96 | return (GetInput() > 0) && (GetOutput() == 0); |
| 87 | } | ||
| 88 | |||
| 89 | 96 | bool SokolovKMatrixDoubleFoxSTL::PreProcessingImpl() { | |
| 90 | 96 | GetOutput() = 0; | |
| 91 | 96 | n_ = GetInput(); | |
| 92 | 96 | block_size_ = ChooseBlockSize(n_); | |
| 93 | 96 | q_ = n_ / block_size_; | |
| 94 | 96 | auto sz = static_cast<std::size_t>(n_) * n_; | |
| 95 | 96 | std::vector<double> a(sz, 1.5); | |
| 96 |
1/4✓ Branch 1 taken 96 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
96 | std::vector<double> b(sz, 2.0); |
| 97 |
1/2✓ Branch 1 taken 96 times.
✗ Branch 2 not taken.
|
96 | blocks_a_.resize(sz); |
| 98 |
1/2✓ Branch 1 taken 96 times.
✗ Branch 2 not taken.
|
96 | blocks_b_.resize(sz); |
| 99 |
1/4✓ Branch 1 taken 96 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
96 | blocks_c_.assign(sz, 0.0); |
| 100 | 96 | DecomposeToBlocks(a, blocks_a_, n_, block_size_, q_); | |
| 101 | 96 | DecomposeToBlocks(b, blocks_b_, n_, block_size_, q_); | |
| 102 | 96 | return true; | |
| 103 | } | ||
| 104 | |||
| 105 |
1/2✓ Branch 0 taken 96 times.
✗ Branch 1 not taken.
|
96 | bool SokolovKMatrixDoubleFoxSTL::RunImpl() { |
| 106 | std::ranges::fill(blocks_c_, 0.0); | ||
| 107 |
4/4✓ Branch 1 taken 20 times.
✓ Branch 2 taken 76 times.
✓ Branch 3 taken 66 times.
✓ Branch 4 taken 30 times.
|
116 | int num_threads = std::max(1, std::min(ppc::util::GetNumThreads(), q_)); |
| 108 | 96 | std::vector<std::thread> threads; | |
| 109 |
1/2✓ Branch 1 taken 96 times.
✗ Branch 2 not taken.
|
96 | threads.reserve(num_threads); |
| 110 | 96 | int rows_per = q_ / num_threads; | |
| 111 | 96 | int extra = q_ % num_threads; | |
| 112 | 96 | int row = 0; | |
| 113 |
2/2✓ Branch 0 taken 210 times.
✓ Branch 1 taken 96 times.
|
306 | for (int idx = 0; idx < num_threads; idx++) { |
| 114 | 210 | int row_begin = row; | |
| 115 |
2/2✓ Branch 0 taken 164 times.
✓ Branch 1 taken 46 times.
|
210 | int add = (idx < extra) ? 1 : 0; |
| 116 | 210 | row += rows_per + add; | |
| 117 |
1/2✓ Branch 1 taken 210 times.
✗ Branch 2 not taken.
|
210 | threads.emplace_back(FoxWorker, std::cref(blocks_a_), std::cref(blocks_b_), std::ref(blocks_c_), block_size_, q_, |
| 118 | row_begin, row); | ||
| 119 | } | ||
| 120 |
2/2✓ Branch 0 taken 210 times.
✓ Branch 1 taken 96 times.
|
306 | for (auto &thr : threads) { |
| 121 |
1/2✓ Branch 1 taken 210 times.
✗ Branch 2 not taken.
|
210 | thr.join(); |
| 122 | } | ||
| 123 | 96 | return true; | |
| 124 | 96 | } | |
| 125 | |||
| 126 | 96 | bool SokolovKMatrixDoubleFoxSTL::PostProcessingImpl() { | |
| 127 | 96 | std::vector<double> result(static_cast<std::size_t>(n_) * n_); | |
| 128 | 96 | AssembleFromBlocks(blocks_c_, result, n_, block_size_, q_); | |
| 129 | 96 | double expected = 3.0 * n_; | |
| 130 |
1/2✓ Branch 0 taken 103080 times.
✗ Branch 1 not taken.
|
103080 | bool ok = std::ranges::all_of(result, [expected](double v) { return std::abs(v - expected) <= 1e-9; }); |
| 131 |
2/4✓ Branch 0 taken 96 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 96 times.
✗ Branch 3 not taken.
|
96 | GetOutput() = ok ? GetInput() : -1; |
| 132 | std::vector<double>().swap(blocks_a_); | ||
| 133 | std::vector<double>().swap(blocks_b_); | ||
| 134 | std::vector<double>().swap(blocks_c_); | ||
| 135 | 96 | return true; | |
| 136 | } | ||
| 137 | |||
| 138 | } // namespace sokolov_k_matrix_double_fox | ||
| 139 |