| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #include "sokolov_k_matrix_double_fox/omp/include/ops_omp.hpp" | ||
| 2 | |||
| 3 | #include <omp.h> | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <cmath> | ||
| 7 | #include <cstddef> | ||
| 8 | #include <vector> | ||
| 9 | |||
| 10 | #include "sokolov_k_matrix_double_fox/common/include/common.hpp" | ||
| 11 | |||
| 12 | namespace sokolov_k_matrix_double_fox { | ||
| 13 | |||
| 14 | namespace { | ||
| 15 | |||
| 16 | void DecomposeToBlocksOmp(const std::vector<double> &flat, std::vector<double> &blocks, int n, int bs, int q) { | ||
| 17 | 48 | #pragma omp parallel for default(none) shared(flat, blocks, n, bs, q) schedule(static) | |
| 18 | for (int bi = 0; bi < q; bi++) { | ||
| 19 | for (int bj = 0; bj < q; bj++) { | ||
| 20 | int block_off = ((bi * q) + bj) * (bs * bs); | ||
| 21 | for (int i = 0; i < bs; i++) { | ||
| 22 | for (int j = 0; j < bs; j++) { | ||
| 23 | blocks[block_off + (i * bs) + j] = flat[(((bi * bs) + i) * n) + ((bj * bs) + j)]; | ||
| 24 | } | ||
| 25 | } | ||
| 26 | } | ||
| 27 | } | ||
| 28 | } | ||
| 29 | |||
| 30 | void AssembleFromBlocksOmp(const std::vector<double> &blocks, std::vector<double> &flat, int n, int bs, int q) { | ||
| 31 | 48 | #pragma omp parallel for default(none) shared(blocks, flat, n, bs, q) schedule(static) | |
| 32 | for (int bi = 0; bi < q; bi++) { | ||
| 33 | for (int bj = 0; bj < q; bj++) { | ||
| 34 | int block_off = ((bi * q) + bj) * (bs * bs); | ||
| 35 | for (int i = 0; i < bs; i++) { | ||
| 36 | for (int j = 0; j < bs; j++) { | ||
| 37 | flat[(((bi * bs) + i) * n) + ((bj * bs) + j)] = blocks[block_off + (i * bs) + j]; | ||
| 38 | } | ||
| 39 | } | ||
| 40 | } | ||
| 41 | } | ||
| 42 | } | ||
| 43 | |||
| 44 | 11020 | void MultiplyBlocksLocal(const std::vector<double> &a, int a_off, const std::vector<double> &b, int b_off, | |
| 45 | std::vector<double> &c, int c_off, int bs) { | ||
| 46 |
2/2✓ Branch 0 taken 64132 times.
✓ Branch 1 taken 11020 times.
|
75152 | for (int i = 0; i < bs; i++) { |
| 47 |
2/2✓ Branch 0 taken 506572 times.
✓ Branch 1 taken 64132 times.
|
570704 | for (int k = 0; k < bs; k++) { |
| 48 | 506572 | double val = a[a_off + (i * bs) + k]; | |
| 49 |
2/2✓ Branch 0 taken 4512100 times.
✓ Branch 1 taken 506572 times.
|
5018672 | for (int j = 0; j < bs; j++) { |
| 50 | 4512100 | c[c_off + (i * bs) + j] += val * b[b_off + (k * bs) + j]; | |
| 51 | } | ||
| 52 | } | ||
| 53 | } | ||
| 54 | 11020 | } | |
| 55 | |||
| 56 | void FoxStepOmp(const std::vector<double> &a, const std::vector<double> &b, std::vector<double> &c, int bs, int q, | ||
| 57 | int step) { | ||
| 58 | 220 | int bsq = bs * bs; | |
| 59 | 220 | #pragma omp parallel for default(none) shared(a, b, c, bs, q, step, bsq) schedule(static) | |
| 60 | for (int i = 0; i < q; i++) { | ||
| 61 | int k = (i + step) % q; | ||
| 62 | for (int j = 0; j < q; j++) { | ||
| 63 | MultiplyBlocksLocal(a, ((i * q) + k) * bsq, b, ((k * q) + j) * bsq, c, ((i * q) + j) * bsq, bs); | ||
| 64 | } | ||
| 65 | } | ||
| 66 | } | ||
| 67 | |||
| 68 | 48 | void FoxMultiplyOmp(const std::vector<double> &a, const std::vector<double> &b, std::vector<double> &c, int bs, int q) { | |
| 69 |
2/2✓ Branch 0 taken 220 times.
✓ Branch 1 taken 48 times.
|
268 | for (int step = 0; step < q; step++) { |
| 70 | FoxStepOmp(a, b, c, bs, q, step); | ||
| 71 | } | ||
| 72 | 48 | } | |
| 73 | |||
| 74 | 48 | int ChooseBlockSizeOmp(int n) { | |
| 75 |
1/2✓ Branch 0 taken 68 times.
✗ Branch 1 not taken.
|
68 | for (int div = static_cast<int>(std::sqrt(static_cast<double>(n))); div >= 1; div--) { |
| 76 |
2/2✓ Branch 0 taken 20 times.
✓ Branch 1 taken 48 times.
|
68 | if (n % div == 0) { |
| 77 | return div; | ||
| 78 | } | ||
| 79 | } | ||
| 80 | return 1; | ||
| 81 | } | ||
| 82 | |||
| 83 | } // namespace | ||
| 84 | |||
| 85 | 48 | SokolovKMatrixDoubleFoxOMP::SokolovKMatrixDoubleFoxOMP(const InType &in) { | |
| 86 | SetTypeOfTask(GetStaticTypeOfTask()); | ||
| 87 | 48 | GetInput() = in; | |
| 88 | GetOutput() = 0; | ||
| 89 | 48 | } | |
| 90 | |||
| 91 | 48 | bool SokolovKMatrixDoubleFoxOMP::ValidationImpl() { | |
| 92 |
2/4✓ Branch 0 taken 48 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 48 times.
|
48 | return (GetInput() > 0) && (GetOutput() == 0); |
| 93 | } | ||
| 94 | |||
| 95 | 48 | bool SokolovKMatrixDoubleFoxOMP::PreProcessingImpl() { | |
| 96 | 48 | GetOutput() = 0; | |
| 97 | 48 | n_ = GetInput(); | |
| 98 | 48 | block_size_ = ChooseBlockSizeOmp(n_); | |
| 99 | 48 | q_ = n_ / block_size_; | |
| 100 | 48 | auto sz = static_cast<std::size_t>(n_) * n_; | |
| 101 | 48 | std::vector<double> a(sz, 1.5); | |
| 102 |
1/4✓ Branch 1 taken 48 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
48 | std::vector<double> b(sz, 2.0); |
| 103 |
1/2✓ Branch 1 taken 48 times.
✗ Branch 2 not taken.
|
48 | blocks_a_.resize(sz); |
| 104 |
1/2✓ Branch 1 taken 48 times.
✗ Branch 2 not taken.
|
48 | blocks_b_.resize(sz); |
| 105 |
1/4✓ Branch 1 taken 48 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
48 | blocks_c_.assign(sz, 0.0); |
| 106 | 48 | DecomposeToBlocksOmp(a, blocks_a_, n_, block_size_, q_); | |
| 107 |
1/2✓ Branch 0 taken 48 times.
✗ Branch 1 not taken.
|
48 | DecomposeToBlocksOmp(b, blocks_b_, n_, block_size_, q_); |
| 108 | 48 | return true; | |
| 109 | } | ||
| 110 | |||
| 111 |
1/2✓ Branch 0 taken 48 times.
✗ Branch 1 not taken.
|
48 | bool SokolovKMatrixDoubleFoxOMP::RunImpl() { |
| 112 | std::ranges::fill(blocks_c_, 0.0); | ||
| 113 | 48 | FoxMultiplyOmp(blocks_a_, blocks_b_, blocks_c_, block_size_, q_); | |
| 114 | 48 | return true; | |
| 115 | } | ||
| 116 | |||
| 117 | 48 | bool SokolovKMatrixDoubleFoxOMP::PostProcessingImpl() { | |
| 118 | 48 | std::vector<double> result(static_cast<std::size_t>(n_) * n_); | |
| 119 | 48 | AssembleFromBlocksOmp(blocks_c_, result, n_, block_size_, q_); | |
| 120 | 48 | double expected = 3.0 * n_; | |
| 121 |
1/2✓ Branch 0 taken 51540 times.
✗ Branch 1 not taken.
|
51540 | bool ok = std::ranges::all_of(result, [expected](double v) { return std::abs(v - expected) <= 1e-9; }); |
| 122 |
2/4✓ Branch 0 taken 48 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 48 times.
✗ Branch 3 not taken.
|
48 | GetOutput() = ok ? GetInput() : -1; |
| 123 | std::vector<double>().swap(blocks_a_); | ||
| 124 | std::vector<double>().swap(blocks_b_); | ||
| 125 | std::vector<double>().swap(blocks_c_); | ||
| 126 | 48 | return true; | |
| 127 | } | ||
| 128 | |||
| 129 | } // namespace sokolov_k_matrix_double_fox | ||
| 130 |