| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #include "kazennova_a_fox_algorithm/omp/include/ops_omp.hpp" | ||
| 2 | |||
| 3 | #include <omp.h> | ||
| 4 | |||
| 5 | #include <cmath> | ||
| 6 | #include <cstddef> | ||
| 7 | #include <vector> | ||
| 8 | |||
| 9 | #include "kazennova_a_fox_algorithm/common/include/common.hpp" | ||
| 10 | |||
| 11 | namespace kazennova_a_fox_algorithm { | ||
| 12 | |||
| 13 | namespace { | ||
| 14 | |||
| 15 | 16 | int ChooseBlockSize(int n) { | |
| 16 | int best = 1; | ||
| 17 | 16 | int sqrt_n = static_cast<int>(std::sqrt(static_cast<double>(n))); | |
| 18 | |||
| 19 |
1/2✓ Branch 0 taken 24 times.
✗ Branch 1 not taken.
|
24 | for (int bs = sqrt_n; bs >= 1; --bs) { |
| 20 |
2/2✓ Branch 0 taken 8 times.
✓ Branch 1 taken 16 times.
|
24 | if (n % bs == 0) { |
| 21 | best = bs; | ||
| 22 | break; | ||
| 23 | } | ||
| 24 | } | ||
| 25 | 16 | return best; | |
| 26 | } | ||
| 27 | |||
| 28 | } // namespace | ||
| 29 | |||
| 30 |
1/2✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
|
16 | KazennovaATestTaskOMP::KazennovaATestTaskOMP(const InType &in) { |
| 31 | SetTypeOfTask(GetStaticTypeOfTask()); | ||
| 32 |
1/2✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
|
16 | GetInput() = in; |
| 33 | 16 | } | |
| 34 | |||
| 35 |
1/2✓ Branch 0 taken 16 times.
✗ Branch 1 not taken.
|
16 | bool KazennovaATestTaskOMP::ValidationImpl() { |
| 36 | const auto &in = GetInput(); | ||
| 37 | |||
| 38 |
2/4✓ Branch 0 taken 16 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 16 times.
✗ Branch 3 not taken.
|
16 | if (in.A.data.empty() || in.B.data.empty()) { |
| 39 | return false; | ||
| 40 | } | ||
| 41 | |||
| 42 |
4/8✓ Branch 0 taken 16 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 16 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 16 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 16 times.
✗ Branch 7 not taken.
|
16 | if (in.A.rows <= 0 || in.A.cols <= 0 || in.B.rows <= 0 || in.B.cols <= 0) { |
| 43 | return false; | ||
| 44 | } | ||
| 45 | |||
| 46 |
1/2✓ Branch 0 taken 16 times.
✗ Branch 1 not taken.
|
16 | if (in.A.cols != in.B.rows) { |
| 47 | return false; | ||
| 48 | } | ||
| 49 | |||
| 50 |
2/4✓ Branch 0 taken 16 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 16 times.
|
16 | if (in.A.rows != in.A.cols || in.B.rows != in.B.cols || in.A.rows != in.B.rows) { |
| 51 | ✗ | return false; | |
| 52 | } | ||
| 53 | |||
| 54 | return true; | ||
| 55 | } | ||
| 56 | |||
| 57 | 32 | void KazennovaATestTaskOMP::DecomposeMatrix(const std::vector<double> &src, std::vector<double> &dst, int n, int bs, | |
| 58 | int q) { | ||
| 59 | 32 | size_t block_elements = static_cast<size_t>(bs) * bs; | |
| 60 | |||
| 61 |
2/2✓ Branch 0 taken 120 times.
✓ Branch 1 taken 32 times.
|
152 | for (int bi = 0; bi < q; ++bi) { |
| 62 |
2/2✓ Branch 0 taken 504 times.
✓ Branch 1 taken 120 times.
|
624 | for (int bj = 0; bj < q; ++bj) { |
| 63 | 504 | size_t block_offset = ((static_cast<size_t>(bi) * q) + bj) * block_elements; | |
| 64 | |||
| 65 |
2/2✓ Branch 0 taken 704 times.
✓ Branch 1 taken 504 times.
|
1208 | for (int i = 0; i < bs; ++i) { |
| 66 |
2/2✓ Branch 0 taken 1104 times.
✓ Branch 1 taken 704 times.
|
1808 | for (int j = 0; j < bs; ++j) { |
| 67 | 1104 | size_t src_idx = (((static_cast<size_t>(bi) * bs) + i) * n) + ((static_cast<size_t>(bj) * bs) + j); | |
| 68 | 1104 | size_t dst_idx = block_offset + (static_cast<size_t>(i) * bs) + j; | |
| 69 | 1104 | dst[dst_idx] = src[src_idx]; | |
| 70 | } | ||
| 71 | } | ||
| 72 | } | ||
| 73 | } | ||
| 74 | 32 | } | |
| 75 | |||
| 76 | 16 | void KazennovaATestTaskOMP::AssembleMatrix(const std::vector<double> &src, std::vector<double> &dst, int n, int bs, | |
| 77 | int q) { | ||
| 78 | 16 | size_t block_elements = static_cast<size_t>(bs) * bs; | |
| 79 | |||
| 80 |
2/2✓ Branch 0 taken 60 times.
✓ Branch 1 taken 16 times.
|
76 | for (int bi = 0; bi < q; ++bi) { |
| 81 |
2/2✓ Branch 0 taken 252 times.
✓ Branch 1 taken 60 times.
|
312 | for (int bj = 0; bj < q; ++bj) { |
| 82 | 252 | size_t block_offset = ((static_cast<size_t>(bi) * q) + bj) * block_elements; | |
| 83 | |||
| 84 |
2/2✓ Branch 0 taken 352 times.
✓ Branch 1 taken 252 times.
|
604 | for (int i = 0; i < bs; ++i) { |
| 85 |
2/2✓ Branch 0 taken 552 times.
✓ Branch 1 taken 352 times.
|
904 | for (int j = 0; j < bs; ++j) { |
| 86 | 552 | size_t dst_idx = (((static_cast<size_t>(bi) * bs) + i) * n) + ((static_cast<size_t>(bj) * bs) + j); | |
| 87 | 552 | size_t src_idx = block_offset + (static_cast<size_t>(i) * bs) + j; | |
| 88 | 552 | dst[dst_idx] = src[src_idx]; | |
| 89 | } | ||
| 90 | } | ||
| 91 | } | ||
| 92 | } | ||
| 93 | 16 | } | |
| 94 | |||
| 95 | 16 | bool KazennovaATestTaskOMP::PreProcessingImpl() { | |
| 96 | const auto &in = GetInput(); | ||
| 97 | |||
| 98 | 16 | matrix_size_ = in.A.rows; | |
| 99 | 16 | GetOutput().rows = matrix_size_; | |
| 100 | 16 | GetOutput().cols = matrix_size_; | |
| 101 | 16 | GetOutput().data.assign(static_cast<size_t>(matrix_size_) * matrix_size_, 0.0); | |
| 102 | |||
| 103 | 16 | block_size_ = ChooseBlockSize(matrix_size_); | |
| 104 | 16 | block_count_ = matrix_size_ / block_size_; | |
| 105 | |||
| 106 | 16 | size_t total_blocks = static_cast<size_t>(block_count_) * block_count_; | |
| 107 | 16 | size_t block_elements = static_cast<size_t>(block_size_) * block_size_; | |
| 108 | 16 | a_blocks_.assign(total_blocks * block_elements, 0.0); | |
| 109 | 16 | b_blocks_.assign(total_blocks * block_elements, 0.0); | |
| 110 | 16 | c_blocks_.assign(total_blocks * block_elements, 0.0); | |
| 111 | |||
| 112 | 16 | DecomposeMatrix(in.A.data, a_blocks_, matrix_size_, block_size_, block_count_); | |
| 113 | 16 | DecomposeMatrix(in.B.data, b_blocks_, matrix_size_, block_size_, block_count_); | |
| 114 | |||
| 115 | 16 | return true; | |
| 116 | } | ||
| 117 | |||
| 118 | 1140 | void KazennovaATestTaskOMP::MultiplyBlock(size_t a_idx, size_t b_idx, size_t c_idx, int bs) { | |
| 119 |
2/2✓ Branch 0 taken 1640 times.
✓ Branch 1 taken 1140 times.
|
2780 | for (int ii = 0; ii < bs; ++ii) { |
| 120 | 1640 | size_t row_offset = a_idx + (static_cast<size_t>(ii) * bs); | |
| 121 | 1640 | size_t c_row_offset = c_idx + (static_cast<size_t>(ii) * bs); | |
| 122 |
2/2✓ Branch 0 taken 2640 times.
✓ Branch 1 taken 1640 times.
|
4280 | for (int kk = 0; kk < bs; ++kk) { |
| 123 | 2640 | double a_val = a_blocks_[row_offset + kk]; | |
| 124 | 2640 | size_t b_row_offset = b_idx + (static_cast<size_t>(kk) * bs); | |
| 125 |
2/2✓ Branch 0 taken 4640 times.
✓ Branch 1 taken 2640 times.
|
7280 | for (int jj = 0; jj < bs; ++jj) { |
| 126 | 4640 | c_blocks_[c_row_offset + jj] += a_val * b_blocks_[b_row_offset + jj]; | |
| 127 | } | ||
| 128 | } | ||
| 129 | } | ||
| 130 | 1140 | } | |
| 131 | |||
| 132 | 16 | bool KazennovaATestTaskOMP::RunImpl() { | |
| 133 | 16 | size_t block_elements = static_cast<size_t>(block_size_) * block_size_; | |
| 134 | |||
| 135 |
2/2✓ Branch 0 taken 552 times.
✓ Branch 1 taken 16 times.
|
568 | for (auto &c_block : c_blocks_) { |
| 136 | 552 | c_block = 0.0; | |
| 137 | } | ||
| 138 | |||
| 139 |
2/2✓ Branch 0 taken 60 times.
✓ Branch 1 taken 16 times.
|
76 | for (int step = 0; step < block_count_; ++step) { |
| 140 | 60 | #pragma omp parallel for default(none) shared(step, block_elements) | |
| 141 | for (int i = 0; i < block_count_; ++i) { | ||
| 142 | for (int j = 0; j < block_count_; ++j) { | ||
| 143 | int k = (i + step) % block_count_; | ||
| 144 | |||
| 145 | size_t a_idx = ((static_cast<size_t>(i) * block_count_) + k) * block_elements; | ||
| 146 | size_t b_idx = ((static_cast<size_t>(k) * block_count_) + j) * block_elements; | ||
| 147 | size_t c_idx = ((static_cast<size_t>(i) * block_count_) + j) * block_elements; | ||
| 148 | |||
| 149 | MultiplyBlock(a_idx, b_idx, c_idx, block_size_); | ||
| 150 | } | ||
| 151 | } | ||
| 152 | } | ||
| 153 | |||
| 154 | 16 | return true; | |
| 155 | } | ||
| 156 | |||
| 157 | 16 | bool KazennovaATestTaskOMP::PostProcessingImpl() { | |
| 158 | 16 | AssembleMatrix(c_blocks_, GetOutput().data, matrix_size_, block_size_, block_count_); | |
| 159 | 16 | return true; | |
| 160 | } | ||
| 161 | |||
| 162 | } // namespace kazennova_a_fox_algorithm | ||
| 163 |