| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #include "sinev_a_mult_matrix_fox_algorithm/tbb/include/ops_tbb.hpp" | ||
| 2 | |||
| 3 | #include <tbb/blocked_range2d.h> | ||
| 4 | #include <tbb/parallel_for.h> | ||
| 5 | |||
| 6 | #include <cmath> | ||
| 7 | #include <cstddef> | ||
| 8 | #include <vector> | ||
| 9 | |||
| 10 | #include "sinev_a_mult_matrix_fox_algorithm/common/include/common.hpp" | ||
| 11 | |||
| 12 | namespace sinev_a_mult_matrix_fox_algorithm { | ||
| 13 | |||
| 14 |
1/2✓ Branch 1 taken 52 times.
✗ Branch 2 not taken.
|
52 | SinevAMultMatrixFoxAlgorithmTBB::SinevAMultMatrixFoxAlgorithmTBB(const InType &in) { |
| 15 | SetTypeOfTask(GetStaticTypeOfTask()); | ||
| 16 | GetInput() = in; | ||
| 17 | GetOutput() = {}; | ||
| 18 | 52 | } | |
| 19 | |||
| 20 | 52 | bool SinevAMultMatrixFoxAlgorithmTBB::ValidationImpl() { | |
| 21 | const auto &[matrix_size, matrix_a, matrix_b] = GetInput(); | ||
| 22 | |||
| 23 |
3/6✓ Branch 0 taken 52 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 52 times.
✗ Branch 4 not taken.
✓ Branch 5 taken 52 times.
|
52 | return matrix_size > 0 && matrix_a.size() == matrix_size * matrix_size && |
| 24 | 52 | matrix_b.size() == matrix_size * matrix_size; | |
| 25 | } | ||
| 26 | |||
| 27 | 52 | bool SinevAMultMatrixFoxAlgorithmTBB::PreProcessingImpl() { | |
| 28 | const auto &[matrix_size, matrix_a, matrix_b] = GetInput(); | ||
| 29 | 52 | GetOutput() = std::vector<double>(matrix_size * matrix_size, 0.0); | |
| 30 | 52 | return true; | |
| 31 | } | ||
| 32 | |||
| 33 | // Добавляем static к определениям | ||
| 34 | 20 | void SinevAMultMatrixFoxAlgorithmTBB::SimpleMultiply(size_t n, const std::vector<double> &a, | |
| 35 | const std::vector<double> &b, std::vector<double> &c) { | ||
| 36 | 504 | tbb::parallel_for(tbb::blocked_range2d<size_t>(0, n, 0, n), [&](const tbb::blocked_range2d<size_t> &r) { | |
| 37 |
2/2✓ Branch 0 taken 484 times.
✓ Branch 1 taken 484 times.
|
968 | for (size_t i = r.rows().begin(); i < r.rows().end(); ++i) { |
| 38 |
2/2✓ Branch 0 taken 484 times.
✓ Branch 1 taken 484 times.
|
968 | for (size_t j = r.cols().begin(); j < r.cols().end(); ++j) { |
| 39 | double sum = 0.0; | ||
| 40 |
2/2✓ Branch 0 taken 3204 times.
✓ Branch 1 taken 484 times.
|
3688 | for (size_t k = 0; k < n; ++k) { |
| 41 | 3204 | sum += a[(i * n) + k] * b[(k * n) + j]; | |
| 42 | } | ||
| 43 | 484 | c[(i * n) + j] = sum; | |
| 44 | } | ||
| 45 | } | ||
| 46 | 484 | }); | |
| 47 | 20 | } | |
| 48 | |||
| 49 | 64 | void SinevAMultMatrixFoxAlgorithmTBB::DecomposeToBlocks(const std::vector<double> &src, std::vector<double> &dst, | |
| 50 | size_t n, size_t bs, int q) { | ||
| 51 | 4352 | tbb::parallel_for(tbb::blocked_range2d<int>(0, q, 0, q), [&](const tbb::blocked_range2d<int> &r) { | |
| 52 |
2/2✓ Branch 0 taken 4288 times.
✓ Branch 1 taken 4288 times.
|
8576 | for (int bi = r.rows().begin(); bi < r.rows().end(); ++bi) { |
| 53 |
2/2✓ Branch 0 taken 4288 times.
✓ Branch 1 taken 4288 times.
|
8576 | for (int bj = r.cols().begin(); bj < r.cols().end(); ++bj) { |
| 54 | 4288 | const size_t block_off = (static_cast<size_t>((bi * q) + bj)) * (bs * bs); | |
| 55 |
2/2✓ Branch 0 taken 23992 times.
✓ Branch 1 taken 4288 times.
|
28280 | for (size_t i = 0; i < bs; ++i) { |
| 56 |
2/2✓ Branch 0 taken 156088 times.
✓ Branch 1 taken 23992 times.
|
180080 | for (size_t j = 0; j < bs; ++j) { |
| 57 | 156088 | const size_t src_idx = ((static_cast<size_t>(bi) * bs + i) * n) + (static_cast<size_t>(bj) * bs + j); | |
| 58 | 156088 | const size_t dst_idx = block_off + (i * bs) + j; | |
| 59 | 156088 | dst[dst_idx] = src[src_idx]; | |
| 60 | } | ||
| 61 | } | ||
| 62 | } | ||
| 63 | } | ||
| 64 | 4288 | }); | |
| 65 | 64 | } | |
| 66 | |||
| 67 | 32 | void SinevAMultMatrixFoxAlgorithmTBB::AssembleFromBlocks(const std::vector<double> &src, std::vector<double> &dst, | |
| 68 | size_t n, size_t bs, int q) { | ||
| 69 | 2176 | tbb::parallel_for(tbb::blocked_range2d<int>(0, q, 0, q), [&](const tbb::blocked_range2d<int> &r) { | |
| 70 |
2/2✓ Branch 0 taken 2144 times.
✓ Branch 1 taken 2144 times.
|
4288 | for (int bi = r.rows().begin(); bi < r.rows().end(); ++bi) { |
| 71 |
2/2✓ Branch 0 taken 2144 times.
✓ Branch 1 taken 2144 times.
|
4288 | for (int bj = r.cols().begin(); bj < r.cols().end(); ++bj) { |
| 72 | 2144 | const size_t block_off = (static_cast<size_t>((bi * q) + bj)) * (bs * bs); | |
| 73 |
2/2✓ Branch 0 taken 11996 times.
✓ Branch 1 taken 2144 times.
|
14140 | for (size_t i = 0; i < bs; ++i) { |
| 74 |
2/2✓ Branch 0 taken 78044 times.
✓ Branch 1 taken 11996 times.
|
90040 | for (size_t j = 0; j < bs; ++j) { |
| 75 | 78044 | const size_t src_idx = block_off + (i * bs) + j; | |
| 76 | 78044 | const size_t dst_idx = ((static_cast<size_t>(bi) * bs + i) * n) + (static_cast<size_t>(bj) * bs + j); | |
| 77 | 78044 | dst[dst_idx] = src[src_idx]; | |
| 78 | } | ||
| 79 | } | ||
| 80 | } | ||
| 81 | } | ||
| 82 | 2144 | }); | |
| 83 | 32 | } | |
| 84 | |||
| 85 | 23728 | void SinevAMultMatrixFoxAlgorithmTBB::MultiplyBlocks(const std::vector<double> &blocks_a, | |
| 86 | const std::vector<double> &blocks_b, std::vector<double> &blocks_c, | ||
| 87 | size_t bs, size_t a_off, size_t b_off, size_t c_off) { | ||
| 88 |
2/2✓ Branch 0 taken 134940 times.
✓ Branch 1 taken 23728 times.
|
158668 | for (size_t ii = 0; ii < bs; ++ii) { |
| 89 |
2/2✓ Branch 0 taken 864844 times.
✓ Branch 1 taken 134940 times.
|
999784 | for (size_t kk = 0; kk < bs; ++kk) { |
| 90 | 864844 | const double val = blocks_a[a_off + (ii * bs) + kk]; | |
| 91 | 864844 | const size_t b_base = b_off + (kk * bs); | |
| 92 | 864844 | const size_t c_base = c_off + (ii * bs); | |
| 93 |
2/2✓ Branch 0 taken 6296628 times.
✓ Branch 1 taken 864844 times.
|
7161472 | for (size_t jj = 0; jj < bs; ++jj) { |
| 94 | 6296628 | blocks_c[c_base + jj] += val * blocks_b[b_base + jj]; | |
| 95 | } | ||
| 96 | } | ||
| 97 | } | ||
| 98 | 23728 | } | |
| 99 | |||
| 100 | 232 | void SinevAMultMatrixFoxAlgorithmTBB::FoxStep(const std::vector<double> &blocks_a, const std::vector<double> &blocks_b, | |
| 101 | std::vector<double> &blocks_c, size_t bs, int q, int step) { | ||
| 102 | 232 | const size_t block_size = bs * bs; | |
| 103 | |||
| 104 | 23960 | tbb::parallel_for(tbb::blocked_range2d<int>(0, q, 0, q), [&](const tbb::blocked_range2d<int> &r) { | |
| 105 |
2/2✓ Branch 0 taken 23728 times.
✓ Branch 1 taken 23728 times.
|
47456 | for (int i = r.rows().begin(); i < r.rows().end(); ++i) { |
| 106 |
2/2✓ Branch 0 taken 23728 times.
✓ Branch 1 taken 23728 times.
|
47456 | for (int j = r.cols().begin(); j < r.cols().end(); ++j) { |
| 107 | 23728 | const int k = (i + step) % q; | |
| 108 | |||
| 109 | 23728 | const size_t a_off = (static_cast<size_t>((i * q) + k)) * block_size; | |
| 110 | 23728 | const size_t b_off = (static_cast<size_t>((k * q) + j)) * block_size; | |
| 111 | 23728 | const size_t c_off = (static_cast<size_t>((i * q) + j)) * block_size; | |
| 112 | |||
| 113 | 23728 | MultiplyBlocks(blocks_a, blocks_b, blocks_c, bs, a_off, b_off, c_off); | |
| 114 | } | ||
| 115 | } | ||
| 116 | 23728 | }); | |
| 117 | 232 | } | |
| 118 | |||
| 119 | 52 | bool SinevAMultMatrixFoxAlgorithmTBB::RunImpl() { | |
| 120 | const auto &input = GetInput(); | ||
| 121 |
2/2✓ Branch 0 taken 20 times.
✓ Branch 1 taken 32 times.
|
52 | const size_t n = std::get<0>(input); |
| 122 | const auto &a = std::get<1>(input); | ||
| 123 | const auto &b = std::get<2>(input); | ||
| 124 | auto &c = GetOutput(); | ||
| 125 | |||
| 126 | // Для маленьких матриц используем простое умножение | ||
| 127 |
2/2✓ Branch 0 taken 20 times.
✓ Branch 1 taken 32 times.
|
52 | if (n <= 8) { |
| 128 | 20 | SimpleMultiply(n, a, b, c); | |
| 129 | 20 | return true; | |
| 130 | } | ||
| 131 | |||
| 132 | 32 | size_t bs = ChooseBlockSize(n); | |
| 133 | 32 | const int actual_q = static_cast<int>(n / bs); | |
| 134 | |||
| 135 | 32 | const auto total_blocks = static_cast<size_t>(actual_q) * static_cast<size_t>(actual_q); | |
| 136 | 32 | const auto block_elements = bs * bs; | |
| 137 | |||
| 138 | 32 | std::vector<double> blocks_a(total_blocks * block_elements); | |
| 139 |
1/4✓ Branch 1 taken 32 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
32 | std::vector<double> blocks_b(total_blocks * block_elements); |
| 140 |
1/4✓ Branch 1 taken 32 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
32 | std::vector<double> blocks_c(total_blocks * block_elements, 0.0); |
| 141 | |||
| 142 |
1/2✓ Branch 1 taken 32 times.
✗ Branch 2 not taken.
|
32 | DecomposeToBlocks(a, blocks_a, n, bs, actual_q); |
| 143 |
1/2✓ Branch 1 taken 32 times.
✗ Branch 2 not taken.
|
32 | DecomposeToBlocks(b, blocks_b, n, bs, actual_q); |
| 144 | |||
| 145 |
2/2✓ Branch 0 taken 232 times.
✓ Branch 1 taken 32 times.
|
264 | for (int step = 0; step < actual_q; ++step) { |
| 146 |
1/2✓ Branch 1 taken 232 times.
✗ Branch 2 not taken.
|
232 | FoxStep(blocks_a, blocks_b, blocks_c, bs, actual_q, step); |
| 147 | } | ||
| 148 | |||
| 149 |
1/2✓ Branch 1 taken 32 times.
✗ Branch 2 not taken.
|
32 | AssembleFromBlocks(blocks_c, c, n, bs, actual_q); |
| 150 | |||
| 151 | return true; | ||
| 152 | } | ||
| 153 | |||
| 154 | 32 | size_t SinevAMultMatrixFoxAlgorithmTBB::ChooseBlockSize(size_t n) { | |
| 155 | size_t bs = 1; | ||
| 156 | 32 | const auto sqrt_n = static_cast<size_t>(std::sqrt(static_cast<double>(n))); | |
| 157 |
1/2✓ Branch 0 taken 60 times.
✗ Branch 1 not taken.
|
60 | for (size_t div = sqrt_n; div >= 1; --div) { |
| 158 |
2/2✓ Branch 0 taken 28 times.
✓ Branch 1 taken 32 times.
|
60 | if (n % div == 0) { |
| 159 | bs = div; | ||
| 160 | break; | ||
| 161 | } | ||
| 162 | } | ||
| 163 | 32 | return bs; | |
| 164 | } | ||
| 165 | |||
| 166 | 52 | bool SinevAMultMatrixFoxAlgorithmTBB::PostProcessingImpl() { | |
| 167 | 52 | return true; | |
| 168 | } | ||
| 169 | |||
| 170 | } // namespace sinev_a_mult_matrix_fox_algorithm | ||
| 171 |