| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #include "chyokotov_a_dense_matrix_mul_foxs_algorithm/tbb/include/ops_tbb.hpp" | ||
| 2 | |||
| 3 | #include <tbb/blocked_range2d.h> | ||
| 4 | #include <tbb/parallel_for.h> | ||
| 5 | |||
| 6 | #include <algorithm> | ||
| 7 | #include <cmath> | ||
| 8 | #include <cstddef> | ||
| 9 | #include <vector> | ||
| 10 | |||
| 11 | #include "chyokotov_a_dense_matrix_mul_foxs_algorithm/common/include/common.hpp" | ||
| 12 | |||
| 13 | namespace chyokotov_a_dense_matrix_mul_foxs_algorithm { | ||
| 14 | |||
| 15 |
1/2✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
|
16 | ChyokotovADenseMatMulFoxAlgorithmTBB::ChyokotovADenseMatMulFoxAlgorithmTBB(const InType &in) { |
| 16 | SetTypeOfTask(GetStaticTypeOfTask()); | ||
| 17 | GetInput() = in; | ||
| 18 | GetOutput().clear(); | ||
| 19 | 16 | } | |
| 20 | |||
| 21 | 16 | bool ChyokotovADenseMatMulFoxAlgorithmTBB::ValidationImpl() { | |
| 22 | 16 | return (GetInput().first.size() == GetInput().second.size()); | |
| 23 | } | ||
| 24 | |||
| 25 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 16 times.
|
16 | bool ChyokotovADenseMatMulFoxAlgorithmTBB::PreProcessingImpl() { |
| 26 | GetOutput().clear(); | ||
| 27 | 16 | GetOutput().resize(GetInput().first.size(), 0.0); | |
| 28 | 16 | return true; | |
| 29 | } | ||
| 30 | |||
| 31 | ✗ | int ChyokotovADenseMatMulFoxAlgorithmTBB::CalculateBlockSize(int n) { | |
| 32 | 12 | return static_cast<int>(std::sqrt(static_cast<double>(n))); | |
| 33 | } | ||
| 34 | |||
| 35 | ✗ | int ChyokotovADenseMatMulFoxAlgorithmTBB::CountBlock(int n, int size) { | |
| 36 | 12 | return (n + size - 1) / size; | |
| 37 | } | ||
| 38 | |||
| 39 | ✗ | void ChyokotovADenseMatMulFoxAlgorithmTBB::Matmul(std::vector<double> &a, std::vector<double> &b, int n, int istart, | |
| 40 | int iend, int jstart, int jend, int kstart, int kend) { | ||
| 41 | 68 | const auto n_sz = static_cast<size_t>(n); | |
| 42 | 68 | const auto istart_sz = static_cast<size_t>(istart); | |
| 43 | 68 | const auto iend_sz = static_cast<size_t>(iend); | |
| 44 | |||
| 45 | ✗ | tbb::parallel_for(istart_sz, iend_sz, [&](size_t i) { | |
| 46 | 100 | double *output_row = GetOutput().data() + (i * n_sz); | |
| 47 | 100 | const double *a_row = a.data() + (i * n_sz); | |
| 48 | |||
| 49 |
2/2✓ Branch 0 taken 164 times.
✓ Branch 1 taken 100 times.
|
264 | for (int j = jstart; j < jend; ++j) { |
| 50 | long double sum = 0.0L; | ||
| 51 | 164 | const double *b_col = b.data() + j; | |
| 52 | |||
| 53 |
2/2✓ Branch 0 taken 292 times.
✓ Branch 1 taken 164 times.
|
456 | for (int k = kstart; k < kend; ++k) { |
| 54 | 292 | const auto k_sz = static_cast<size_t>(k); | |
| 55 | 292 | sum += static_cast<long double>(a_row[k_sz]) * static_cast<long double>(b_col[k_sz * n_sz]); | |
| 56 | } | ||
| 57 | 164 | output_row[j] += static_cast<double>(sum); | |
| 58 | } | ||
| 59 | 100 | }); | |
| 60 | ✗ | } | |
| 61 | |||
| 62 | 16 | bool ChyokotovADenseMatMulFoxAlgorithmTBB::RunImpl() { | |
| 63 | 16 | std::vector<double> a = GetInput().first; | |
| 64 |
1/2✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
|
16 | std::vector<double> b = GetInput().second; |
| 65 | 16 | int n = static_cast<int>(std::sqrt(static_cast<double>(a.size()))); | |
| 66 |
2/2✓ Branch 0 taken 12 times.
✓ Branch 1 taken 4 times.
|
16 | if (n == 0) { |
| 67 | return true; | ||
| 68 | } | ||
| 69 | |||
| 70 | 12 | int block_size = CalculateBlockSize(n); | |
| 71 | 12 | int count_block = CountBlock(n, block_size); | |
| 72 | |||
| 73 |
1/4✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
48 | tbb::parallel_for(tbb::blocked_range2d<int>(0, count_block, 0, count_block), [&](const tbb::blocked_range2d<int> &r) { |
| 74 |
2/2✓ Branch 0 taken 36 times.
✓ Branch 1 taken 36 times.
|
72 | for (int ic = r.rows().begin(); ic < r.rows().end(); ++ic) { |
| 75 |
2/2✓ Branch 0 taken 36 times.
✓ Branch 1 taken 36 times.
|
72 | for (int jc = r.cols().begin(); jc < r.cols().end(); ++jc) { |
| 76 |
2/2✓ Branch 0 taken 68 times.
✓ Branch 1 taken 36 times.
|
104 | for (int kc = 0; kc < count_block; ++kc) { |
| 77 | 68 | int istart = ic * block_size; | |
| 78 | 68 | int jstart = jc * block_size; | |
| 79 | 68 | int kstart = kc * block_size; | |
| 80 | |||
| 81 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 68 times.
|
68 | int iend = std::min(istart + block_size, n); |
| 82 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 68 times.
|
68 | int jend = std::min(jstart + block_size, n); |
| 83 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 68 times.
|
68 | int kend = std::min(kstart + block_size, n); |
| 84 | |||
| 85 | 68 | Matmul(a, b, n, istart, iend, jstart, jend, kstart, kend); | |
| 86 | } | ||
| 87 | } | ||
| 88 | } | ||
| 89 | 36 | }); | |
| 90 | |||
| 91 | 12 | return true; | |
| 92 | } | ||
| 93 | |||
| 94 | 16 | bool ChyokotovADenseMatMulFoxAlgorithmTBB::PostProcessingImpl() { | |
| 95 | 16 | return true; | |
| 96 | } | ||
| 97 | |||
| 98 | } // namespace chyokotov_a_dense_matrix_mul_foxs_algorithm | ||
| 99 |