| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #include "kazennova_a_fox_algorithm/tbb/include/ops_tbb.hpp" | ||
| 2 | |||
| 3 | #include <tbb/blocked_range2d.h> | ||
| 4 | #include <tbb/parallel_for.h> | ||
| 5 | |||
| 6 | #include <algorithm> | ||
| 7 | #include <cstddef> | ||
| 8 | #include <vector> | ||
| 9 | |||
| 10 | #include "kazennova_a_fox_algorithm/common/include/common.hpp" | ||
| 11 | |||
| 12 | namespace kazennova_a_fox_algorithm { | ||
| 13 | |||
| 14 | namespace { | ||
| 15 | |||
| 16 | 32 | void GetBlock(const std::vector<double> &mat, int rows, int cols, int block_row, int block_col, int block_size, | |
| 17 | double *block_buf) { | ||
| 18 | 32 | const int start_row = block_row * block_size; | |
| 19 | 32 | const int start_col = block_col * block_size; | |
| 20 | 32 | const int end_row = std::min(start_row + block_size, rows); | |
| 21 | 32 | const int end_col = std::min(start_col + block_size, cols); | |
| 22 | |||
| 23 |
2/2✓ Branch 0 taken 2048 times.
✓ Branch 1 taken 32 times.
|
2080 | for (int i = 0; i < block_size; ++i) { |
| 24 |
2/2✓ Branch 0 taken 131072 times.
✓ Branch 1 taken 2048 times.
|
133120 | for (int j = 0; j < block_size; ++j) { |
| 25 | 131072 | block_buf[(i * block_size) + j] = 0.0; | |
| 26 | } | ||
| 27 | } | ||
| 28 |
2/2✓ Branch 0 taken 160 times.
✓ Branch 1 taken 32 times.
|
192 | for (int i = start_row; i < end_row; ++i) { |
| 29 |
2/2✓ Branch 0 taken 1104 times.
✓ Branch 1 taken 160 times.
|
1264 | for (int j = start_col; j < end_col; ++j) { |
| 30 | 1104 | block_buf[((i - start_row) * block_size) + (j - start_col)] = mat[(i * cols) + j]; | |
| 31 | } | ||
| 32 | } | ||
| 33 | 32 | } | |
| 34 | |||
| 35 | 16 | void MultiplyBlock(const std::vector<double> &block_a, const std::vector<double> &block_b, int block_size, int max_i, | |
| 36 | int max_j, int max_k, int bi, int bj, int n, std::vector<double> &c) { | ||
| 37 |
2/2✓ Branch 0 taken 80 times.
✓ Branch 1 taken 16 times.
|
96 | for (int i = 0; i < max_i; ++i) { |
| 38 | 80 | const int global_row = (bi * block_size) + i; | |
| 39 |
2/2✓ Branch 0 taken 552 times.
✓ Branch 1 taken 80 times.
|
632 | for (int j = 0; j < max_j; ++j) { |
| 40 | 552 | const int global_col = (bj * block_size) + j; | |
| 41 | double sum = 0.0; | ||
| 42 |
2/2✓ Branch 0 taken 4640 times.
✓ Branch 1 taken 552 times.
|
5192 | for (int kk = 0; kk < max_k; ++kk) { |
| 43 | 4640 | sum += block_a[(i * block_size) + kk] * block_b[(kk * block_size) + j]; | |
| 44 | } | ||
| 45 | 552 | c[(global_row * n) + global_col] += sum; | |
| 46 | } | ||
| 47 | } | ||
| 48 | 16 | } | |
| 49 | |||
| 50 | } // namespace | ||
| 51 | |||
| 52 |
1/2✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
|
16 | KazennovaATestTaskTBB::KazennovaATestTaskTBB(const InType &in) { |
| 53 | SetTypeOfTask(GetStaticTypeOfTask()); | ||
| 54 |
1/2✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
|
16 | GetInput() = in; |
| 55 | 16 | } | |
| 56 | |||
| 57 |
1/2✓ Branch 0 taken 16 times.
✗ Branch 1 not taken.
|
16 | bool KazennovaATestTaskTBB::ValidationImpl() { |
| 58 | const auto &in = GetInput(); | ||
| 59 |
2/4✓ Branch 0 taken 16 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 16 times.
✗ Branch 3 not taken.
|
16 | if (in.A.data.empty() || in.B.data.empty()) { |
| 60 | return false; | ||
| 61 | } | ||
| 62 |
4/8✓ Branch 0 taken 16 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 16 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 16 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 16 times.
✗ Branch 7 not taken.
|
16 | if (in.A.rows <= 0 || in.A.cols <= 0 || in.B.rows <= 0 || in.B.cols <= 0) { |
| 63 | return false; | ||
| 64 | } | ||
| 65 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 16 times.
|
16 | if (in.A.cols != in.B.rows) { |
| 66 | ✗ | return false; | |
| 67 | } | ||
| 68 | return true; | ||
| 69 | } | ||
| 70 | |||
| 71 | 16 | bool KazennovaATestTaskTBB::PreProcessingImpl() { | |
| 72 | const auto &in = GetInput(); | ||
| 73 | auto &out = GetOutput(); | ||
| 74 | 16 | out.rows = in.A.rows; | |
| 75 | 16 | out.cols = in.B.cols; | |
| 76 | 16 | out.data.assign(static_cast<size_t>(out.rows) * out.cols, 0.0); | |
| 77 | 16 | return true; | |
| 78 | } | ||
| 79 | |||
| 80 | 16 | bool KazennovaATestTaskTBB::RunImpl() { | |
| 81 | const auto &in = GetInput(); | ||
| 82 | auto &out = GetOutput(); | ||
| 83 | |||
| 84 | 16 | const int m = in.A.rows; | |
| 85 | 16 | const int k = in.A.cols; | |
| 86 | 16 | const int n = in.B.cols; | |
| 87 | 16 | const auto &a = in.A.data; | |
| 88 | 16 | const auto &b = in.B.data; | |
| 89 | 16 | auto &c = out.data; | |
| 90 | |||
| 91 | 16 | const int bs = kBlockSize; | |
| 92 | |||
| 93 | 16 | const int blocks_i = (m + bs - 1) / bs; | |
| 94 | 16 | const int blocks_j = (n + bs - 1) / bs; | |
| 95 | 16 | const int blocks_k = (k + bs - 1) / bs; | |
| 96 | |||
| 97 | 16 | tbb::parallel_for(tbb::blocked_range2d<int>(0, blocks_i, 0, blocks_j), [&](const tbb::blocked_range2d<int> &r) { | |
| 98 | 16 | std::vector<double> block_a(static_cast<size_t>(bs) * bs); | |
| 99 |
1/4✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
16 | std::vector<double> block_b(static_cast<size_t>(bs) * bs); |
| 100 | |||
| 101 |
2/2✓ Branch 0 taken 16 times.
✓ Branch 1 taken 16 times.
|
32 | for (int bi = r.rows().begin(); bi != r.rows().end(); ++bi) { |
| 102 |
2/2✓ Branch 0 taken 16 times.
✓ Branch 1 taken 16 times.
|
32 | for (int bj = r.cols().begin(); bj != r.cols().end(); ++bj) { |
| 103 |
2/2✓ Branch 0 taken 16 times.
✓ Branch 1 taken 16 times.
|
32 | for (int bk = 0; bk < blocks_k; ++bk) { |
| 104 | 16 | GetBlock(a, m, k, bi, bk, bs, block_a.data()); | |
| 105 | 16 | GetBlock(b, k, n, bk, bj, bs, block_b.data()); | |
| 106 | |||
| 107 |
1/2✓ Branch 0 taken 16 times.
✗ Branch 1 not taken.
|
16 | const int max_i = std::min(bs, m - (bi * bs)); |
| 108 |
1/2✓ Branch 0 taken 16 times.
✗ Branch 1 not taken.
|
16 | const int max_j = std::min(bs, n - (bj * bs)); |
| 109 |
1/2✓ Branch 0 taken 16 times.
✗ Branch 1 not taken.
|
16 | const int max_k = std::min(bs, k - (bk * bs)); |
| 110 | |||
| 111 | 16 | MultiplyBlock(block_a, block_b, bs, max_i, max_j, max_k, bi, bj, n, c); | |
| 112 | } | ||
| 113 | } | ||
| 114 | } | ||
| 115 | 16 | }); | |
| 116 | |||
| 117 | 16 | return true; | |
| 118 | } | ||
| 119 | |||
| 120 | 16 | bool KazennovaATestTaskTBB::PostProcessingImpl() { | |
| 121 | 16 | return !GetOutput().data.empty(); | |
| 122 | } | ||
| 123 | |||
| 124 | } // namespace kazennova_a_fox_algorithm | ||
| 125 |