| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #include "kazennova_a_fox_algorithm/stl/include/ops_stl.hpp" | ||
| 2 | |||
| 3 | #include <algorithm> | ||
| 4 | #include <atomic> | ||
| 5 | #include <cstddef> | ||
| 6 | #include <thread> | ||
| 7 | #include <vector> | ||
| 8 | |||
| 9 | #include "kazennova_a_fox_algorithm/common/include/common.hpp" | ||
| 10 | #include "util/include/util.hpp" | ||
| 11 | |||
| 12 | namespace kazennova_a_fox_algorithm { | ||
| 13 | |||
| 14 | namespace { | ||
| 15 | |||
| 16 | 64 | void GetBlock(const std::vector<double> &mat, int rows, int cols, int block_row, int block_col, int block_size, | |
| 17 | double *block_buf) { | ||
| 18 | 64 | const int start_row = block_row * block_size; | |
| 19 | 64 | const int start_col = block_col * block_size; | |
| 20 | 64 | const int end_row = std::min(start_row + block_size, rows); | |
| 21 | 64 | const int end_col = std::min(start_col + block_size, cols); | |
| 22 | |||
| 23 |
2/2✓ Branch 0 taken 4096 times.
✓ Branch 1 taken 64 times.
|
4160 | for (int i = 0; i < block_size; ++i) { |
| 24 |
2/2✓ Branch 0 taken 262144 times.
✓ Branch 1 taken 4096 times.
|
266240 | for (int j = 0; j < block_size; ++j) { |
| 25 | 262144 | block_buf[(i * block_size) + j] = 0.0; | |
| 26 | } | ||
| 27 | } | ||
| 28 |
2/2✓ Branch 0 taken 320 times.
✓ Branch 1 taken 64 times.
|
384 | for (int i = start_row; i < end_row; ++i) { |
| 29 |
2/2✓ Branch 0 taken 2208 times.
✓ Branch 1 taken 320 times.
|
2528 | for (int j = start_col; j < end_col; ++j) { |
| 30 | 2208 | block_buf[((i - start_row) * block_size) + (j - start_col)] = mat[(i * cols) + j]; | |
| 31 | } | ||
| 32 | } | ||
| 33 | 64 | } | |
| 34 | |||
| 35 | 32 | void MultiplyBlock(const std::vector<double> &block_a, const std::vector<double> &block_b, int block_size, int max_i, | |
| 36 | int max_j, int max_k, int bi, int bj, int n, std::vector<double> &c) { | ||
| 37 |
2/2✓ Branch 0 taken 160 times.
✓ Branch 1 taken 32 times.
|
192 | for (int i = 0; i < max_i; ++i) { |
| 38 | 160 | const int global_row = (bi * block_size) + i; | |
| 39 |
2/2✓ Branch 0 taken 1104 times.
✓ Branch 1 taken 160 times.
|
1264 | for (int j = 0; j < max_j; ++j) { |
| 40 | 1104 | const int global_col = (bj * block_size) + j; | |
| 41 | double sum = 0.0; | ||
| 42 |
2/2✓ Branch 0 taken 9280 times.
✓ Branch 1 taken 1104 times.
|
10384 | for (int kk = 0; kk < max_k; ++kk) { |
| 43 | 9280 | sum += block_a[(i * block_size) + kk] * block_b[(kk * block_size) + j]; | |
| 44 | } | ||
| 45 | 1104 | c[(global_row * n) + global_col] += sum; | |
| 46 | } | ||
| 47 | } | ||
| 48 | 32 | } | |
| 49 | |||
| 50 | } // namespace | ||
| 51 | |||
| 52 |
1/2✓ Branch 1 taken 32 times.
✗ Branch 2 not taken.
|
32 | KazennovaATestTaskSTL::KazennovaATestTaskSTL(const InType &in) { |
| 53 | SetTypeOfTask(GetStaticTypeOfTask()); | ||
| 54 |
1/2✓ Branch 1 taken 32 times.
✗ Branch 2 not taken.
|
32 | GetInput() = in; |
| 55 | 32 | } | |
| 56 | |||
| 57 |
1/2✓ Branch 0 taken 32 times.
✗ Branch 1 not taken.
|
32 | bool KazennovaATestTaskSTL::ValidationImpl() { |
| 58 | const auto &in = GetInput(); | ||
| 59 |
2/4✓ Branch 0 taken 32 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 32 times.
✗ Branch 3 not taken.
|
32 | if (in.A.data.empty() || in.B.data.empty()) { |
| 60 | return false; | ||
| 61 | } | ||
| 62 |
4/8✓ Branch 0 taken 32 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 32 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 32 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 32 times.
✗ Branch 7 not taken.
|
32 | if (in.A.rows <= 0 || in.A.cols <= 0 || in.B.rows <= 0 || in.B.cols <= 0) { |
| 63 | return false; | ||
| 64 | } | ||
| 65 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 32 times.
|
32 | if (in.A.cols != in.B.rows) { |
| 66 | ✗ | return false; | |
| 67 | } | ||
| 68 | return true; | ||
| 69 | } | ||
| 70 | |||
| 71 | 32 | bool KazennovaATestTaskSTL::PreProcessingImpl() { | |
| 72 | const auto &in = GetInput(); | ||
| 73 | auto &out = GetOutput(); | ||
| 74 | 32 | out.rows = in.A.rows; | |
| 75 | 32 | out.cols = in.B.cols; | |
| 76 | 32 | out.data.assign(static_cast<size_t>(out.rows) * out.cols, 0.0); | |
| 77 | 32 | return true; | |
| 78 | } | ||
| 79 | |||
| 80 | 32 | bool KazennovaATestTaskSTL::RunImpl() { | |
| 81 | const auto &in = GetInput(); | ||
| 82 | auto &out = GetOutput(); | ||
| 83 | |||
| 84 | 32 | const int m = in.A.rows; | |
| 85 | 32 | const int k = in.A.cols; | |
| 86 | 32 | const int n = in.B.cols; | |
| 87 | 32 | const auto &a = in.A.data; | |
| 88 | 32 | const auto &b = in.B.data; | |
| 89 | 32 | auto &c = out.data; | |
| 90 | |||
| 91 | 32 | const int bs = kBlockSize; | |
| 92 | |||
| 93 | 32 | const int blocks_i = (m + bs - 1) / bs; | |
| 94 | 32 | const int blocks_j = (n + bs - 1) / bs; | |
| 95 | 32 | const int blocks_k = (k + bs - 1) / bs; | |
| 96 | |||
| 97 | 32 | int num_threads = ppc::util::GetNumThreads(); | |
| 98 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 32 times.
|
32 | if (num_threads <= 0) { |
| 99 | ✗ | num_threads = static_cast<int>(std::thread::hardware_concurrency()); | |
| 100 | } | ||
| 101 | ✗ | if (num_threads <= 0) { | |
| 102 | num_threads = 2; | ||
| 103 | } | ||
| 104 | |||
| 105 | 32 | std::vector<std::thread> threads; | |
| 106 |
1/2✓ Branch 1 taken 32 times.
✗ Branch 2 not taken.
|
32 | threads.reserve(static_cast<size_t>(num_threads)); |
| 107 | |||
| 108 | 32 | std::atomic<size_t> next_block_idx(0); | |
| 109 | 32 | const size_t total_blocks = static_cast<size_t>(blocks_i) * blocks_j; | |
| 110 | |||
| 111 | 80 | auto worker = [&]() { | |
| 112 | 80 | std::vector<double> block_a(static_cast<size_t>(bs) * bs); | |
| 113 |
1/4✓ Branch 1 taken 80 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
80 | std::vector<double> block_b(static_cast<size_t>(bs) * bs); |
| 114 | |||
| 115 | while (true) { | ||
| 116 |
2/2✓ Branch 0 taken 32 times.
✓ Branch 1 taken 80 times.
|
112 | const size_t idx = next_block_idx.fetch_add(1); |
| 117 |
2/2✓ Branch 0 taken 32 times.
✓ Branch 1 taken 80 times.
|
112 | if (idx >= total_blocks) { |
| 118 | break; | ||
| 119 | } | ||
| 120 | |||
| 121 | 32 | const int bi = static_cast<int>(idx / blocks_j); | |
| 122 | 32 | const int bj = static_cast<int>(idx % blocks_j); | |
| 123 | |||
| 124 |
2/2✓ Branch 0 taken 32 times.
✓ Branch 1 taken 32 times.
|
64 | for (int bk = 0; bk < blocks_k; ++bk) { |
| 125 | 32 | GetBlock(a, m, k, bi, bk, bs, block_a.data()); | |
| 126 | 32 | GetBlock(b, k, n, bk, bj, bs, block_b.data()); | |
| 127 | |||
| 128 |
1/2✓ Branch 0 taken 32 times.
✗ Branch 1 not taken.
|
32 | const int max_i = std::min(bs, m - (bi * bs)); |
| 129 |
1/2✓ Branch 0 taken 32 times.
✗ Branch 1 not taken.
|
32 | const int max_j = std::min(bs, n - (bj * bs)); |
| 130 |
1/2✓ Branch 0 taken 32 times.
✗ Branch 1 not taken.
|
32 | const int max_k = std::min(bs, k - (bk * bs)); |
| 131 | |||
| 132 | 32 | MultiplyBlock(block_a, block_b, bs, max_i, max_j, max_k, bi, bj, n, c); | |
| 133 | } | ||
| 134 | } | ||
| 135 | 80 | }; | |
| 136 | |||
| 137 |
2/2✓ Branch 0 taken 80 times.
✓ Branch 1 taken 32 times.
|
112 | for (int thread_idx = 0; thread_idx < num_threads; ++thread_idx) { |
| 138 |
1/2✓ Branch 1 taken 80 times.
✗ Branch 2 not taken.
|
80 | threads.emplace_back(worker); |
| 139 | } | ||
| 140 |
2/2✓ Branch 0 taken 80 times.
✓ Branch 1 taken 32 times.
|
112 | for (auto &thr : threads) { |
| 141 |
1/2✓ Branch 1 taken 80 times.
✗ Branch 2 not taken.
|
80 | thr.join(); |
| 142 | } | ||
| 143 | |||
| 144 | 32 | return true; | |
| 145 | 32 | } | |
| 146 | |||
| 147 | 32 | bool KazennovaATestTaskSTL::PostProcessingImpl() { | |
| 148 | 32 | return !GetOutput().data.empty(); | |
| 149 | } | ||
| 150 | |||
| 151 | } // namespace kazennova_a_fox_algorithm | ||
| 152 |