| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #include "makoveeva_matmul_double/tbb/include/ops_tbb.hpp" | ||
| 2 | |||
| 3 | #include <tbb/blocked_range.h> | ||
| 4 | #include <tbb/mutex.h> | ||
| 5 | #include <tbb/parallel_for.h> | ||
| 6 | |||
| 7 | #include <cmath> | ||
| 8 | #include <cstddef> | ||
| 9 | #include <vector> | ||
| 10 | |||
| 11 | #include "makoveeva_matmul_double/tbb/include/common.hpp" | ||
| 12 | |||
| 13 | namespace makoveeva_matmul_double_tbb { | ||
| 14 | |||
| 15 | namespace { | ||
| 16 | |||
| 17 | // Выбирает размер блока в зависимости от размера матрицы | ||
| 18 | [[nodiscard]] size_t SelectBlockSize(size_t n) { | ||
| 19 | 48 | if (n <= 64) { | |
| 20 | return n; | ||
| 21 | } | ||
| 22 | ✗ | if (n <= 256) { | |
| 23 | return 64; | ||
| 24 | } | ||
| 25 | ✗ | if (n <= 1024) { | |
| 26 | ✗ | return 128; | |
| 27 | } | ||
| 28 | return 256; | ||
| 29 | } | ||
| 30 | |||
| 31 | // Вычисляет умножение блока A[i][root] на блок B[root][j] | ||
| 32 | 48 | void ComputeBlock(const std::vector<double> &matrix_a, const std::vector<double> &matrix_b, | |
| 33 | std::vector<double> &local_block, size_t i, size_t j, size_t root, size_t block_size, size_t n) { | ||
| 34 |
2/2✓ Branch 0 taken 412 times.
✓ Branch 1 taken 48 times.
|
460 | for (size_t bi = 0; bi < block_size; ++bi) { |
| 35 |
2/2✓ Branch 0 taken 6660 times.
✓ Branch 1 taken 412 times.
|
7072 | for (size_t bj = 0; bj < block_size; ++bj) { |
| 36 | double sum = 0.0; | ||
| 37 |
2/2✓ Branch 0 taken 159556 times.
✓ Branch 1 taken 6660 times.
|
166216 | for (size_t bk = 0; bk < block_size; ++bk) { |
| 38 | 159556 | const size_t idx_a = ((i * block_size + bi) * n) + (root * block_size + bk); | |
| 39 | 159556 | const size_t idx_b = ((root * block_size + bk) * n) + (j * block_size + bj); | |
| 40 | 159556 | sum += matrix_a[idx_a] * matrix_b[idx_b]; | |
| 41 | } | ||
| 42 | 6660 | local_block[(bi * block_size) + bj] += sum; | |
| 43 | } | ||
| 44 | } | ||
| 45 | 48 | } | |
| 46 | |||
| 47 | // Безопасно добавляет результат из local_block в матрицу C | ||
| 48 | 48 | void AccumulateResult(std::vector<double> &matrix_c, const std::vector<double> &local_block, size_t i, size_t j, | |
| 49 | size_t block_size, size_t n, tbb::mutex &write_mutex) { | ||
| 50 | tbb::mutex::scoped_lock lock(write_mutex); | ||
| 51 |
2/2✓ Branch 0 taken 412 times.
✓ Branch 1 taken 48 times.
|
460 | for (size_t bi = 0; bi < block_size; ++bi) { |
| 52 |
2/2✓ Branch 0 taken 6660 times.
✓ Branch 1 taken 412 times.
|
7072 | for (size_t bj = 0; bj < block_size; ++bj) { |
| 53 | 6660 | const size_t idx_c = ((i * block_size + bi) * n) + (j * block_size + bj); | |
| 54 | 6660 | matrix_c[idx_c] += local_block[(bi * block_size) + bj]; | |
| 55 | } | ||
| 56 | } | ||
| 57 | 48 | } | |
| 58 | |||
| 59 | } // namespace | ||
| 60 | |||
| 61 |
1/2✓ Branch 1 taken 48 times.
✗ Branch 2 not taken.
|
48 | MatmulDoubleTBBTask::MatmulDoubleTBBTask(const InType &in) { |
| 62 | SetTypeOfTask(GetStaticTypeOfTask()); | ||
| 63 | GetInput() = in; | ||
| 64 | 48 | GetOutput() = std::vector<double>(); | |
| 65 | 48 | } | |
| 66 | |||
| 67 | 48 | bool MatmulDoubleTBBTask::ValidationImpl() { | |
| 68 | const auto &input = GetInput(); | ||
| 69 | 48 | const size_t n = std::get<0>(input); | |
| 70 | const auto &a = std::get<1>(input); | ||
| 71 | const auto &b = std::get<2>(input); | ||
| 72 | |||
| 73 |
3/6✓ Branch 0 taken 48 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 48 times.
✗ Branch 4 not taken.
✓ Branch 5 taken 48 times.
|
48 | return n > 0 && a.size() == n * n && b.size() == n * n; |
| 74 | } | ||
| 75 | |||
| 76 | 48 | bool MatmulDoubleTBBTask::PreProcessingImpl() { | |
| 77 | const auto &input = GetInput(); | ||
| 78 | 48 | n_ = std::get<0>(input); | |
| 79 | 48 | A_ = std::get<1>(input); | |
| 80 | 48 | B_ = std::get<2>(input); | |
| 81 | 48 | C_.assign(n_ * n_, 0.0); | |
| 82 | |||
| 83 | 48 | return true; | |
| 84 | } | ||
| 85 | |||
| 86 | 48 | bool MatmulDoubleTBBTask::RunImpl() { | |
| 87 |
1/2✓ Branch 0 taken 48 times.
✗ Branch 1 not taken.
|
48 | if (n_ <= 0) { |
| 88 | return false; | ||
| 89 | } | ||
| 90 | |||
| 91 | 48 | const size_t n = n_; | |
| 92 | 48 | const auto &a = A_; | |
| 93 | 48 | const auto &b = B_; | |
| 94 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 48 times.
|
48 | auto &c = C_; |
| 95 | |||
| 96 | 48 | const size_t block_size = SelectBlockSize(n); | |
| 97 | |||
| 98 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 48 times.
|
48 | if (n % block_size != 0) { |
| 99 | ✗ | return RunSimpleMultiply(); | |
| 100 | } | ||
| 101 | |||
| 102 | 48 | const size_t grid_size = n / block_size; | |
| 103 | |||
| 104 | tbb::mutex write_mutex; | ||
| 105 | |||
| 106 | 96 | tbb::parallel_for(tbb::blocked_range<size_t>(0, grid_size * grid_size * grid_size), | |
| 107 | 96 | [&](const tbb::blocked_range<size_t> &range) { | |
| 108 |
2/2✓ Branch 0 taken 48 times.
✓ Branch 1 taken 48 times.
|
96 | for (size_t step_i_j = range.begin(); step_i_j != range.end(); ++step_i_j) { |
| 109 | 48 | const size_t step = step_i_j / (grid_size * grid_size); | |
| 110 | 48 | const size_t i = (step_i_j % (grid_size * grid_size)) / grid_size; | |
| 111 | 48 | const size_t j = step_i_j % grid_size; | |
| 112 | |||
| 113 | 48 | const size_t root = (i + step) % grid_size; | |
| 114 | |||
| 115 | 48 | std::vector<double> local_block(block_size * block_size, 0.0); | |
| 116 | |||
| 117 | 48 | ComputeBlock(a, b, local_block, i, j, root, block_size, n); | |
| 118 | |||
| 119 |
1/2✓ Branch 1 taken 48 times.
✗ Branch 2 not taken.
|
48 | AccumulateResult(c, local_block, i, j, block_size, n, write_mutex); |
| 120 | } | ||
| 121 | 48 | }); | |
| 122 | |||
| 123 | 48 | GetOutput() = C_; | |
| 124 | return true; | ||
| 125 | } | ||
| 126 | |||
| 127 | ✗ | bool MatmulDoubleTBBTask::RunSimpleMultiply() { | |
| 128 | ✗ | const size_t n = n_; | |
| 129 | ✗ | const auto &a = A_; | |
| 130 | ✗ | const auto &b = B_; | |
| 131 | ✗ | auto &c = C_; | |
| 132 | |||
| 133 | ✗ | tbb::parallel_for(tbb::blocked_range<size_t>(0, n), [&](const tbb::blocked_range<size_t> &range) { | |
| 134 | ✗ | for (size_t i = range.begin(); i != range.end(); ++i) { | |
| 135 | ✗ | for (size_t j = 0; j < n; ++j) { | |
| 136 | double sum = 0.0; | ||
| 137 | ✗ | for (size_t k = 0; k < n; ++k) { | |
| 138 | ✗ | sum += a[(i * n) + k] * b[(k * n) + j]; | |
| 139 | } | ||
| 140 | ✗ | c[(i * n) + j] = sum; | |
| 141 | } | ||
| 142 | } | ||
| 143 | ✗ | }); | |
| 144 | |||
| 145 | ✗ | return true; | |
| 146 | } | ||
| 147 | |||
| 148 | 48 | bool MatmulDoubleTBBTask::PostProcessingImpl() { | |
| 149 | 48 | return true; | |
| 150 | } | ||
| 151 | |||
| 152 | } // namespace makoveeva_matmul_double_tbb | ||
| 153 |