| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #include "lazareva_a_matrix_mult_strassen/tbb/include/ops_tbb.hpp" | ||
| 2 | |||
| 3 | #include <oneapi/tbb/blocked_range.h> | ||
| 4 | #include <oneapi/tbb/parallel_for.h> | ||
| 5 | |||
| 6 | #include <array> | ||
| 7 | #include <cstddef> | ||
| 8 | #include <utility> | ||
| 9 | #include <vector> | ||
| 10 | |||
| 11 | #include "lazareva_a_matrix_mult_strassen/common/include/common.hpp" | ||
| 12 | |||
| 13 | namespace lazareva_a_matrix_mult_strassen { | ||
| 14 | |||
| 15 |
1/2✓ Branch 1 taken 24 times.
✗ Branch 2 not taken.
|
24 | LazarevaATestTaskTBB::LazarevaATestTaskTBB(const InType &in) { |
| 16 | SetTypeOfTask(GetStaticTypeOfTask()); | ||
| 17 | GetInput() = in; | ||
| 18 | GetOutput() = {}; | ||
| 19 | 24 | } | |
| 20 | |||
| 21 | 24 | bool LazarevaATestTaskTBB::ValidationImpl() { | |
| 22 | 24 | const int n = GetInput().n; | |
| 23 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 24 times.
|
24 | if (n <= 0) { |
| 24 | return false; | ||
| 25 | } | ||
| 26 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 24 times.
|
24 | const auto expected = static_cast<size_t>(n) * static_cast<size_t>(n); |
| 27 |
2/4✗ Branch 0 not taken.
✓ Branch 1 taken 24 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 24 times.
|
24 | return std::cmp_equal(GetInput().a.size(), expected) && std::cmp_equal(GetInput().b.size(), expected); |
| 28 | } | ||
| 29 | |||
| 30 | 24 | bool LazarevaATestTaskTBB::PreProcessingImpl() { | |
| 31 | 24 | n_ = GetInput().n; | |
| 32 | 24 | padded_n_ = NextPowerOfTwo(n_); | |
| 33 | 24 | a_ = PadMatrix(GetInput().a, n_, padded_n_); | |
| 34 | 24 | b_ = PadMatrix(GetInput().b, n_, padded_n_); | |
| 35 | 24 | const auto padded_size = static_cast<size_t>(padded_n_) * static_cast<size_t>(padded_n_); | |
| 36 | 24 | result_.assign(padded_size, 0.0); | |
| 37 | 24 | return true; | |
| 38 | } | ||
| 39 | |||
| 40 | 24 | bool LazarevaATestTaskTBB::RunImpl() { | |
| 41 | 24 | result_ = Strassen(a_, b_, padded_n_); | |
| 42 | 24 | return true; | |
| 43 | } | ||
| 44 | |||
| 45 | 24 | bool LazarevaATestTaskTBB::PostProcessingImpl() { | |
| 46 | 24 | GetOutput() = UnpadMatrix(result_, padded_n_, n_); | |
| 47 | 24 | return true; | |
| 48 | } | ||
| 49 | |||
| 50 | ✗ | int LazarevaATestTaskTBB::NextPowerOfTwo(int n) { | |
| 51 |
1/4✗ Branch 0 not taken.
✗ Branch 1 not taken.
✓ Branch 2 taken 24 times.
✗ Branch 3 not taken.
|
24 | if (n <= 0) { |
| 52 | return 1; | ||
| 53 | } | ||
| 54 | int p = 1; | ||
| 55 |
2/4✗ Branch 0 not taken.
✗ Branch 1 not taken.
✓ Branch 2 taken 80 times.
✓ Branch 3 taken 24 times.
|
104 | while (p < n) { |
| 56 | 80 | p <<= 1; | |
| 57 | } | ||
| 58 | return p; | ||
| 59 | } | ||
| 60 | |||
| 61 | 48 | std::vector<double> LazarevaATestTaskTBB::PadMatrix(const std::vector<double> &m, int old_n, int new_n) { | |
| 62 | 48 | const auto new_size = static_cast<size_t>(new_n) * static_cast<size_t>(new_n); | |
| 63 | 48 | std::vector<double> result(new_size, 0.0); | |
| 64 |
2/2✓ Branch 0 taken 1288 times.
✓ Branch 1 taken 48 times.
|
1336 | for (int i = 0; i < old_n; ++i) { |
| 65 |
2/2✓ Branch 0 taken 133816 times.
✓ Branch 1 taken 1288 times.
|
135104 | for (int j = 0; j < old_n; ++j) { |
| 66 | 133816 | const auto dst = (static_cast<ptrdiff_t>(i) * new_n) + j; | |
| 67 | 133816 | const auto src = (static_cast<ptrdiff_t>(i) * old_n) + j; | |
| 68 | 133816 | result[static_cast<size_t>(dst)] = m[static_cast<size_t>(src)]; | |
| 69 | } | ||
| 70 | } | ||
| 71 | 48 | return result; | |
| 72 | } | ||
| 73 | |||
| 74 | 24 | std::vector<double> LazarevaATestTaskTBB::UnpadMatrix(const std::vector<double> &m, int old_n, int new_n) { | |
| 75 | 24 | const auto new_size = static_cast<size_t>(new_n) * static_cast<size_t>(new_n); | |
| 76 | 24 | std::vector<double> result(new_size); | |
| 77 |
2/2✓ Branch 0 taken 644 times.
✓ Branch 1 taken 24 times.
|
668 | for (int i = 0; i < new_n; ++i) { |
| 78 |
2/2✓ Branch 0 taken 66908 times.
✓ Branch 1 taken 644 times.
|
67552 | for (int j = 0; j < new_n; ++j) { |
| 79 | 66908 | const auto dst = (static_cast<ptrdiff_t>(i) * new_n) + j; | |
| 80 | 66908 | const auto src = (static_cast<ptrdiff_t>(i) * old_n) + j; | |
| 81 | 66908 | result[static_cast<size_t>(dst)] = m[static_cast<size_t>(src)]; | |
| 82 | } | ||
| 83 | } | ||
| 84 | 24 | return result; | |
| 85 | } | ||
| 86 | |||
| 87 | 48 | std::vector<double> LazarevaATestTaskTBB::Add(const std::vector<double> &a, const std::vector<double> &b, int n) { | |
| 88 | 48 | const auto size = static_cast<size_t>(n) * static_cast<size_t>(n); | |
| 89 | 48 | std::vector<double> result(size); | |
| 90 |
2/2✓ Branch 0 taken 196608 times.
✓ Branch 1 taken 48 times.
|
196656 | for (size_t i = 0; i < size; ++i) { |
| 91 | 196608 | result[i] = a[i] + b[i]; | |
| 92 | } | ||
| 93 | 48 | return result; | |
| 94 | } | ||
| 95 | |||
| 96 | 24 | std::vector<double> LazarevaATestTaskTBB::Sub(const std::vector<double> &a, const std::vector<double> &b, int n) { | |
| 97 | 24 | const auto size = static_cast<size_t>(n) * static_cast<size_t>(n); | |
| 98 | 24 | std::vector<double> result(size); | |
| 99 |
2/2✓ Branch 0 taken 98304 times.
✓ Branch 1 taken 24 times.
|
98328 | for (size_t i = 0; i < size; ++i) { |
| 100 | 98304 | result[i] = a[i] - b[i]; | |
| 101 | } | ||
| 102 | 24 | return result; | |
| 103 | } | ||
| 104 | |||
| 105 | 8 | void LazarevaATestTaskTBB::Split(const std::vector<double> &parent, int n, std::vector<double> &a11, | |
| 106 | std::vector<double> &a12, std::vector<double> &a21, std::vector<double> &a22) { | ||
| 107 | 8 | const int half = n / 2; | |
| 108 | 8 | const auto half_size = static_cast<size_t>(half) * static_cast<size_t>(half); | |
| 109 | 8 | a11.resize(half_size); | |
| 110 | 8 | a12.resize(half_size); | |
| 111 | 8 | a21.resize(half_size); | |
| 112 | 8 | a22.resize(half_size); | |
| 113 | |||
| 114 |
2/2✓ Branch 0 taken 512 times.
✓ Branch 1 taken 8 times.
|
520 | for (int i = 0; i < half; ++i) { |
| 115 |
2/2✓ Branch 0 taken 32768 times.
✓ Branch 1 taken 512 times.
|
33280 | for (int j = 0; j < half; ++j) { |
| 116 | 32768 | const auto idx = static_cast<size_t>((static_cast<ptrdiff_t>(i) * half) + j); | |
| 117 | 32768 | a11[idx] = parent[static_cast<size_t>((static_cast<ptrdiff_t>(i) * n) + j)]; | |
| 118 | 32768 | a12[idx] = parent[static_cast<size_t>((static_cast<ptrdiff_t>(i) * n) + j + half)]; | |
| 119 | 32768 | a21[idx] = parent[static_cast<size_t>((static_cast<ptrdiff_t>(i + half) * n) + j)]; | |
| 120 | 32768 | a22[idx] = parent[static_cast<size_t>((static_cast<ptrdiff_t>(i + half) * n) + j + half)]; | |
| 121 | } | ||
| 122 | } | ||
| 123 | 8 | } | |
| 124 | |||
| 125 | 4 | std::vector<double> LazarevaATestTaskTBB::Merge(const std::vector<double> &c11, const std::vector<double> &c12, | |
| 126 | const std::vector<double> &c21, const std::vector<double> &c22, int n) { | ||
| 127 | 4 | const int full = n * 2; | |
| 128 | 4 | const auto full_size = static_cast<size_t>(full) * static_cast<size_t>(full); | |
| 129 | 4 | std::vector<double> result(full_size); | |
| 130 | |||
| 131 |
2/2✓ Branch 0 taken 256 times.
✓ Branch 1 taken 4 times.
|
260 | for (int i = 0; i < n; ++i) { |
| 132 |
2/2✓ Branch 0 taken 16384 times.
✓ Branch 1 taken 256 times.
|
16640 | for (int j = 0; j < n; ++j) { |
| 133 | 16384 | const auto src = static_cast<size_t>((static_cast<ptrdiff_t>(i) * n) + j); | |
| 134 | 16384 | result[static_cast<size_t>((static_cast<ptrdiff_t>(i) * full) + j)] = c11[src]; | |
| 135 | 16384 | result[static_cast<size_t>((static_cast<ptrdiff_t>(i) * full) + j + n)] = c12[src]; | |
| 136 | 16384 | result[static_cast<size_t>((static_cast<ptrdiff_t>(i + n) * full) + j)] = c21[src]; | |
| 137 | 16384 | result[static_cast<size_t>((static_cast<ptrdiff_t>(i + n) * full) + j + n)] = c22[src]; | |
| 138 | } | ||
| 139 | } | ||
| 140 | 4 | return result; | |
| 141 | } | ||
| 142 | |||
| 143 | 20 | std::vector<double> LazarevaATestTaskTBB::NaiveMult(const std::vector<double> &a, const std::vector<double> &b, int n) { | |
| 144 | 20 | const auto size = static_cast<size_t>(n) * static_cast<size_t>(n); | |
| 145 | 20 | std::vector<double> c(size, 0.0); | |
| 146 | |||
| 147 |
1/4✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
172 | oneapi::tbb::parallel_for(oneapi::tbb::blocked_range<int>(0, n), [&](const oneapi::tbb::blocked_range<int> &range) { |
| 148 |
2/2✓ Branch 0 taken 152 times.
✓ Branch 1 taken 152 times.
|
304 | for (int i = range.begin(); i < range.end(); ++i) { |
| 149 |
2/2✓ Branch 0 taken 1616 times.
✓ Branch 1 taken 152 times.
|
1768 | for (int k = 0; k < n; ++k) { |
| 150 | 1616 | const double aik = a[static_cast<size_t>((static_cast<ptrdiff_t>(i) * n) + k)]; | |
| 151 |
2/2✓ Branch 0 taken 20768 times.
✓ Branch 1 taken 1616 times.
|
22384 | for (int j = 0; j < n; ++j) { |
| 152 | 20768 | c[static_cast<size_t>((static_cast<ptrdiff_t>(i) * n) + j)] += | |
| 153 | 20768 | aik * b[static_cast<size_t>((static_cast<ptrdiff_t>(k) * n) + j)]; | |
| 154 | } | ||
| 155 | } | ||
| 156 | } | ||
| 157 | 152 | }); | |
| 158 | |||
| 159 | 20 | return c; | |
| 160 | } | ||
| 161 | |||
| 162 | 24 | std::vector<double> LazarevaATestTaskTBB::Strassen(const std::vector<double> &root_a, const std::vector<double> &root_b, | |
| 163 | int root_n) { | ||
| 164 |
2/2✓ Branch 0 taken 20 times.
✓ Branch 1 taken 4 times.
|
24 | if (root_n <= 64) { |
| 165 | 20 | return NaiveMult(root_a, root_b, root_n); | |
| 166 | } | ||
| 167 | |||
| 168 | 4 | const int half = root_n / 2; | |
| 169 | |||
| 170 | 4 | std::vector<double> a11; | |
| 171 | 4 | std::vector<double> a12; | |
| 172 | 4 | std::vector<double> a21; | |
| 173 | 4 | std::vector<double> a22; | |
| 174 | 4 | std::vector<double> b11; | |
| 175 | 4 | std::vector<double> b12; | |
| 176 | 4 | std::vector<double> b21; | |
| 177 | 4 | std::vector<double> b22; | |
| 178 |
1/2✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
|
4 | Split(root_a, root_n, a11, a12, a21, a22); |
| 179 |
1/2✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
|
4 | Split(root_b, root_n, b11, b12, b21, b22); |
| 180 | |||
| 181 | 4 | std::array<std::vector<double>, 7> lhs; | |
| 182 | 4 | std::array<std::vector<double>, 7> rhs; | |
| 183 |
1/2✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
|
4 | lhs.at(0) = Add(a11, a22, half); |
| 184 |
1/2✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
|
4 | rhs.at(0) = Add(b11, b22, half); |
| 185 |
1/2✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
|
4 | lhs.at(1) = Add(a21, a22, half); |
| 186 |
1/2✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
|
4 | rhs.at(1) = b11; |
| 187 |
1/2✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
|
4 | lhs.at(2) = a11; |
| 188 |
1/2✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
|
4 | rhs.at(2) = Sub(b12, b22, half); |
| 189 |
1/2✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
|
4 | lhs.at(3) = a22; |
| 190 |
1/2✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
|
4 | rhs.at(3) = Sub(b21, b11, half); |
| 191 |
1/2✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
|
4 | lhs.at(4) = Add(a11, a12, half); |
| 192 |
1/2✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
|
4 | rhs.at(4) = b22; |
| 193 |
1/2✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
|
4 | lhs.at(5) = Sub(a21, a11, half); |
| 194 |
1/2✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
|
4 | rhs.at(5) = Add(b11, b12, half); |
| 195 |
1/2✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
|
4 | lhs.at(6) = Sub(a12, a22, half); |
| 196 |
1/2✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
|
4 | rhs.at(6) = Add(b21, b22, half); |
| 197 | |||
| 198 | 4 | std::array<std::vector<double>, 7> m; | |
| 199 | |||
| 200 |
1/2✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
|
32 | oneapi::tbb::parallel_for(oneapi::tbb::blocked_range<int>(0, 7), [&](const oneapi::tbb::blocked_range<int> &range) { |
| 201 |
2/2✓ Branch 0 taken 28 times.
✓ Branch 1 taken 28 times.
|
56 | for (int k = range.begin(); k < range.end(); ++k) { |
| 202 | 28 | const auto uk = static_cast<size_t>(k); | |
| 203 | 28 | const int nn = half; | |
| 204 | 28 | const auto sz = static_cast<size_t>(nn) * static_cast<size_t>(nn); | |
| 205 | 28 | std::vector<double> c(sz, 0.0); | |
| 206 |
2/2✓ Branch 0 taken 1792 times.
✓ Branch 1 taken 28 times.
|
1820 | for (int i = 0; i < nn; ++i) { |
| 207 |
2/2✓ Branch 0 taken 114688 times.
✓ Branch 1 taken 1792 times.
|
116480 | for (int ki = 0; ki < nn; ++ki) { |
| 208 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 114688 times.
|
114688 | const double aik = lhs.at(uk)[static_cast<size_t>((static_cast<ptrdiff_t>(i) * nn) + ki)]; |
| 209 |
2/2✓ Branch 0 taken 7340032 times.
✓ Branch 1 taken 114688 times.
|
7454720 | for (int j = 0; j < nn; ++j) { |
| 210 | 7340032 | c[static_cast<size_t>((static_cast<ptrdiff_t>(i) * nn) + j)] += | |
| 211 | 7340032 | aik * rhs.at(uk)[static_cast<size_t>((static_cast<ptrdiff_t>(ki) * nn) + j)]; | |
| 212 | } | ||
| 213 | } | ||
| 214 | } | ||
| 215 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 28 times.
|
28 | m.at(uk) = std::move(c); |
| 216 | } | ||
| 217 | 28 | }); | |
| 218 | |||
| 219 |
3/6✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 4 times.
✗ Branch 5 not taken.
✓ Branch 7 taken 4 times.
✗ Branch 8 not taken.
|
4 | auto c11 = Add(Sub(Add(m.at(0), m.at(3), half), m.at(4), half), m.at(6), half); |
| 220 |
1/2✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
|
4 | auto c12 = Add(m.at(2), m.at(4), half); |
| 221 |
1/2✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
|
4 | auto c21 = Add(m.at(1), m.at(3), half); |
| 222 |
3/6✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 4 times.
✗ Branch 5 not taken.
✓ Branch 7 taken 4 times.
✗ Branch 8 not taken.
|
4 | auto c22 = Add(Sub(Add(m.at(0), m.at(2), half), m.at(1), half), m.at(5), half); |
| 223 | |||
| 224 |
1/2✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
|
4 | return Merge(c11, c12, c21, c22, half); |
| 225 | 4 | } | |
| 226 | |||
| 227 | } // namespace lazareva_a_matrix_mult_strassen | ||
| 228 |