| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #include "lazareva_a_matrix_mult_strassen/stl/include/ops_stl.hpp" | ||
| 2 | |||
| 3 | #include <algorithm> | ||
| 4 | #include <array> | ||
| 5 | #include <cstddef> | ||
| 6 | #include <functional> | ||
| 7 | #include <future> | ||
| 8 | #include <thread> | ||
| 9 | #include <utility> | ||
| 10 | #include <vector> | ||
| 11 | |||
| 12 | #include "lazareva_a_matrix_mult_strassen/common/include/common.hpp" | ||
| 13 | #include "util/include/util.hpp" | ||
| 14 | |||
| 15 | namespace lazareva_a_matrix_mult_strassen { | ||
| 16 | |||
| 17 |
1/2✓ Branch 1 taken 48 times.
✗ Branch 2 not taken.
|
48 | LazarevaATestTaskSTL::LazarevaATestTaskSTL(const InType &in) { |
| 18 | SetTypeOfTask(GetStaticTypeOfTask()); | ||
| 19 | GetInput() = in; | ||
| 20 | GetOutput() = {}; | ||
| 21 | 48 | } | |
| 22 | |||
| 23 | 48 | bool LazarevaATestTaskSTL::ValidationImpl() { | |
| 24 | 48 | const int n = GetInput().n; | |
| 25 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 48 times.
|
48 | if (n <= 0) { |
| 26 | return false; | ||
| 27 | } | ||
| 28 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 48 times.
|
48 | const auto expected = static_cast<size_t>(n) * static_cast<size_t>(n); |
| 29 |
2/4✗ Branch 0 not taken.
✓ Branch 1 taken 48 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 48 times.
|
48 | return std::cmp_equal(GetInput().a.size(), expected) && std::cmp_equal(GetInput().b.size(), expected); |
| 30 | } | ||
| 31 | |||
| 32 | 48 | bool LazarevaATestTaskSTL::PreProcessingImpl() { | |
| 33 | 48 | n_ = GetInput().n; | |
| 34 | 48 | padded_n_ = NextPowerOfTwo(n_); | |
| 35 | 48 | a_ = PadMatrix(GetInput().a, n_, padded_n_); | |
| 36 | 48 | b_ = PadMatrix(GetInput().b, n_, padded_n_); | |
| 37 | 48 | const auto padded_size = static_cast<size_t>(padded_n_) * static_cast<size_t>(padded_n_); | |
| 38 | 48 | result_.assign(padded_size, 0.0); | |
| 39 | 48 | return true; | |
| 40 | } | ||
| 41 | |||
| 42 | 48 | bool LazarevaATestTaskSTL::RunImpl() { | |
| 43 | 48 | result_ = Strassen(a_, b_, padded_n_); | |
| 44 | 48 | return true; | |
| 45 | } | ||
| 46 | |||
| 47 | 48 | bool LazarevaATestTaskSTL::PostProcessingImpl() { | |
| 48 | 48 | GetOutput() = UnpadMatrix(result_, padded_n_, n_); | |
| 49 | 48 | return true; | |
| 50 | } | ||
| 51 | |||
| 52 | ✗ | int LazarevaATestTaskSTL::NextPowerOfTwo(int n) { | |
| 53 |
1/4✗ Branch 0 not taken.
✗ Branch 1 not taken.
✓ Branch 2 taken 48 times.
✗ Branch 3 not taken.
|
48 | if (n <= 0) { |
| 54 | return 1; | ||
| 55 | } | ||
| 56 | int p = 1; | ||
| 57 |
2/4✗ Branch 0 not taken.
✗ Branch 1 not taken.
✓ Branch 2 taken 160 times.
✓ Branch 3 taken 48 times.
|
208 | while (p < n) { |
| 58 | 160 | p <<= 1; | |
| 59 | } | ||
| 60 | return p; | ||
| 61 | } | ||
| 62 | |||
| 63 | 96 | std::vector<double> LazarevaATestTaskSTL::PadMatrix(const std::vector<double> &m, int old_n, int new_n) { | |
| 64 | 96 | const auto new_size = static_cast<size_t>(new_n) * static_cast<size_t>(new_n); | |
| 65 | 96 | std::vector<double> result(new_size, 0.0); | |
| 66 | |||
| 67 |
2/2✓ Branch 0 taken 2576 times.
✓ Branch 1 taken 96 times.
|
2672 | for (int i = 0; i < old_n; ++i) { |
| 68 | 2576 | const auto src_start = m.begin() + (static_cast<ptrdiff_t>(i) * old_n); | |
| 69 | const auto src_end = src_start + old_n; | ||
| 70 | 2576 | const auto dst_start = result.begin() + (static_cast<ptrdiff_t>(i) * new_n); | |
| 71 | 2576 | std::copy(src_start, src_end, dst_start); | |
| 72 | } | ||
| 73 | |||
| 74 | 96 | return result; | |
| 75 | } | ||
| 76 | |||
| 77 | 48 | std::vector<double> LazarevaATestTaskSTL::UnpadMatrix(const std::vector<double> &m, int old_n, int new_n) { | |
| 78 | 48 | const auto new_size = static_cast<size_t>(new_n) * static_cast<size_t>(new_n); | |
| 79 | 48 | std::vector<double> result(new_size); | |
| 80 | |||
| 81 |
2/2✓ Branch 0 taken 1288 times.
✓ Branch 1 taken 48 times.
|
1336 | for (int i = 0; i < new_n; ++i) { |
| 82 | 1288 | const auto src_start = m.begin() + (static_cast<ptrdiff_t>(i) * old_n); | |
| 83 | const auto src_end = src_start + new_n; | ||
| 84 | 1288 | const auto dst_start = result.begin() + (static_cast<ptrdiff_t>(i) * new_n); | |
| 85 | 1288 | std::copy(src_start, src_end, dst_start); | |
| 86 | } | ||
| 87 | |||
| 88 | 48 | return result; | |
| 89 | } | ||
| 90 | |||
| 91 | 96 | std::vector<double> LazarevaATestTaskSTL::Add(const std::vector<double> &a, const std::vector<double> &b, int n) { | |
| 92 | 96 | const auto size = static_cast<size_t>(n) * static_cast<size_t>(n); | |
| 93 | 96 | std::vector<double> result(size); | |
| 94 | |||
| 95 | std::transform(a.begin(), a.begin() + static_cast<ptrdiff_t>(size), b.begin(), result.begin(), std::plus<>()); | ||
| 96 | |||
| 97 | 96 | return result; | |
| 98 | } | ||
| 99 | |||
| 100 | 48 | std::vector<double> LazarevaATestTaskSTL::Sub(const std::vector<double> &a, const std::vector<double> &b, int n) { | |
| 101 | 48 | const auto size = static_cast<size_t>(n) * static_cast<size_t>(n); | |
| 102 | 48 | std::vector<double> result(size); | |
| 103 | |||
| 104 | std::transform(a.begin(), a.begin() + static_cast<ptrdiff_t>(size), b.begin(), result.begin(), std::minus<>()); | ||
| 105 | |||
| 106 | 48 | return result; | |
| 107 | } | ||
| 108 | |||
| 109 | 16 | void LazarevaATestTaskSTL::Split(const std::vector<double> &parent, int n, std::vector<double> &a11, | |
| 110 | std::vector<double> &a12, std::vector<double> &a21, std::vector<double> &a22) { | ||
| 111 | 16 | const int half = n / 2; | |
| 112 | 16 | const auto half_size = static_cast<size_t>(half) * static_cast<size_t>(half); | |
| 113 | 16 | a11.resize(half_size); | |
| 114 | 16 | a12.resize(half_size); | |
| 115 | 16 | a21.resize(half_size); | |
| 116 | 16 | a22.resize(half_size); | |
| 117 | |||
| 118 |
2/2✓ Branch 0 taken 1024 times.
✓ Branch 1 taken 16 times.
|
1040 | for (int i = 0; i < half; ++i) { |
| 119 | 1024 | const auto parent_row_start = parent.begin() + (static_cast<ptrdiff_t>(i) * n); | |
| 120 | 1024 | const auto a11_row_start = a11.begin() + (static_cast<ptrdiff_t>(i) * half); | |
| 121 | const auto a12_row_start = a12.begin() + (static_cast<ptrdiff_t>(i) * half); | ||
| 122 | |||
| 123 | 1024 | std::copy(parent_row_start, parent_row_start + half, a11_row_start); | |
| 124 | 1024 | std::copy(parent_row_start + half, parent_row_start + n, a12_row_start); | |
| 125 | } | ||
| 126 | |||
| 127 |
2/2✓ Branch 0 taken 1024 times.
✓ Branch 1 taken 16 times.
|
1040 | for (int i = 0; i < half; ++i) { |
| 128 | 1024 | const auto parent_row_start = parent.begin() + (static_cast<ptrdiff_t>(i + half) * n); | |
| 129 | 1024 | const auto a21_row_start = a21.begin() + (static_cast<ptrdiff_t>(i) * half); | |
| 130 | const auto a22_row_start = a22.begin() + (static_cast<ptrdiff_t>(i) * half); | ||
| 131 | |||
| 132 | 1024 | std::copy(parent_row_start, parent_row_start + half, a21_row_start); | |
| 133 | 1024 | std::copy(parent_row_start + half, parent_row_start + n, a22_row_start); | |
| 134 | } | ||
| 135 | 16 | } | |
| 136 | |||
| 137 | 8 | std::vector<double> LazarevaATestTaskSTL::Merge(const std::vector<double> &c11, const std::vector<double> &c12, | |
| 138 | const std::vector<double> &c21, const std::vector<double> &c22, int n) { | ||
| 139 | 8 | const int full = n * 2; | |
| 140 | 8 | const auto full_size = static_cast<size_t>(full) * static_cast<size_t>(full); | |
| 141 | 8 | std::vector<double> result(full_size); | |
| 142 | |||
| 143 |
2/2✓ Branch 0 taken 512 times.
✓ Branch 1 taken 8 times.
|
520 | for (int i = 0; i < n; ++i) { |
| 144 | 512 | const auto c11_row_start = c11.begin() + (static_cast<ptrdiff_t>(i) * n); | |
| 145 | const auto c12_row_start = c12.begin() + (static_cast<ptrdiff_t>(i) * n); | ||
| 146 | const auto c21_row_start = c21.begin() + (static_cast<ptrdiff_t>(i) * n); | ||
| 147 | const auto c22_row_start = c22.begin() + (static_cast<ptrdiff_t>(i) * n); | ||
| 148 | |||
| 149 | 512 | auto result_row_start = result.begin() + (static_cast<ptrdiff_t>(i) * full); | |
| 150 | 512 | std::copy(c11_row_start, c11_row_start + n, result_row_start); | |
| 151 | 512 | std::copy(c12_row_start, c12_row_start + n, result_row_start + n); | |
| 152 | |||
| 153 | 512 | result_row_start = result.begin() + (static_cast<ptrdiff_t>(i + n) * full); | |
| 154 | 512 | std::copy(c21_row_start, c21_row_start + n, result_row_start); | |
| 155 | 512 | std::copy(c22_row_start, c22_row_start + n, result_row_start + n); | |
| 156 | } | ||
| 157 | |||
| 158 | 8 | return result; | |
| 159 | } | ||
| 160 | |||
| 161 | 40 | std::vector<double> LazarevaATestTaskSTL::NaiveMult(const std::vector<double> &a, const std::vector<double> &b, int n) { | |
| 162 | 40 | const auto size = static_cast<size_t>(n) * static_cast<size_t>(n); | |
| 163 | 40 | std::vector<double> c(size, 0.0); | |
| 164 | |||
| 165 |
1/2✓ Branch 1 taken 40 times.
✗ Branch 2 not taken.
|
40 | const int num_threads = ppc::util::GetNumThreads(); |
| 166 | 40 | std::vector<std::thread> threads; | |
| 167 |
1/2✓ Branch 1 taken 40 times.
✗ Branch 2 not taken.
|
40 | threads.reserve(num_threads); |
| 168 | |||
| 169 | 100 | auto worker = [&](int thread_id) { | |
| 170 |
2/2✓ Branch 0 taken 304 times.
✓ Branch 1 taken 100 times.
|
404 | for (int i = thread_id; i < n; i += num_threads) { |
| 171 |
2/2✓ Branch 0 taken 3232 times.
✓ Branch 1 taken 304 times.
|
3536 | for (int k = 0; k < n; ++k) { |
| 172 | 3232 | const double aik = a[static_cast<size_t>((static_cast<ptrdiff_t>(i) * n) + k)]; | |
| 173 |
2/2✓ Branch 0 taken 41536 times.
✓ Branch 1 taken 3232 times.
|
44768 | for (int j = 0; j < n; ++j) { |
| 174 | 41536 | c[static_cast<size_t>((static_cast<ptrdiff_t>(i) * n) + j)] += | |
| 175 | 41536 | aik * b[static_cast<size_t>((static_cast<ptrdiff_t>(k) * n) + j)]; | |
| 176 | } | ||
| 177 | } | ||
| 178 | } | ||
| 179 | 140 | }; | |
| 180 | |||
| 181 |
2/2✓ Branch 0 taken 100 times.
✓ Branch 1 taken 40 times.
|
140 | for (int thread_idx = 0; thread_idx < num_threads; ++thread_idx) { |
| 182 |
1/2✓ Branch 1 taken 100 times.
✗ Branch 2 not taken.
|
100 | threads.emplace_back(worker, thread_idx); |
| 183 | } | ||
| 184 | |||
| 185 |
1/2✓ Branch 1 taken 100 times.
✗ Branch 2 not taken.
|
100 | std::ranges::for_each(threads, [](std::thread &thr) { thr.join(); }); |
| 186 | |||
| 187 | 40 | return c; | |
| 188 | 40 | } | |
| 189 | |||
| 190 | 48 | std::vector<double> LazarevaATestTaskSTL::Strassen(const std::vector<double> &root_a, const std::vector<double> &root_b, | |
| 191 | int root_n) { | ||
| 192 |
2/2✓ Branch 0 taken 40 times.
✓ Branch 1 taken 8 times.
|
48 | if (root_n <= 64) { |
| 193 | 40 | return NaiveMult(root_a, root_b, root_n); | |
| 194 | } | ||
| 195 | |||
| 196 | 8 | const int half = root_n / 2; | |
| 197 | |||
| 198 | 8 | std::vector<double> a11; | |
| 199 | 8 | std::vector<double> a12; | |
| 200 | 8 | std::vector<double> a21; | |
| 201 | 8 | std::vector<double> a22; | |
| 202 | 8 | std::vector<double> b11; | |
| 203 | 8 | std::vector<double> b12; | |
| 204 | 8 | std::vector<double> b21; | |
| 205 | 8 | std::vector<double> b22; | |
| 206 |
1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
|
8 | Split(root_a, root_n, a11, a12, a21, a22); |
| 207 |
1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
|
8 | Split(root_b, root_n, b11, b12, b21, b22); |
| 208 | |||
| 209 | 8 | std::array<std::vector<double>, 7> lhs; | |
| 210 | 8 | std::array<std::vector<double>, 7> rhs; | |
| 211 |
1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
|
8 | lhs.at(0) = Add(a11, a22, half); |
| 212 |
1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
|
8 | rhs.at(0) = Add(b11, b22, half); |
| 213 |
1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
|
8 | lhs.at(1) = Add(a21, a22, half); |
| 214 |
1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
|
8 | rhs.at(1) = b11; |
| 215 |
1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
|
8 | lhs.at(2) = a11; |
| 216 |
1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
|
8 | rhs.at(2) = Sub(b12, b22, half); |
| 217 |
1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
|
8 | lhs.at(3) = a22; |
| 218 |
1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
|
8 | rhs.at(3) = Sub(b21, b11, half); |
| 219 |
1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
|
8 | lhs.at(4) = Add(a11, a12, half); |
| 220 |
1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
|
8 | rhs.at(4) = b22; |
| 221 |
1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
|
8 | lhs.at(5) = Sub(a21, a11, half); |
| 222 |
1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
|
8 | rhs.at(5) = Add(b11, b12, half); |
| 223 |
1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
|
8 | lhs.at(6) = Sub(a12, a22, half); |
| 224 |
1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
|
8 | rhs.at(6) = Add(b21, b22, half); |
| 225 | |||
| 226 | 8 | std::array<std::vector<double>, 7> m; | |
| 227 | |||
| 228 | 8 | std::array<std::future<std::vector<double>>, 7> futures; | |
| 229 | |||
| 230 |
2/2✓ Branch 0 taken 56 times.
✓ Branch 1 taken 8 times.
|
64 | for (size_t k = 0; k < 7; ++k) { |
| 231 |
1/2✓ Branch 1 taken 56 times.
✗ Branch 2 not taken.
|
112 | futures.at(k) = std::async(std::launch::async, [&lhs, &rhs, k, half]() { |
| 232 | 56 | const int nn = half; | |
| 233 | 56 | const auto sz = static_cast<size_t>(nn) * static_cast<size_t>(nn); | |
| 234 | 56 | std::vector<double> c(sz, 0.0); | |
| 235 |
2/2✓ Branch 0 taken 3584 times.
✓ Branch 1 taken 56 times.
|
3640 | for (int i = 0; i < nn; ++i) { |
| 236 |
2/2✓ Branch 0 taken 229376 times.
✓ Branch 1 taken 3584 times.
|
232960 | for (int ki = 0; ki < nn; ++ki) { |
| 237 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 229376 times.
|
229376 | const double aik = lhs.at(k)[static_cast<size_t>((static_cast<ptrdiff_t>(i) * nn) + ki)]; |
| 238 |
2/2✓ Branch 0 taken 14680064 times.
✓ Branch 1 taken 229376 times.
|
14909440 | for (int j = 0; j < nn; ++j) { |
| 239 | 14680064 | c[static_cast<size_t>((static_cast<ptrdiff_t>(i) * nn) + j)] += | |
| 240 | 14680064 | aik * rhs.at(k)[static_cast<size_t>((static_cast<ptrdiff_t>(ki) * nn) + j)]; | |
| 241 | } | ||
| 242 | } | ||
| 243 | } | ||
| 244 | 56 | return c; | |
| 245 | }); | ||
| 246 | } | ||
| 247 | |||
| 248 | 56 | std::ranges::transform(futures, m.begin(), [](std::future<std::vector<double>> &f) { return f.get(); }); | |
| 249 | |||
| 250 |
3/6✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 8 times.
✗ Branch 5 not taken.
✓ Branch 7 taken 8 times.
✗ Branch 8 not taken.
|
8 | auto c11 = Add(Sub(Add(m.at(0), m.at(3), half), m.at(4), half), m.at(6), half); |
| 251 |
1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
|
8 | auto c12 = Add(m.at(2), m.at(4), half); |
| 252 |
1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
|
8 | auto c21 = Add(m.at(1), m.at(3), half); |
| 253 |
3/6✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 8 times.
✗ Branch 5 not taken.
✓ Branch 7 taken 8 times.
✗ Branch 8 not taken.
|
8 | auto c22 = Add(Sub(Add(m.at(0), m.at(2), half), m.at(1), half), m.at(5), half); |
| 254 | |||
| 255 |
1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
|
8 | return Merge(c11, c12, c21, c22, half); |
| 256 | 8 | } | |
| 257 | |||
| 258 | } // namespace lazareva_a_matrix_mult_strassen | ||
| 259 |