| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #include "lazareva_a_matrix_mult_strassen/seq/include/ops_seq.hpp" | ||
| 2 | |||
| 3 | #include <array> | ||
| 4 | #include <cstddef> | ||
| 5 | #include <utility> | ||
| 6 | #include <vector> | ||
| 7 | |||
| 8 | #include "lazareva_a_matrix_mult_strassen/common/include/common.hpp" | ||
| 9 | |||
| 10 | namespace lazareva_a_matrix_mult_strassen { | ||
| 11 | |||
| 12 |
1/2✓ Branch 1 taken 48 times.
✗ Branch 2 not taken.
|
48 | LazarevaATestTaskSEQ::LazarevaATestTaskSEQ(const InType &in) { |
| 13 | SetTypeOfTask(GetStaticTypeOfTask()); | ||
| 14 | GetInput() = in; | ||
| 15 | GetOutput() = {}; | ||
| 16 | 48 | } | |
| 17 | |||
| 18 | 48 | bool LazarevaATestTaskSEQ::ValidationImpl() { | |
| 19 | 48 | const int n = GetInput().n; | |
| 20 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 48 times.
|
48 | if (n <= 0) { |
| 21 | return false; | ||
| 22 | } | ||
| 23 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 48 times.
|
48 | const auto expected = static_cast<size_t>(n) * static_cast<size_t>(n); |
| 24 |
2/4✗ Branch 0 not taken.
✓ Branch 1 taken 48 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 48 times.
|
48 | return std::cmp_equal(GetInput().a.size(), expected) && std::cmp_equal(GetInput().b.size(), expected); |
| 25 | } | ||
| 26 | |||
| 27 | 48 | bool LazarevaATestTaskSEQ::PreProcessingImpl() { | |
| 28 | 48 | n_ = GetInput().n; | |
| 29 | 48 | padded_n_ = NextPowerOfTwo(n_); | |
| 30 | 48 | a_ = PadMatrix(GetInput().a, n_, padded_n_); | |
| 31 | 48 | b_ = PadMatrix(GetInput().b, n_, padded_n_); | |
| 32 | 48 | const auto padded_size = static_cast<size_t>(padded_n_) * static_cast<size_t>(padded_n_); | |
| 33 | 48 | result_.assign(padded_size, 0.0); | |
| 34 | 48 | return true; | |
| 35 | } | ||
| 36 | |||
| 37 | 48 | bool LazarevaATestTaskSEQ::RunImpl() { | |
| 38 | 48 | result_ = Strassen(a_, b_, padded_n_); | |
| 39 | 48 | return true; | |
| 40 | } | ||
| 41 | |||
| 42 | 48 | bool LazarevaATestTaskSEQ::PostProcessingImpl() { | |
| 43 | 48 | GetOutput() = UnpadMatrix(result_, padded_n_, n_); | |
| 44 | 48 | return true; | |
| 45 | } | ||
| 46 | |||
| 47 | ✗ | int LazarevaATestTaskSEQ::NextPowerOfTwo(int n) { | |
| 48 |
1/4✗ Branch 0 not taken.
✗ Branch 1 not taken.
✓ Branch 2 taken 48 times.
✗ Branch 3 not taken.
|
48 | if (n <= 0) { |
| 49 | return 1; | ||
| 50 | } | ||
| 51 | int p = 1; | ||
| 52 |
2/4✗ Branch 0 not taken.
✗ Branch 1 not taken.
✓ Branch 2 taken 160 times.
✓ Branch 3 taken 48 times.
|
208 | while (p < n) { |
| 53 | 160 | p <<= 1; | |
| 54 | } | ||
| 55 | return p; | ||
| 56 | } | ||
| 57 | |||
| 58 | 96 | std::vector<double> LazarevaATestTaskSEQ::PadMatrix(const std::vector<double> &m, int old_n, int new_n) { | |
| 59 | 96 | const auto new_size = static_cast<size_t>(new_n) * static_cast<size_t>(new_n); | |
| 60 | 96 | std::vector<double> result(new_size, 0.0); | |
| 61 |
2/2✓ Branch 0 taken 2576 times.
✓ Branch 1 taken 96 times.
|
2672 | for (int i = 0; i < old_n; ++i) { |
| 62 |
2/2✓ Branch 0 taken 267632 times.
✓ Branch 1 taken 2576 times.
|
270208 | for (int j = 0; j < old_n; ++j) { |
| 63 | 267632 | const auto dst = (static_cast<ptrdiff_t>(i) * new_n) + j; | |
| 64 | 267632 | const auto src = (static_cast<ptrdiff_t>(i) * old_n) + j; | |
| 65 | 267632 | result[static_cast<size_t>(dst)] = m[static_cast<size_t>(src)]; | |
| 66 | } | ||
| 67 | } | ||
| 68 | 96 | return result; | |
| 69 | } | ||
| 70 | |||
| 71 | 48 | std::vector<double> LazarevaATestTaskSEQ::UnpadMatrix(const std::vector<double> &m, int old_n, int new_n) { | |
| 72 | 48 | const auto new_size = static_cast<size_t>(new_n) * static_cast<size_t>(new_n); | |
| 73 | 48 | std::vector<double> result(new_size); | |
| 74 |
2/2✓ Branch 0 taken 1288 times.
✓ Branch 1 taken 48 times.
|
1336 | for (int i = 0; i < new_n; ++i) { |
| 75 |
2/2✓ Branch 0 taken 133816 times.
✓ Branch 1 taken 1288 times.
|
135104 | for (int j = 0; j < new_n; ++j) { |
| 76 | 133816 | const auto dst = (static_cast<ptrdiff_t>(i) * new_n) + j; | |
| 77 | 133816 | const auto src = (static_cast<ptrdiff_t>(i) * old_n) + j; | |
| 78 | 133816 | result[static_cast<size_t>(dst)] = m[static_cast<size_t>(src)]; | |
| 79 | } | ||
| 80 | } | ||
| 81 | 48 | return result; | |
| 82 | } | ||
| 83 | |||
| 84 | 96 | std::vector<double> LazarevaATestTaskSEQ::Add(const std::vector<double> &a, const std::vector<double> &b, int n) { | |
| 85 | 96 | const auto size = static_cast<size_t>(n) * static_cast<size_t>(n); | |
| 86 | 96 | std::vector<double> result(size); | |
| 87 |
2/2✓ Branch 0 taken 393216 times.
✓ Branch 1 taken 96 times.
|
393312 | for (size_t i = 0; i < size; ++i) { |
| 88 | 393216 | result[i] = a[i] + b[i]; | |
| 89 | } | ||
| 90 | 96 | return result; | |
| 91 | } | ||
| 92 | |||
| 93 | 48 | std::vector<double> LazarevaATestTaskSEQ::Sub(const std::vector<double> &a, const std::vector<double> &b, int n) { | |
| 94 | 48 | const auto size = static_cast<size_t>(n) * static_cast<size_t>(n); | |
| 95 | 48 | std::vector<double> result(size); | |
| 96 |
2/2✓ Branch 0 taken 196608 times.
✓ Branch 1 taken 48 times.
|
196656 | for (size_t i = 0; i < size; ++i) { |
| 97 | 196608 | result[i] = a[i] - b[i]; | |
| 98 | } | ||
| 99 | 48 | return result; | |
| 100 | } | ||
| 101 | |||
| 102 | 16 | void LazarevaATestTaskSEQ::Split(const std::vector<double> &parent, int n, std::vector<double> &a11, | |
| 103 | std::vector<double> &a12, std::vector<double> &a21, std::vector<double> &a22) { | ||
| 104 | 16 | const int half = n / 2; | |
| 105 | 16 | const auto half_size = static_cast<size_t>(half) * static_cast<size_t>(half); | |
| 106 | 16 | a11.resize(half_size); | |
| 107 | 16 | a12.resize(half_size); | |
| 108 | 16 | a21.resize(half_size); | |
| 109 | 16 | a22.resize(half_size); | |
| 110 | |||
| 111 |
2/2✓ Branch 0 taken 1024 times.
✓ Branch 1 taken 16 times.
|
1040 | for (int i = 0; i < half; ++i) { |
| 112 |
2/2✓ Branch 0 taken 65536 times.
✓ Branch 1 taken 1024 times.
|
66560 | for (int j = 0; j < half; ++j) { |
| 113 | 65536 | const auto idx = static_cast<size_t>((static_cast<ptrdiff_t>(i) * half) + j); | |
| 114 | 65536 | a11[idx] = parent[static_cast<size_t>((static_cast<ptrdiff_t>(i) * n) + j)]; | |
| 115 | 65536 | a12[idx] = parent[static_cast<size_t>((static_cast<ptrdiff_t>(i) * n) + j + half)]; | |
| 116 | 65536 | a21[idx] = parent[static_cast<size_t>((static_cast<ptrdiff_t>(i + half) * n) + j)]; | |
| 117 | 65536 | a22[idx] = parent[static_cast<size_t>((static_cast<ptrdiff_t>(i + half) * n) + j + half)]; | |
| 118 | } | ||
| 119 | } | ||
| 120 | 16 | } | |
| 121 | |||
| 122 | 8 | std::vector<double> LazarevaATestTaskSEQ::Merge(const std::vector<double> &c11, const std::vector<double> &c12, | |
| 123 | const std::vector<double> &c21, const std::vector<double> &c22, int n) { | ||
| 124 | 8 | const int full = n * 2; | |
| 125 | 8 | const auto full_size = static_cast<size_t>(full) * static_cast<size_t>(full); | |
| 126 | 8 | std::vector<double> result(full_size); | |
| 127 | |||
| 128 |
2/2✓ Branch 0 taken 512 times.
✓ Branch 1 taken 8 times.
|
520 | for (int i = 0; i < n; ++i) { |
| 129 |
2/2✓ Branch 0 taken 32768 times.
✓ Branch 1 taken 512 times.
|
33280 | for (int j = 0; j < n; ++j) { |
| 130 | 32768 | const auto src = static_cast<size_t>((static_cast<ptrdiff_t>(i) * n) + j); | |
| 131 | 32768 | result[static_cast<size_t>((static_cast<ptrdiff_t>(i) * full) + j)] = c11[src]; | |
| 132 | 32768 | result[static_cast<size_t>((static_cast<ptrdiff_t>(i) * full) + j + n)] = c12[src]; | |
| 133 | 32768 | result[static_cast<size_t>((static_cast<ptrdiff_t>(i + n) * full) + j)] = c21[src]; | |
| 134 | 32768 | result[static_cast<size_t>((static_cast<ptrdiff_t>(i + n) * full) + j + n)] = c22[src]; | |
| 135 | } | ||
| 136 | } | ||
| 137 | 8 | return result; | |
| 138 | } | ||
| 139 | |||
| 140 | 96 | std::vector<double> LazarevaATestTaskSEQ::NaiveMult(const std::vector<double> &a, const std::vector<double> &b, int n) { | |
| 141 | 96 | const auto size = static_cast<size_t>(n) * static_cast<size_t>(n); | |
| 142 | 96 | std::vector<double> c(size, 0.0); | |
| 143 |
2/2✓ Branch 0 taken 3888 times.
✓ Branch 1 taken 96 times.
|
3984 | for (int i = 0; i < n; ++i) { |
| 144 |
2/2✓ Branch 0 taken 232608 times.
✓ Branch 1 taken 3888 times.
|
236496 | for (int k = 0; k < n; ++k) { |
| 145 | 232608 | const double aik = a[static_cast<size_t>((static_cast<ptrdiff_t>(i) * n) + k)]; | |
| 146 |
2/2✓ Branch 0 taken 14721600 times.
✓ Branch 1 taken 232608 times.
|
14954208 | for (int j = 0; j < n; ++j) { |
| 147 | 14721600 | c[static_cast<size_t>((static_cast<ptrdiff_t>(i) * n) + j)] += | |
| 148 | 14721600 | aik * b[static_cast<size_t>((static_cast<ptrdiff_t>(k) * n) + j)]; | |
| 149 | } | ||
| 150 | } | ||
| 151 | } | ||
| 152 | 96 | return c; | |
| 153 | } | ||
| 154 | |||
| 155 | 48 | std::vector<double> LazarevaATestTaskSEQ::Strassen(const std::vector<double> &root_a, const std::vector<double> &root_b, | |
| 156 | int root_n) { | ||
| 157 | 48 | std::vector<std::vector<double>> results; | |
| 158 | 48 | std::vector<StrassenNode> nodes; | |
| 159 | 48 | std::vector<int> call_stack; | |
| 160 | |||
| 161 |
1/2✓ Branch 1 taken 48 times.
✗ Branch 2 not taken.
|
48 | results.emplace_back(); |
| 162 | { | ||
| 163 | 48 | StrassenNode root; | |
| 164 |
1/2✓ Branch 1 taken 48 times.
✗ Branch 2 not taken.
|
48 | root.a = root_a; |
| 165 |
1/2✓ Branch 1 taken 48 times.
✗ Branch 2 not taken.
|
48 | root.b = root_b; |
| 166 | 48 | root.n = root_n; | |
| 167 | 48 | root.result_slot = 0; | |
| 168 |
1/2✓ Branch 1 taken 48 times.
✗ Branch 2 not taken.
|
48 | root.expanded = false; |
| 169 | nodes.push_back(std::move(root)); | ||
| 170 | 48 | } | |
| 171 |
1/2✓ Branch 1 taken 48 times.
✗ Branch 2 not taken.
|
48 | call_stack.push_back(0); |
| 172 | |||
| 173 |
2/2✓ Branch 0 taken 112 times.
✓ Branch 1 taken 48 times.
|
160 | while (!call_stack.empty()) { |
| 174 |
2/2✓ Branch 0 taken 96 times.
✓ Branch 1 taken 16 times.
|
112 | const int node_idx = call_stack.back(); |
| 175 | call_stack.pop_back(); | ||
| 176 | |||
| 177 |
2/2✓ Branch 0 taken 96 times.
✓ Branch 1 taken 16 times.
|
112 | const auto nidx = static_cast<size_t>(node_idx); |
| 178 | 112 | const int cur_n = nodes[nidx].n; | |
| 179 | 112 | const int cur_slot = nodes[nidx].result_slot; | |
| 180 | |||
| 181 |
2/2✓ Branch 0 taken 96 times.
✓ Branch 1 taken 16 times.
|
112 | if (cur_n <= 64) { |
| 182 |
2/4✓ Branch 1 taken 96 times.
✗ Branch 2 not taken.
✓ Branch 5 taken 96 times.
✗ Branch 6 not taken.
|
192 | results[static_cast<size_t>(cur_slot)] = NaiveMult(nodes[nidx].a, nodes[nidx].b, cur_n); |
| 183 | nodes[nidx].a = {}; | ||
| 184 | nodes[nidx].b = {}; | ||
| 185 | 96 | continue; | |
| 186 | } | ||
| 187 | |||
| 188 |
2/2✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
|
16 | if (!nodes[nidx].expanded) { |
| 189 | 8 | const int half = cur_n / 2; | |
| 190 | |||
| 191 | 8 | std::vector<double> a11; | |
| 192 | 8 | std::vector<double> a12; | |
| 193 | 8 | std::vector<double> a21; | |
| 194 | 8 | std::vector<double> a22; | |
| 195 | 8 | std::vector<double> b11; | |
| 196 | 8 | std::vector<double> b12; | |
| 197 | 8 | std::vector<double> b21; | |
| 198 | 8 | std::vector<double> b22; | |
| 199 | |||
| 200 |
1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
|
8 | Split(nodes[nidx].a, cur_n, a11, a12, a21, a22); |
| 201 |
1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
|
8 | Split(nodes[nidx].b, cur_n, b11, b12, b21, b22); |
| 202 | |||
| 203 | nodes[nidx].a = {}; | ||
| 204 | nodes[nidx].b = {}; | ||
| 205 | 8 | nodes[nidx].expanded = true; | |
| 206 | |||
| 207 | std::array<std::pair<std::vector<double>, std::vector<double>>, 7> args = { | ||
| 208 |
2/6✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 8 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
|
8 | std::make_pair(Add(a11, a22, half), Add(b11, b22, half)), |
| 209 |
2/8✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 8 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
|
8 | std::make_pair(Add(a21, a22, half), std::vector<double>(b11)), |
| 210 |
2/8✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 8 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
|
8 | std::make_pair(std::vector<double>(a11), Sub(b12, b22, half)), |
| 211 |
2/8✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 8 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
|
8 | std::make_pair(std::vector<double>(a22), Sub(b21, b11, half)), |
| 212 |
2/8✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 8 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
|
8 | std::make_pair(Add(a11, a12, half), std::vector<double>(b22)), |
| 213 |
2/8✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 8 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
|
8 | std::make_pair(Sub(a21, a11, half), Add(b11, b12, half)), |
| 214 |
2/8✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 8 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
|
8 | std::make_pair(Sub(a12, a22, half), Add(b21, b22, half)), |
| 215 | ✗ | }; | |
| 216 | |||
| 217 | 8 | const int base_slot = static_cast<int>(results.size()); | |
| 218 |
2/2✓ Branch 0 taken 56 times.
✓ Branch 1 taken 8 times.
|
64 | for (size_t k = 0; k < 7; ++k) { |
| 219 | 56 | nodes[nidx].child_slots.at(k) = base_slot + static_cast<int>(k); | |
| 220 |
1/2✓ Branch 1 taken 56 times.
✗ Branch 2 not taken.
|
56 | results.emplace_back(); |
| 221 | } | ||
| 222 | |||
| 223 | call_stack.push_back(node_idx); | ||
| 224 | |||
| 225 |
2/2✓ Branch 0 taken 56 times.
✓ Branch 1 taken 8 times.
|
64 | for (int k = 6; k >= 0; --k) { |
| 226 | 56 | const auto uk = static_cast<size_t>(k); | |
| 227 | 56 | StrassenNode child; | |
| 228 | 56 | child.a = std::move(args.at(uk).first); | |
| 229 | 56 | child.b = std::move(args.at(uk).second); | |
| 230 | 56 | child.n = half; | |
| 231 | 56 | child.result_slot = base_slot + k; | |
| 232 |
1/2✓ Branch 1 taken 56 times.
✗ Branch 2 not taken.
|
56 | child.expanded = false; |
| 233 |
1/2✓ Branch 1 taken 56 times.
✗ Branch 2 not taken.
|
56 | const int child_idx = static_cast<int>(nodes.size()); |
| 234 | nodes.push_back(std::move(child)); | ||
| 235 | call_stack.push_back(child_idx); | ||
| 236 | 56 | } | |
| 237 | |||
| 238 | } else { | ||
| 239 | 8 | const int half = cur_n / 2; | |
| 240 | const std::array<int, 7> &cs = nodes[nidx].child_slots; | ||
| 241 | |||
| 242 |
1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
|
8 | const auto &m1 = results[static_cast<size_t>(cs.at(0))]; |
| 243 | 8 | const auto &m2 = results[static_cast<size_t>(cs.at(1))]; | |
| 244 | 8 | const auto &m3 = results[static_cast<size_t>(cs.at(2))]; | |
| 245 | 8 | const auto &m4 = results[static_cast<size_t>(cs.at(3))]; | |
| 246 | 8 | const auto &m5 = results[static_cast<size_t>(cs.at(4))]; | |
| 247 | 8 | const auto &m6 = results[static_cast<size_t>(cs.at(5))]; | |
| 248 | 8 | const auto &m7 = results[static_cast<size_t>(cs.at(6))]; | |
| 249 | |||
| 250 |
3/8✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 8 times.
✗ Branch 5 not taken.
✓ Branch 7 taken 8 times.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
|
8 | auto c11 = Add(Sub(Add(m1, m4, half), m5, half), m7, half); |
| 251 |
1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
|
8 | auto c12 = Add(m3, m5, half); |
| 252 |
1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
|
8 | auto c21 = Add(m2, m4, half); |
| 253 |
3/8✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 8 times.
✗ Branch 5 not taken.
✓ Branch 7 taken 8 times.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
|
8 | auto c22 = Add(Sub(Add(m1, m3, half), m2, half), m6, half); |
| 254 | |||
| 255 |
2/6✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 8 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
|
16 | results[static_cast<size_t>(cur_slot)] = Merge(c11, c12, c21, c22, half); |
| 256 | } | ||
| 257 | } | ||
| 258 | |||
| 259 |
1/2✓ Branch 1 taken 48 times.
✗ Branch 2 not taken.
|
96 | return results[0]; |
| 260 | 48 | } | |
| 261 | |||
| 262 | } // namespace lazareva_a_matrix_mult_strassen | ||
| 263 |