| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #include "zorin_d_strassen_alg_matrix_seq/seq/include/ops_seq.hpp" | ||
| 2 | |||
| 3 | #include <algorithm> | ||
| 4 | #include <cassert> | ||
| 5 | #include <cstddef> | ||
| 6 | #include <utility> | ||
| 7 | #include <vector> | ||
| 8 | |||
| 9 | #include "zorin_d_strassen_alg_matrix_seq/common/include/common.hpp" | ||
| 10 | |||
| 11 | namespace zorin_d_strassen_alg_matrix_seq { | ||
| 12 | |||
| 13 | namespace { | ||
| 14 | |||
| 15 | constexpr std::size_t kCutoff = 64; | ||
| 16 | |||
| 17 | std::size_t NextPow2(std::size_t x) { | ||
| 18 | 56 | if (x <= 1) { | |
| 19 | return 1; | ||
| 20 | } | ||
| 21 | std::size_t p = 1; | ||
| 22 |
2/2✓ Branch 0 taken 136 times.
✓ Branch 1 taken 48 times.
|
184 | while (p < x) { |
| 23 | 136 | p <<= 1; | |
| 24 | } | ||
| 25 | return p; | ||
| 26 | } | ||
| 27 | |||
| 28 |
1/2✓ Branch 0 taken 56 times.
✗ Branch 1 not taken.
|
56 | void NaiveMul(const std::vector<double> &a, const std::vector<double> &b, std::vector<double> &c, std::size_t n) { |
| 29 | std::ranges::fill(c, 0.0); | ||
| 30 |
2/2✓ Branch 0 taken 440 times.
✓ Branch 1 taken 56 times.
|
496 | for (std::size_t i = 0; i < n; ++i) { |
| 31 | 440 | const std::size_t i_row = i * n; | |
| 32 |
2/2✓ Branch 0 taken 5288 times.
✓ Branch 1 taken 440 times.
|
5728 | for (std::size_t k = 0; k < n; ++k) { |
| 33 | 5288 | const double aik = a[i_row + k]; | |
| 34 | 5288 | const std::size_t k_row = k * n; | |
| 35 |
2/2✓ Branch 0 taken 74312 times.
✓ Branch 1 taken 5288 times.
|
79600 | for (std::size_t j = 0; j < n; ++j) { |
| 36 | 74312 | c[i_row + j] += aik * b[k_row + j]; | |
| 37 | } | ||
| 38 | } | ||
| 39 | } | ||
| 40 | 56 | } | |
| 41 | |||
| 42 | ✗ | std::vector<double> AddVec(const std::vector<double> &x, const std::vector<double> &y) { | |
| 43 | assert(x.size() == y.size()); | ||
| 44 | ✗ | std::vector<double> r(x.size()); | |
| 45 | ✗ | for (std::size_t i = 0; i < x.size(); ++i) { | |
| 46 | ✗ | r[i] = x[i] + y[i]; | |
| 47 | } | ||
| 48 | ✗ | return r; | |
| 49 | } | ||
| 50 | |||
| 51 | ✗ | std::vector<double> SubVec(const std::vector<double> &x, const std::vector<double> &y) { | |
| 52 | assert(x.size() == y.size()); | ||
| 53 | ✗ | std::vector<double> r(x.size()); | |
| 54 | ✗ | for (std::size_t i = 0; i < x.size(); ++i) { | |
| 55 | ✗ | r[i] = x[i] - y[i]; | |
| 56 | } | ||
| 57 | ✗ | return r; | |
| 58 | } | ||
| 59 | |||
| 60 | ✗ | void SplitMat(const std::vector<double> &m, std::size_t n, std::vector<double> &m11, std::vector<double> &m12, | |
| 61 | std::vector<double> &m21, std::vector<double> &m22) { | ||
| 62 | ✗ | const std::size_t k = n / 2; | |
| 63 | ✗ | m11.assign(k * k, 0.0); | |
| 64 | ✗ | m12.assign(k * k, 0.0); | |
| 65 | ✗ | m21.assign(k * k, 0.0); | |
| 66 | ✗ | m22.assign(k * k, 0.0); | |
| 67 | |||
| 68 | ✗ | for (std::size_t i = 0; i < k; ++i) { | |
| 69 | ✗ | for (std::size_t j = 0; j < k; ++j) { | |
| 70 | ✗ | m11[(i * k) + j] = m[(i * n) + j]; | |
| 71 | ✗ | m12[(i * k) + j] = m[(i * n) + (j + k)]; | |
| 72 | ✗ | m21[(i * k) + j] = m[((i + k) * n) + j]; | |
| 73 | ✗ | m22[(i * k) + j] = m[((i + k) * n) + (j + k)]; | |
| 74 | } | ||
| 75 | } | ||
| 76 | ✗ | } | |
| 77 | |||
| 78 | ✗ | std::vector<double> JoinMat(const std::vector<double> &c11, const std::vector<double> &c12, | |
| 79 | const std::vector<double> &c21, const std::vector<double> &c22, std::size_t n) { | ||
| 80 | ✗ | const std::size_t k = n / 2; | |
| 81 | ✗ | std::vector<double> c(n * n, 0.0); | |
| 82 | ✗ | for (std::size_t i = 0; i < k; ++i) { | |
| 83 | ✗ | for (std::size_t j = 0; j < k; ++j) { | |
| 84 | ✗ | c[(i * n) + j] = c11[(i * k) + j]; | |
| 85 | ✗ | c[(i * n) + (j + k)] = c12[(i * k) + j]; | |
| 86 | ✗ | c[((i + k) * n) + j] = c21[(i * k) + j]; | |
| 87 | ✗ | c[((i + k) * n) + (j + k)] = c22[(i * k) + j]; | |
| 88 | } | ||
| 89 | } | ||
| 90 | ✗ | return c; | |
| 91 | } | ||
| 92 | |||
| 93 | struct Frame { | ||
| 94 | std::size_t n{}; | ||
| 95 | std::size_t half{}; | ||
| 96 | int stage{}; | ||
| 97 | int parent_slot{-1}; | ||
| 98 | |||
| 99 | std::vector<double> a; | ||
| 100 | std::vector<double> b; | ||
| 101 | std::vector<double> c; | ||
| 102 | |||
| 103 | std::vector<double> a11; | ||
| 104 | std::vector<double> a12; | ||
| 105 | std::vector<double> a21; | ||
| 106 | std::vector<double> a22; | ||
| 107 | |||
| 108 | std::vector<double> b11; | ||
| 109 | std::vector<double> b12; | ||
| 110 | std::vector<double> b21; | ||
| 111 | std::vector<double> b22; | ||
| 112 | |||
| 113 | std::vector<double> m1; | ||
| 114 | std::vector<double> m2; | ||
| 115 | std::vector<double> m3; | ||
| 116 | std::vector<double> m4; | ||
| 117 | std::vector<double> m5; | ||
| 118 | std::vector<double> m6; | ||
| 119 | std::vector<double> m7; | ||
| 120 | |||
| 121 | 56 | Frame(std::vector<double> a_in, std::vector<double> b_in, std::size_t n_in, int parent_slot_in) | |
| 122 | 56 | : n(n_in), | |
| 123 | 56 | half(n_in / 2), | |
| 124 | 56 | parent_slot(parent_slot_in), | |
| 125 | a(std::move(a_in)), | ||
| 126 | b(std::move(b_in)), | ||
| 127 |
1/4✓ Branch 1 taken 56 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
56 | c(n_in * n_in, 0.0) {} |
| 128 | }; | ||
| 129 | |||
| 130 | std::vector<double> &SelectM(Frame &f, int slot) { | ||
| 131 | if (slot == 1) { | ||
| 132 | ✗ | return f.m1; | |
| 133 | } | ||
| 134 | if (slot == 2) { | ||
| 135 | ✗ | return f.m2; | |
| 136 | } | ||
| 137 | if (slot == 3) { | ||
| 138 | ✗ | return f.m3; | |
| 139 | } | ||
| 140 | if (slot == 4) { | ||
| 141 | ✗ | return f.m4; | |
| 142 | } | ||
| 143 | if (slot == 5) { | ||
| 144 | ✗ | return f.m5; | |
| 145 | } | ||
| 146 | if (slot == 6) { | ||
| 147 | ✗ | return f.m6; | |
| 148 | } | ||
| 149 | ✗ | return f.m7; | |
| 150 | } | ||
| 151 | |||
| 152 | ✗ | Frame MakeChildForSlot(const Frame &f, int slot) { | |
| 153 | ✗ | const std::size_t k = f.half; | |
| 154 | if (slot == 1) { | ||
| 155 | ✗ | return {AddVec(f.a11, f.a22), AddVec(f.b11, f.b22), k, 1}; | |
| 156 | } | ||
| 157 | if (slot == 2) { | ||
| 158 | ✗ | return {AddVec(f.a21, f.a22), f.b11, k, 2}; | |
| 159 | } | ||
| 160 | if (slot == 3) { | ||
| 161 | ✗ | return {f.a11, SubVec(f.b12, f.b22), k, 3}; | |
| 162 | } | ||
| 163 | if (slot == 4) { | ||
| 164 | ✗ | return {f.a22, SubVec(f.b21, f.b11), k, 4}; | |
| 165 | } | ||
| 166 | if (slot == 5) { | ||
| 167 | ✗ | return {AddVec(f.a11, f.a12), f.b22, k, 5}; | |
| 168 | } | ||
| 169 | if (slot == 6) { | ||
| 170 | ✗ | return {SubVec(f.a21, f.a11), AddVec(f.b11, f.b12), k, 6}; | |
| 171 | } | ||
| 172 | ✗ | return {SubVec(f.a12, f.a22), AddVec(f.b21, f.b22), k, 7}; | |
| 173 | } | ||
| 174 | |||
| 175 | 56 | std::vector<double> StrassenIter(std::vector<double> a, std::vector<double> b, std::size_t n) { | |
| 176 | 56 | std::vector<Frame> st; | |
| 177 |
1/2✓ Branch 1 taken 56 times.
✗ Branch 2 not taken.
|
56 | st.emplace_back(std::move(a), std::move(b), n, -1); |
| 178 | |||
| 179 | 56 | std::vector<double> root_out; | |
| 180 | |||
| 181 |
2/2✓ Branch 0 taken 56 times.
✓ Branch 1 taken 56 times.
|
112 | while (!st.empty()) { |
| 182 | Frame &f = st.back(); | ||
| 183 | |||
| 184 |
1/2✓ Branch 0 taken 56 times.
✗ Branch 1 not taken.
|
56 | if (f.n <= kCutoff) { |
| 185 | 56 | NaiveMul(f.a, f.b, f.c, f.n); | |
| 186 | |||
| 187 | 56 | Frame finished = std::move(f); | |
| 188 | st.pop_back(); | ||
| 189 | |||
| 190 |
1/2✓ Branch 0 taken 56 times.
✗ Branch 1 not taken.
|
56 | if (st.empty()) { |
| 191 | root_out = std::move(finished.c); | ||
| 192 | 56 | continue; | |
| 193 | } | ||
| 194 | |||
| 195 | Frame &parent = st.back(); | ||
| 196 | ✗ | SelectM(parent, finished.parent_slot) = std::move(finished.c); | |
| 197 | ✗ | parent.stage += 1; | |
| 198 | ✗ | continue; | |
| 199 | 56 | } | |
| 200 | |||
| 201 | ✗ | if (f.stage == 0) { | |
| 202 | ✗ | SplitMat(f.a, f.n, f.a11, f.a12, f.a21, f.a22); | |
| 203 | ✗ | SplitMat(f.b, f.n, f.b11, f.b12, f.b21, f.b22); | |
| 204 | ✗ | st.push_back(MakeChildForSlot(f, 1)); | |
| 205 | ✗ | continue; | |
| 206 | } | ||
| 207 | |||
| 208 | ✗ | if (f.stage >= 1 && f.stage <= 6) { | |
| 209 | ✗ | const int next_slot = f.stage + 1; | |
| 210 | ✗ | st.push_back(MakeChildForSlot(f, next_slot)); | |
| 211 | ✗ | continue; | |
| 212 | ✗ | } | |
| 213 | |||
| 214 | ✗ | const auto c11 = AddVec(SubVec(AddVec(f.m1, f.m4), f.m5), f.m7); | |
| 215 | ✗ | const auto c12 = AddVec(f.m3, f.m5); | |
| 216 | ✗ | const auto c21 = AddVec(f.m2, f.m4); | |
| 217 | ✗ | const auto c22 = AddVec(AddVec(SubVec(f.m1, f.m2), f.m3), f.m6); | |
| 218 | |||
| 219 | ✗ | f.c = JoinMat(c11, c12, c21, c22, f.n); | |
| 220 | |||
| 221 | ✗ | Frame finished = std::move(f); | |
| 222 | st.pop_back(); | ||
| 223 | |||
| 224 | ✗ | if (st.empty()) { | |
| 225 | root_out = std::move(finished.c); | ||
| 226 | continue; | ||
| 227 | } | ||
| 228 | |||
| 229 | Frame &parent = st.back(); | ||
| 230 | ✗ | SelectM(parent, finished.parent_slot) = std::move(finished.c); | |
| 231 | ✗ | parent.stage += 1; | |
| 232 | ✗ | } | |
| 233 | |||
| 234 | 56 | return root_out; | |
| 235 | 56 | } | |
| 236 | |||
| 237 | } // namespace | ||
| 238 | |||
| 239 |
1/2✓ Branch 1 taken 56 times.
✗ Branch 2 not taken.
|
56 | ZorinDStrassenAlgMatrixSEQ::ZorinDStrassenAlgMatrixSEQ(const InType &in) { |
| 240 | SetTypeOfTask(GetStaticTypeOfTask()); | ||
| 241 | GetInput() = in; | ||
| 242 | GetOutput().clear(); | ||
| 243 | 56 | } | |
| 244 | |||
| 245 | 56 | bool ZorinDStrassenAlgMatrixSEQ::ValidationImpl() { | |
| 246 | const auto &in = GetInput(); | ||
| 247 |
1/2✓ Branch 0 taken 56 times.
✗ Branch 1 not taken.
|
56 | if (in.n == 0) { |
| 248 | return false; | ||
| 249 | } | ||
| 250 |
1/2✓ Branch 0 taken 56 times.
✗ Branch 1 not taken.
|
56 | if (in.a.size() != in.n * in.n) { |
| 251 | return false; | ||
| 252 | } | ||
| 253 |
1/2✓ Branch 0 taken 56 times.
✗ Branch 1 not taken.
|
56 | if (in.b.size() != in.n * in.n) { |
| 254 | return false; | ||
| 255 | } | ||
| 256 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 56 times.
|
56 | if (!GetOutput().empty()) { |
| 257 | ✗ | return false; | |
| 258 | } | ||
| 259 | return true; | ||
| 260 | } | ||
| 261 | |||
| 262 | 56 | bool ZorinDStrassenAlgMatrixSEQ::PreProcessingImpl() { | |
| 263 | 56 | const auto n = GetInput().n; | |
| 264 | 56 | GetOutput().assign(n * n, 0.0); | |
| 265 | 56 | return true; | |
| 266 | } | ||
| 267 | |||
| 268 | 56 | bool ZorinDStrassenAlgMatrixSEQ::RunImpl() { | |
| 269 | const auto &in = GetInput(); | ||
| 270 |
2/2✓ Branch 0 taken 48 times.
✓ Branch 1 taken 8 times.
|
56 | const std::size_t n = in.n; |
| 271 | const std::size_t m = NextPow2(n); | ||
| 272 | |||
| 273 | 56 | std::vector<double> a_pad(m * m, 0.0); | |
| 274 |
1/2✓ Branch 1 taken 56 times.
✗ Branch 2 not taken.
|
56 | std::vector<double> b_pad(m * m, 0.0); |
| 275 | |||
| 276 |
2/2✓ Branch 0 taken 352 times.
✓ Branch 1 taken 56 times.
|
408 | for (std::size_t i = 0; i < n; ++i) { |
| 277 | 352 | std::copy_n(&in.a[i * n], n, &a_pad[i * m]); | |
| 278 | 352 | std::copy_n(&in.b[i * n], n, &b_pad[i * m]); | |
| 279 | } | ||
| 280 | |||
| 281 |
2/8✓ Branch 1 taken 56 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✓ Branch 4 taken 56 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
|
112 | const auto c_pad = StrassenIter(std::move(a_pad), std::move(b_pad), m); |
| 282 | |||
| 283 | auto &out = GetOutput(); | ||
| 284 |
2/2✓ Branch 0 taken 352 times.
✓ Branch 1 taken 56 times.
|
408 | for (std::size_t i = 0; i < n; ++i) { |
| 285 | 352 | std::copy_n(&c_pad[i * m], n, &out[i * n]); | |
| 286 | } | ||
| 287 | |||
| 288 | 56 | return true; | |
| 289 | } | ||
| 290 | |||
| 291 | 56 | bool ZorinDStrassenAlgMatrixSEQ::PostProcessingImpl() { | |
| 292 | 56 | return true; | |
| 293 | } | ||
| 294 | |||
| 295 | } // namespace zorin_d_strassen_alg_matrix_seq | ||
| 296 |