| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #include "akhmetov_daniil_strassen_dense_double_seq/seq/include/ops_seq.hpp" | ||
| 2 | |||
| 3 | #include <cstddef> | ||
| 4 | #include <utility> | ||
| 5 | #include <vector> | ||
| 6 | |||
| 7 | #include "akhmetov_daniil_strassen_dense_double_seq/common/include/common.hpp" | ||
| 8 | |||
| 9 | namespace akhmetov_daniil_strassen_dense_double_seq { | ||
| 10 | |||
| 11 |
1/2✓ Branch 1 taken 24 times.
✗ Branch 2 not taken.
|
24 | AkhmetovDStrassenDenseDoubleSEQ::AkhmetovDStrassenDenseDoubleSEQ(const InType &in) { |
| 12 | SetTypeOfTask(GetStaticTypeOfTask()); | ||
| 13 |
1/2✓ Branch 1 taken 24 times.
✗ Branch 2 not taken.
|
24 | GetInput() = in; |
| 14 | 24 | } | |
| 15 | |||
| 16 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 24 times.
|
24 | bool AkhmetovDStrassenDenseDoubleSEQ::ValidationImpl() { |
| 17 | const auto &input = GetInput(); | ||
| 18 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 24 times.
|
24 | if (input.empty()) { |
| 19 | return false; | ||
| 20 | } | ||
| 21 | 24 | size_t n = format::GetN(input); | |
| 22 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 24 times.
|
24 | if (n == 0) { |
| 23 | return false; | ||
| 24 | } | ||
| 25 | 24 | size_t expected_size = 1 + (2 * n * n); | |
| 26 | 24 | return input.size() == expected_size; | |
| 27 | } | ||
| 28 | |||
| 29 | 24 | bool AkhmetovDStrassenDenseDoubleSEQ::PreProcessingImpl() { | |
| 30 | const auto &input = GetInput(); | ||
| 31 | 24 | size_t n = format::GetN(input); | |
| 32 | 24 | GetOutput().resize(n * n); | |
| 33 | 24 | return true; | |
| 34 | } | ||
| 35 | |||
| 36 | namespace { | ||
| 37 | const size_t kThreshold = 64; | ||
| 38 | |||
| 39 | 456 | Matrix StandardMultiply(const Matrix &a, const Matrix &b, size_t size) { | |
| 40 | 456 | Matrix c(size * size, 0.0); | |
| 41 |
2/2✓ Branch 0 taken 29184 times.
✓ Branch 1 taken 456 times.
|
29640 | for (size_t i = 0; i < size; ++i) { |
| 42 |
2/2✓ Branch 0 taken 1867776 times.
✓ Branch 1 taken 29184 times.
|
1896960 | for (size_t j = 0; j < size; ++j) { |
| 43 | double sum = 0.0; | ||
| 44 |
2/2✓ Branch 0 taken 119537664 times.
✓ Branch 1 taken 1867776 times.
|
121405440 | for (size_t k = 0; k < size; ++k) { |
| 45 |
2/4✗ Branch 0 not taken.
✓ Branch 1 taken 119537664 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 119537664 times.
|
119537664 | sum += a.at((i * size) + k) * b.at((k * size) + j); |
| 46 | } | ||
| 47 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1867776 times.
|
1867776 | c.at((i * size) + j) = sum; |
| 48 | } | ||
| 49 | } | ||
| 50 | 456 | return c; | |
| 51 | } | ||
| 52 | |||
| 53 | 432 | inline void Add(const Matrix &a, const Matrix &b, Matrix &c) { | |
| 54 | const size_t n = a.size(); | ||
| 55 |
2/2✓ Branch 0 taken 2359296 times.
✓ Branch 1 taken 432 times.
|
2359728 | for (size_t i = 0; i < n; ++i) { |
| 56 |
2/4✗ Branch 0 not taken.
✓ Branch 1 taken 2359296 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 2359296 times.
|
2359296 | c.at(i) = a.at(i) + b.at(i); |
| 57 | } | ||
| 58 | 432 | } | |
| 59 | |||
| 60 | 288 | inline void Sub(const Matrix &a, const Matrix &b, Matrix &c) { | |
| 61 | const size_t n = a.size(); | ||
| 62 |
2/2✓ Branch 0 taken 1572864 times.
✓ Branch 1 taken 288 times.
|
1573152 | for (size_t i = 0; i < n; ++i) { |
| 63 |
2/4✗ Branch 0 not taken.
✓ Branch 1 taken 1572864 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 1572864 times.
|
1572864 | c.at(i) = a.at(i) - b.at(i); |
| 64 | } | ||
| 65 | 288 | } | |
| 66 | |||
| 67 | 144 | void SplitMatrix(const Matrix &src, Matrix &a11, Matrix &a12, Matrix &a21, Matrix &a22, size_t size, size_t half) { | |
| 68 |
2/2✓ Branch 0 taken 10240 times.
✓ Branch 1 taken 144 times.
|
10384 | for (size_t i = 0; i < half; ++i) { |
| 69 |
2/2✓ Branch 0 taken 786432 times.
✓ Branch 1 taken 10240 times.
|
796672 | for (size_t j = 0; j < half; ++j) { |
| 70 |
2/4✗ Branch 0 not taken.
✓ Branch 1 taken 786432 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 786432 times.
|
786432 | a11.at((i * half) + j) = src.at((i * size) + j); |
| 71 |
2/4✗ Branch 0 not taken.
✓ Branch 1 taken 786432 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 786432 times.
|
786432 | a12.at((i * half) + j) = src.at((i * size) + j + half); |
| 72 |
2/4✗ Branch 0 not taken.
✓ Branch 1 taken 786432 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 786432 times.
|
786432 | a21.at((i * half) + j) = src.at(((i + half) * size) + j); |
| 73 |
2/4✗ Branch 0 not taken.
✓ Branch 1 taken 786432 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 786432 times.
|
786432 | a22.at((i * half) + j) = src.at(((i + half) * size) + j + half); |
| 74 | } | ||
| 75 | } | ||
| 76 | 144 | } | |
| 77 | |||
| 78 | 72 | void MergeMatrix(Matrix &dst, const Matrix &c11, const Matrix &c12, const Matrix &c21, const Matrix &c22, size_t size, | |
| 79 | size_t half) { | ||
| 80 |
2/2✓ Branch 0 taken 5120 times.
✓ Branch 1 taken 72 times.
|
5192 | for (size_t i = 0; i < half; ++i) { |
| 81 |
2/2✓ Branch 0 taken 393216 times.
✓ Branch 1 taken 5120 times.
|
398336 | for (size_t j = 0; j < half; ++j) { |
| 82 |
3/6✗ Branch 0 not taken.
✓ Branch 1 taken 393216 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 393216 times.
✗ Branch 4 not taken.
✓ Branch 5 taken 393216 times.
|
393216 | dst.at((i * size) + j) = c11.at((i * half) + j); |
| 83 |
2/4✗ Branch 0 not taken.
✓ Branch 1 taken 393216 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 393216 times.
|
393216 | dst.at((i * size) + j + half) = c12.at((i * half) + j); |
| 84 |
2/4✗ Branch 0 not taken.
✓ Branch 1 taken 393216 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 393216 times.
|
393216 | dst.at(((i + half) * size) + j) = c21.at((i * half) + j); |
| 85 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 393216 times.
|
393216 | dst.at(((i + half) * size) + j + half) = c22.at((i * half) + j); |
| 86 | } | ||
| 87 | } | ||
| 88 | 72 | } | |
| 89 | |||
| 90 | 16 | void CreatePaddedMatrices(const Matrix &a, const Matrix &b, size_t n, size_t new_n, Matrix &a_padded, | |
| 91 | Matrix &b_padded) { | ||
| 92 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 16 times.
|
16 | if (new_n != n) { |
| 93 | ✗ | a_padded.assign(new_n * new_n, 0.0); | |
| 94 | ✗ | b_padded.assign(new_n * new_n, 0.0); | |
| 95 | ✗ | for (size_t i = 0; i < n; ++i) { | |
| 96 | ✗ | for (size_t j = 0; j < n; ++j) { | |
| 97 | ✗ | a_padded.at((i * new_n) + j) = a.at((i * n) + j); | |
| 98 | ✗ | b_padded.at((i * new_n) + j) = b.at((i * n) + j); | |
| 99 | } | ||
| 100 | } | ||
| 101 | } else { | ||
| 102 | 16 | a_padded = a; | |
| 103 | 16 | b_padded = b; | |
| 104 | } | ||
| 105 | 16 | } | |
| 106 | |||
| 107 | struct Frame { | ||
| 108 | Matrix a; | ||
| 109 | Matrix b; | ||
| 110 | Matrix result; | ||
| 111 | size_t size; | ||
| 112 | size_t stage{0}; | ||
| 113 | |||
| 114 | Matrix a11, a12, a21, a22; | ||
| 115 | Matrix b11, b12, b21, b22; | ||
| 116 | |||
| 117 | Matrix m1, m2, m3, m4, m5, m6, m7; | ||
| 118 | Matrix temp_a, temp_b; | ||
| 119 | |||
| 120 | 520 | Frame(Matrix a, Matrix b, size_t s) : a(std::move(a)), b(std::move(b)), size(s) {} | |
| 121 | }; | ||
| 122 | |||
| 123 |
1/2✓ Branch 0 taken 1096 times.
✗ Branch 1 not taken.
|
1096 | void ProcessTopFrame(std::vector<Frame> &stack, Matrix &final_result) { |
| 124 |
1/2✓ Branch 0 taken 1096 times.
✗ Branch 1 not taken.
|
1096 | if (stack.empty()) { |
| 125 | return; | ||
| 126 | } | ||
| 127 | |||
| 128 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1096 times.
|
1096 | size_t current_index = stack.size() - 1; |
| 129 | Frame &frame = stack.at(current_index); | ||
| 130 | |||
| 131 |
2/2✓ Branch 0 taken 448 times.
✓ Branch 1 taken 648 times.
|
1096 | if (frame.size <= kThreshold) { |
| 132 | 448 | Matrix base = StandardMultiply(frame.a, frame.b, frame.size); | |
| 133 | stack.pop_back(); | ||
| 134 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 448 times.
|
448 | if (stack.empty()) { |
| 135 | final_result = std::move(base); | ||
| 136 | } else { | ||
| 137 | 448 | stack.back().temp_a = std::move(base); | |
| 138 | } | ||
| 139 | return; | ||
| 140 | } | ||
| 141 | |||
| 142 | 648 | const size_t half = frame.size / 2; | |
| 143 | 648 | const size_t block_size = half * half; | |
| 144 | |||
| 145 |
9/10✓ Branch 0 taken 72 times.
✓ Branch 1 taken 72 times.
✓ Branch 2 taken 72 times.
✓ Branch 3 taken 72 times.
✓ Branch 4 taken 72 times.
✓ Branch 5 taken 72 times.
✓ Branch 6 taken 72 times.
✓ Branch 7 taken 72 times.
✓ Branch 8 taken 72 times.
✗ Branch 9 not taken.
|
648 | switch (frame.stage) { |
| 146 | 72 | case 0: { | |
| 147 | 72 | frame.a11.resize(block_size); | |
| 148 | 72 | frame.a12.resize(block_size); | |
| 149 | 72 | frame.a21.resize(block_size); | |
| 150 | 72 | frame.a22.resize(block_size); | |
| 151 | 72 | frame.b11.resize(block_size); | |
| 152 | 72 | frame.b12.resize(block_size); | |
| 153 | 72 | frame.b21.resize(block_size); | |
| 154 | 72 | frame.b22.resize(block_size); | |
| 155 | |||
| 156 | 72 | SplitMatrix(frame.a, frame.a11, frame.a12, frame.a21, frame.a22, frame.size, half); | |
| 157 | 72 | SplitMatrix(frame.b, frame.b11, frame.b12, frame.b21, frame.b22, frame.size, half); | |
| 158 | |||
| 159 | 72 | frame.temp_a.resize(block_size); | |
| 160 | 72 | frame.temp_b.resize(block_size); | |
| 161 | 72 | frame.m1.resize(block_size); | |
| 162 | 72 | frame.m2.resize(block_size); | |
| 163 | 72 | frame.m3.resize(block_size); | |
| 164 | 72 | frame.m4.resize(block_size); | |
| 165 | 72 | frame.m5.resize(block_size); | |
| 166 | 72 | frame.m6.resize(block_size); | |
| 167 | 72 | frame.m7.resize(block_size); | |
| 168 | |||
| 169 | 72 | frame.stage = 1; | |
| 170 | 72 | return; | |
| 171 | } | ||
| 172 | |||
| 173 | 72 | case 1: { | |
| 174 | 72 | Add(frame.a11, frame.a22, frame.temp_a); | |
| 175 | 72 | Add(frame.b11, frame.b22, frame.temp_b); | |
| 176 | |||
| 177 | 72 | Matrix temp_a_copy = frame.temp_a; | |
| 178 |
1/2✓ Branch 1 taken 72 times.
✗ Branch 2 not taken.
|
72 | Matrix temp_b_copy = frame.temp_b; |
| 179 | |||
| 180 |
1/2✓ Branch 1 taken 72 times.
✗ Branch 2 not taken.
|
72 | stack.emplace_back(std::move(temp_a_copy), std::move(temp_b_copy), half); |
| 181 | |||
| 182 | Frame &updated_frame = stack.at(current_index); | ||
| 183 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 72 times.
|
72 | updated_frame.stage = 2; |
| 184 | return; | ||
| 185 | } | ||
| 186 | |||
| 187 | 72 | case 2: { | |
| 188 | 72 | frame.m1 = frame.temp_a; | |
| 189 | 72 | Add(frame.a21, frame.a22, frame.temp_a); | |
| 190 | |||
| 191 | 72 | Matrix temp_a_copy = frame.temp_a; | |
| 192 |
1/2✓ Branch 1 taken 72 times.
✗ Branch 2 not taken.
|
72 | stack.emplace_back(std::move(temp_a_copy), frame.b11, half); |
| 193 | |||
| 194 | Frame &updated_frame = stack.at(current_index); | ||
| 195 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 72 times.
|
72 | updated_frame.stage = 3; |
| 196 | return; | ||
| 197 | } | ||
| 198 | |||
| 199 | 72 | case 3: { | |
| 200 | 72 | frame.m2 = frame.temp_a; | |
| 201 | 72 | Sub(frame.b12, frame.b22, frame.temp_b); | |
| 202 | |||
| 203 | 72 | Matrix temp_b_copy = frame.temp_b; | |
| 204 |
1/2✓ Branch 1 taken 72 times.
✗ Branch 2 not taken.
|
72 | stack.emplace_back(frame.a11, std::move(temp_b_copy), half); |
| 205 | |||
| 206 | Frame &updated_frame = stack.at(current_index); | ||
| 207 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 72 times.
|
72 | updated_frame.stage = 4; |
| 208 | return; | ||
| 209 | } | ||
| 210 | |||
| 211 | 72 | case 4: { | |
| 212 | 72 | frame.m3 = frame.temp_a; | |
| 213 | 72 | Sub(frame.b21, frame.b11, frame.temp_b); | |
| 214 | |||
| 215 | 72 | Matrix temp_b_copy = frame.temp_b; | |
| 216 |
1/2✓ Branch 1 taken 72 times.
✗ Branch 2 not taken.
|
72 | stack.emplace_back(frame.a22, std::move(temp_b_copy), half); |
| 217 | |||
| 218 | Frame &updated_frame = stack.at(current_index); | ||
| 219 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 72 times.
|
72 | updated_frame.stage = 5; |
| 220 | return; | ||
| 221 | } | ||
| 222 | |||
| 223 | 72 | case 5: { | |
| 224 | 72 | frame.m4 = frame.temp_a; | |
| 225 | 72 | Add(frame.a11, frame.a12, frame.temp_a); | |
| 226 | |||
| 227 | 72 | Matrix temp_a_copy = frame.temp_a; | |
| 228 |
1/2✓ Branch 1 taken 72 times.
✗ Branch 2 not taken.
|
72 | stack.emplace_back(std::move(temp_a_copy), frame.b22, half); |
| 229 | |||
| 230 | Frame &updated_frame = stack.at(current_index); | ||
| 231 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 72 times.
|
72 | updated_frame.stage = 6; |
| 232 | return; | ||
| 233 | } | ||
| 234 | |||
| 235 | 72 | case 6: { | |
| 236 | 72 | frame.m5 = frame.temp_a; | |
| 237 | 72 | Sub(frame.a21, frame.a11, frame.temp_a); | |
| 238 | 72 | Add(frame.b11, frame.b12, frame.temp_b); | |
| 239 | |||
| 240 | 72 | Matrix temp_a_copy = frame.temp_a; | |
| 241 |
1/2✓ Branch 1 taken 72 times.
✗ Branch 2 not taken.
|
72 | Matrix temp_b_copy = frame.temp_b; |
| 242 |
1/2✓ Branch 1 taken 72 times.
✗ Branch 2 not taken.
|
72 | stack.emplace_back(std::move(temp_a_copy), std::move(temp_b_copy), half); |
| 243 | |||
| 244 | Frame &updated_frame = stack.at(current_index); | ||
| 245 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 72 times.
|
72 | updated_frame.stage = 7; |
| 246 | return; | ||
| 247 | } | ||
| 248 | |||
| 249 | 72 | case 7: { | |
| 250 | 72 | frame.m6 = frame.temp_a; | |
| 251 | 72 | Sub(frame.a12, frame.a22, frame.temp_a); | |
| 252 | 72 | Add(frame.b21, frame.b22, frame.temp_b); | |
| 253 | |||
| 254 | 72 | Matrix temp_a_copy = frame.temp_a; | |
| 255 |
1/2✓ Branch 1 taken 72 times.
✗ Branch 2 not taken.
|
72 | Matrix temp_b_copy = frame.temp_b; |
| 256 |
1/2✓ Branch 1 taken 72 times.
✗ Branch 2 not taken.
|
72 | stack.emplace_back(std::move(temp_a_copy), std::move(temp_b_copy), half); |
| 257 | |||
| 258 | Frame &updated_frame = stack.at(current_index); | ||
| 259 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 72 times.
|
72 | updated_frame.stage = 8; |
| 260 | return; | ||
| 261 | } | ||
| 262 | |||
| 263 | 72 | case 8: { | |
| 264 | 72 | frame.m7 = frame.temp_a; | |
| 265 | |||
| 266 | 72 | Matrix c11(block_size); | |
| 267 |
1/4✓ Branch 1 taken 72 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
72 | Matrix c12(block_size); |
| 268 |
1/4✓ Branch 1 taken 72 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
72 | Matrix c21(block_size); |
| 269 |
1/4✓ Branch 1 taken 72 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
72 | Matrix c22(block_size); |
| 270 | |||
| 271 |
2/2✓ Branch 0 taken 393216 times.
✓ Branch 1 taken 72 times.
|
393288 | for (size_t i = 0; i < block_size; ++i) { |
| 272 |
5/10✗ Branch 0 not taken.
✓ Branch 1 taken 393216 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 393216 times.
✗ Branch 4 not taken.
✓ Branch 5 taken 393216 times.
✗ Branch 6 not taken.
✓ Branch 7 taken 393216 times.
✗ Branch 8 not taken.
✓ Branch 9 taken 393216 times.
|
393216 | c11.at(i) = frame.m1.at(i) + frame.m4.at(i) - frame.m5.at(i) + frame.m7.at(i); |
| 273 |
2/4✗ Branch 0 not taken.
✓ Branch 1 taken 393216 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 393216 times.
|
393216 | c12.at(i) = frame.m3.at(i) + frame.m5.at(i); |
| 274 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 393216 times.
|
393216 | c21.at(i) = frame.m2.at(i) + frame.m4.at(i); |
| 275 |
2/4✗ Branch 0 not taken.
✓ Branch 1 taken 393216 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 393216 times.
|
393216 | c22.at(i) = frame.m1.at(i) - frame.m2.at(i) + frame.m3.at(i) + frame.m6.at(i); |
| 276 | } | ||
| 277 | |||
| 278 |
1/4✓ Branch 1 taken 72 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
72 | Matrix merged(frame.size * frame.size); |
| 279 |
1/2✓ Branch 1 taken 72 times.
✗ Branch 2 not taken.
|
72 | MergeMatrix(merged, c11, c12, c21, c22, frame.size, half); |
| 280 | |||
| 281 | stack.pop_back(); | ||
| 282 |
2/2✓ Branch 0 taken 16 times.
✓ Branch 1 taken 56 times.
|
72 | if (stack.empty()) { |
| 283 | final_result = std::move(merged); | ||
| 284 | } else { | ||
| 285 | 56 | stack.back().temp_a = std::move(merged); | |
| 286 | } | ||
| 287 | return; | ||
| 288 | } | ||
| 289 | |||
| 290 | default: | ||
| 291 | return; | ||
| 292 | } | ||
| 293 | } | ||
| 294 | |||
| 295 | 16 | Matrix StrassenMultiply(const Matrix &a_init, const Matrix &b_init, size_t size_init) { | |
| 296 | 16 | std::vector<Frame> stack; | |
| 297 |
1/2✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
|
16 | stack.emplace_back(a_init, b_init, size_init); |
| 298 | |||
| 299 | 16 | Matrix result; | |
| 300 | |||
| 301 |
2/2✓ Branch 0 taken 1096 times.
✓ Branch 1 taken 16 times.
|
1112 | while (!stack.empty()) { |
| 302 |
1/2✓ Branch 1 taken 1096 times.
✗ Branch 2 not taken.
|
1096 | ProcessTopFrame(stack, result); |
| 303 | } | ||
| 304 | |||
| 305 | 16 | return result; | |
| 306 | 16 | } | |
| 307 | |||
| 308 | } // namespace | ||
| 309 | |||
| 310 | 24 | bool AkhmetovDStrassenDenseDoubleSEQ::RunImpl() { | |
| 311 | const auto &input = GetInput(); | ||
| 312 | auto &output = GetOutput(); | ||
| 313 | |||
| 314 | 24 | const size_t n = format::GetN(input); | |
| 315 | 24 | const Matrix a = format::GetA(input); | |
| 316 |
1/2✓ Branch 1 taken 24 times.
✗ Branch 2 not taken.
|
24 | const Matrix b = format::GetB(input); |
| 317 | |||
| 318 |
2/2✓ Branch 0 taken 8 times.
✓ Branch 1 taken 16 times.
|
24 | if (n <= kThreshold) { |
| 319 |
1/4✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
8 | output = StandardMultiply(a, b, n); |
| 320 | 8 | return true; | |
| 321 | } | ||
| 322 | |||
| 323 | size_t new_n = 1; | ||
| 324 |
2/2✓ Branch 0 taken 120 times.
✓ Branch 1 taken 16 times.
|
136 | while (new_n < n) { |
| 325 | 120 | new_n <<= 1; | |
| 326 | } | ||
| 327 | |||
| 328 | 16 | Matrix a_padded; | |
| 329 | 16 | Matrix b_padded; | |
| 330 |
1/2✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
|
16 | CreatePaddedMatrices(a, b, n, new_n, a_padded, b_padded); |
| 331 | |||
| 332 |
1/2✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
|
16 | Matrix result_padded = StrassenMultiply(a_padded, b_padded, new_n); |
| 333 | |||
| 334 |
1/4✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
16 | output.assign(n * n, 0.0); |
| 335 |
2/2✓ Branch 0 taken 3072 times.
✓ Branch 1 taken 16 times.
|
3088 | for (size_t i = 0; i < n; ++i) { |
| 336 |
2/2✓ Branch 0 taken 655360 times.
✓ Branch 1 taken 3072 times.
|
658432 | for (size_t j = 0; j < n; ++j) { |
| 337 |
2/4✗ Branch 0 not taken.
✓ Branch 1 taken 655360 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 655360 times.
|
655360 | output.at((i * n) + j) = result_padded.at((i * new_n) + j); |
| 338 | } | ||
| 339 | } | ||
| 340 | |||
| 341 | return true; | ||
| 342 | } | ||
| 343 | |||
| 344 | 24 | bool AkhmetovDStrassenDenseDoubleSEQ::PostProcessingImpl() { | |
| 345 | const auto &input = GetInput(); | ||
| 346 | 24 | size_t n = format::GetN(input); | |
| 347 | 24 | return GetOutput().size() == n * n; | |
| 348 | } | ||
| 349 | |||
| 350 | } // namespace akhmetov_daniil_strassen_dense_double_seq | ||
| 351 |