| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #include "korolev_k_matrix_mult/omp/include/ops_omp.hpp" | ||
| 2 | |||
| 3 | #include <cstddef> | ||
| 4 | #include <functional> | ||
| 5 | #include <vector> | ||
| 6 | |||
| 7 | #include "korolev_k_matrix_mult/common/include/common.hpp" | ||
| 8 | #include "korolev_k_matrix_mult/common/include/strassen_impl.hpp" | ||
| 9 | #include "util/include/util.hpp" | ||
| 10 | |||
| 11 | namespace korolev_k_matrix_mult { | ||
| 12 | |||
| 13 |
1/2✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.
|
12 | KorolevKMatrixMultOMP::KorolevKMatrixMultOMP(const InType &in) { |
| 14 | SetTypeOfTask(GetStaticTypeOfTask()); | ||
| 15 | GetInput() = in; | ||
| 16 | GetOutput().clear(); | ||
| 17 | 12 | } | |
| 18 | |||
| 19 | 12 | bool KorolevKMatrixMultOMP::ValidationImpl() { | |
| 20 | const auto &in = GetInput(); | ||
| 21 |
4/8✓ Branch 0 taken 12 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 12 times.
✗ Branch 4 not taken.
✓ Branch 5 taken 12 times.
✗ Branch 6 not taken.
✓ Branch 7 taken 12 times.
|
12 | return in.n > 0 && in.A.size() == in.n * in.n && in.B.size() == in.n * in.n && GetOutput().empty(); |
| 22 | } | ||
| 23 | |||
| 24 | 12 | bool KorolevKMatrixMultOMP::PreProcessingImpl() { | |
| 25 | 12 | GetOutput().resize(GetInput().n * GetInput().n); | |
| 26 | 12 | return true; | |
| 27 | } | ||
| 28 | |||
| 29 | 12 | bool KorolevKMatrixMultOMP::RunImpl() { | |
| 30 | const auto &in = GetInput(); | ||
| 31 |
1/2✓ Branch 0 taken 12 times.
✗ Branch 1 not taken.
|
12 | size_t n = in.n; |
| 32 | size_t np2 = strassen_impl::NextPowerOf2(n); | ||
| 33 | |||
| 34 | ✗ | auto parallel_run = [](std::vector<std::function<void()>> &tasks) { | |
| 35 | ✗ | #pragma omp parallel default(none) num_threads(ppc::util::GetNumThreads()) shared(tasks) | |
| 36 | #pragma omp single | ||
| 37 | { | ||
| 38 | for (size_t i = 0; i < tasks.size(); ++i) { | ||
| 39 | #pragma omp task default(none) firstprivate(i) shared(tasks) | ||
| 40 | tasks[i](); | ||
| 41 | } | ||
| 42 | #pragma omp taskwait | ||
| 43 | } | ||
| 44 | ✗ | }; | |
| 45 | |||
| 46 |
1/2✓ Branch 0 taken 12 times.
✗ Branch 1 not taken.
|
12 | if (np2 == n) { |
| 47 |
1/2✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.
|
24 | strassen_impl::StrassenMultiply(in.A, in.B, GetOutput(), n, parallel_run); |
| 48 | } else { | ||
| 49 | ✗ | std::vector<double> a_pad(np2 * np2, 0); | |
| 50 | ✗ | std::vector<double> b_pad(np2 * np2, 0); | |
| 51 | ✗ | std::vector<double> c_pad(np2 * np2, 0); | |
| 52 | ✗ | for (size_t i = 0; i < n; ++i) { | |
| 53 | ✗ | for (size_t j = 0; j < n; ++j) { | |
| 54 | ✗ | a_pad[(i * np2) + j] = in.A[(i * n) + j]; | |
| 55 | ✗ | b_pad[(i * np2) + j] = in.B[(i * n) + j]; | |
| 56 | } | ||
| 57 | } | ||
| 58 | ✗ | strassen_impl::StrassenMultiply(a_pad, b_pad, c_pad, np2, parallel_run); | |
| 59 | ✗ | for (size_t i = 0; i < n; ++i) { | |
| 60 | ✗ | for (size_t j = 0; j < n; ++j) { | |
| 61 | ✗ | GetOutput()[(i * n) + j] = c_pad[(i * np2) + j]; | |
| 62 | } | ||
| 63 | } | ||
| 64 | } | ||
| 65 | 12 | return true; | |
| 66 | } | ||
| 67 | |||
| 68 | 12 | bool KorolevKMatrixMultOMP::PostProcessingImpl() { | |
| 69 | 12 | return !GetOutput().empty(); | |
| 70 | } | ||
| 71 | |||
| 72 | } // namespace korolev_k_matrix_mult | ||
| 73 |