| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #include "chyokotov_a_dense_matrix_mul_foxs_algorithm/omp/include/ops_omp.hpp" | ||
| 2 | |||
| 3 | #include <omp.h> | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <cmath> | ||
| 7 | #include <vector> | ||
| 8 | |||
| 9 | #include "chyokotov_a_dense_matrix_mul_foxs_algorithm/common/include/common.hpp" | ||
| 10 | |||
| 11 | namespace chyokotov_a_dense_matrix_mul_foxs_algorithm { | ||
| 12 | |||
| 13 | ✗ | ChyokotovADenseMatMulFoxAlgorithmOMP::ChyokotovADenseMatMulFoxAlgorithmOMP(const InType &in) { | |
| 14 | SetTypeOfTask(GetStaticTypeOfTask()); | ||
| 15 | GetInput() = in; | ||
| 16 | GetOutput().clear(); | ||
| 17 | ✗ | } | |
| 18 | |||
| 19 | ✗ | bool ChyokotovADenseMatMulFoxAlgorithmOMP::ValidationImpl() { | |
| 20 | ✗ | return (GetInput().first.size() == GetInput().second.size()); | |
| 21 | } | ||
| 22 | |||
| 23 | ✗ | bool ChyokotovADenseMatMulFoxAlgorithmOMP::PreProcessingImpl() { | |
| 24 | GetOutput().clear(); | ||
| 25 | ✗ | GetOutput().resize(GetInput().first.size(), 0.0); | |
| 26 | ✗ | return true; | |
| 27 | } | ||
| 28 | |||
| 29 | ✗ | int ChyokotovADenseMatMulFoxAlgorithmOMP::CalculateBlockSize(int n) { | |
| 30 | ✗ | return static_cast<int>(std::sqrt(static_cast<double>(n))); | |
| 31 | } | ||
| 32 | |||
| 33 | ✗ | int ChyokotovADenseMatMulFoxAlgorithmOMP::CountBlock(int n, int size) { | |
| 34 | ✗ | return (n + size - 1) / size; | |
| 35 | } | ||
| 36 | |||
| 37 | ✗ | void ChyokotovADenseMatMulFoxAlgorithmOMP::Matmul(std::vector<double> &a, std::vector<double> &b, int n, int istart, | |
| 38 | int iend, int jstart, int jend, int kstart, int kend) { | ||
| 39 | ✗ | #pragma omp parallel for collapse(2) default(none) shared(a, b, n, istart, iend, jstart, jend, kstart, kend) | |
| 40 | for (int i = istart; i < iend; i++) { | ||
| 41 | for (int j = jstart; j < jend; j++) { | ||
| 42 | double sum = 0.0; | ||
| 43 | for (int k = kstart; k < kend; k++) { | ||
| 44 | sum += a[(i * n) + k] * b[(k * n) + j]; | ||
| 45 | } | ||
| 46 | #pragma omp atomic | ||
| 47 | GetOutput()[(i * n) + j] += sum; | ||
| 48 | } | ||
| 49 | } | ||
| 50 | ✗ | } | |
| 51 | |||
| 52 | ✗ | bool ChyokotovADenseMatMulFoxAlgorithmOMP::RunImpl() { | |
| 53 | ✗ | std::vector<double> a = GetInput().first; | |
| 54 | ✗ | std::vector<double> b = GetInput().second; | |
| 55 | ✗ | int n = static_cast<int>(std::sqrt(static_cast<double>(a.size()))); | |
| 56 | ✗ | if (n == 0) { | |
| 57 | return true; | ||
| 58 | } | ||
| 59 | |||
| 60 | int block_size = CalculateBlockSize(n); | ||
| 61 | int count_block = CountBlock(n, block_size); | ||
| 62 | |||
| 63 | ✗ | for (int ic = 0; ic < count_block; ic++) { | |
| 64 | ✗ | for (int jc = 0; jc < count_block; jc++) { | |
| 65 | ✗ | for (int kc = 0; kc < count_block; kc++) { | |
| 66 | ✗ | int istart = ic * block_size; | |
| 67 | ✗ | int jstart = jc * block_size; | |
| 68 | ✗ | int kstart = kc * block_size; | |
| 69 | |||
| 70 | ✗ | int iend = std::min(istart + block_size, n); | |
| 71 | ✗ | int jend = std::min(jstart + block_size, n); | |
| 72 | ✗ | int kend = std::min(kstart + block_size, n); | |
| 73 | |||
| 74 | Matmul(a, b, n, istart, iend, jstart, jend, kstart, kend); | ||
| 75 | } | ||
| 76 | } | ||
| 77 | } | ||
| 78 | |||
| 79 | return true; | ||
| 80 | } | ||
| 81 | |||
| 82 | ✗ | bool ChyokotovADenseMatMulFoxAlgorithmOMP::PostProcessingImpl() { | |
| 83 | ✗ | return true; | |
| 84 | } | ||
| 85 | |||
| 86 | } // namespace chyokotov_a_dense_matrix_mul_foxs_algorithm | ||
| 87 |