| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #include "klimovich_v_crs_complex_mat_mul/omp/include/ops_omp.hpp" | ||
| 2 | |||
| 3 | #include <algorithm> | ||
| 4 | #include <cmath> | ||
| 5 | #include <cstddef> | ||
| 6 | #include <vector> | ||
| 7 | |||
| 8 | #include "klimovich_v_crs_complex_mat_mul/common/include/common.hpp" | ||
| 9 | |||
| 10 | namespace klimovich_v_crs_complex_mat_mul { | ||
| 11 | namespace { | ||
| 12 | |||
| 13 | struct RowStage { | ||
| 14 | std::vector<int> cols; | ||
| 15 | std::vector<Cplx> vals; | ||
| 16 | }; | ||
| 17 | |||
| 18 |
2/2✓ Branch 0 taken 168 times.
✓ Branch 1 taken 60 times.
|
228 | void GustavsonRow(const CrsMatrix &lhs, const CrsMatrix &rhs, int row, std::vector<Cplx> &spa, |
| 19 | std::vector<int> &touched_by_row, std::vector<int> &touched_cols, RowStage &stage) { | ||
| 20 | touched_cols.clear(); | ||
| 21 | |||
| 22 |
2/2✓ Branch 0 taken 336 times.
✓ Branch 1 taken 228 times.
|
564 | for (int lp = lhs.row_offsets[row]; lp < lhs.row_offsets[row + 1]; ++lp) { |
| 23 | 336 | const int k = lhs.col_indices[lp]; | |
| 24 | 336 | const Cplx a_ik = lhs.data[lp]; | |
| 25 |
2/2✓ Branch 0 taken 604 times.
✓ Branch 1 taken 336 times.
|
940 | for (int rq = rhs.row_offsets[k]; rq < rhs.row_offsets[k + 1]; ++rq) { |
| 26 |
2/2✓ Branch 0 taken 336 times.
✓ Branch 1 taken 268 times.
|
604 | const int j = rhs.col_indices[rq]; |
| 27 |
2/2✓ Branch 0 taken 336 times.
✓ Branch 1 taken 268 times.
|
604 | if (touched_by_row[j] != row) { |
| 28 |
1/2✓ Branch 0 taken 336 times.
✗ Branch 1 not taken.
|
336 | touched_by_row[j] = row; |
| 29 | touched_cols.push_back(j); | ||
| 30 | 336 | spa[j] = a_ik * rhs.data[rq]; | |
| 31 | } else { | ||
| 32 | spa[j] += a_ik * rhs.data[rq]; | ||
| 33 | } | ||
| 34 | } | ||
| 35 | } | ||
| 36 | |||
| 37 | std::ranges::sort(touched_cols); | ||
| 38 | |||
| 39 | stage.cols.clear(); | ||
| 40 | stage.vals.clear(); | ||
| 41 | 228 | stage.cols.reserve(touched_cols.size()); | |
| 42 | 228 | stage.vals.reserve(touched_cols.size()); | |
| 43 | |||
| 44 |
2/2✓ Branch 0 taken 336 times.
✓ Branch 1 taken 228 times.
|
564 | for (const int j : touched_cols) { |
| 45 |
2/2✓ Branch 0 taken 16 times.
✓ Branch 1 taken 320 times.
|
336 | const Cplx v = spa[j]; |
| 46 |
2/2✓ Branch 0 taken 16 times.
✓ Branch 1 taken 320 times.
|
336 | spa[j] = Cplx(0.0, 0.0); |
| 47 |
3/4✓ Branch 0 taken 16 times.
✓ Branch 1 taken 320 times.
✓ Branch 2 taken 16 times.
✗ Branch 3 not taken.
|
336 | if (std::abs(v.real()) > kZeroDropTol || std::abs(v.imag()) > kZeroDropTol) { |
| 48 | stage.cols.push_back(j); | ||
| 49 | stage.vals.push_back(v); | ||
| 50 | } | ||
| 51 | } | ||
| 52 | 228 | } | |
| 53 | |||
| 54 | 40 | CrsMatrix Assemble(int rows, int cols, const std::vector<RowStage> &per_row) { | |
| 55 | CrsMatrix out(rows, cols); | ||
| 56 |
2/2✓ Branch 0 taken 228 times.
✓ Branch 1 taken 40 times.
|
268 | for (int i = 0; i < rows; ++i) { |
| 57 | 228 | out.row_offsets[i + 1] = out.row_offsets[i] + static_cast<int>(per_row[i].cols.size()); | |
| 58 | } | ||
| 59 |
1/2✓ Branch 1 taken 40 times.
✗ Branch 2 not taken.
|
40 | out.col_indices.reserve(static_cast<std::size_t>(out.row_offsets[rows])); |
| 60 |
1/2✓ Branch 1 taken 40 times.
✗ Branch 2 not taken.
|
40 | out.data.reserve(static_cast<std::size_t>(out.row_offsets[rows])); |
| 61 |
2/2✓ Branch 0 taken 228 times.
✓ Branch 1 taken 40 times.
|
268 | for (int i = 0; i < rows; ++i) { |
| 62 |
2/4✓ Branch 1 taken 228 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 228 times.
✗ Branch 5 not taken.
|
228 | out.col_indices.insert(out.col_indices.end(), per_row[i].cols.begin(), per_row[i].cols.end()); |
| 63 | 228 | out.data.insert(out.data.end(), per_row[i].vals.begin(), per_row[i].vals.end()); | |
| 64 | } | ||
| 65 | 40 | return out; | |
| 66 | ✗ | } | |
| 67 | |||
| 68 | } // namespace | ||
| 69 | |||
| 70 |
1/2✓ Branch 2 taken 40 times.
✗ Branch 3 not taken.
|
40 | KlimovichVCrsComplexMatMulOmp::KlimovichVCrsComplexMatMulOmp(const InType &in) { |
| 71 | SetTypeOfTask(GetStaticTypeOfTask()); | ||
| 72 | GetInput() = in; | ||
| 73 | 40 | GetOutput() = CrsMatrix(); | |
| 74 | 40 | } | |
| 75 | |||
| 76 | 40 | bool KlimovichVCrsComplexMatMulOmp::ValidationImpl() { | |
| 77 | const auto &lhs = std::get<0>(GetInput()); | ||
| 78 | const auto &rhs = std::get<1>(GetInput()); | ||
| 79 | 40 | return lhs.n_cols == rhs.n_rows; | |
| 80 | } | ||
| 81 | |||
| 82 | 40 | bool KlimovichVCrsComplexMatMulOmp::PreProcessingImpl() { | |
| 83 | 40 | return true; | |
| 84 | } | ||
| 85 | |||
| 86 | 40 | CrsMatrix KlimovichVCrsComplexMatMulOmp::MultiplyCrs(const CrsMatrix &lhs, const CrsMatrix &rhs) { | |
| 87 | 40 | std::vector<RowStage> per_row(static_cast<std::size_t>(lhs.n_rows)); | |
| 88 | |||
| 89 | 40 | #pragma omp parallel default(none) shared(lhs, rhs, per_row) | |
| 90 | { | ||
| 91 | std::vector<Cplx> spa(static_cast<std::size_t>(rhs.n_cols)); | ||
| 92 | std::vector<int> touched_by_row(static_cast<std::size_t>(rhs.n_cols), -1); | ||
| 93 | std::vector<int> touched_cols; | ||
| 94 | touched_cols.reserve(static_cast<std::size_t>(rhs.n_cols)); | ||
| 95 | |||
| 96 | #pragma omp for schedule(dynamic, 16) | ||
| 97 | for (int i = 0; i < lhs.n_rows; ++i) { | ||
| 98 | GustavsonRow(lhs, rhs, i, spa, touched_by_row, touched_cols, per_row[i]); | ||
| 99 | } | ||
| 100 | } | ||
| 101 | |||
| 102 |
1/2✓ Branch 1 taken 40 times.
✗ Branch 2 not taken.
|
80 | return Assemble(lhs.n_rows, rhs.n_cols, per_row); |
| 103 | 40 | } | |
| 104 | |||
| 105 | 40 | bool KlimovichVCrsComplexMatMulOmp::RunImpl() { | |
| 106 | const auto &lhs = std::get<0>(GetInput()); | ||
| 107 | const auto &rhs = std::get<1>(GetInput()); | ||
| 108 | 40 | GetOutput() = MultiplyCrs(lhs, rhs); | |
| 109 | 40 | return true; | |
| 110 | } | ||
| 111 | |||
| 112 | 40 | bool KlimovichVCrsComplexMatMulOmp::PostProcessingImpl() { | |
| 113 | 40 | return true; | |
| 114 | } | ||
| 115 | |||
| 116 | } // namespace klimovich_v_crs_complex_mat_mul | ||
| 117 |