| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #include "klimovich_v_crs_complex_mat_mul/stl/include/ops_stl.hpp" | ||
| 2 | |||
| 3 | #include <algorithm> | ||
| 4 | #include <cmath> | ||
| 5 | #include <cstddef> | ||
| 6 | #include <thread> | ||
| 7 | #include <vector> | ||
| 8 | |||
| 9 | #include "klimovich_v_crs_complex_mat_mul/common/include/common.hpp" | ||
| 10 | #include "util/include/util.hpp" | ||
| 11 | |||
| 12 | namespace klimovich_v_crs_complex_mat_mul { | ||
| 13 | namespace { | ||
| 14 | |||
| 15 | struct RowStage { | ||
| 16 | std::vector<int> cols; | ||
| 17 | std::vector<Cplx> vals; | ||
| 18 | }; | ||
| 19 | |||
| 20 |
2/2✓ Branch 0 taken 248 times.
✓ Branch 1 taken 208 times.
|
456 | void GustavsonRow(const CrsMatrix &lhs, const CrsMatrix &rhs, int row, std::vector<Cplx> &spa, |
| 21 | std::vector<int> &touched_by_row, std::vector<int> &touched_cols, RowStage &stage) { | ||
| 22 | touched_cols.clear(); | ||
| 23 | |||
| 24 |
2/2✓ Branch 0 taken 672 times.
✓ Branch 1 taken 456 times.
|
1128 | for (int lp = lhs.row_offsets[row]; lp < lhs.row_offsets[row + 1]; ++lp) { |
| 25 | 672 | const int k = lhs.col_indices[lp]; | |
| 26 | 672 | const Cplx a_ik = lhs.data[lp]; | |
| 27 |
2/2✓ Branch 0 taken 1208 times.
✓ Branch 1 taken 672 times.
|
1880 | for (int rq = rhs.row_offsets[k]; rq < rhs.row_offsets[k + 1]; ++rq) { |
| 28 |
2/2✓ Branch 0 taken 672 times.
✓ Branch 1 taken 536 times.
|
1208 | const int j = rhs.col_indices[rq]; |
| 29 |
2/2✓ Branch 0 taken 672 times.
✓ Branch 1 taken 536 times.
|
1208 | if (touched_by_row[j] != row) { |
| 30 |
1/2✓ Branch 0 taken 672 times.
✗ Branch 1 not taken.
|
672 | touched_by_row[j] = row; |
| 31 | touched_cols.push_back(j); | ||
| 32 | 672 | spa[j] = a_ik * rhs.data[rq]; | |
| 33 | } else { | ||
| 34 | spa[j] += a_ik * rhs.data[rq]; | ||
| 35 | } | ||
| 36 | } | ||
| 37 | } | ||
| 38 | |||
| 39 | std::ranges::sort(touched_cols); | ||
| 40 | |||
| 41 | stage.cols.clear(); | ||
| 42 | stage.vals.clear(); | ||
| 43 | 456 | stage.cols.reserve(touched_cols.size()); | |
| 44 | 456 | stage.vals.reserve(touched_cols.size()); | |
| 45 | |||
| 46 |
2/2✓ Branch 0 taken 672 times.
✓ Branch 1 taken 456 times.
|
1128 | for (const int j : touched_cols) { |
| 47 |
2/2✓ Branch 0 taken 32 times.
✓ Branch 1 taken 640 times.
|
672 | const Cplx v = spa[j]; |
| 48 |
2/2✓ Branch 0 taken 32 times.
✓ Branch 1 taken 640 times.
|
672 | spa[j] = Cplx(0.0, 0.0); |
| 49 |
3/4✓ Branch 0 taken 32 times.
✓ Branch 1 taken 640 times.
✓ Branch 2 taken 32 times.
✗ Branch 3 not taken.
|
672 | if (std::abs(v.real()) > kZeroDropTol || std::abs(v.imag()) > kZeroDropTol) { |
| 50 | stage.cols.push_back(j); | ||
| 51 | stage.vals.push_back(v); | ||
| 52 | } | ||
| 53 | } | ||
| 54 | 456 | } | |
| 55 | |||
| 56 | 80 | CrsMatrix Assemble(int rows, int cols, const std::vector<RowStage> &per_row) { | |
| 57 | CrsMatrix out(rows, cols); | ||
| 58 |
2/2✓ Branch 0 taken 456 times.
✓ Branch 1 taken 80 times.
|
536 | for (int i = 0; i < rows; ++i) { |
| 59 | 456 | out.row_offsets[i + 1] = out.row_offsets[i] + static_cast<int>(per_row[i].cols.size()); | |
| 60 | } | ||
| 61 |
1/2✓ Branch 1 taken 80 times.
✗ Branch 2 not taken.
|
80 | out.col_indices.reserve(static_cast<std::size_t>(out.row_offsets[rows])); |
| 62 |
1/2✓ Branch 1 taken 80 times.
✗ Branch 2 not taken.
|
80 | out.data.reserve(static_cast<std::size_t>(out.row_offsets[rows])); |
| 63 |
2/2✓ Branch 0 taken 456 times.
✓ Branch 1 taken 80 times.
|
536 | for (int i = 0; i < rows; ++i) { |
| 64 |
2/4✓ Branch 1 taken 456 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 456 times.
✗ Branch 5 not taken.
|
456 | out.col_indices.insert(out.col_indices.end(), per_row[i].cols.begin(), per_row[i].cols.end()); |
| 65 | 456 | out.data.insert(out.data.end(), per_row[i].vals.begin(), per_row[i].vals.end()); | |
| 66 | } | ||
| 67 | 80 | return out; | |
| 68 | ✗ | } | |
| 69 | |||
| 70 | } // namespace | ||
| 71 | |||
| 72 |
1/2✓ Branch 2 taken 80 times.
✗ Branch 3 not taken.
|
80 | KlimovichVCrsComplexMatMulStl::KlimovichVCrsComplexMatMulStl(const InType &in) { |
| 73 | SetTypeOfTask(GetStaticTypeOfTask()); | ||
| 74 | GetInput() = in; | ||
| 75 | 80 | GetOutput() = CrsMatrix(); | |
| 76 | 80 | } | |
| 77 | |||
| 78 | 80 | bool KlimovichVCrsComplexMatMulStl::ValidationImpl() { | |
| 79 | const auto &lhs = std::get<0>(GetInput()); | ||
| 80 | const auto &rhs = std::get<1>(GetInput()); | ||
| 81 | 80 | return lhs.n_cols == rhs.n_rows; | |
| 82 | } | ||
| 83 | |||
| 84 | 80 | bool KlimovichVCrsComplexMatMulStl::PreProcessingImpl() { | |
| 85 | 80 | return true; | |
| 86 | } | ||
| 87 | |||
| 88 | 80 | CrsMatrix KlimovichVCrsComplexMatMulStl::MultiplyCrs(const CrsMatrix &lhs, const CrsMatrix &rhs) { | |
| 89 | 80 | std::vector<RowStage> per_row(static_cast<std::size_t>(lhs.n_rows)); | |
| 90 | |||
| 91 |
1/2✓ Branch 1 taken 80 times.
✗ Branch 2 not taken.
|
80 | const int requested_threads = std::max(1, ppc::util::GetNumThreads()); |
| 92 |
2/2✓ Branch 0 taken 12 times.
✓ Branch 1 taken 68 times.
|
80 | const int total_rows = lhs.n_rows; |
| 93 |
2/2✓ Branch 0 taken 54 times.
✓ Branch 1 taken 26 times.
|
80 | const int worker_count = std::max(1, std::min(requested_threads, total_rows)); |
| 94 | |||
| 95 | 180 | auto worker = [&](int begin, int end) { | |
| 96 | 180 | std::vector<Cplx> spa(static_cast<std::size_t>(rhs.n_cols), Cplx(0.0, 0.0)); | |
| 97 |
1/4✓ Branch 1 taken 180 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
180 | std::vector<int> touched_by_row(static_cast<std::size_t>(rhs.n_cols), -1); |
| 98 | 180 | std::vector<int> touched_cols; | |
| 99 |
1/2✓ Branch 1 taken 180 times.
✗ Branch 2 not taken.
|
180 | touched_cols.reserve(static_cast<std::size_t>(rhs.n_cols)); |
| 100 | |||
| 101 |
2/2✓ Branch 0 taken 456 times.
✓ Branch 1 taken 180 times.
|
636 | for (int i = begin; i < end; ++i) { |
| 102 |
1/2✓ Branch 1 taken 456 times.
✗ Branch 2 not taken.
|
456 | GustavsonRow(lhs, rhs, i, spa, touched_by_row, touched_cols, per_row[i]); |
| 103 | } | ||
| 104 | 180 | }; | |
| 105 | |||
| 106 | 80 | const int chunk = total_rows / worker_count; | |
| 107 | 80 | const int leftover = total_rows % worker_count; | |
| 108 | |||
| 109 | 80 | std::vector<std::thread> pool; | |
| 110 |
1/2✓ Branch 1 taken 80 times.
✗ Branch 2 not taken.
|
80 | pool.reserve(static_cast<std::size_t>(worker_count)); |
| 111 | |||
| 112 | int cursor = 0; | ||
| 113 |
2/2✓ Branch 0 taken 180 times.
✓ Branch 1 taken 80 times.
|
260 | for (int worker_index = 0; worker_index < worker_count; ++worker_index) { |
| 114 | 180 | const int begin = cursor; | |
| 115 |
2/2✓ Branch 0 taken 132 times.
✓ Branch 1 taken 48 times.
|
180 | const int end = begin + chunk + (worker_index < leftover ? 1 : 0); |
| 116 | cursor = end; | ||
| 117 |
1/2✓ Branch 0 taken 180 times.
✗ Branch 1 not taken.
|
180 | if (begin < end) { |
| 118 |
1/2✓ Branch 1 taken 180 times.
✗ Branch 2 not taken.
|
180 | pool.emplace_back(worker, begin, end); |
| 119 | } | ||
| 120 | } | ||
| 121 |
2/2✓ Branch 0 taken 180 times.
✓ Branch 1 taken 80 times.
|
260 | for (auto &th : pool) { |
| 122 |
1/2✓ Branch 1 taken 180 times.
✗ Branch 2 not taken.
|
180 | th.join(); |
| 123 | } | ||
| 124 | |||
| 125 |
1/2✓ Branch 1 taken 80 times.
✗ Branch 2 not taken.
|
160 | return Assemble(lhs.n_rows, rhs.n_cols, per_row); |
| 126 | 80 | } | |
| 127 | |||
| 128 | 80 | bool KlimovichVCrsComplexMatMulStl::RunImpl() { | |
| 129 | const auto &lhs = std::get<0>(GetInput()); | ||
| 130 | const auto &rhs = std::get<1>(GetInput()); | ||
| 131 | 80 | GetOutput() = MultiplyCrs(lhs, rhs); | |
| 132 | 80 | return true; | |
| 133 | } | ||
| 134 | |||
| 135 | 80 | bool KlimovichVCrsComplexMatMulStl::PostProcessingImpl() { | |
| 136 | 80 | return true; | |
| 137 | } | ||
| 138 | |||
| 139 | } // namespace klimovich_v_crs_complex_mat_mul | ||
| 140 |