| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #include "klimovich_v_crs_complex_mat_mul/tbb/include/ops_tbb.hpp" | ||
| 2 | |||
| 3 | #include <algorithm> | ||
| 4 | #include <cmath> | ||
| 5 | #include <cstddef> | ||
| 6 | #include <vector> | ||
| 7 | |||
| 8 | #include "klimovich_v_crs_complex_mat_mul/common/include/common.hpp" | ||
| 9 | #include "oneapi/tbb/blocked_range.h" | ||
| 10 | #include "oneapi/tbb/enumerable_thread_specific.h" | ||
| 11 | #include "oneapi/tbb/parallel_for.h" | ||
| 12 | |||
| 13 | namespace klimovich_v_crs_complex_mat_mul { | ||
| 14 | namespace { | ||
| 15 | |||
| 16 | struct RowStage { | ||
| 17 | std::vector<int> cols; | ||
| 18 | std::vector<Cplx> vals; | ||
| 19 | }; | ||
| 20 | |||
| 21 | struct ThreadCtx { | ||
| 22 | std::vector<Cplx> spa; | ||
| 23 | std::vector<int> touched_by_row; | ||
| 24 | std::vector<int> touched_cols; | ||
| 25 | }; | ||
| 26 | |||
| 27 | 228 | void GustavsonRow(const CrsMatrix &lhs, const CrsMatrix &rhs, int row, ThreadCtx &ctx, RowStage &stage) { | |
| 28 | auto &spa = ctx.spa; | ||
| 29 | auto &touched_by_row = ctx.touched_by_row; | ||
| 30 |
2/2✓ Branch 0 taken 168 times.
✓ Branch 1 taken 60 times.
|
228 | auto &touched_cols = ctx.touched_cols; |
| 31 | touched_cols.clear(); | ||
| 32 | |||
| 33 |
2/2✓ Branch 0 taken 336 times.
✓ Branch 1 taken 228 times.
|
564 | for (int lp = lhs.row_offsets[row]; lp < lhs.row_offsets[row + 1]; ++lp) { |
| 34 | 336 | const int k = lhs.col_indices[lp]; | |
| 35 | 336 | const Cplx a_ik = lhs.data[lp]; | |
| 36 |
2/2✓ Branch 0 taken 604 times.
✓ Branch 1 taken 336 times.
|
940 | for (int rq = rhs.row_offsets[k]; rq < rhs.row_offsets[k + 1]; ++rq) { |
| 37 |
2/2✓ Branch 0 taken 336 times.
✓ Branch 1 taken 268 times.
|
604 | const int j = rhs.col_indices[rq]; |
| 38 |
2/2✓ Branch 0 taken 336 times.
✓ Branch 1 taken 268 times.
|
604 | if (touched_by_row[j] != row) { |
| 39 |
1/2✓ Branch 0 taken 336 times.
✗ Branch 1 not taken.
|
336 | touched_by_row[j] = row; |
| 40 | touched_cols.push_back(j); | ||
| 41 | 336 | spa[j] = a_ik * rhs.data[rq]; | |
| 42 | } else { | ||
| 43 | spa[j] += a_ik * rhs.data[rq]; | ||
| 44 | } | ||
| 45 | } | ||
| 46 | } | ||
| 47 | |||
| 48 | std::ranges::sort(touched_cols); | ||
| 49 | |||
| 50 | stage.cols.clear(); | ||
| 51 | stage.vals.clear(); | ||
| 52 | 228 | stage.cols.reserve(touched_cols.size()); | |
| 53 | 228 | stage.vals.reserve(touched_cols.size()); | |
| 54 | |||
| 55 |
2/2✓ Branch 0 taken 336 times.
✓ Branch 1 taken 228 times.
|
564 | for (const int j : touched_cols) { |
| 56 |
2/2✓ Branch 0 taken 16 times.
✓ Branch 1 taken 320 times.
|
336 | const Cplx v = spa[j]; |
| 57 |
2/2✓ Branch 0 taken 16 times.
✓ Branch 1 taken 320 times.
|
336 | spa[j] = Cplx(0.0, 0.0); |
| 58 |
3/4✓ Branch 0 taken 16 times.
✓ Branch 1 taken 320 times.
✓ Branch 2 taken 16 times.
✗ Branch 3 not taken.
|
336 | if (std::abs(v.real()) > kZeroDropTol || std::abs(v.imag()) > kZeroDropTol) { |
| 59 | stage.cols.push_back(j); | ||
| 60 | stage.vals.push_back(v); | ||
| 61 | } | ||
| 62 | } | ||
| 63 | 228 | } | |
| 64 | |||
| 65 | 40 | CrsMatrix Assemble(int rows, int cols, const std::vector<RowStage> &per_row) { | |
| 66 | CrsMatrix out(rows, cols); | ||
| 67 |
2/2✓ Branch 0 taken 228 times.
✓ Branch 1 taken 40 times.
|
268 | for (int i = 0; i < rows; ++i) { |
| 68 | 228 | out.row_offsets[i + 1] = out.row_offsets[i] + static_cast<int>(per_row[i].cols.size()); | |
| 69 | } | ||
| 70 |
1/2✓ Branch 1 taken 40 times.
✗ Branch 2 not taken.
|
40 | out.col_indices.reserve(static_cast<std::size_t>(out.row_offsets[rows])); |
| 71 |
1/2✓ Branch 1 taken 40 times.
✗ Branch 2 not taken.
|
40 | out.data.reserve(static_cast<std::size_t>(out.row_offsets[rows])); |
| 72 |
2/2✓ Branch 0 taken 228 times.
✓ Branch 1 taken 40 times.
|
268 | for (int i = 0; i < rows; ++i) { |
| 73 |
2/4✓ Branch 1 taken 228 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 228 times.
✗ Branch 5 not taken.
|
228 | out.col_indices.insert(out.col_indices.end(), per_row[i].cols.begin(), per_row[i].cols.end()); |
| 74 | 228 | out.data.insert(out.data.end(), per_row[i].vals.begin(), per_row[i].vals.end()); | |
| 75 | } | ||
| 76 | 40 | return out; | |
| 77 | ✗ | } | |
| 78 | |||
| 79 | } // namespace | ||
| 80 | |||
| 81 |
1/2✓ Branch 2 taken 40 times.
✗ Branch 3 not taken.
|
40 | KlimovichVCrsComplexMatMulTbb::KlimovichVCrsComplexMatMulTbb(const InType &in) { |
| 82 | SetTypeOfTask(GetStaticTypeOfTask()); | ||
| 83 | GetInput() = in; | ||
| 84 | 40 | GetOutput() = CrsMatrix(); | |
| 85 | 40 | } | |
| 86 | |||
| 87 | 40 | bool KlimovichVCrsComplexMatMulTbb::ValidationImpl() { | |
| 88 | const auto &lhs = std::get<0>(GetInput()); | ||
| 89 | const auto &rhs = std::get<1>(GetInput()); | ||
| 90 | 40 | return lhs.n_cols == rhs.n_rows; | |
| 91 | } | ||
| 92 | |||
| 93 | 40 | bool KlimovichVCrsComplexMatMulTbb::PreProcessingImpl() { | |
| 94 | 40 | return true; | |
| 95 | } | ||
| 96 | |||
| 97 | 40 | CrsMatrix KlimovichVCrsComplexMatMulTbb::MultiplyCrs(const CrsMatrix &lhs, const CrsMatrix &rhs) { | |
| 98 | 40 | std::vector<RowStage> per_row(static_cast<std::size_t>(lhs.n_rows)); | |
| 99 | |||
| 100 | 40 | oneapi::tbb::enumerable_thread_specific<ThreadCtx> tls([&rhs] { | |
| 101 | 40 | ThreadCtx c; | |
| 102 |
1/2✓ Branch 1 taken 40 times.
✗ Branch 2 not taken.
|
40 | c.spa.assign(static_cast<std::size_t>(rhs.n_cols), Cplx(0.0, 0.0)); |
| 103 |
1/2✓ Branch 1 taken 40 times.
✗ Branch 2 not taken.
|
40 | c.touched_by_row.assign(static_cast<std::size_t>(rhs.n_cols), -1); |
| 104 |
1/2✓ Branch 1 taken 40 times.
✗ Branch 2 not taken.
|
40 | c.touched_cols.reserve(static_cast<std::size_t>(rhs.n_cols)); |
| 105 | 40 | return c; | |
| 106 |
1/2✓ Branch 1 taken 40 times.
✗ Branch 2 not taken.
|
40 | }); |
| 107 | |||
| 108 |
1/2✓ Branch 1 taken 40 times.
✗ Branch 2 not taken.
|
40 | oneapi::tbb::parallel_for(oneapi::tbb::blocked_range<int>(0, lhs.n_rows), [&](const auto &range) { |
| 109 | 228 | auto &ctx = tls.local(); | |
| 110 |
2/2✓ Branch 0 taken 228 times.
✓ Branch 1 taken 228 times.
|
456 | for (int i = range.begin(); i < range.end(); ++i) { |
| 111 | 228 | GustavsonRow(lhs, rhs, i, ctx, per_row[i]); | |
| 112 | } | ||
| 113 | 228 | }); | |
| 114 | |||
| 115 |
1/2✓ Branch 1 taken 40 times.
✗ Branch 2 not taken.
|
80 | return Assemble(lhs.n_rows, rhs.n_cols, per_row); |
| 116 | 40 | } | |
| 117 | |||
| 118 | 40 | bool KlimovichVCrsComplexMatMulTbb::RunImpl() { | |
| 119 | const auto &lhs = std::get<0>(GetInput()); | ||
| 120 | const auto &rhs = std::get<1>(GetInput()); | ||
| 121 | 40 | GetOutput() = MultiplyCrs(lhs, rhs); | |
| 122 | 40 | return true; | |
| 123 | } | ||
| 124 | |||
| 125 | 40 | bool KlimovichVCrsComplexMatMulTbb::PostProcessingImpl() { | |
| 126 | 40 | return true; | |
| 127 | } | ||
| 128 | |||
| 129 | } // namespace klimovich_v_crs_complex_mat_mul | ||
| 130 |