| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #include "romanov_m_matrix_ccs/omp/include/ops_omp.hpp" | ||
| 2 | |||
| 3 | #include <omp.h> | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <cmath> | ||
| 7 | #include <cstddef> | ||
| 8 | #include <utility> | ||
| 9 | #include <vector> | ||
| 10 | |||
| 11 | #include "romanov_m_matrix_ccs/common/include/common.hpp" | ||
| 12 | #include "util/include/util.hpp" | ||
| 13 | |||
| 14 | namespace romanov_m_matrix_ccs { | ||
| 15 | |||
| 16 |
1/2✓ Branch 2 taken 12 times.
✗ Branch 3 not taken.
|
12 | RomanovMMatrixCCSOMP::RomanovMMatrixCCSOMP(const InType &in) { |
| 17 | SetTypeOfTask(GetStaticTypeOfTask()); | ||
| 18 | GetInput() = in; | ||
| 19 | 12 | } | |
| 20 | |||
| 21 | 12 | bool RomanovMMatrixCCSOMP::ValidationImpl() { | |
| 22 | 12 | return GetInput().first.cols_num == GetInput().second.rows_num; | |
| 23 | } | ||
| 24 | |||
| 25 | 12 | bool RomanovMMatrixCCSOMP::PreProcessingImpl() { | |
| 26 | 12 | return true; | |
| 27 | } | ||
| 28 | |||
| 29 | 20 | void RomanovMMatrixCCSOMP::MultiplyColumn(size_t col_index, const MatrixCCS &a, const MatrixCCS &b, | |
| 30 | std::vector<double> &temp_v, std::vector<size_t> &temp_r) { | ||
| 31 | 20 | std::vector<double> accumulator(a.rows_num, 0.0); | |
| 32 |
1/4✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
20 | std::vector<bool> row_mask(a.rows_num, false); |
| 33 | 20 | std::vector<size_t> active_rows; | |
| 34 | |||
| 35 |
2/2✓ Branch 0 taken 20 times.
✓ Branch 1 taken 20 times.
|
40 | for (size_t kb = b.col_ptrs[col_index]; kb < b.col_ptrs[col_index + 1]; ++kb) { |
| 36 | 20 | size_t k = b.row_inds[kb]; | |
| 37 | 20 | double v_b = b.vals[kb]; | |
| 38 | |||
| 39 |
2/2✓ Branch 0 taken 20 times.
✓ Branch 1 taken 20 times.
|
40 | for (size_t ka = a.col_ptrs[k]; ka < a.col_ptrs[k + 1]; ++ka) { |
| 40 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 20 times.
|
20 | size_t i = a.row_inds[ka]; |
| 41 |
2/2✓ Branch 0 taken 12 times.
✓ Branch 1 taken 8 times.
|
20 | if (!row_mask[i]) { |
| 42 | row_mask[i] = true; | ||
| 43 | active_rows.push_back(i); | ||
| 44 | } | ||
| 45 | 20 | accumulator[i] += a.vals[ka] * v_b; | |
| 46 | } | ||
| 47 | } | ||
| 48 | |||
| 49 | std::ranges::sort(active_rows); | ||
| 50 |
3/4✓ Branch 0 taken 12 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 12 times.
✓ Branch 3 taken 20 times.
|
32 | for (size_t row_idx : active_rows) { |
| 51 |
1/2✓ Branch 0 taken 12 times.
✗ Branch 1 not taken.
|
12 | if (std::abs(accumulator[row_idx]) > 1e-12) { |
| 52 | temp_v.push_back(accumulator[row_idx]); | ||
| 53 | temp_r.push_back(row_idx); | ||
| 54 | } | ||
| 55 | } | ||
| 56 | 20 | } | |
| 57 | |||
| 58 | 12 | bool RomanovMMatrixCCSOMP::RunImpl() { | |
| 59 | 12 | const auto &a = GetInput().first; | |
| 60 | 12 | const auto &b = GetInput().second; | |
| 61 | auto &c = GetOutput(); | ||
| 62 | |||
| 63 | 12 | c.rows_num = a.rows_num; | |
| 64 | 12 | c.cols_num = b.cols_num; | |
| 65 | 12 | c.col_ptrs.assign(c.cols_num + 1, 0); | |
| 66 | |||
| 67 | 12 | std::vector<std::vector<double>> temp_vals(c.cols_num); | |
| 68 |
1/2✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.
|
12 | std::vector<std::vector<size_t>> temp_rows(c.cols_num); |
| 69 | |||
| 70 |
1/2✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.
|
12 | #pragma omp parallel num_threads(ppc::util::GetNumThreads()) default(none) shared(a, b, temp_vals, temp_rows) |
| 71 | { | ||
| 72 | #pragma omp for schedule(dynamic) | ||
| 73 | for (size_t j = 0; j < b.cols_num; ++j) { | ||
| 74 | MultiplyColumn(j, a, b, temp_vals[j], temp_rows[j]); | ||
| 75 | } | ||
| 76 | } | ||
| 77 | |||
| 78 | size_t total_nnz = 0; | ||
| 79 |
2/2✓ Branch 0 taken 20 times.
✓ Branch 1 taken 12 times.
|
32 | for (size_t j = 0; j < b.cols_num; ++j) { |
| 80 | 20 | c.col_ptrs[j] = total_nnz; | |
| 81 | 20 | total_nnz += temp_vals[j].size(); | |
| 82 | } | ||
| 83 | 12 | c.col_ptrs[b.cols_num] = total_nnz; | |
| 84 | 12 | c.nnz = total_nnz; | |
| 85 | |||
| 86 |
1/2✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.
|
12 | c.vals.reserve(total_nnz); |
| 87 |
1/2✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.
|
12 | c.row_inds.reserve(total_nnz); |
| 88 |
2/2✓ Branch 0 taken 20 times.
✓ Branch 1 taken 12 times.
|
32 | for (size_t j = 0; j < b.cols_num; ++j) { |
| 89 |
1/2✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.
|
20 | c.vals.insert(c.vals.end(), temp_vals[j].begin(), temp_vals[j].end()); |
| 90 | 20 | c.row_inds.insert(c.row_inds.end(), temp_rows[j].begin(), temp_rows[j].end()); | |
| 91 | } | ||
| 92 | |||
| 93 | 12 | return true; | |
| 94 | 12 | } | |
| 95 | |||
| 96 | 12 | bool RomanovMMatrixCCSOMP::PostProcessingImpl() { | |
| 97 | 12 | return true; | |
| 98 | } | ||
| 99 | |||
| 100 | } // namespace romanov_m_matrix_ccs | ||
| 101 |