| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #include "liulin_y_complex_ccs/omp/include/ops_omp.hpp" | ||
| 2 | |||
| 3 | #include <omp.h> | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <cmath> | ||
| 7 | #include <complex> | ||
| 8 | #include <cstddef> | ||
| 9 | #include <vector> | ||
| 10 | |||
| 11 | #include "liulin_y_complex_ccs/common/include/common.hpp" | ||
| 12 | |||
| 13 | namespace liulin_y_complex_ccs { | ||
| 14 | |||
| 15 | namespace { | ||
| 16 | constexpr double kZeroThreshold = 1e-15; | ||
| 17 | |||
| 18 | bool IsValueNonZero(const std::complex<double> &value) { | ||
| 19 | return std::abs(value) > kZeroThreshold; | ||
| 20 | } | ||
| 21 | |||
| 22 | 48 | void ProcessSingleColumn(int col_idx_b, const CCSMatrix &mat_a, const CCSMatrix &mat_b, | |
| 23 | std::vector<std::complex<double>> &accumulator, std::vector<int> &active_rows, | ||
| 24 | std::vector<int> &row_marker, std::vector<std::complex<double>> &out_values, | ||
| 25 | std::vector<int> &out_rows) { | ||
| 26 | 48 | const int col_start = mat_b.col_index[static_cast<size_t>(col_idx_b)]; | |
| 27 | 48 | const int col_end = mat_b.col_index[static_cast<size_t>(col_idx_b) + 1]; | |
| 28 | |||
| 29 |
2/2✓ Branch 0 taken 40 times.
✓ Branch 1 taken 48 times.
|
88 | for (int idx_b = col_start; idx_b < col_end; ++idx_b) { |
| 30 | 40 | const int row_idx_b = mat_b.row_index[static_cast<size_t>(idx_b)]; | |
| 31 | 40 | const std::complex<double> val_b = mat_b.values[static_cast<size_t>(idx_b)]; | |
| 32 | |||
| 33 | 40 | const int a_start = mat_a.col_index[static_cast<size_t>(row_idx_b)]; | |
| 34 | 40 | const int a_end = mat_a.col_index[static_cast<size_t>(row_idx_b) + 1]; | |
| 35 | |||
| 36 |
2/2✓ Branch 0 taken 40 times.
✓ Branch 1 taken 40 times.
|
80 | for (int idx_a = a_start; idx_a < a_end; ++idx_a) { |
| 37 |
2/2✓ Branch 0 taken 36 times.
✓ Branch 1 taken 4 times.
|
40 | const int row_idx_a = mat_a.row_index[static_cast<size_t>(idx_a)]; |
| 38 |
2/2✓ Branch 0 taken 36 times.
✓ Branch 1 taken 4 times.
|
40 | if (row_marker[static_cast<size_t>(row_idx_a)] != col_idx_b) { |
| 39 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 32 times.
|
36 | row_marker[static_cast<size_t>(row_idx_a)] = col_idx_b; |
| 40 | active_rows.push_back(row_idx_a); | ||
| 41 | 36 | accumulator[static_cast<size_t>(row_idx_a)] = mat_a.values[static_cast<size_t>(idx_a)] * val_b; | |
| 42 | } else { | ||
| 43 | accumulator[static_cast<size_t>(row_idx_a)] += mat_a.values[static_cast<size_t>(idx_a)] * val_b; | ||
| 44 | } | ||
| 45 | } | ||
| 46 | } | ||
| 47 | |||
| 48 | std::ranges::sort(active_rows); | ||
| 49 | |||
| 50 |
2/2✓ Branch 0 taken 36 times.
✓ Branch 1 taken 48 times.
|
84 | for (const int row_idx_res : active_rows) { |
| 51 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | const std::complex<double> final_val = accumulator[static_cast<size_t>(row_idx_res)]; |
| 52 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | if (IsValueNonZero(final_val)) { |
| 53 | out_values.push_back(final_val); | ||
| 54 | out_rows.push_back(row_idx_res); | ||
| 55 | } | ||
| 56 | } | ||
| 57 | 48 | } | |
| 58 | } // namespace | ||
| 59 | |||
| 60 |
1/2✓ Branch 2 taken 24 times.
✗ Branch 3 not taken.
|
24 | LiulinYComplexCcsOmp::LiulinYComplexCcsOmp(const InType &in) : BaseTask() { |
| 61 | SetTypeOfTask(GetStaticTypeOfTask()); | ||
| 62 | GetInput() = in; | ||
| 63 | 24 | } | |
| 64 | |||
| 65 | 24 | bool LiulinYComplexCcsOmp::ValidationImpl() { | |
| 66 | const auto &mat_a = GetInput().first; | ||
| 67 | const auto &mat_b = GetInput().second; | ||
| 68 | 24 | return mat_a.count_cols == mat_b.count_rows; | |
| 69 | } | ||
| 70 | |||
| 71 | 24 | bool LiulinYComplexCcsOmp::PreProcessingImpl() { | |
| 72 | const auto &mat_a = GetInput().first; | ||
| 73 | const auto &mat_b = GetInput().second; | ||
| 74 | |||
| 75 | auto &mat_res = GetOutput(); | ||
| 76 | 24 | mat_res.count_rows = mat_a.count_rows; | |
| 77 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 24 times.
|
24 | mat_res.count_cols = mat_b.count_cols; |
| 78 | mat_res.values.clear(); | ||
| 79 | mat_res.row_index.clear(); | ||
| 80 | 24 | mat_res.col_index.assign(static_cast<size_t>(mat_res.count_cols) + 1, 0); | |
| 81 | |||
| 82 | 24 | return true; | |
| 83 | } | ||
| 84 | |||
| 85 | 24 | bool LiulinYComplexCcsOmp::RunImpl() { | |
| 86 | 24 | const auto &mat_a = GetInput().first; | |
| 87 | 24 | const auto &mat_b = GetInput().second; | |
| 88 | auto &mat_res = GetOutput(); | ||
| 89 | |||
| 90 | 24 | const int rows_a_count = mat_a.count_rows; | |
| 91 | 24 | const int cols_b_count = mat_b.count_cols; | |
| 92 | |||
| 93 | 24 | std::vector<std::vector<std::complex<double>>> thread_values(static_cast<size_t>(cols_b_count)); | |
| 94 |
1/2✓ Branch 1 taken 24 times.
✗ Branch 2 not taken.
|
24 | std::vector<std::vector<int>> thread_row_indices(static_cast<size_t>(cols_b_count)); |
| 95 | |||
| 96 | 24 | #pragma omp parallel default(none) shared(mat_a, mat_b, thread_values, thread_row_indices, cols_b_count, rows_a_count) | |
| 97 | { | ||
| 98 | std::vector<std::complex<double>> accumulator(static_cast<size_t>(rows_a_count), {0.0, 0.0}); | ||
| 99 | std::vector<int> active_rows; | ||
| 100 | std::vector<int> row_marker(static_cast<size_t>(rows_a_count), -1); | ||
| 101 | |||
| 102 | #pragma omp for schedule(dynamic) | ||
| 103 | for (int col_idx_b = 0; col_idx_b < cols_b_count; ++col_idx_b) { | ||
| 104 | ProcessSingleColumn(col_idx_b, mat_a, mat_b, accumulator, active_rows, row_marker, | ||
| 105 | thread_values[static_cast<size_t>(col_idx_b)], | ||
| 106 | thread_row_indices[static_cast<size_t>(col_idx_b)]); | ||
| 107 | active_rows.clear(); | ||
| 108 | } | ||
| 109 | } | ||
| 110 | |||
| 111 | 24 | mat_res.col_index[0] = 0; | |
| 112 |
2/2✓ Branch 0 taken 48 times.
✓ Branch 1 taken 24 times.
|
72 | for (int col_idx = 0; col_idx < cols_b_count; ++col_idx) { |
| 113 | 48 | const auto u_col_idx = static_cast<size_t>(col_idx); | |
| 114 |
1/2✓ Branch 1 taken 48 times.
✗ Branch 2 not taken.
|
48 | mat_res.values.insert(mat_res.values.end(), thread_values[u_col_idx].begin(), thread_values[u_col_idx].end()); |
| 115 |
1/2✓ Branch 1 taken 48 times.
✗ Branch 2 not taken.
|
48 | mat_res.row_index.insert(mat_res.row_index.end(), thread_row_indices[u_col_idx].begin(), |
| 116 | thread_row_indices[u_col_idx].end()); | ||
| 117 | 48 | mat_res.col_index[u_col_idx + 1] = static_cast<int>(mat_res.values.size()); | |
| 118 | } | ||
| 119 | |||
| 120 | 24 | return true; | |
| 121 | 24 | } | |
| 122 | |||
| 123 | 24 | bool LiulinYComplexCcsOmp::PostProcessingImpl() { | |
| 124 | 24 | return true; | |
| 125 | } | ||
| 126 | |||
| 127 | } // namespace liulin_y_complex_ccs | ||
| 128 |