| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #include "ermakov_a_spar_mat_mult/omp/include/ops_omp.hpp" | ||
| 2 | |||
| 3 | #include <algorithm> | ||
| 4 | #include <complex> | ||
| 5 | #include <cstddef> | ||
| 6 | #include <vector> | ||
| 7 | |||
| 8 | #include "ermakov_a_spar_mat_mult/common/include/common.hpp" | ||
| 9 | |||
| 10 | namespace ermakov_a_spar_mat_mult { | ||
| 11 | |||
| 12 |
1/2✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
|
16 | ErmakovASparMatMultOMP::ErmakovASparMatMultOMP(const InType &in) { |
| 13 | SetTypeOfTask(GetStaticTypeOfTask()); | ||
| 14 | GetInput() = in; | ||
| 15 | 16 | } | |
| 16 | |||
| 17 | 32 | bool ErmakovASparMatMultOMP::ValidateMatrix(const MatrixCRS &m) { | |
| 18 |
2/4✓ Branch 0 taken 32 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 32 times.
|
32 | if (m.rows < 0 || m.cols < 0) { |
| 19 | return false; | ||
| 20 | } | ||
| 21 | |||
| 22 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 32 times.
|
32 | if (m.row_ptr.size() != static_cast<std::size_t>(m.rows) + 1) { |
| 23 | return false; | ||
| 24 | } | ||
| 25 | |||
| 26 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 32 times.
|
32 | if (m.values.size() != m.col_index.size()) { |
| 27 | return false; | ||
| 28 | } | ||
| 29 | |||
| 30 | 32 | const int nnz = static_cast<int>(m.values.size()); | |
| 31 | |||
| 32 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 32 times.
|
32 | if (m.row_ptr.empty()) { |
| 33 | return false; | ||
| 34 | } | ||
| 35 | |||
| 36 |
2/4✗ Branch 0 not taken.
✓ Branch 1 taken 32 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 32 times.
|
32 | if (m.row_ptr.front() != 0 || m.row_ptr.back() != nnz) { |
| 37 | return false; | ||
| 38 | } | ||
| 39 | |||
| 40 |
2/2✓ Branch 0 taken 504 times.
✓ Branch 1 taken 32 times.
|
536 | for (int i = 0; i < m.rows; ++i) { |
| 41 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 504 times.
|
504 | if (m.row_ptr[i] > m.row_ptr[i + 1]) { |
| 42 | return false; | ||
| 43 | } | ||
| 44 | } | ||
| 45 | |||
| 46 |
2/2✓ Branch 0 taken 5781 times.
✓ Branch 1 taken 32 times.
|
5813 | for (int k = 0; k < nnz; ++k) { |
| 47 |
2/4✗ Branch 0 not taken.
✓ Branch 1 taken 5781 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 5781 times.
|
5781 | if (m.col_index[k] < 0 || m.col_index[k] >= m.cols) { |
| 48 | return false; | ||
| 49 | } | ||
| 50 | } | ||
| 51 | |||
| 52 | return true; | ||
| 53 | } | ||
| 54 | |||
| 55 | 16 | bool ErmakovASparMatMultOMP::ValidationImpl() { | |
| 56 | 16 | const auto &a = GetInput().A; | |
| 57 | 16 | const auto &b = GetInput().B; | |
| 58 | |||
| 59 |
1/2✓ Branch 0 taken 16 times.
✗ Branch 1 not taken.
|
16 | if (a.cols != b.rows) { |
| 60 | return false; | ||
| 61 | } | ||
| 62 | |||
| 63 |
1/2✓ Branch 0 taken 16 times.
✗ Branch 1 not taken.
|
16 | if (!ValidateMatrix(a)) { |
| 64 | return false; | ||
| 65 | } | ||
| 66 | |||
| 67 | 16 | if (!ValidateMatrix(b)) { | |
| 68 | return false; | ||
| 69 | } | ||
| 70 | |||
| 71 | return true; | ||
| 72 | } | ||
| 73 | |||
| 74 | 16 | bool ErmakovASparMatMultOMP::PreProcessingImpl() { | |
| 75 | 16 | a_ = GetInput().A; | |
| 76 | 16 | b_ = GetInput().B; | |
| 77 | |||
| 78 | 16 | c_.rows = a_.rows; | |
| 79 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 16 times.
|
16 | c_.cols = b_.cols; |
| 80 | |||
| 81 | c_.values.clear(); | ||
| 82 | c_.col_index.clear(); | ||
| 83 | 16 | c_.row_ptr.assign(static_cast<std::size_t>(c_.rows) + 1, 0); | |
| 84 | |||
| 85 | 16 | return true; | |
| 86 | } | ||
| 87 | |||
| 88 |
2/2✓ Branch 0 taken 187 times.
✓ Branch 1 taken 65 times.
|
252 | void ErmakovASparMatMultOMP::AccumulateRowProducts(int row_index, std::vector<std::complex<double>> &row_vals, |
| 89 | std::vector<int> &row_mark, std::vector<int> &used_cols) { | ||
| 90 | used_cols.clear(); | ||
| 91 | |||
| 92 | 252 | const int a_start = a_.row_ptr[row_index]; | |
| 93 | 252 | const int a_end = a_.row_ptr[row_index + 1]; | |
| 94 | |||
| 95 |
2/2✓ Branch 0 taken 2931 times.
✓ Branch 1 taken 252 times.
|
3183 | for (int ak = a_start; ak < a_end; ++ak) { |
| 96 | 2931 | const int j = a_.col_index[ak]; | |
| 97 | 2931 | const auto a_ij = a_.values[ak]; | |
| 98 | |||
| 99 | 2931 | const int b_start = b_.row_ptr[j]; | |
| 100 | 2931 | const int b_end = b_.row_ptr[j + 1]; | |
| 101 | |||
| 102 |
2/2✓ Branch 0 taken 54673 times.
✓ Branch 1 taken 2931 times.
|
57604 | for (int bk = b_start; bk < b_end; ++bk) { |
| 103 |
2/2✓ Branch 0 taken 4570 times.
✓ Branch 1 taken 50103 times.
|
54673 | const int k = b_.col_index[bk]; |
| 104 | 54673 | const auto b_jk = b_.values[bk]; | |
| 105 | |||
| 106 |
2/2✓ Branch 0 taken 4570 times.
✓ Branch 1 taken 50103 times.
|
54673 | if (row_mark[k] != row_index) { |
| 107 |
1/2✓ Branch 0 taken 4570 times.
✗ Branch 1 not taken.
|
4570 | row_mark[k] = row_index; |
| 108 |
1/2✓ Branch 0 taken 4570 times.
✗ Branch 1 not taken.
|
4570 | row_vals[k] = a_ij * b_jk; |
| 109 | used_cols.push_back(k); | ||
| 110 | } else { | ||
| 111 | row_vals[k] += a_ij * b_jk; | ||
| 112 | } | ||
| 113 | } | ||
| 114 | } | ||
| 115 | 252 | } | |
| 116 | |||
| 117 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 252 times.
|
252 | void ErmakovASparMatMultOMP::CollectRowValues(const std::vector<std::complex<double>> &row_vals, |
| 118 | const std::vector<int> &used_cols, std::vector<int> &cols, | ||
| 119 | std::vector<std::complex<double>> &vals) { | ||
| 120 | cols.clear(); | ||
| 121 | vals.clear(); | ||
| 122 | |||
| 123 | 252 | cols.reserve(used_cols.size()); | |
| 124 | 252 | vals.reserve(used_cols.size()); | |
| 125 | |||
| 126 |
2/2✓ Branch 0 taken 4570 times.
✓ Branch 1 taken 252 times.
|
4822 | for (int col : used_cols) { |
| 127 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 4570 times.
|
4570 | const auto &v = row_vals[static_cast<std::size_t>(col)]; |
| 128 | if (v != std::complex<double>(0.0, 0.0)) { | ||
| 129 | cols.push_back(col); | ||
| 130 | vals.push_back(v); | ||
| 131 | } | ||
| 132 | } | ||
| 133 | 252 | } | |
| 134 | |||
| 135 | ✗ | void ErmakovASparMatMultOMP::SortUsedCols(std::vector<int> &cols) { | |
| 136 | std::ranges::sort(cols); | ||
| 137 | ✗ | } | |
| 138 | |||
| 139 | 16 | bool ErmakovASparMatMultOMP::RunImpl() { | |
| 140 | 16 | const int m = a_.rows; | |
| 141 | 16 | const int p = b_.cols; | |
| 142 | |||
| 143 |
1/2✓ Branch 0 taken 16 times.
✗ Branch 1 not taken.
|
16 | if (a_.cols != b_.rows) { |
| 144 | return false; | ||
| 145 | } | ||
| 146 | |||
| 147 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 16 times.
|
16 | if (m == 0 || p == 0) { |
| 148 | return true; | ||
| 149 | } | ||
| 150 | |||
| 151 | 16 | std::vector<std::vector<std::complex<double>>> row_values(static_cast<std::size_t>(m)); | |
| 152 |
1/2✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
|
16 | std::vector<std::vector<int>> row_cols(static_cast<std::size_t>(m)); |
| 153 | |||
| 154 | 16 | #pragma omp parallel default(none) shared(m, p, row_values, row_cols) | |
| 155 | { | ||
| 156 | std::vector<std::complex<double>> row_vals(static_cast<std::size_t>(p), std::complex<double>(0.0, 0.0)); | ||
| 157 | |||
| 158 | std::vector<int> row_mark(static_cast<std::size_t>(p), -1); | ||
| 159 | |||
| 160 | std::vector<int> used_cols; | ||
| 161 | used_cols.reserve(256); | ||
| 162 | |||
| 163 | #pragma omp for | ||
| 164 | for (int i = 0; i < m; ++i) { | ||
| 165 | AccumulateRowProducts(i, row_vals, row_mark, used_cols); | ||
| 166 | |||
| 167 | SortUsedCols(used_cols); | ||
| 168 | |||
| 169 | const auto row_i = static_cast<std::size_t>(i); | ||
| 170 | |||
| 171 | CollectRowValues(row_vals, used_cols, row_cols[row_i], row_values[row_i]); | ||
| 172 | } | ||
| 173 | } | ||
| 174 | |||
| 175 | int nnz = 0; | ||
| 176 | |||
| 177 |
2/2✓ Branch 0 taken 252 times.
✓ Branch 1 taken 16 times.
|
268 | for (int i = 0; i < m; ++i) { |
| 178 | 252 | const auto row_i = static_cast<std::size_t>(i); | |
| 179 | 252 | c_.row_ptr[row_i] = nnz; | |
| 180 | 252 | nnz += static_cast<int>(row_values[row_i].size()); | |
| 181 | } | ||
| 182 | |||
| 183 | 16 | c_.row_ptr[static_cast<std::size_t>(m)] = nnz; | |
| 184 | |||
| 185 |
1/2✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
|
16 | c_.values.reserve(static_cast<std::size_t>(nnz)); |
| 186 |
1/2✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
|
16 | c_.col_index.reserve(static_cast<std::size_t>(nnz)); |
| 187 | |||
| 188 |
2/2✓ Branch 0 taken 252 times.
✓ Branch 1 taken 16 times.
|
268 | for (int i = 0; i < m; ++i) { |
| 189 |
1/2✓ Branch 1 taken 252 times.
✗ Branch 2 not taken.
|
252 | const auto row_i = static_cast<std::size_t>(i); |
| 190 | |||
| 191 |
1/2✓ Branch 1 taken 252 times.
✗ Branch 2 not taken.
|
252 | c_.values.insert(c_.values.end(), row_values[row_i].begin(), row_values[row_i].end()); |
| 192 | |||
| 193 | 252 | c_.col_index.insert(c_.col_index.end(), row_cols[row_i].begin(), row_cols[row_i].end()); | |
| 194 | } | ||
| 195 | |||
| 196 | return true; | ||
| 197 | 16 | } | |
| 198 | |||
| 199 | 16 | bool ErmakovASparMatMultOMP::PostProcessingImpl() { | |
| 200 | 16 | GetOutput() = c_; | |
| 201 | 16 | return true; | |
| 202 | } | ||
| 203 | |||
| 204 | } // namespace ermakov_a_spar_mat_mult | ||
| 205 |