| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #include "borunov_v_complex_ccs/stl/include/ops_stl.hpp" | ||
| 2 | |||
| 3 | #include <algorithm> | ||
| 4 | #include <cmath> | ||
| 5 | #include <complex> | ||
| 6 | #include <cstddef> | ||
| 7 | #include <thread> | ||
| 8 | #include <utility> | ||
| 9 | #include <vector> | ||
| 10 | |||
| 11 | #include "borunov_v_complex_ccs/common/include/common.hpp" | ||
| 12 | |||
| 13 | namespace borunov_v_complex_ccs { | ||
| 14 | |||
| 15 | namespace { | ||
| 16 | |||
| 17 | 96 | void WorkerThread(int thread_id, int num_threads, int num_cols, const SparseMatrix &a, const SparseMatrix &b, | |
| 18 | std::vector<std::complex<double>> &thread_val, std::vector<int> &thread_row_idx, | ||
| 19 | std::vector<int> &thread_col_ptr) { | ||
| 20 | 96 | int start_col = (num_cols * thread_id) / num_threads; | |
| 21 | 96 | int end_col = (num_cols * (thread_id + 1)) / num_threads; | |
| 22 | |||
| 23 | 96 | int num_cols_thread = end_col - start_col; | |
| 24 | 96 | thread_col_ptr.assign(num_cols_thread + 1, 0); | |
| 25 | |||
| 26 | 96 | std::vector<std::complex<double>> col_accumulator(a.num_rows, {0.0, 0.0}); | |
| 27 | 96 | std::vector<int> non_zero_indices; | |
| 28 |
1/4✓ Branch 1 taken 96 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
96 | std::vector<bool> is_non_zero(a.num_rows, false); |
| 29 | |||
| 30 | int current_nnz = 0; | ||
| 31 |
2/2✓ Branch 0 taken 320 times.
✓ Branch 1 taken 96 times.
|
416 | for (int j = start_col; j < end_col; ++j) { |
| 32 |
2/2✓ Branch 0 taken 1232 times.
✓ Branch 1 taken 320 times.
|
1552 | for (int b_idx = b.col_ptrs[j]; b_idx < b.col_ptrs[j + 1]; ++b_idx) { |
| 33 | 1232 | int p = b.row_indices[b_idx]; | |
| 34 | 1232 | std::complex<double> b_val = b.values[b_idx]; | |
| 35 | |||
| 36 |
2/2✓ Branch 0 taken 3784 times.
✓ Branch 1 taken 1232 times.
|
5016 | for (int a_idx = a.col_ptrs[p]; a_idx < a.col_ptrs[p + 1]; ++a_idx) { |
| 37 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 3784 times.
|
3784 | int i = a.row_indices[a_idx]; |
| 38 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 3784 times.
|
3784 | std::complex<double> a_val = a.values[a_idx]; |
| 39 | |||
| 40 |
2/2✓ Branch 0 taken 2672 times.
✓ Branch 1 taken 1112 times.
|
3784 | if (!is_non_zero[i]) { |
| 41 | is_non_zero[i] = true; | ||
| 42 | non_zero_indices.push_back(i); | ||
| 43 | } | ||
| 44 | 3784 | col_accumulator[i] += a_val * b_val; | |
| 45 | } | ||
| 46 | } | ||
| 47 | |||
| 48 | std::ranges::sort(non_zero_indices); | ||
| 49 | |||
| 50 |
2/2✓ Branch 0 taken 2672 times.
✓ Branch 1 taken 320 times.
|
2992 | for (int i : non_zero_indices) { |
| 51 |
1/2✓ Branch 0 taken 2672 times.
✗ Branch 1 not taken.
|
2672 | if (std::abs(col_accumulator[i]) > 1e-9) { |
| 52 | thread_val.push_back(col_accumulator[i]); | ||
| 53 | thread_row_idx.push_back(i); | ||
| 54 | 2672 | current_nnz++; | |
| 55 | } | ||
| 56 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2672 times.
|
2672 | col_accumulator[i] = {0.0, 0.0}; |
| 57 | is_non_zero[i] = false; | ||
| 58 | } | ||
| 59 | non_zero_indices.clear(); | ||
| 60 | |||
| 61 | 320 | thread_col_ptr[j - start_col + 1] = current_nnz; | |
| 62 | } | ||
| 63 | 96 | } | |
| 64 | |||
| 65 | } // namespace | ||
| 66 | |||
| 67 |
1/2✓ Branch 2 taken 24 times.
✗ Branch 3 not taken.
|
24 | BorunovVComplexCcsSTL::BorunovVComplexCcsSTL(const InType &in) { |
| 68 | SetTypeOfTask(GetStaticTypeOfTask()); | ||
| 69 | GetInput() = in; | ||
| 70 |
1/2✓ Branch 1 taken 24 times.
✗ Branch 2 not taken.
|
24 | GetOutput().resize(1); |
| 71 | 24 | } | |
| 72 | |||
| 73 | 24 | bool BorunovVComplexCcsSTL::ValidationImpl() { | |
| 74 | const auto &a = GetInput().first; | ||
| 75 | const auto &b = GetInput().second; | ||
| 76 |
1/2✓ Branch 0 taken 24 times.
✗ Branch 1 not taken.
|
24 | if (a.num_cols != b.num_rows) { |
| 77 | return false; | ||
| 78 | } | ||
| 79 |
1/2✓ Branch 0 taken 24 times.
✗ Branch 1 not taken.
|
24 | if (a.col_ptrs.size() != static_cast<size_t>(a.num_cols) + 1 || |
| 80 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 24 times.
|
24 | b.col_ptrs.size() != static_cast<size_t>(b.num_cols) + 1) { |
| 81 | ✗ | return false; | |
| 82 | } | ||
| 83 | return true; | ||
| 84 | } | ||
| 85 | |||
| 86 | 24 | bool BorunovVComplexCcsSTL::PreProcessingImpl() { | |
| 87 | const auto &a = GetInput().first; | ||
| 88 | const auto &b = GetInput().second; | ||
| 89 | auto &c = GetOutput()[0]; | ||
| 90 | |||
| 91 | 24 | c.num_rows = a.num_rows; | |
| 92 | 24 | c.num_cols = b.num_cols; | |
| 93 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 24 times.
|
24 | c.col_ptrs.assign(c.num_cols + 1, 0); |
| 94 | c.values.clear(); | ||
| 95 | c.row_indices.clear(); | ||
| 96 | |||
| 97 | 24 | return true; | |
| 98 | } | ||
| 99 | |||
| 100 | 24 | bool BorunovVComplexCcsSTL::RunImpl() { | |
| 101 | 24 | const auto &a = GetInput().first; | |
| 102 | 24 | const auto &b = GetInput().second; | |
| 103 | auto &c = GetOutput()[0]; | ||
| 104 | |||
| 105 | 24 | int num_cols = b.num_cols; | |
| 106 | 24 | unsigned int num_threads = std::thread::hardware_concurrency(); | |
| 107 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 24 times.
|
24 | if (num_threads == 0) { |
| 108 | ✗ | num_threads = 4; | |
| 109 | } | ||
| 110 | |||
| 111 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 24 times.
|
24 | if (std::cmp_greater(num_threads, num_cols)) { |
| 112 | ✗ | num_threads = num_cols; | |
| 113 | } | ||
| 114 | |||
| 115 |
1/2✓ Branch 0 taken 24 times.
✗ Branch 1 not taken.
|
24 | if (num_cols == 0) { |
| 116 | return true; | ||
| 117 | } | ||
| 118 | |||
| 119 | 24 | std::vector<std::vector<std::complex<double>>> thread_values(num_threads); | |
| 120 |
1/2✓ Branch 1 taken 24 times.
✗ Branch 2 not taken.
|
24 | std::vector<std::vector<int>> thread_row_indices(num_threads); |
| 121 |
1/2✓ Branch 1 taken 24 times.
✗ Branch 2 not taken.
|
24 | std::vector<std::vector<int>> thread_col_ptrs(num_threads); |
| 122 | |||
| 123 | 96 | auto worker = [&](int thread_id) { | |
| 124 | 96 | WorkerThread(thread_id, static_cast<int>(num_threads), num_cols, a, b, thread_values[thread_id], | |
| 125 | 96 | thread_row_indices[thread_id], thread_col_ptrs[thread_id]); | |
| 126 | 120 | }; | |
| 127 | |||
| 128 | 24 | std::vector<std::thread> threads; | |
| 129 |
1/2✓ Branch 1 taken 24 times.
✗ Branch 2 not taken.
|
24 | threads.reserve(num_threads); |
| 130 |
2/2✓ Branch 0 taken 96 times.
✓ Branch 1 taken 24 times.
|
120 | for (unsigned int i = 0; i < num_threads; ++i) { |
| 131 |
1/2✓ Branch 1 taken 96 times.
✗ Branch 2 not taken.
|
96 | threads.emplace_back(worker, i); |
| 132 | } | ||
| 133 | |||
| 134 |
2/2✓ Branch 0 taken 96 times.
✓ Branch 1 taken 24 times.
|
120 | for (auto &t : threads) { |
| 135 |
1/2✓ Branch 1 taken 96 times.
✗ Branch 2 not taken.
|
96 | t.join(); |
| 136 | } | ||
| 137 | |||
| 138 | // merge results | ||
| 139 | int total_nnz = 0; | ||
| 140 |
2/2✓ Branch 0 taken 96 times.
✓ Branch 1 taken 24 times.
|
120 | for (unsigned int i = 0; i < num_threads; ++i) { |
| 141 | 96 | total_nnz += static_cast<int>(thread_values[i].size()); | |
| 142 | } | ||
| 143 | |||
| 144 |
1/2✓ Branch 1 taken 24 times.
✗ Branch 2 not taken.
|
24 | c.values.reserve(total_nnz); |
| 145 |
1/2✓ Branch 1 taken 24 times.
✗ Branch 2 not taken.
|
24 | c.row_indices.reserve(total_nnz); |
| 146 | |||
| 147 | int current_global_ptr = 0; | ||
| 148 | 24 | int num_threads_int = static_cast<int>(num_threads); | |
| 149 |
2/2✓ Branch 0 taken 96 times.
✓ Branch 1 taken 24 times.
|
120 | for (unsigned int i = 0; i < num_threads; ++i) { |
| 150 | 96 | int i_int = static_cast<int>(i); | |
| 151 | 96 | int start_col = (num_cols * i_int) / num_threads_int; | |
| 152 | 96 | int end_col = (num_cols * (i_int + 1)) / num_threads_int; | |
| 153 | |||
| 154 |
2/2✓ Branch 0 taken 320 times.
✓ Branch 1 taken 96 times.
|
416 | for (int j = 0; j < end_col - start_col; ++j) { |
| 155 | 320 | c.col_ptrs[start_col + j + 1] = current_global_ptr + thread_col_ptrs[i][j + 1]; | |
| 156 | } | ||
| 157 | |||
| 158 |
1/2✓ Branch 1 taken 96 times.
✗ Branch 2 not taken.
|
96 | current_global_ptr += static_cast<int>(thread_values[i].size()); |
| 159 | |||
| 160 |
1/2✓ Branch 1 taken 96 times.
✗ Branch 2 not taken.
|
96 | c.values.insert(c.values.end(), thread_values[i].begin(), thread_values[i].end()); |
| 161 | 96 | c.row_indices.insert(c.row_indices.end(), thread_row_indices[i].begin(), thread_row_indices[i].end()); | |
| 162 | } | ||
| 163 | |||
| 164 | return true; | ||
| 165 | 24 | } | |
| 166 | |||
| 167 | 24 | bool BorunovVComplexCcsSTL::PostProcessingImpl() { | |
| 168 | 24 | return true; | |
| 169 | } | ||
| 170 | |||
| 171 | } // namespace borunov_v_complex_ccs | ||
| 172 |