| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #include "romanov_m_matrix_ccs/stl/include/ops_stl.hpp" | ||
| 2 | |||
| 3 | #include <algorithm> | ||
| 4 | #include <cmath> | ||
| 5 | #include <cstddef> | ||
| 6 | #include <thread> | ||
| 7 | #include <vector> | ||
| 8 | |||
| 9 | #include "romanov_m_matrix_ccs/common/include/common.hpp" | ||
| 10 | #include "util/include/util.hpp" | ||
| 11 | |||
| 12 | namespace romanov_m_matrix_ccs { | ||
| 13 | |||
| 14 |
1/2✓ Branch 2 taken 24 times.
✗ Branch 3 not taken.
|
24 | RomanovMMatrixCCSSTL::RomanovMMatrixCCSSTL(const InType &in) { |
| 15 | SetTypeOfTask(GetStaticTypeOfTask()); | ||
| 16 | GetInput() = in; | ||
| 17 | 24 | } | |
| 18 | |||
| 19 | 24 | bool RomanovMMatrixCCSSTL::ValidationImpl() { | |
| 20 | 24 | return GetInput().first.cols_num == GetInput().second.rows_num; | |
| 21 | } | ||
| 22 | |||
| 23 | 24 | bool RomanovMMatrixCCSSTL::PreProcessingImpl() { | |
| 24 | 24 | return true; | |
| 25 | } | ||
| 26 | |||
| 27 | 40 | void RomanovMMatrixCCSSTL::MultiplyColumn(size_t col_index, const MatrixCCS &a, const MatrixCCS &b, | |
| 28 | std::vector<double> &temp_v, std::vector<size_t> &temp_r) { | ||
| 29 | 40 | std::vector<double> accumulator(a.rows_num, 0.0); | |
| 30 |
1/4✓ Branch 1 taken 40 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
40 | std::vector<bool> row_mask(a.rows_num, false); |
| 31 | 40 | std::vector<size_t> active_rows; | |
| 32 | |||
| 33 |
2/2✓ Branch 0 taken 40 times.
✓ Branch 1 taken 40 times.
|
80 | for (size_t kb = b.col_ptrs[col_index]; kb < b.col_ptrs[col_index + 1]; ++kb) { |
| 34 | 40 | size_t k = b.row_inds[kb]; | |
| 35 | 40 | double v_b = b.vals[kb]; | |
| 36 | |||
| 37 |
2/2✓ Branch 0 taken 40 times.
✓ Branch 1 taken 40 times.
|
80 | for (size_t ka = a.col_ptrs[k]; ka < a.col_ptrs[k + 1]; ++ka) { |
| 38 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 40 times.
|
40 | size_t i = a.row_inds[ka]; |
| 39 |
2/2✓ Branch 0 taken 24 times.
✓ Branch 1 taken 16 times.
|
40 | if (!row_mask[i]) { |
| 40 | row_mask[i] = true; | ||
| 41 | active_rows.push_back(i); | ||
| 42 | } | ||
| 43 | 40 | accumulator[i] += a.vals[ka] * v_b; | |
| 44 | } | ||
| 45 | } | ||
| 46 | |||
| 47 | std::ranges::sort(active_rows); | ||
| 48 |
3/4✓ Branch 0 taken 24 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 24 times.
✓ Branch 3 taken 40 times.
|
64 | for (size_t row_idx : active_rows) { |
| 49 |
1/2✓ Branch 0 taken 24 times.
✗ Branch 1 not taken.
|
24 | if (std::abs(accumulator[row_idx]) > 1e-12) { |
| 50 | temp_v.push_back(accumulator[row_idx]); | ||
| 51 | temp_r.push_back(row_idx); | ||
| 52 | } | ||
| 53 | } | ||
| 54 | 40 | } | |
| 55 | |||
| 56 | 24 | bool RomanovMMatrixCCSSTL::RunImpl() { | |
| 57 | 24 | const auto &a = GetInput().first; | |
| 58 | 24 | const auto &b = GetInput().second; | |
| 59 | auto &c = GetOutput(); | ||
| 60 | |||
| 61 | 24 | c.rows_num = a.rows_num; | |
| 62 | 24 | c.cols_num = b.cols_num; | |
| 63 | 24 | c.col_ptrs.assign(c.cols_num + 1, 0); | |
| 64 | |||
| 65 | 24 | std::vector<std::vector<double>> temp_vals(c.cols_num); | |
| 66 |
1/2✓ Branch 1 taken 24 times.
✗ Branch 2 not taken.
|
24 | std::vector<std::vector<size_t>> temp_rows(c.cols_num); |
| 67 | |||
| 68 |
1/2✓ Branch 1 taken 24 times.
✗ Branch 2 not taken.
|
24 | unsigned int num_threads = ppc::util::GetNumThreads(); |
| 69 | 24 | if (num_threads == 0) { | |
| 70 | num_threads = 1; | ||
| 71 | } | ||
| 72 | |||
| 73 | 24 | std::vector<std::thread> threads; | |
| 74 |
1/2✓ Branch 1 taken 24 times.
✗ Branch 2 not taken.
|
24 | threads.reserve(num_threads); |
| 75 | |||
| 76 | 36 | auto worker = [&](size_t start, size_t end) { | |
| 77 |
2/2✓ Branch 0 taken 40 times.
✓ Branch 1 taken 36 times.
|
76 | for (size_t j = start; j < end; ++j) { |
| 78 | 40 | MultiplyColumn(j, a, b, temp_vals[j], temp_rows[j]); | |
| 79 | } | ||
| 80 | 60 | }; | |
| 81 | |||
| 82 | 24 | size_t chunk_size = b.cols_num / num_threads; | |
| 83 | 24 | size_t remainder = b.cols_num % num_threads; | |
| 84 | 24 | size_t current_start = 0; | |
| 85 | |||
| 86 |
2/2✓ Branch 0 taken 60 times.
✓ Branch 1 taken 24 times.
|
84 | for (unsigned int i = 0; i < num_threads; ++i) { |
| 87 |
2/2✓ Branch 0 taken 38 times.
✓ Branch 1 taken 22 times.
|
60 | size_t current_end = current_start + chunk_size + (i < remainder ? 1 : 0); |
| 88 |
2/2✓ Branch 0 taken 36 times.
✓ Branch 1 taken 24 times.
|
60 | if (current_start < current_end) { |
| 89 |
1/2✓ Branch 1 taken 36 times.
✗ Branch 2 not taken.
|
36 | threads.emplace_back(worker, current_start, current_end); |
| 90 | } | ||
| 91 | 60 | current_start = current_end; | |
| 92 | } | ||
| 93 | |||
| 94 |
2/2✓ Branch 0 taken 36 times.
✓ Branch 1 taken 24 times.
|
60 | for (auto &t : threads) { |
| 95 |
1/2✓ Branch 0 taken 36 times.
✗ Branch 1 not taken.
|
36 | if (t.joinable()) { |
| 96 |
1/2✓ Branch 1 taken 36 times.
✗ Branch 2 not taken.
|
36 | t.join(); |
| 97 | } | ||
| 98 | } | ||
| 99 | |||
| 100 | size_t total_nnz = 0; | ||
| 101 |
2/2✓ Branch 0 taken 40 times.
✓ Branch 1 taken 24 times.
|
64 | for (size_t j = 0; j < b.cols_num; ++j) { |
| 102 | 40 | c.col_ptrs[j] = total_nnz; | |
| 103 | 40 | total_nnz += temp_vals[j].size(); | |
| 104 | } | ||
| 105 | 24 | c.col_ptrs[b.cols_num] = total_nnz; | |
| 106 | 24 | c.nnz = total_nnz; | |
| 107 | |||
| 108 |
1/2✓ Branch 1 taken 24 times.
✗ Branch 2 not taken.
|
24 | c.vals.reserve(total_nnz); |
| 109 |
1/2✓ Branch 1 taken 24 times.
✗ Branch 2 not taken.
|
24 | c.row_inds.reserve(total_nnz); |
| 110 |
2/2✓ Branch 0 taken 40 times.
✓ Branch 1 taken 24 times.
|
64 | for (size_t j = 0; j < b.cols_num; ++j) { |
| 111 |
1/2✓ Branch 1 taken 40 times.
✗ Branch 2 not taken.
|
40 | c.vals.insert(c.vals.end(), temp_vals[j].begin(), temp_vals[j].end()); |
| 112 | 40 | c.row_inds.insert(c.row_inds.end(), temp_rows[j].begin(), temp_rows[j].end()); | |
| 113 | } | ||
| 114 | |||
| 115 | 24 | return true; | |
| 116 | 24 | } | |
| 117 | |||
| 118 | 24 | bool RomanovMMatrixCCSSTL::PostProcessingImpl() { | |
| 119 | 24 | return true; | |
| 120 | } | ||
| 121 | |||
| 122 | } // namespace romanov_m_matrix_ccs | ||
| 123 |