| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #include "volkov_a_sparse_mat_mul_ccs/stl/include/ops_stl.hpp" | ||
| 2 | |||
| 3 | #include <cmath> | ||
| 4 | #include <cstddef> | ||
| 5 | #include <thread> | ||
| 6 | #include <tuple> | ||
| 7 | #include <vector> | ||
| 8 | |||
| 9 | #include "volkov_a_sparse_mat_mul_ccs/common/include/common.hpp" | ||
| 10 | |||
| 11 | namespace volkov_a_sparse_mat_mul_ccs { | ||
| 12 | |||
| 13 | namespace { | ||
| 14 | |||
| 15 | template <typename Func> | ||
| 16 | 160 | void RunParallelChunks(int total_size, const Func &worker_func) { | |
| 17 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 80 times.
|
160 | if (total_size <= 0) { |
| 18 | ✗ | return; | |
| 19 | } | ||
| 20 | |||
| 21 | 160 | int num_threads = static_cast<int>(std::thread::hardware_concurrency()); | |
| 22 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 80 times.
|
160 | if (num_threads == 0) { |
| 23 | num_threads = 4; | ||
| 24 | } | ||
| 25 | |||
| 26 | 160 | int chunk_size = total_size / num_threads; | |
| 27 | 160 | int remainder = total_size % num_threads; | |
| 28 | |||
| 29 | 160 | std::vector<std::thread> threads; | |
| 30 |
1/2✓ Branch 1 taken 80 times.
✗ Branch 2 not taken.
|
160 | threads.reserve(static_cast<size_t>(num_threads)); |
| 31 | |||
| 32 | 160 | int current_start = 0; | |
| 33 |
2/2✓ Branch 0 taken 320 times.
✓ Branch 1 taken 80 times.
|
800 | for (int i = 0; i < num_threads; ++i) { |
| 34 |
2/2✓ Branch 0 taken 176 times.
✓ Branch 1 taken 144 times.
|
640 | int current_end = current_start + chunk_size + (i < remainder ? 1 : 0); |
| 35 |
2/2✓ Branch 0 taken 144 times.
✓ Branch 1 taken 176 times.
|
640 | if (current_start < current_end) { |
| 36 |
1/2✓ Branch 1 taken 144 times.
✗ Branch 2 not taken.
|
288 | threads.emplace_back(worker_func, current_start, current_end); |
| 37 | } | ||
| 38 | 640 | current_start = current_end; | |
| 39 | } | ||
| 40 | |||
| 41 |
2/2✓ Branch 0 taken 144 times.
✓ Branch 1 taken 80 times.
|
448 | for (auto &t : threads) { |
| 42 |
1/2✓ Branch 0 taken 144 times.
✗ Branch 1 not taken.
|
288 | if (t.joinable()) { |
| 43 |
1/2✓ Branch 1 taken 144 times.
✗ Branch 2 not taken.
|
288 | t.join(); |
| 44 | } | ||
| 45 | } | ||
| 46 | 160 | } | |
| 47 | |||
| 48 | template <typename MatrixType> | ||
| 49 | 72 | void ProcessColumn(int col_idx, const MatrixType &matrix_a, const MatrixType &matrix_b, | |
| 50 | std::vector<double> &col_accumulator, std::vector<int> &local_row_indices, | ||
| 51 | std::vector<double> &local_values) { | ||
| 52 | 72 | int b_start = matrix_b.col_ptrs[col_idx]; | |
| 53 | 72 | int b_end = matrix_b.col_ptrs[col_idx + 1]; | |
| 54 | |||
| 55 |
2/2✓ Branch 0 taken 96 times.
✓ Branch 1 taken 72 times.
|
168 | for (int k = b_start; k < b_end; ++k) { |
| 56 | 96 | int b_row = matrix_b.row_indices[k]; | |
| 57 | 96 | double b_val = matrix_b.values[k]; | |
| 58 | |||
| 59 | 96 | int a_start = matrix_a.col_ptrs[b_row]; | |
| 60 | 96 | int a_end = matrix_a.col_ptrs[b_row + 1]; | |
| 61 | |||
| 62 |
2/2✓ Branch 0 taken 112 times.
✓ Branch 1 taken 96 times.
|
208 | for (int idx = a_start; idx < a_end; ++idx) { |
| 63 | 112 | int a_row = matrix_a.row_indices[idx]; | |
| 64 | 112 | double a_val = matrix_a.values[idx]; | |
| 65 | 112 | col_accumulator[a_row] += a_val * b_val; | |
| 66 | } | ||
| 67 | } | ||
| 68 | |||
| 69 |
2/2✓ Branch 0 taken 160 times.
✓ Branch 1 taken 72 times.
|
232 | for (int i = 0; i < matrix_a.rows_count; ++i) { |
| 70 |
2/2✓ Branch 0 taken 96 times.
✓ Branch 1 taken 64 times.
|
160 | if (std::abs(col_accumulator[i]) > 1e-10) { |
| 71 | local_row_indices.push_back(i); | ||
| 72 | local_values.push_back(col_accumulator[i]); | ||
| 73 | } | ||
| 74 | 160 | col_accumulator[i] = 0.0; | |
| 75 | } | ||
| 76 | 72 | } | |
| 77 | |||
| 78 | } // namespace | ||
| 79 | |||
| 80 |
1/2✓ Branch 2 taken 40 times.
✗ Branch 3 not taken.
|
40 | VolkovASparseMatMulCcsStl::VolkovASparseMatMulCcsStl(const InType &in) { |
| 81 | SetTypeOfTask(GetStaticTypeOfTask()); | ||
| 82 | GetInput() = in; | ||
| 83 | 40 | } | |
| 84 | |||
| 85 | 40 | bool VolkovASparseMatMulCcsStl::ValidationImpl() { | |
| 86 | const auto &matrix_a = std::get<0>(GetInput()); | ||
| 87 | const auto &matrix_b = std::get<1>(GetInput()); | ||
| 88 | 40 | return (matrix_a.cols_count == matrix_b.rows_count); | |
| 89 | } | ||
| 90 | |||
| 91 | 40 | bool VolkovASparseMatMulCcsStl::PreProcessingImpl() { | |
| 92 | 40 | return true; | |
| 93 | } | ||
| 94 | |||
| 95 | 40 | bool VolkovASparseMatMulCcsStl::RunImpl() { | |
| 96 | const auto &matrix_a = std::get<0>(GetInput()); | ||
| 97 | const auto &matrix_b = std::get<1>(GetInput()); | ||
| 98 | auto &matrix_c = GetOutput(); | ||
| 99 | |||
| 100 | 40 | matrix_c.rows_count = matrix_a.rows_count; | |
| 101 | 40 | matrix_c.cols_count = matrix_b.cols_count; | |
| 102 | |||
| 103 | 40 | std::vector<std::vector<int>> local_row_indices(matrix_b.cols_count); | |
| 104 |
1/2✓ Branch 1 taken 40 times.
✗ Branch 2 not taken.
|
40 | std::vector<std::vector<double>> local_values(matrix_b.cols_count); |
| 105 | |||
| 106 |
1/2✓ Branch 1 taken 40 times.
✗ Branch 2 not taken.
|
40 | RunParallelChunks(matrix_b.cols_count, [&](int start, int end) { |
| 107 | 72 | std::vector<double> col_accumulator(matrix_a.rows_count, 0.0); | |
| 108 |
2/2✓ Branch 0 taken 72 times.
✓ Branch 1 taken 72 times.
|
144 | for (int j = start; j < end; ++j) { |
| 109 |
1/2✓ Branch 1 taken 72 times.
✗ Branch 2 not taken.
|
72 | ProcessColumn(j, matrix_a, matrix_b, col_accumulator, local_row_indices[j], local_values[j]); |
| 110 | } | ||
| 111 | 72 | }); | |
| 112 | |||
| 113 |
1/2✓ Branch 1 taken 40 times.
✗ Branch 2 not taken.
|
40 | matrix_c.col_ptrs.assign(matrix_c.cols_count + 1, 0); |
| 114 |
2/2✓ Branch 0 taken 72 times.
✓ Branch 1 taken 40 times.
|
112 | for (int j = 0; j < matrix_b.cols_count; ++j) { |
| 115 | 72 | matrix_c.col_ptrs[j + 1] = matrix_c.col_ptrs[j] + static_cast<int>(local_row_indices[j].size()); | |
| 116 | } | ||
| 117 | |||
| 118 | 40 | matrix_c.non_zeros = matrix_c.col_ptrs.back(); | |
| 119 |
1/2✓ Branch 1 taken 40 times.
✗ Branch 2 not taken.
|
40 | matrix_c.row_indices.resize(matrix_c.non_zeros); |
| 120 |
1/2✓ Branch 1 taken 40 times.
✗ Branch 2 not taken.
|
40 | matrix_c.values.resize(matrix_c.non_zeros); |
| 121 | |||
| 122 |
1/2✓ Branch 1 taken 40 times.
✗ Branch 2 not taken.
|
40 | RunParallelChunks(matrix_b.cols_count, [&](int start, int end) { |
| 123 |
2/2✓ Branch 0 taken 72 times.
✓ Branch 1 taken 72 times.
|
144 | for (int j = start; j < end; ++j) { |
| 124 | 72 | int offset = matrix_c.col_ptrs[j]; | |
| 125 | 72 | int current_col_size = static_cast<int>(local_row_indices[j].size()); | |
| 126 | |||
| 127 |
2/2✓ Branch 0 taken 96 times.
✓ Branch 1 taken 72 times.
|
168 | for (int k = 0; k < current_col_size; ++k) { |
| 128 | 96 | matrix_c.row_indices[offset + k] = local_row_indices[j][k]; | |
| 129 | 96 | matrix_c.values[offset + k] = local_values[j][k]; | |
| 130 | } | ||
| 131 | } | ||
| 132 | 72 | }); | |
| 133 | |||
| 134 | 40 | return true; | |
| 135 | 40 | } | |
| 136 | |||
| 137 | 40 | bool VolkovASparseMatMulCcsStl::PostProcessingImpl() { | |
| 138 | 40 | return true; | |
| 139 | } | ||
| 140 | |||
| 141 | } // namespace volkov_a_sparse_mat_mul_ccs | ||
| 142 |