| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #include "volkov_a_sparse_mat_mul_ccs/tbb/include/ops_tbb.hpp" | ||
| 2 | |||
| 3 | #include <tbb/blocked_range.h> | ||
| 4 | #include <tbb/parallel_for.h> | ||
| 5 | |||
| 6 | #include <cmath> | ||
| 7 | #include <tuple> | ||
| 8 | #include <vector> | ||
| 9 | |||
| 10 | #include "volkov_a_sparse_mat_mul_ccs/common/include/common.hpp" | ||
| 11 | |||
| 12 | namespace volkov_a_sparse_mat_mul_ccs { | ||
| 13 | |||
| 14 | namespace { | ||
| 15 | template <typename MatrixType> | ||
| 16 | 36 | void ProcessColumn(int col_idx, const MatrixType &matrix_a, const MatrixType &matrix_b, | |
| 17 | std::vector<double> &col_accumulator, std::vector<int> &local_row_indices, | ||
| 18 | std::vector<double> &local_values) { | ||
| 19 | 36 | int b_start = matrix_b.col_ptrs[col_idx]; | |
| 20 | 36 | int b_end = matrix_b.col_ptrs[col_idx + 1]; | |
| 21 | |||
| 22 |
2/2✓ Branch 0 taken 48 times.
✓ Branch 1 taken 36 times.
|
84 | for (int k = b_start; k < b_end; ++k) { |
| 23 | 48 | int b_row = matrix_b.row_indices[k]; | |
| 24 | 48 | double b_val = matrix_b.values[k]; | |
| 25 | |||
| 26 | 48 | int a_start = matrix_a.col_ptrs[b_row]; | |
| 27 | 48 | int a_end = matrix_a.col_ptrs[b_row + 1]; | |
| 28 | |||
| 29 |
2/2✓ Branch 0 taken 56 times.
✓ Branch 1 taken 48 times.
|
104 | for (int idx = a_start; idx < a_end; ++idx) { |
| 30 | 56 | int a_row = matrix_a.row_indices[idx]; | |
| 31 | 56 | double a_val = matrix_a.values[idx]; | |
| 32 | 56 | col_accumulator[a_row] += a_val * b_val; | |
| 33 | } | ||
| 34 | } | ||
| 35 | |||
| 36 |
2/2✓ Branch 0 taken 80 times.
✓ Branch 1 taken 36 times.
|
116 | for (int i = 0; i < matrix_a.rows_count; ++i) { |
| 37 |
2/2✓ Branch 0 taken 48 times.
✓ Branch 1 taken 32 times.
|
80 | if (std::abs(col_accumulator[i]) > 1e-10) { |
| 38 | local_row_indices.push_back(i); | ||
| 39 | local_values.push_back(col_accumulator[i]); | ||
| 40 | } | ||
| 41 | 80 | col_accumulator[i] = 0.0; | |
| 42 | } | ||
| 43 | 36 | } | |
| 44 | |||
| 45 | } // namespace | ||
| 46 | |||
| 47 |
1/2✓ Branch 2 taken 20 times.
✗ Branch 3 not taken.
|
20 | VolkovASparseMatMulCcsTbb::VolkovASparseMatMulCcsTbb(const InType &in) { |
| 48 | SetTypeOfTask(GetStaticTypeOfTask()); | ||
| 49 | GetInput() = in; | ||
| 50 | 20 | } | |
| 51 | |||
| 52 | 20 | bool VolkovASparseMatMulCcsTbb::ValidationImpl() { | |
| 53 | const auto &matrix_a = std::get<0>(GetInput()); | ||
| 54 | const auto &matrix_b = std::get<1>(GetInput()); | ||
| 55 | 20 | return (matrix_a.cols_count == matrix_b.rows_count); | |
| 56 | } | ||
| 57 | |||
| 58 | 20 | bool VolkovASparseMatMulCcsTbb::PreProcessingImpl() { | |
| 59 | 20 | return true; | |
| 60 | } | ||
| 61 | |||
| 62 | 20 | bool VolkovASparseMatMulCcsTbb::RunImpl() { | |
| 63 | const auto &matrix_a = std::get<0>(GetInput()); | ||
| 64 | const auto &matrix_b = std::get<1>(GetInput()); | ||
| 65 | auto &matrix_c = GetOutput(); | ||
| 66 | |||
| 67 | 20 | matrix_c.rows_count = matrix_a.rows_count; | |
| 68 | 20 | matrix_c.cols_count = matrix_b.cols_count; | |
| 69 | |||
| 70 | 20 | std::vector<std::vector<int>> local_row_indices(matrix_b.cols_count); | |
| 71 |
1/2✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.
|
20 | std::vector<std::vector<double>> local_values(matrix_b.cols_count); |
| 72 | |||
| 73 |
1/2✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.
|
20 | tbb::parallel_for(tbb::blocked_range<int>(0, matrix_b.cols_count), [&](const tbb::blocked_range<int> &range) { |
| 74 | 36 | std::vector<double> col_accumulator(matrix_a.rows_count, 0.0); | |
| 75 | |||
| 76 |
2/2✓ Branch 0 taken 36 times.
✓ Branch 1 taken 36 times.
|
72 | for (int j = range.begin(); j != range.end(); ++j) { |
| 77 |
1/2✓ Branch 1 taken 36 times.
✗ Branch 2 not taken.
|
36 | ProcessColumn(j, matrix_a, matrix_b, col_accumulator, local_row_indices[j], local_values[j]); |
| 78 | } | ||
| 79 | 36 | }); | |
| 80 | |||
| 81 |
1/2✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.
|
20 | matrix_c.col_ptrs.assign(matrix_c.cols_count + 1, 0); |
| 82 |
2/2✓ Branch 0 taken 36 times.
✓ Branch 1 taken 20 times.
|
56 | for (int j = 0; j < matrix_b.cols_count; ++j) { |
| 83 | 36 | matrix_c.col_ptrs[j + 1] = matrix_c.col_ptrs[j] + static_cast<int>(local_row_indices[j].size()); | |
| 84 | } | ||
| 85 | |||
| 86 | 20 | matrix_c.non_zeros = matrix_c.col_ptrs.back(); | |
| 87 |
1/2✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.
|
20 | matrix_c.row_indices.resize(matrix_c.non_zeros); |
| 88 |
1/2✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.
|
20 | matrix_c.values.resize(matrix_c.non_zeros); |
| 89 | |||
| 90 |
1/2✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.
|
56 | tbb::parallel_for(tbb::blocked_range<int>(0, matrix_b.cols_count), [&](const tbb::blocked_range<int> &range) { |
| 91 |
2/2✓ Branch 0 taken 36 times.
✓ Branch 1 taken 36 times.
|
72 | for (int j = range.begin(); j != range.end(); ++j) { |
| 92 | 36 | int offset = matrix_c.col_ptrs[j]; | |
| 93 | 36 | int current_col_size = static_cast<int>(local_row_indices[j].size()); | |
| 94 | |||
| 95 |
2/2✓ Branch 0 taken 48 times.
✓ Branch 1 taken 36 times.
|
84 | for (int k = 0; k < current_col_size; ++k) { |
| 96 | 48 | matrix_c.row_indices[offset + k] = local_row_indices[j][k]; | |
| 97 | 48 | matrix_c.values[offset + k] = local_values[j][k]; | |
| 98 | } | ||
| 99 | } | ||
| 100 | 36 | }); | |
| 101 | |||
| 102 | 20 | return true; | |
| 103 | 20 | } | |
| 104 | |||
| 105 | 20 | bool VolkovASparseMatMulCcsTbb::PostProcessingImpl() { | |
| 106 | 20 | return true; | |
| 107 | } | ||
| 108 | |||
| 109 | } // namespace volkov_a_sparse_mat_mul_ccs | ||
| 110 |