| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #include "ashihmin_d_mult_matr_crs/stl/include/ops_stl.hpp" | ||
| 2 | |||
| 3 | #include <algorithm> | ||
| 4 | #include <cmath> | ||
| 5 | #include <future> | ||
| 6 | #include <map> | ||
| 7 | #include <thread> | ||
| 8 | #include <vector> | ||
| 9 | |||
| 10 | #include "ashihmin_d_mult_matr_crs/common/include/common.hpp" | ||
| 11 | |||
| 12 | namespace ashihmin_d_mult_matr_crs { | ||
| 13 | |||
| 14 |
1/2✓ Branch 2 taken 48 times.
✗ Branch 3 not taken.
|
48 | AshihminDMultMatrCrsSTL::AshihminDMultMatrCrsSTL(const InType &in) { |
| 15 | SetTypeOfTask(GetStaticTypeOfTask()); | ||
| 16 | GetInput() = in; | ||
| 17 | 48 | } | |
| 18 | |||
| 19 | 48 | bool AshihminDMultMatrCrsSTL::ValidationImpl() { | |
| 20 | 48 | return GetInput().first.cols == GetInput().second.rows; | |
| 21 | } | ||
| 22 | |||
| 23 | 48 | bool AshihminDMultMatrCrsSTL::PreProcessingImpl() { | |
| 24 | auto &matrix_c = GetOutput(); | ||
| 25 | 48 | matrix_c.rows = GetInput().first.rows; | |
| 26 | 48 | matrix_c.cols = GetInput().second.cols; | |
| 27 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 48 times.
|
48 | matrix_c.row_ptr.assign(matrix_c.rows + 1, 0); |
| 28 | matrix_c.values.clear(); | ||
| 29 | matrix_c.col_index.clear(); | ||
| 30 | 48 | return true; | |
| 31 | } | ||
| 32 | |||
| 33 | 96 | void AshihminDMultMatrCrsSTL::MultiplyRow(int row_idx, const CRSMatrix &matrix_a, const CRSMatrix &matrix_b, | |
| 34 | std::vector<int> &row_cols, std::vector<double> &row_vals) { | ||
| 35 | std::map<int, double> row_accumulator; | ||
| 36 |
2/2✓ Branch 0 taken 104 times.
✓ Branch 1 taken 96 times.
|
200 | for (int j = matrix_a.row_ptr[row_idx]; j < matrix_a.row_ptr[row_idx + 1]; ++j) { |
| 37 | 104 | int col_a = matrix_a.col_index[j]; | |
| 38 | 104 | double val_a = matrix_a.values[j]; | |
| 39 |
2/2✓ Branch 0 taken 160 times.
✓ Branch 1 taken 104 times.
|
264 | for (int k = matrix_b.row_ptr[col_a]; k < matrix_b.row_ptr[col_a + 1]; ++k) { |
| 40 |
1/2✓ Branch 1 taken 160 times.
✗ Branch 2 not taken.
|
160 | row_accumulator[matrix_b.col_index[k]] += val_a * matrix_b.values[k]; |
| 41 | } | ||
| 42 | } | ||
| 43 |
2/2✓ Branch 0 taken 120 times.
✓ Branch 1 taken 96 times.
|
216 | for (const auto &entry : row_accumulator) { |
| 44 |
1/2✓ Branch 0 taken 120 times.
✗ Branch 1 not taken.
|
120 | if (std::abs(entry.second) > 1e-15) { |
| 45 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 120 times.
|
120 | row_cols.push_back(entry.first); |
| 46 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 120 times.
|
120 | row_vals.push_back(entry.second); |
| 47 | } | ||
| 48 | } | ||
| 49 | 96 | } | |
| 50 | |||
| 51 | 48 | bool AshihminDMultMatrCrsSTL::RunImpl() { | |
| 52 | 48 | const auto &matrix_a = GetInput().first; | |
| 53 | 48 | const auto &matrix_b = GetInput().second; | |
| 54 | auto &matrix_c = GetOutput(); | ||
| 55 | 48 | int rows_a = matrix_a.rows; | |
| 56 | |||
| 57 | 48 | std::vector<std::vector<int>> local_cols(rows_a); | |
| 58 |
1/2✓ Branch 1 taken 48 times.
✗ Branch 2 not taken.
|
48 | std::vector<std::vector<double>> local_vals(rows_a); |
| 59 | |||
| 60 | 48 | unsigned int hardware_threads = std::thread::hardware_concurrency(); | |
| 61 |
1/2✓ Branch 0 taken 48 times.
✗ Branch 1 not taken.
|
48 | int num_threads = (hardware_threads == 0) ? 2 : static_cast<int>(hardware_threads); |
| 62 | |||
| 63 | 48 | int chunk_size = (rows_a + num_threads - 1) / num_threads; | |
| 64 | 48 | std::vector<std::future<void>> futures; | |
| 65 | |||
| 66 |
1/2✓ Branch 0 taken 144 times.
✗ Branch 1 not taken.
|
144 | for (int thread_idx = 0; thread_idx < num_threads; ++thread_idx) { |
| 67 | 144 | int start_row = thread_idx * chunk_size; | |
| 68 |
2/2✓ Branch 0 taken 96 times.
✓ Branch 1 taken 48 times.
|
144 | int end_row = std::min(start_row + chunk_size, rows_a); |
| 69 | |||
| 70 |
2/2✓ Branch 0 taken 96 times.
✓ Branch 1 taken 48 times.
|
144 | if (start_row >= end_row) { |
| 71 | break; | ||
| 72 | } | ||
| 73 | |||
| 74 | futures.push_back( | ||
| 75 |
1/2✓ Branch 1 taken 96 times.
✗ Branch 2 not taken.
|
192 | std::async(std::launch::async, [start_row, end_row, &matrix_a, &matrix_b, &local_cols, &local_vals] { |
| 76 |
2/2✓ Branch 0 taken 96 times.
✓ Branch 1 taken 96 times.
|
192 | for (int i = start_row; i < end_row; ++i) { |
| 77 | 96 | MultiplyRow(i, matrix_a, matrix_b, local_cols[i], local_vals[i]); | |
| 78 | } | ||
| 79 | 96 | })); | |
| 80 | } | ||
| 81 | |||
| 82 |
2/2✓ Branch 0 taken 96 times.
✓ Branch 1 taken 48 times.
|
144 | for (auto &fut : futures) { |
| 83 |
1/2✓ Branch 1 taken 96 times.
✗ Branch 2 not taken.
|
96 | fut.get(); |
| 84 | } | ||
| 85 | |||
| 86 |
2/2✓ Branch 0 taken 96 times.
✓ Branch 1 taken 48 times.
|
144 | for (int i = 0; i < rows_a; ++i) { |
| 87 |
1/2✓ Branch 1 taken 96 times.
✗ Branch 2 not taken.
|
96 | matrix_c.col_index.insert(matrix_c.col_index.end(), local_cols[i].begin(), local_cols[i].end()); |
| 88 |
1/2✓ Branch 1 taken 96 times.
✗ Branch 2 not taken.
|
96 | matrix_c.values.insert(matrix_c.values.end(), local_vals[i].begin(), local_vals[i].end()); |
| 89 | 96 | matrix_c.row_ptr[i + 1] = static_cast<int>(matrix_c.values.size()); | |
| 90 | } | ||
| 91 | |||
| 92 | 48 | return true; | |
| 93 | 48 | } | |
| 94 | |||
| 95 | 48 | bool AshihminDMultMatrCrsSTL::PostProcessingImpl() { | |
| 96 | 48 | return true; | |
| 97 | } | ||
| 98 | |||
| 99 | } // namespace ashihmin_d_mult_matr_crs | ||
| 100 |