GCC Code Coverage Report


Directory: ./
File: tasks/ashihmin_d_mult_matr_crs/all/src/ops_all.cpp
Date: 2026-06-04 20:25:32
Exec Total Coverage
Lines: 77 77 100.0%
Functions: 8 8 100.0%
Branches: 55 88 62.5%

Line Branch Exec Source
1 #include "ashihmin_d_mult_matr_crs/all/include/ops_all.hpp"
2
3 #include <mpi.h>
4 #include <omp.h>
5 #include <tbb/tbb.h>
6
7 #include <algorithm>
8 #include <cmath>
9 #include <map>
10 #include <thread>
11 #include <vector>
12
13 #include "ashihmin_d_mult_matr_crs/common/include/common.hpp"
14 #include "util/include/util.hpp"
15
16 namespace ashihmin_d_mult_matr_crs {
17
18
1/2
✓ Branch 2 taken 12 times.
✗ Branch 3 not taken.
12 AshihminDMultMatrCrsALL::AshihminDMultMatrCrsALL(const InType &in) {
19 SetTypeOfTask(GetStaticTypeOfTask());
20 GetInput() = in;
21 12 }
22
23 12 bool AshihminDMultMatrCrsALL::ValidationImpl() {
24 12 return GetInput().first.cols == GetInput().second.rows;
25 }
26
27 12 bool AshihminDMultMatrCrsALL::PreProcessingImpl() {
28 auto &matrix_c = GetOutput();
29
30 12 matrix_c.rows = GetInput().first.rows;
31 12 matrix_c.cols = GetInput().second.cols;
32 12 return true;
33 }
34
35 12 void AshihminDMultMatrCrsALL::MultiplyRow(int global_row_idx, int local_idx, const CRSMatrix &matrix_a,
36 const CRSMatrix &matrix_b, std::vector<std::vector<int>> &local_cols,
37 std::vector<std::vector<double>> &local_vals) {
38 std::map<int, double> row_accumulator;
39
2/2
✓ Branch 0 taken 13 times.
✓ Branch 1 taken 12 times.
25 for (int j = matrix_a.row_ptr[global_row_idx]; j < matrix_a.row_ptr[global_row_idx + 1]; ++j) {
40 13 int col_a = matrix_a.col_index[j];
41 13 double val_a = matrix_a.values[j];
42
2/2
✓ Branch 0 taken 20 times.
✓ Branch 1 taken 13 times.
33 for (int k = matrix_b.row_ptr[col_a]; k < matrix_b.row_ptr[col_a + 1]; ++k) {
43
1/2
✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.
20 row_accumulator[matrix_b.col_index[k]] += val_a * matrix_b.values[k];
44 }
45 }
46
47
2/2
✓ Branch 0 taken 15 times.
✓ Branch 1 taken 12 times.
27 for (const auto &entry : row_accumulator) {
48
1/2
✓ Branch 0 taken 15 times.
✗ Branch 1 not taken.
15 if (std::abs(entry.second) > 1e-15) {
49
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 15 times.
15 local_cols[local_idx].push_back(entry.first);
50
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 15 times.
15 local_vals[local_idx].push_back(entry.second);
51 }
52 }
53 12 }
54
55 12 bool AshihminDMultMatrCrsALL::RunImpl() {
56 12 const auto &matrix_a = GetInput().first;
57 12 const auto &matrix_b = GetInput().second;
58 auto &matrix_c = GetOutput();
59
60 12 int rank = 0;
61 12 int size = 0;
62 12 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
63 12 MPI_Comm_size(MPI_COMM_WORLD, &size);
64
65 12 int rows_a = matrix_a.rows;
66 12 int base_rows = rows_a / size;
67 12 int rem = rows_a % size;
68
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 2 times.
12 int my_start = (rank * base_rows) + std::min(rank, rem);
69
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 2 times.
12 int my_end = my_start + base_rows + (rank < rem ? 1 : 0);
70 12 int my_row_count = my_end - my_start;
71
72 12 std::vector<std::vector<int>> local_cols(my_row_count);
73
1/2
✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.
12 std::vector<std::vector<double>> local_vals(my_row_count);
74
75
1/2
✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.
12 int thread_count = ppc::util::GetNumThreads();
76 12 std::vector<std::thread> threads;
77
78 12 auto compute_rows = [&](int start_idx, int end_idx) {
79 tbb::parallel_for(start_idx, end_idx,
80 24 [&](int i) { MultiplyRow(my_start + i, i, matrix_a, matrix_b, local_cols, local_vals); });
81 24 };
82
83 12 int stl_chunk = (my_row_count + thread_count - 1) / thread_count;
84
2/2
✓ Branch 0 taken 24 times.
✓ Branch 1 taken 12 times.
36 for (int thread_idx = 0; thread_idx < thread_count; ++thread_idx) {
85 24 int start_chunk = thread_idx * stl_chunk;
86
2/2
✓ Branch 0 taken 12 times.
✓ Branch 1 taken 12 times.
24 int end_chunk = std::min(start_chunk + stl_chunk, my_row_count);
87
2/2
✓ Branch 0 taken 12 times.
✓ Branch 1 taken 12 times.
24 if (start_chunk < end_chunk) {
88
1/2
✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.
12 threads.emplace_back(compute_rows, start_chunk, end_chunk);
89 }
90 }
91
2/2
✓ Branch 0 taken 12 times.
✓ Branch 1 taken 12 times.
24 for (auto &th : threads) {
92
1/2
✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.
12 th.join();
93 }
94
95
1/2
✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.
12 std::vector<int> my_nnz_per_row(my_row_count);
96 12 #pragma omp parallel for default(none) shared(my_nnz_per_row, local_cols, my_row_count)
97 for (int i = 0; i < my_row_count; ++i) {
98 my_nnz_per_row[i] = static_cast<int>(local_cols[i].size());
99 }
100
101 12 std::vector<int> my_flat_cols;
102 12 std::vector<double> my_flat_vals;
103
2/2
✓ Branch 0 taken 12 times.
✓ Branch 1 taken 12 times.
24 for (int i = 0; i < my_row_count; ++i) {
104
2/4
✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 12 times.
✗ Branch 5 not taken.
12 my_flat_cols.insert(my_flat_cols.end(), local_cols[i].begin(), local_cols[i].end());
105 12 my_flat_vals.insert(my_flat_vals.end(), local_vals[i].begin(), local_vals[i].end());
106 }
107
108
1/4
✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
12 std::vector<int> all_nnz_per_row(rows_a);
109
1/4
✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
12 std::vector<int> recv_counts(size);
110
1/4
✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
12 std::vector<int> displs(size);
111
2/2
✓ Branch 0 taken 24 times.
✓ Branch 1 taken 12 times.
36 for (int i = 0; i < size; ++i) {
112
4/4
✓ Branch 0 taken 20 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 12 times.
✓ Branch 3 taken 12 times.
44 recv_counts[i] = (rows_a / size) + (i < (rows_a % size) ? 1 : 0);
113
2/2
✓ Branch 0 taken 12 times.
✓ Branch 1 taken 12 times.
24 displs[i] = (i == 0) ? 0 : displs[i - 1] + recv_counts[i - 1];
114 }
115
116
1/2
✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.
12 MPI_Allgatherv(my_nnz_per_row.data(), my_row_count, MPI_INT, all_nnz_per_row.data(), recv_counts.data(),
117 displs.data(), MPI_INT, MPI_COMM_WORLD);
118
119
1/2
✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.
12 matrix_c.row_ptr.assign(rows_a + 1, 0);
120
2/2
✓ Branch 0 taken 24 times.
✓ Branch 1 taken 12 times.
36 for (int i = 0; i < rows_a; ++i) {
121 24 matrix_c.row_ptr[i + 1] = matrix_c.row_ptr[i] + all_nnz_per_row[i];
122 }
123
124
1/2
✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.
12 matrix_c.col_index.resize(matrix_c.row_ptr[rows_a]);
125
1/2
✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.
12 matrix_c.values.resize(matrix_c.row_ptr[rows_a]);
126
127
1/4
✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
12 std::vector<int> val_recv_counts(size);
128
1/4
✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
12 std::vector<int> val_displs(size);
129
2/2
✓ Branch 0 taken 24 times.
✓ Branch 1 taken 12 times.
36 for (int i = 0; i < size; ++i) {
130 24 val_recv_counts[i] = matrix_c.row_ptr[displs[i] + recv_counts[i]] - matrix_c.row_ptr[displs[i]];
131 24 val_displs[i] = matrix_c.row_ptr[displs[i]];
132 }
133
134
1/2
✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.
12 MPI_Allgatherv(my_flat_cols.data(), static_cast<int>(my_flat_cols.size()), MPI_INT, matrix_c.col_index.data(),
135 val_recv_counts.data(), val_displs.data(), MPI_INT, MPI_COMM_WORLD);
136
1/2
✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.
12 MPI_Allgatherv(my_flat_vals.data(), static_cast<int>(my_flat_vals.size()), MPI_DOUBLE, matrix_c.values.data(),
137 val_recv_counts.data(), val_displs.data(), MPI_DOUBLE, MPI_COMM_WORLD);
138
139 12 return true;
140 12 }
141
142 12 bool AshihminDMultMatrCrsALL::PostProcessingImpl() {
143 12 return true;
144 }
145
146 } // namespace ashihmin_d_mult_matr_crs
147