GCC Code Coverage Report

Directory:	./
File:	tasks/ashihmin_d_mult_matr_crs/all/src/ops_all.cpp
Date:	2026-06-04 20:25:32

	Exec	Total	Coverage
Lines:	77	77	100.0%
Functions:	8	8	100.0%
Branches:	55	88	62.5%

  
      Line
      Branch
      Exec
      Source
    
      #include "ashihmin_d_mult_matr_crs/all/include/ops_all.hpp"
    
      #include <mpi.h>
    
      #include <omp.h>
    
      #include <tbb/tbb.h>
    
      #include <algorithm>
    
      #include <cmath>
    
      #include <map>
    
      #include <thread>
    
      #include <vector>
    
      #include "ashihmin_d_mult_matr_crs/common/include/common.hpp"
    
      #include "util/include/util.hpp"
    
      namespace ashihmin_d_mult_matr_crs {
    
        1/2✓ Branch 2 taken 12 times.
✗ Branch 3 not taken.

      12
      AshihminDMultMatrCrsALL::AshihminDMultMatrCrsALL(const InType &in) {
    
        SetTypeOfTask(GetStaticTypeOfTask());
    
        GetInput() = in;
    
      12
      }
    
      12
      bool AshihminDMultMatrCrsALL::ValidationImpl() {
    
      12
        return GetInput().first.cols == GetInput().second.rows;
    
      }
    
      12
      bool AshihminDMultMatrCrsALL::PreProcessingImpl() {
    
        auto &matrix_c = GetOutput();
    
      12
        matrix_c.rows = GetInput().first.rows;
    
      12
        matrix_c.cols = GetInput().second.cols;
    
      12
        return true;
    
      }
    
      12
      void AshihminDMultMatrCrsALL::MultiplyRow(int global_row_idx, int local_idx, const CRSMatrix &matrix_a,
    
                                                const CRSMatrix &matrix_b, std::vector<std::vector<int>> &local_cols,
    
                                                std::vector<std::vector<double>> &local_vals) {
    
        std::map<int, double> row_accumulator;
    
        2/2✓ Branch 0 taken 13 times.
✓ Branch 1 taken 12 times.

      25
        for (int j = matrix_a.row_ptr[global_row_idx]; j < matrix_a.row_ptr[global_row_idx + 1]; ++j) {
    
      13
          int col_a = matrix_a.col_index[j];
    
      13
          double val_a = matrix_a.values[j];
    
        2/2✓ Branch 0 taken 20 times.
✓ Branch 1 taken 13 times.

      33
          for (int k = matrix_b.row_ptr[col_a]; k < matrix_b.row_ptr[col_a + 1]; ++k) {
    
        1/2✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.

      20
            row_accumulator[matrix_b.col_index[k]] += val_a * matrix_b.values[k];
    
          }
    
        }
    
        2/2✓ Branch 0 taken 15 times.
✓ Branch 1 taken 12 times.

      27
        for (const auto &entry : row_accumulator) {
    
        1/2✓ Branch 0 taken 15 times.
✗ Branch 1 not taken.

      15
          if (std::abs(entry.second) > 1e-15) {
    
        1/2✗ Branch 0 not taken.
✓ Branch 1 taken 15 times.

      15
            local_cols[local_idx].push_back(entry.first);
    
        1/2✗ Branch 0 not taken.
✓ Branch 1 taken 15 times.

      15
            local_vals[local_idx].push_back(entry.second);
    
          }
    
        }
    
      12
      }
    
      12
      bool AshihminDMultMatrCrsALL::RunImpl() {
    
      12
        const auto &matrix_a = GetInput().first;
    
      12
        const auto &matrix_b = GetInput().second;
    
        auto &matrix_c = GetOutput();
    
      12
        int rank = 0;
    
      12
        int size = 0;
    
      12
        MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    
      12
        MPI_Comm_size(MPI_COMM_WORLD, &size);
    
      12
        int rows_a = matrix_a.rows;
    
      12
        int base_rows = rows_a / size;
    
      12
        int rem = rows_a % size;
    
        2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 2 times.

      12
        int my_start = (rank * base_rows) + std::min(rank, rem);
    
        2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 2 times.

      12
        int my_end = my_start + base_rows + (rank < rem ? 1 : 0);
    
      12
        int my_row_count = my_end - my_start;
    
      12
        std::vector<std::vector<int>> local_cols(my_row_count);
    
        1/2✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.

      12
        std::vector<std::vector<double>> local_vals(my_row_count);
    
        1/2✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.

      12
        int thread_count = ppc::util::GetNumThreads();
    
      12
        std::vector<std::thread> threads;
    
      12
        auto compute_rows = [&](int start_idx, int end_idx) {
    
          tbb::parallel_for(start_idx, end_idx,
    
      24
                            [&](int i) { MultiplyRow(my_start + i, i, matrix_a, matrix_b, local_cols, local_vals); });
    
      24
        };
    
      12
        int stl_chunk = (my_row_count + thread_count - 1) / thread_count;
    
        2/2✓ Branch 0 taken 24 times.
✓ Branch 1 taken 12 times.

      36
        for (int thread_idx = 0; thread_idx < thread_count; ++thread_idx) {
    
      24
          int start_chunk = thread_idx * stl_chunk;
    
        2/2✓ Branch 0 taken 12 times.
✓ Branch 1 taken 12 times.

      24
          int end_chunk = std::min(start_chunk + stl_chunk, my_row_count);
    
        2/2✓ Branch 0 taken 12 times.
✓ Branch 1 taken 12 times.

      24
          if (start_chunk < end_chunk) {
    
        1/2✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.

      12
            threads.emplace_back(compute_rows, start_chunk, end_chunk);
    
          }
    
        }
    
        2/2✓ Branch 0 taken 12 times.
✓ Branch 1 taken 12 times.

      24
        for (auto &th : threads) {
    
        1/2✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.

      12
          th.join();
    
        }
    
        1/2✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.

      12
        std::vector<int> my_nnz_per_row(my_row_count);
    
      12
      #pragma omp parallel for default(none) shared(my_nnz_per_row, local_cols, my_row_count)
    
        for (int i = 0; i < my_row_count; ++i) {
    
          my_nnz_per_row[i] = static_cast<int>(local_cols[i].size());
    
        }
    
      12
        std::vector<int> my_flat_cols;
    
      12
        std::vector<double> my_flat_vals;
    
        2/2✓ Branch 0 taken 12 times.
✓ Branch 1 taken 12 times.

      24
        for (int i = 0; i < my_row_count; ++i) {
    
        2/4✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 12 times.
✗ Branch 5 not taken.

      12
          my_flat_cols.insert(my_flat_cols.end(), local_cols[i].begin(), local_cols[i].end());
    
      12
          my_flat_vals.insert(my_flat_vals.end(), local_vals[i].begin(), local_vals[i].end());
    
        }
    
        1/4✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.

      12
        std::vector<int> all_nnz_per_row(rows_a);
    
        1/4✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.

      12
        std::vector<int> recv_counts(size);
    
        1/4✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.

      12
        std::vector<int> displs(size);
    
        2/2✓ Branch 0 taken 24 times.
✓ Branch 1 taken 12 times.

      36
        for (int i = 0; i < size; ++i) {
    
        4/4✓ Branch 0 taken 20 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 12 times.
✓ Branch 3 taken 12 times.

      44
          recv_counts[i] = (rows_a / size) + (i < (rows_a % size) ? 1 : 0);
    
        2/2✓ Branch 0 taken 12 times.
✓ Branch 1 taken 12 times.

      24
          displs[i] = (i == 0) ? 0 : displs[i - 1] + recv_counts[i - 1];
    
        }
    
        1/2✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.

      12
        MPI_Allgatherv(my_nnz_per_row.data(), my_row_count, MPI_INT, all_nnz_per_row.data(), recv_counts.data(),
    
                       displs.data(), MPI_INT, MPI_COMM_WORLD);
    
        1/2✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.

      12
        matrix_c.row_ptr.assign(rows_a + 1, 0);
    
        2/2✓ Branch 0 taken 24 times.
✓ Branch 1 taken 12 times.

      36
        for (int i = 0; i < rows_a; ++i) {
    
      24
          matrix_c.row_ptr[i + 1] = matrix_c.row_ptr[i] + all_nnz_per_row[i];
    
        }
    
        1/2✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.

      12
        matrix_c.col_index.resize(matrix_c.row_ptr[rows_a]);
    
        1/2✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.

      12
        matrix_c.values.resize(matrix_c.row_ptr[rows_a]);
    
        1/4✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.

      12
        std::vector<int> val_recv_counts(size);
    
        1/4✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.

      12
        std::vector<int> val_displs(size);
    
        2/2✓ Branch 0 taken 24 times.
✓ Branch 1 taken 12 times.

      36
        for (int i = 0; i < size; ++i) {
    
      24
          val_recv_counts[i] = matrix_c.row_ptr[displs[i] + recv_counts[i]] - matrix_c.row_ptr[displs[i]];
    
      24
          val_displs[i] = matrix_c.row_ptr[displs[i]];
    
        }
    
        1/2✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.

      12
        MPI_Allgatherv(my_flat_cols.data(), static_cast<int>(my_flat_cols.size()), MPI_INT, matrix_c.col_index.data(),
    
                       val_recv_counts.data(), val_displs.data(), MPI_INT, MPI_COMM_WORLD);
    
        1/2✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.

      12
        MPI_Allgatherv(my_flat_vals.data(), static_cast<int>(my_flat_vals.size()), MPI_DOUBLE, matrix_c.values.data(),
    
                       val_recv_counts.data(), val_displs.data(), MPI_DOUBLE, MPI_COMM_WORLD);
    
      12
        return true;
    
      12
      }
    
      12
      bool AshihminDMultMatrCrsALL::PostProcessingImpl() {
    
      12
        return true;
    
      }
    
      }  // namespace ashihmin_d_mult_matr_crs

Line	Branch	Exec	Source
1			#include "ashihmin_d_mult_matr_crs/all/include/ops_all.hpp"
2
3			#include <mpi.h>
4			#include <omp.h>
5			#include <tbb/tbb.h>
6
7			#include <algorithm>
8			#include <cmath>
9			#include <map>
10			#include <thread>
11			#include <vector>
12
13			#include "ashihmin_d_mult_matr_crs/common/include/common.hpp"
14			#include "util/include/util.hpp"
15
16			namespace ashihmin_d_mult_matr_crs {
17
18	1/2 ✓ Branch 2 taken 12 times. ✗ Branch 3 not taken.	12	AshihminDMultMatrCrsALL::AshihminDMultMatrCrsALL(const InType &in) {
19			SetTypeOfTask(GetStaticTypeOfTask());
20			GetInput() = in;
21		12	}
22
23		12	bool AshihminDMultMatrCrsALL::ValidationImpl() {
24		12	return GetInput().first.cols == GetInput().second.rows;
25			}
26
27		12	bool AshihminDMultMatrCrsALL::PreProcessingImpl() {
28			auto &matrix_c = GetOutput();
29
30		12	matrix_c.rows = GetInput().first.rows;
31		12	matrix_c.cols = GetInput().second.cols;
32		12	return true;
33			}
34
35		12	void AshihminDMultMatrCrsALL::MultiplyRow(int global_row_idx, int local_idx, const CRSMatrix &matrix_a,
36			const CRSMatrix &matrix_b, std::vector<std::vector<int>> &local_cols,
37			std::vector<std::vector<double>> &local_vals) {
38			std::map<int, double> row_accumulator;
39	2/2 ✓ Branch 0 taken 13 times. ✓ Branch 1 taken 12 times.	25	for (int j = matrix_a.row_ptr[global_row_idx]; j < matrix_a.row_ptr[global_row_idx + 1]; ++j) {
40		13	int col_a = matrix_a.col_index[j];
41		13	double val_a = matrix_a.values[j];
42	2/2 ✓ Branch 0 taken 20 times. ✓ Branch 1 taken 13 times.	33	for (int k = matrix_b.row_ptr[col_a]; k < matrix_b.row_ptr[col_a + 1]; ++k) {
43	1/2 ✓ Branch 1 taken 20 times. ✗ Branch 2 not taken.	20	row_accumulator[matrix_b.col_index[k]] += val_a * matrix_b.values[k];
44			}
45			}
46
47	2/2 ✓ Branch 0 taken 15 times. ✓ Branch 1 taken 12 times.	27	for (const auto &entry : row_accumulator) {
48	1/2 ✓ Branch 0 taken 15 times. ✗ Branch 1 not taken.	15	if (std::abs(entry.second) > 1e-15) {
49	1/2 ✗ Branch 0 not taken. ✓ Branch 1 taken 15 times.	15	local_cols[local_idx].push_back(entry.first);
50	1/2 ✗ Branch 0 not taken. ✓ Branch 1 taken 15 times.	15	local_vals[local_idx].push_back(entry.second);
51			}
52			}
53		12	}
54
55		12	bool AshihminDMultMatrCrsALL::RunImpl() {
56		12	const auto &matrix_a = GetInput().first;
57		12	const auto &matrix_b = GetInput().second;
58			auto &matrix_c = GetOutput();
59
60		12	int rank = 0;
61		12	int size = 0;
62		12	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
63		12	MPI_Comm_size(MPI_COMM_WORLD, &size);
64
65		12	int rows_a = matrix_a.rows;
66		12	int base_rows = rows_a / size;
67		12	int rem = rows_a % size;
68	2/2 ✓ Branch 0 taken 10 times. ✓ Branch 1 taken 2 times.	12	int my_start = (rank * base_rows) + std::min(rank, rem);
69	2/2 ✓ Branch 0 taken 10 times. ✓ Branch 1 taken 2 times.	12	int my_end = my_start + base_rows + (rank < rem ? 1 : 0);
70		12	int my_row_count = my_end - my_start;
71
72		12	std::vector<std::vector<int>> local_cols(my_row_count);
73	1/2 ✓ Branch 1 taken 12 times. ✗ Branch 2 not taken.	12	std::vector<std::vector<double>> local_vals(my_row_count);
74
75	1/2 ✓ Branch 1 taken 12 times. ✗ Branch 2 not taken.	12	int thread_count = ppc::util::GetNumThreads();
76		12	std::vector<std::thread> threads;
77
78		12	auto compute_rows = [&](int start_idx, int end_idx) {
79			tbb::parallel_for(start_idx, end_idx,
80		24	[&](int i) { MultiplyRow(my_start + i, i, matrix_a, matrix_b, local_cols, local_vals); });
81		24	};
82
83		12	int stl_chunk = (my_row_count + thread_count - 1) / thread_count;
84	2/2 ✓ Branch 0 taken 24 times. ✓ Branch 1 taken 12 times.	36	for (int thread_idx = 0; thread_idx < thread_count; ++thread_idx) {
85		24	int start_chunk = thread_idx * stl_chunk;
86	2/2 ✓ Branch 0 taken 12 times. ✓ Branch 1 taken 12 times.	24	int end_chunk = std::min(start_chunk + stl_chunk, my_row_count);
87	2/2 ✓ Branch 0 taken 12 times. ✓ Branch 1 taken 12 times.	24	if (start_chunk < end_chunk) {
88	1/2 ✓ Branch 1 taken 12 times. ✗ Branch 2 not taken.	12	threads.emplace_back(compute_rows, start_chunk, end_chunk);
89			}
90			}
91	2/2 ✓ Branch 0 taken 12 times. ✓ Branch 1 taken 12 times.	24	for (auto &th : threads) {
92	1/2 ✓ Branch 1 taken 12 times. ✗ Branch 2 not taken.	12	th.join();
93			}
94
95	1/2 ✓ Branch 1 taken 12 times. ✗ Branch 2 not taken.	12	std::vector<int> my_nnz_per_row(my_row_count);
96		12	#pragma omp parallel for default(none) shared(my_nnz_per_row, local_cols, my_row_count)
97			for (int i = 0; i < my_row_count; ++i) {
98			my_nnz_per_row[i] = static_cast<int>(local_cols[i].size());
99			}
100
101		12	std::vector<int> my_flat_cols;
102		12	std::vector<double> my_flat_vals;
103	2/2 ✓ Branch 0 taken 12 times. ✓ Branch 1 taken 12 times.	24	for (int i = 0; i < my_row_count; ++i) {
104	2/4 ✓ Branch 1 taken 12 times. ✗ Branch 2 not taken. ✓ Branch 4 taken 12 times. ✗ Branch 5 not taken.	12	my_flat_cols.insert(my_flat_cols.end(), local_cols[i].begin(), local_cols[i].end());
105		12	my_flat_vals.insert(my_flat_vals.end(), local_vals[i].begin(), local_vals[i].end());
106			}
107
108	1/4 ✓ Branch 1 taken 12 times. ✗ Branch 2 not taken. ✗ Branch 3 not taken. ✗ Branch 4 not taken.	12	std::vector<int> all_nnz_per_row(rows_a);
109	1/4 ✓ Branch 1 taken 12 times. ✗ Branch 2 not taken. ✗ Branch 3 not taken. ✗ Branch 4 not taken.	12	std::vector<int> recv_counts(size);
110	1/4 ✓ Branch 1 taken 12 times. ✗ Branch 2 not taken. ✗ Branch 3 not taken. ✗ Branch 4 not taken.	12	std::vector<int> displs(size);
111	2/2 ✓ Branch 0 taken 24 times. ✓ Branch 1 taken 12 times.	36	for (int i = 0; i < size; ++i) {
112	4/4 ✓ Branch 0 taken 20 times. ✓ Branch 1 taken 4 times. ✓ Branch 2 taken 12 times. ✓ Branch 3 taken 12 times.	44	recv_counts[i] = (rows_a / size) + (i < (rows_a % size) ? 1 : 0);
113	2/2 ✓ Branch 0 taken 12 times. ✓ Branch 1 taken 12 times.	24	displs[i] = (i == 0) ? 0 : displs[i - 1] + recv_counts[i - 1];
114			}
115
116	1/2 ✓ Branch 1 taken 12 times. ✗ Branch 2 not taken.	12	MPI_Allgatherv(my_nnz_per_row.data(), my_row_count, MPI_INT, all_nnz_per_row.data(), recv_counts.data(),
117			displs.data(), MPI_INT, MPI_COMM_WORLD);
118
119	1/2 ✓ Branch 1 taken 12 times. ✗ Branch 2 not taken.	12	matrix_c.row_ptr.assign(rows_a + 1, 0);
120	2/2 ✓ Branch 0 taken 24 times. ✓ Branch 1 taken 12 times.	36	for (int i = 0; i < rows_a; ++i) {
121		24	matrix_c.row_ptr[i + 1] = matrix_c.row_ptr[i] + all_nnz_per_row[i];
122			}
123
124	1/2 ✓ Branch 1 taken 12 times. ✗ Branch 2 not taken.	12	matrix_c.col_index.resize(matrix_c.row_ptr[rows_a]);
125	1/2 ✓ Branch 1 taken 12 times. ✗ Branch 2 not taken.	12	matrix_c.values.resize(matrix_c.row_ptr[rows_a]);
126
127	1/4 ✓ Branch 1 taken 12 times. ✗ Branch 2 not taken. ✗ Branch 3 not taken. ✗ Branch 4 not taken.	12	std::vector<int> val_recv_counts(size);
128	1/4 ✓ Branch 1 taken 12 times. ✗ Branch 2 not taken. ✗ Branch 3 not taken. ✗ Branch 4 not taken.	12	std::vector<int> val_displs(size);
129	2/2 ✓ Branch 0 taken 24 times. ✓ Branch 1 taken 12 times.	36	for (int i = 0; i < size; ++i) {
130		24	val_recv_counts[i] = matrix_c.row_ptr[displs[i] + recv_counts[i]] - matrix_c.row_ptr[displs[i]];
131		24	val_displs[i] = matrix_c.row_ptr[displs[i]];
132			}
133
134	1/2 ✓ Branch 1 taken 12 times. ✗ Branch 2 not taken.	12	MPI_Allgatherv(my_flat_cols.data(), static_cast<int>(my_flat_cols.size()), MPI_INT, matrix_c.col_index.data(),
135			val_recv_counts.data(), val_displs.data(), MPI_INT, MPI_COMM_WORLD);
136	1/2 ✓ Branch 1 taken 12 times. ✗ Branch 2 not taken.	12	MPI_Allgatherv(my_flat_vals.data(), static_cast<int>(my_flat_vals.size()), MPI_DOUBLE, matrix_c.values.data(),
137			val_recv_counts.data(), val_displs.data(), MPI_DOUBLE, MPI_COMM_WORLD);
138
139		12	return true;
140		12	}
141
142		12	bool AshihminDMultMatrCrsALL::PostProcessingImpl() {
143		12	return true;
144			}
145
146			} // namespace ashihmin_d_mult_matr_crs
147