GCC Code Coverage Report

Directory:	./
File:	tasks/volkov_a_sparse_mat_mul_ccs/all/src/ops_all.cpp
Date:	2026-06-04 20:25:32

	Exec	Total	Coverage
Lines:	132	132	100.0%
Functions:	12	12	100.0%
Branches:	62	84	73.8%

  
      Line
      Branch
      Exec
      Source
    
      #include "volkov_a_sparse_mat_mul_ccs/all/include/ops_all.hpp"
    
      #include <mpi.h>
    
      #include <omp.h>
    
      #include <algorithm>
    
      #include <cmath>
    
      #include <tuple>
    
      #include <utility>
    
      #include <vector>
    
      #include "volkov_a_sparse_mat_mul_ccs/common/include/common.hpp"
    
      namespace volkov_a_sparse_mat_mul_ccs {
    
      namespace {
    
      template <typename Matrix>
    
      30
      void BroadcastMatrix(Matrix &mat, int root, MPI_Comm comm) {
    
      30
        int rank = 0;
    
      30
        MPI_Comm_rank(comm, &rank);
    
      30
        MPI_Bcast(&mat.rows_count, 1, MPI_INT, root, comm);
    
      30
        MPI_Bcast(&mat.cols_count, 1, MPI_INT, root, comm);
    
      30
        MPI_Bcast(&mat.non_zeros, 1, MPI_INT, root, comm);
    
        2/2✓ Branch 0 taken 15 times.
✓ Branch 1 taken 15 times.

      30
        if (rank != root) {
    
      15
          mat.col_ptrs.resize(mat.cols_count + 1);
    
      15
          mat.row_indices.resize(mat.non_zeros);
    
      15
          mat.values.resize(mat.non_zeros);
    
        }
    
      30
        MPI_Bcast(mat.col_ptrs.data(), mat.cols_count + 1, MPI_INT, root, comm);
    
        2/2✓ Branch 0 taken 28 times.
✓ Branch 1 taken 2 times.

      30
        if (mat.non_zeros > 0) {
    
      28
          MPI_Bcast(mat.row_indices.data(), mat.non_zeros, MPI_INT, root, comm);
    
      28
          MPI_Bcast(mat.values.data(), mat.non_zeros, MPI_DOUBLE, root, comm);
    
        }
    
      30
      }
    
      template <typename MatrixType>
    
      9
      void ProcessColumnOmp(int col_idx, const MatrixType &matrix_a, const MatrixType &matrix_b,
    
                            std::vector<double> &col_accumulator, std::vector<int> &local_row_indices,
    
                            std::vector<double> &local_values) {
    
      9
        int b_start = matrix_b.col_ptrs[col_idx];
    
      9
        int b_end = matrix_b.col_ptrs[col_idx + 1];
    
        2/2✓ Branch 0 taken 12 times.
✓ Branch 1 taken 9 times.

      21
        for (int k = b_start; k < b_end; ++k) {
    
      12
          int b_row = matrix_b.row_indices[k];
    
      12
          double b_val = matrix_b.values[k];
    
      12
          int a_start = matrix_a.col_ptrs[b_row];
    
      12
          int a_end = matrix_a.col_ptrs[b_row + 1];
    
        2/2✓ Branch 0 taken 14 times.
✓ Branch 1 taken 12 times.

      26
          for (int idx = a_start; idx < a_end; ++idx) {
    
      14
            int a_row = matrix_a.row_indices[idx];
    
      14
            double a_val = matrix_a.values[idx];
    
      14
            col_accumulator[a_row] += a_val * b_val;
    
          }
    
        }
    
        2/2✓ Branch 0 taken 20 times.
✓ Branch 1 taken 9 times.

      29
        for (int i = 0; i < matrix_a.rows_count; ++i) {
    
        2/2✓ Branch 0 taken 12 times.
✓ Branch 1 taken 8 times.

      20
          if (std::abs(col_accumulator[i]) > 1e-10) {
    
            local_row_indices.push_back(i);
    
            local_values.push_back(col_accumulator[i]);
    
          }
    
      20
          col_accumulator[i] = 0.0;
    
        }
    
      9
      }
    
      5
      void FlattenLocalData(int my_cols_count, const std::vector<std::vector<int>> &local_row_indices,
    
                            const std::vector<std::vector<double>> &local_values, std::vector<int> &my_col_sizes, int &my_nnz,
    
                            std::vector<int> &my_rows_flat, std::vector<double> &my_vals_flat) {
    
      5
        my_nnz = 0;
    
      5
        my_col_sizes.resize(my_cols_count);
    
        2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 5 times.

      9
        for (int i = 0; i < my_cols_count; ++i) {
    
      4
          my_col_sizes[i] = static_cast<int>(local_row_indices[i].size());
    
      4
          my_nnz += my_col_sizes[i];
    
        }
    
      5
        my_rows_flat.resize(my_nnz);
    
      5
        my_vals_flat.resize(my_nnz);
    
        int offset = 0;
    
        2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 5 times.

      9
        for (int i = 0; i < my_cols_count; ++i) {
    
      4
          std::copy(local_row_indices[i].begin(), local_row_indices[i].end(), my_rows_flat.begin() + offset);
    
      4
          std::copy(local_values[i].begin(), local_values[i].end(), my_vals_flat.begin() + offset);
    
      4
          offset += my_col_sizes[i];
    
        }
    
      5
      }
    
      5
      void SendResultsToRoot(int my_cols_count, const std::vector<int> &my_col_sizes, int my_nnz,
    
                             const std::vector<int> &my_rows_flat, const std::vector<double> &my_vals_flat) {
    
        2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 1 times.

      5
        if (my_cols_count > 0) {
    
      4
          MPI_Send(my_col_sizes.data(), my_cols_count, MPI_INT, 0, 0, MPI_COMM_WORLD);
    
        2/2✓ Branch 0 taken 3 times.
✓ Branch 1 taken 1 times.

      4
          if (my_nnz > 0) {
    
      3
            MPI_Send(my_rows_flat.data(), my_nnz, MPI_INT, 0, 1, MPI_COMM_WORLD);
    
      3
            MPI_Send(my_vals_flat.data(), my_nnz, MPI_DOUBLE, 0, 2, MPI_COMM_WORLD);
    
          }
    
        }
    
      5
      }
    
      5
      void ReceiveWorkerResults(int world_size, int chunk, int rem, std::vector<std::vector<int>> &all_row_indices,
    
                                std::vector<std::vector<double>> &all_values) {
    
        2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.

      10
        for (int proc = 1; proc < world_size; ++proc) {
    
        1/2✓ Branch 0 taken 5 times.
✗ Branch 1 not taken.

      5
          int proc_start_col = (proc * chunk) + std::min(proc, rem);
    
        1/2✓ Branch 0 taken 5 times.
✗ Branch 1 not taken.

      5
          int proc_end_col = proc_start_col + chunk + (proc < rem ? 1 : 0);
    
      5
          int proc_cols_count = proc_end_col - proc_start_col;
    
        2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 4 times.

      5
          if (proc_cols_count <= 0) {
    
      1
            continue;
    
          }
    
        1/2✓ Branch 2 taken 4 times.
✗ Branch 3 not taken.

      4
          std::vector<int> proc_col_sizes(proc_cols_count);
    
        1/2✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.

      4
          MPI_Recv(proc_col_sizes.data(), proc_cols_count, MPI_INT, proc, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
    
          int proc_nnz = 0;
    
        2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 4 times.

      8
          for (int s : proc_col_sizes) {
    
      4
            proc_nnz += s;
    
          }
    
        1/4✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.

      4
          std::vector<int> proc_rows(proc_nnz);
    
        1/4✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.

      4
          std::vector<double> proc_vals(proc_nnz);
    
        2/2✓ Branch 0 taken 3 times.
✓ Branch 1 taken 1 times.

      4
          if (proc_nnz > 0) {
    
        1/2✓ Branch 1 taken 3 times.
✗ Branch 2 not taken.

      3
            MPI_Recv(proc_rows.data(), proc_nnz, MPI_INT, proc, 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
    
        1/2✓ Branch 1 taken 3 times.
✗ Branch 2 not taken.

      3
            MPI_Recv(proc_vals.data(), proc_nnz, MPI_DOUBLE, proc, 2, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
    
          }
    
          int curr_offset = 0;
    
        2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 4 times.

      8
          for (int i = 0; i < proc_cols_count; ++i) {
    
        1/2✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.

      4
            all_row_indices[proc_start_col + i].resize(proc_col_sizes[i]);
    
        1/2✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.

      4
            all_values[proc_start_col + i].resize(proc_col_sizes[i]);
    
      4
            std::copy(proc_rows.begin() + curr_offset, proc_rows.begin() + curr_offset + proc_col_sizes[i],
    
                      all_row_indices[proc_start_col + i].begin());
    
      4
            std::copy(proc_vals.begin() + curr_offset, proc_vals.begin() + curr_offset + proc_col_sizes[i],
    
                      all_values[proc_start_col + i].begin());
    
      4
            curr_offset += proc_col_sizes[i];
    
          }
    
        }
    
      5
      }
    
      template <typename MatrixType>
    
      5
      void AssembleFinalMatrix(int total_cols, const std::vector<std::vector<int>> &all_row_indices,
    
                               const std::vector<std::vector<double>> &all_values, MatrixType &matrix_c) {
    
      5
        matrix_c.col_ptrs.assign(total_cols + 1, 0);
    
        2/2✓ Branch 0 taken 9 times.
✓ Branch 1 taken 5 times.

      14
        for (int j = 0; j < total_cols; ++j) {
    
      9
          matrix_c.col_ptrs[j + 1] = matrix_c.col_ptrs[j] + static_cast<int>(all_row_indices[j].size());
    
        }
    
      5
        matrix_c.non_zeros = matrix_c.col_ptrs.back();
    
      5
        matrix_c.row_indices.resize(matrix_c.non_zeros);
    
      5
        matrix_c.values.resize(matrix_c.non_zeros);
    
      5
      #pragma omp parallel for schedule(static) default(none) shared(total_cols, matrix_c, all_row_indices, all_values)
    
        for (int j = 0; j < total_cols; ++j) {
    
          int offset_c = matrix_c.col_ptrs[j];
    
          int size = static_cast<int>(all_row_indices[j].size());
    
          for (int k = 0; k < size; ++k) {
    
            matrix_c.row_indices[offset_c + k] = all_row_indices[j][k];
    
            matrix_c.values[offset_c + k] = all_values[j][k];
    
          }
    
        }
    
      5
      }
    
      template <typename MatrixType>
    
      5
      void GatherResultsToRoot(int total_cols, int world_size, int chunk, int rem, int start_col, int my_cols_count,
    
                               std::vector<std::vector<int>> &local_row_indices,
    
                               std::vector<std::vector<double>> &local_values, MatrixType &matrix_c) {
    
      5
        std::vector<std::vector<int>> all_row_indices(total_cols);
    
        1/2✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.

      5
        std::vector<std::vector<double>> all_values(total_cols);
    
        2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.

      10
        for (int i = 0; i < my_cols_count; ++i) {
    
      5
          all_row_indices[start_col + i] = std::move(local_row_indices[i]);
    
          all_values[start_col + i] = std::move(local_values[i]);
    
        }
    
        1/2✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.

      5
        ReceiveWorkerResults(world_size, chunk, rem, all_row_indices, all_values);
    
        1/2✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.

      5
        AssembleFinalMatrix(total_cols, all_row_indices, all_values, matrix_c);
    
      5
      }
    
      }  // namespace
    
        1/2✓ Branch 2 taken 10 times.
✗ Branch 3 not taken.

      10
      VolkovASparseMatMulCcsAll::VolkovASparseMatMulCcsAll(const InType &in) {
    
        SetTypeOfTask(GetStaticTypeOfTask());
    
        GetInput() = in;
    
      10
      }
    
      10
      bool VolkovASparseMatMulCcsAll::ValidationImpl() {
    
      10
        int world_rank = 0;
    
      10
        MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);
    
        2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.

      10
        if (world_rank == 0) {
    
          const auto &matrix_a = std::get<0>(GetInput());
    
          const auto &matrix_b = std::get<1>(GetInput());
    
      5
          return (matrix_a.cols_count == matrix_b.rows_count);
    
        }
    
        return true;
    
      }
    
      10
      bool VolkovASparseMatMulCcsAll::PreProcessingImpl() {
    
      10
        return true;
    
      }
    
      10
      bool VolkovASparseMatMulCcsAll::RunImpl() {
    
      10
        int world_rank = 0;
    
      10
        int world_size = 0;
    
      10
        MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);
    
      10
        MPI_Comm_size(MPI_COMM_WORLD, &world_size);
    
        auto &matrix_a = std::get<0>(GetInput());
    
        auto &matrix_b = std::get<1>(GetInput());
    
        auto &matrix_c = GetOutput();
    
      10
        BroadcastMatrix(matrix_a, 0, MPI_COMM_WORLD);
    
      10
        BroadcastMatrix(matrix_b, 0, MPI_COMM_WORLD);
    
        2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.

      10
        if (world_rank == 0) {
    
      5
          matrix_c.rows_count = matrix_a.rows_count;
    
      5
          matrix_c.cols_count = matrix_b.cols_count;
    
        }
    
      10
        int total_cols = matrix_b.cols_count;
    
      10
        int chunk = total_cols / world_size;
    
      10
        int rem = total_cols % world_size;
    
        2/2✓ Branch 0 taken 9 times.
✓ Branch 1 taken 1 times.

      10
        int start_col = (world_rank * chunk) + std::min(world_rank, rem);
    
        2/2✓ Branch 0 taken 9 times.
✓ Branch 1 taken 1 times.

      10
        int end_col = start_col + chunk + (world_rank < rem ? 1 : 0);
    
      10
        int my_cols_count = end_col - start_col;
    
      10
        std::vector<std::vector<int>> local_row_indices(my_cols_count);
    
        1/2✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.

      10
        std::vector<std::vector<double>> local_values(my_cols_count);
    
      10
      #pragma omp parallel default(none) shared(matrix_a, matrix_b, start_col, end_col, local_row_indices, local_values)
    
        {
    
          std::vector<double> col_accumulator(matrix_a.rows_count, 0.0);
    
      #pragma omp for schedule(dynamic)
    
          for (int j = start_col; j < end_col; ++j) {
    
            int local_j = j - start_col;
    
            ProcessColumnOmp(j, matrix_a, matrix_b, col_accumulator, local_row_indices[local_j], local_values[local_j]);
    
          }
    
        }
    
        2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.

      10
        if (world_rank == 0) {
    
        1/2✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.

      5
          GatherResultsToRoot(total_cols, world_size, chunk, rem, start_col, my_cols_count, local_row_indices, local_values,
    
                              matrix_c);
    
        } else {
    
      5
          int my_nnz = 0;
    
      5
          std::vector<int> my_col_sizes;
    
      5
          std::vector<int> my_rows_flat;
    
      5
          std::vector<double> my_vals_flat;
    
        1/2✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.

      5
          FlattenLocalData(my_cols_count, local_row_indices, local_values, my_col_sizes, my_nnz, my_rows_flat, my_vals_flat);
    
        1/2✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.

      5
          SendResultsToRoot(my_cols_count, my_col_sizes, my_nnz, my_rows_flat, my_vals_flat);
    
        }
    
      10
        return true;
    
      10
      }
    
      10
      bool VolkovASparseMatMulCcsAll::PostProcessingImpl() {
    
        auto &matrix_c = GetOutput();
    
      10
        BroadcastMatrix(matrix_c, 0, MPI_COMM_WORLD);
    
      10
        return true;
    
      }
    
      }  // namespace volkov_a_sparse_mat_mul_ccs

Line	Branch	Exec	Source
1			#include "volkov_a_sparse_mat_mul_ccs/all/include/ops_all.hpp"
2
3			#include <mpi.h>
4			#include <omp.h>
5
6			#include <algorithm>
7			#include <cmath>
8			#include <tuple>
9			#include <utility>
10			#include <vector>
11
12			#include "volkov_a_sparse_mat_mul_ccs/common/include/common.hpp"
13
14			namespace volkov_a_sparse_mat_mul_ccs {
15
16			namespace {
17
18			template <typename Matrix>
19		30	void BroadcastMatrix(Matrix &mat, int root, MPI_Comm comm) {
20		30	int rank = 0;
21		30	MPI_Comm_rank(comm, &rank);
22
23		30	MPI_Bcast(&mat.rows_count, 1, MPI_INT, root, comm);
24		30	MPI_Bcast(&mat.cols_count, 1, MPI_INT, root, comm);
25		30	MPI_Bcast(&mat.non_zeros, 1, MPI_INT, root, comm);
26
27	2/2 ✓ Branch 0 taken 15 times. ✓ Branch 1 taken 15 times.	30	if (rank != root) {
28		15	mat.col_ptrs.resize(mat.cols_count + 1);
29		15	mat.row_indices.resize(mat.non_zeros);
30		15	mat.values.resize(mat.non_zeros);
31			}
32
33		30	MPI_Bcast(mat.col_ptrs.data(), mat.cols_count + 1, MPI_INT, root, comm);
34	2/2 ✓ Branch 0 taken 28 times. ✓ Branch 1 taken 2 times.	30	if (mat.non_zeros > 0) {
35		28	MPI_Bcast(mat.row_indices.data(), mat.non_zeros, MPI_INT, root, comm);
36		28	MPI_Bcast(mat.values.data(), mat.non_zeros, MPI_DOUBLE, root, comm);
37			}
38		30	}
39
40			template <typename MatrixType>
41		9	void ProcessColumnOmp(int col_idx, const MatrixType &matrix_a, const MatrixType &matrix_b,
42			std::vector<double> &col_accumulator, std::vector<int> &local_row_indices,
43			std::vector<double> &local_values) {
44		9	int b_start = matrix_b.col_ptrs[col_idx];
45		9	int b_end = matrix_b.col_ptrs[col_idx + 1];
46
47	2/2 ✓ Branch 0 taken 12 times. ✓ Branch 1 taken 9 times.	21	for (int k = b_start; k < b_end; ++k) {
48		12	int b_row = matrix_b.row_indices[k];
49		12	double b_val = matrix_b.values[k];
50
51		12	int a_start = matrix_a.col_ptrs[b_row];
52		12	int a_end = matrix_a.col_ptrs[b_row + 1];
53
54	2/2 ✓ Branch 0 taken 14 times. ✓ Branch 1 taken 12 times.	26	for (int idx = a_start; idx < a_end; ++idx) {
55		14	int a_row = matrix_a.row_indices[idx];
56		14	double a_val = matrix_a.values[idx];
57		14	col_accumulator[a_row] += a_val * b_val;
58			}
59			}
60
61	2/2 ✓ Branch 0 taken 20 times. ✓ Branch 1 taken 9 times.	29	for (int i = 0; i < matrix_a.rows_count; ++i) {
62	2/2 ✓ Branch 0 taken 12 times. ✓ Branch 1 taken 8 times.	20	if (std::abs(col_accumulator[i]) > 1e-10) {
63			local_row_indices.push_back(i);
64			local_values.push_back(col_accumulator[i]);
65			}
66		20	col_accumulator[i] = 0.0;
67			}
68		9	}
69
70		5	void FlattenLocalData(int my_cols_count, const std::vector<std::vector<int>> &local_row_indices,
71			const std::vector<std::vector<double>> &local_values, std::vector<int> &my_col_sizes, int &my_nnz,
72			std::vector<int> &my_rows_flat, std::vector<double> &my_vals_flat) {
73		5	my_nnz = 0;
74		5	my_col_sizes.resize(my_cols_count);
75	2/2 ✓ Branch 0 taken 4 times. ✓ Branch 1 taken 5 times.	9	for (int i = 0; i < my_cols_count; ++i) {
76		4	my_col_sizes[i] = static_cast<int>(local_row_indices[i].size());
77		4	my_nnz += my_col_sizes[i];
78			}
79
80		5	my_rows_flat.resize(my_nnz);
81		5	my_vals_flat.resize(my_nnz);
82			int offset = 0;
83	2/2 ✓ Branch 0 taken 4 times. ✓ Branch 1 taken 5 times.	9	for (int i = 0; i < my_cols_count; ++i) {
84		4	std::copy(local_row_indices[i].begin(), local_row_indices[i].end(), my_rows_flat.begin() + offset);
85		4	std::copy(local_values[i].begin(), local_values[i].end(), my_vals_flat.begin() + offset);
86		4	offset += my_col_sizes[i];
87			}
88		5	}
89
90		5	void SendResultsToRoot(int my_cols_count, const std::vector<int> &my_col_sizes, int my_nnz,
91			const std::vector<int> &my_rows_flat, const std::vector<double> &my_vals_flat) {
92	2/2 ✓ Branch 0 taken 4 times. ✓ Branch 1 taken 1 times.	5	if (my_cols_count > 0) {
93		4	MPI_Send(my_col_sizes.data(), my_cols_count, MPI_INT, 0, 0, MPI_COMM_WORLD);
94	2/2 ✓ Branch 0 taken 3 times. ✓ Branch 1 taken 1 times.	4	if (my_nnz > 0) {
95		3	MPI_Send(my_rows_flat.data(), my_nnz, MPI_INT, 0, 1, MPI_COMM_WORLD);
96		3	MPI_Send(my_vals_flat.data(), my_nnz, MPI_DOUBLE, 0, 2, MPI_COMM_WORLD);
97			}
98			}
99		5	}
100
101		5	void ReceiveWorkerResults(int world_size, int chunk, int rem, std::vector<std::vector<int>> &all_row_indices,
102			std::vector<std::vector<double>> &all_values) {
103	2/2 ✓ Branch 0 taken 5 times. ✓ Branch 1 taken 5 times.	10	for (int proc = 1; proc < world_size; ++proc) {
104	1/2 ✓ Branch 0 taken 5 times. ✗ Branch 1 not taken.	5	int proc_start_col = (proc * chunk) + std::min(proc, rem);
105	1/2 ✓ Branch 0 taken 5 times. ✗ Branch 1 not taken.	5	int proc_end_col = proc_start_col + chunk + (proc < rem ? 1 : 0);
106		5	int proc_cols_count = proc_end_col - proc_start_col;
107
108	2/2 ✓ Branch 0 taken 1 times. ✓ Branch 1 taken 4 times.	5	if (proc_cols_count <= 0) {
109		1	continue;
110			}
111
112	1/2 ✓ Branch 2 taken 4 times. ✗ Branch 3 not taken.	4	std::vector<int> proc_col_sizes(proc_cols_count);
113	1/2 ✓ Branch 1 taken 4 times. ✗ Branch 2 not taken.	4	MPI_Recv(proc_col_sizes.data(), proc_cols_count, MPI_INT, proc, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
114
115			int proc_nnz = 0;
116	2/2 ✓ Branch 0 taken 4 times. ✓ Branch 1 taken 4 times.	8	for (int s : proc_col_sizes) {
117		4	proc_nnz += s;
118			}
119
120	1/4 ✓ Branch 1 taken 4 times. ✗ Branch 2 not taken. ✗ Branch 3 not taken. ✗ Branch 4 not taken.	4	std::vector<int> proc_rows(proc_nnz);
121	1/4 ✓ Branch 1 taken 4 times. ✗ Branch 2 not taken. ✗ Branch 3 not taken. ✗ Branch 4 not taken.	4	std::vector<double> proc_vals(proc_nnz);
122
123	2/2 ✓ Branch 0 taken 3 times. ✓ Branch 1 taken 1 times.	4	if (proc_nnz > 0) {
124	1/2 ✓ Branch 1 taken 3 times. ✗ Branch 2 not taken.	3	MPI_Recv(proc_rows.data(), proc_nnz, MPI_INT, proc, 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
125	1/2 ✓ Branch 1 taken 3 times. ✗ Branch 2 not taken.	3	MPI_Recv(proc_vals.data(), proc_nnz, MPI_DOUBLE, proc, 2, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
126			}
127
128			int curr_offset = 0;
129	2/2 ✓ Branch 0 taken 4 times. ✓ Branch 1 taken 4 times.	8	for (int i = 0; i < proc_cols_count; ++i) {
130	1/2 ✓ Branch 1 taken 4 times. ✗ Branch 2 not taken.	4	all_row_indices[proc_start_col + i].resize(proc_col_sizes[i]);
131	1/2 ✓ Branch 1 taken 4 times. ✗ Branch 2 not taken.	4	all_values[proc_start_col + i].resize(proc_col_sizes[i]);
132
133		4	std::copy(proc_rows.begin() + curr_offset, proc_rows.begin() + curr_offset + proc_col_sizes[i],
134			all_row_indices[proc_start_col + i].begin());
135		4	std::copy(proc_vals.begin() + curr_offset, proc_vals.begin() + curr_offset + proc_col_sizes[i],
136			all_values[proc_start_col + i].begin());
137
138		4	curr_offset += proc_col_sizes[i];
139			}
140			}
141		5	}
142
143			template <typename MatrixType>
144		5	void AssembleFinalMatrix(int total_cols, const std::vector<std::vector<int>> &all_row_indices,
145			const std::vector<std::vector<double>> &all_values, MatrixType &matrix_c) {
146		5	matrix_c.col_ptrs.assign(total_cols + 1, 0);
147	2/2 ✓ Branch 0 taken 9 times. ✓ Branch 1 taken 5 times.	14	for (int j = 0; j < total_cols; ++j) {
148		9	matrix_c.col_ptrs[j + 1] = matrix_c.col_ptrs[j] + static_cast<int>(all_row_indices[j].size());
149			}
150
151		5	matrix_c.non_zeros = matrix_c.col_ptrs.back();
152		5	matrix_c.row_indices.resize(matrix_c.non_zeros);
153		5	matrix_c.values.resize(matrix_c.non_zeros);
154
155		5	#pragma omp parallel for schedule(static) default(none) shared(total_cols, matrix_c, all_row_indices, all_values)
156			for (int j = 0; j < total_cols; ++j) {
157			int offset_c = matrix_c.col_ptrs[j];
158			int size = static_cast<int>(all_row_indices[j].size());
159			for (int k = 0; k < size; ++k) {
160			matrix_c.row_indices[offset_c + k] = all_row_indices[j][k];
161			matrix_c.values[offset_c + k] = all_values[j][k];
162			}
163			}
164		5	}
165
166			template <typename MatrixType>
167		5	void GatherResultsToRoot(int total_cols, int world_size, int chunk, int rem, int start_col, int my_cols_count,
168			std::vector<std::vector<int>> &local_row_indices,
169			std::vector<std::vector<double>> &local_values, MatrixType &matrix_c) {
170		5	std::vector<std::vector<int>> all_row_indices(total_cols);
171	1/2 ✓ Branch 1 taken 5 times. ✗ Branch 2 not taken.	5	std::vector<std::vector<double>> all_values(total_cols);
172
173	2/2 ✓ Branch 0 taken 5 times. ✓ Branch 1 taken 5 times.	10	for (int i = 0; i < my_cols_count; ++i) {
174		5	all_row_indices[start_col + i] = std::move(local_row_indices[i]);
175			all_values[start_col + i] = std::move(local_values[i]);
176			}
177
178	1/2 ✓ Branch 1 taken 5 times. ✗ Branch 2 not taken.	5	ReceiveWorkerResults(world_size, chunk, rem, all_row_indices, all_values);
179
180	1/2 ✓ Branch 1 taken 5 times. ✗ Branch 2 not taken.	5	AssembleFinalMatrix(total_cols, all_row_indices, all_values, matrix_c);
181		5	}
182
183			} // namespace
184
185	1/2 ✓ Branch 2 taken 10 times. ✗ Branch 3 not taken.	10	VolkovASparseMatMulCcsAll::VolkovASparseMatMulCcsAll(const InType &in) {
186			SetTypeOfTask(GetStaticTypeOfTask());
187			GetInput() = in;
188		10	}
189
190		10	bool VolkovASparseMatMulCcsAll::ValidationImpl() {
191		10	int world_rank = 0;
192		10	MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);
193
194	2/2 ✓ Branch 0 taken 5 times. ✓ Branch 1 taken 5 times.	10	if (world_rank == 0) {
195			const auto &matrix_a = std::get<0>(GetInput());
196			const auto &matrix_b = std::get<1>(GetInput());
197		5	return (matrix_a.cols_count == matrix_b.rows_count);
198			}
199			return true;
200			}
201
202		10	bool VolkovASparseMatMulCcsAll::PreProcessingImpl() {
203		10	return true;
204			}
205
206		10	bool VolkovASparseMatMulCcsAll::RunImpl() {
207		10	int world_rank = 0;
208		10	int world_size = 0;
209		10	MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);
210		10	MPI_Comm_size(MPI_COMM_WORLD, &world_size);
211
212			auto &matrix_a = std::get<0>(GetInput());
213			auto &matrix_b = std::get<1>(GetInput());
214			auto &matrix_c = GetOutput();
215
216		10	BroadcastMatrix(matrix_a, 0, MPI_COMM_WORLD);
217		10	BroadcastMatrix(matrix_b, 0, MPI_COMM_WORLD);
218
219	2/2 ✓ Branch 0 taken 5 times. ✓ Branch 1 taken 5 times.	10	if (world_rank == 0) {
220		5	matrix_c.rows_count = matrix_a.rows_count;
221		5	matrix_c.cols_count = matrix_b.cols_count;
222			}
223
224		10	int total_cols = matrix_b.cols_count;
225		10	int chunk = total_cols / world_size;
226		10	int rem = total_cols % world_size;
227	2/2 ✓ Branch 0 taken 9 times. ✓ Branch 1 taken 1 times.	10	int start_col = (world_rank * chunk) + std::min(world_rank, rem);
228	2/2 ✓ Branch 0 taken 9 times. ✓ Branch 1 taken 1 times.	10	int end_col = start_col + chunk + (world_rank < rem ? 1 : 0);
229		10	int my_cols_count = end_col - start_col;
230
231		10	std::vector<std::vector<int>> local_row_indices(my_cols_count);
232	1/2 ✓ Branch 1 taken 10 times. ✗ Branch 2 not taken.	10	std::vector<std::vector<double>> local_values(my_cols_count);
233
234		10	#pragma omp parallel default(none) shared(matrix_a, matrix_b, start_col, end_col, local_row_indices, local_values)
235			{
236			std::vector<double> col_accumulator(matrix_a.rows_count, 0.0);
237			#pragma omp for schedule(dynamic)
238			for (int j = start_col; j < end_col; ++j) {
239			int local_j = j - start_col;
240			ProcessColumnOmp(j, matrix_a, matrix_b, col_accumulator, local_row_indices[local_j], local_values[local_j]);
241			}
242			}
243
244	2/2 ✓ Branch 0 taken 5 times. ✓ Branch 1 taken 5 times.	10	if (world_rank == 0) {
245	1/2 ✓ Branch 1 taken 5 times. ✗ Branch 2 not taken.	5	GatherResultsToRoot(total_cols, world_size, chunk, rem, start_col, my_cols_count, local_row_indices, local_values,
246			matrix_c);
247			} else {
248		5	int my_nnz = 0;
249		5	std::vector<int> my_col_sizes;
250		5	std::vector<int> my_rows_flat;
251		5	std::vector<double> my_vals_flat;
252
253	1/2 ✓ Branch 1 taken 5 times. ✗ Branch 2 not taken.	5	FlattenLocalData(my_cols_count, local_row_indices, local_values, my_col_sizes, my_nnz, my_rows_flat, my_vals_flat);
254	1/2 ✓ Branch 1 taken 5 times. ✗ Branch 2 not taken.	5	SendResultsToRoot(my_cols_count, my_col_sizes, my_nnz, my_rows_flat, my_vals_flat);
255			}
256
257		10	return true;
258		10	}
259
260		10	bool VolkovASparseMatMulCcsAll::PostProcessingImpl() {
261			auto &matrix_c = GetOutput();
262		10	BroadcastMatrix(matrix_c, 0, MPI_COMM_WORLD);
263		10	return true;
264			}
265
266			} // namespace volkov_a_sparse_mat_mul_ccs
267