GCC Code Coverage Report

Directory:	./
File:	tasks/lobanov_d_multi_matrix_crs/all/src/ops_all.cpp
Date:	2026-06-04 20:25:32

	Total	Coverage
Lines:	145	0.0%
Functions:	10	0.0%
Branches:	140	0.0%

  
      Line
      Branch
      Exec
      Source
    
      #include "lobanov_d_multi_matrix_crs/all/include/ops_all.hpp"
    
      #include <mpi.h>
    
      #include <omp.h>
    
      #include <algorithm>
    
      #include <array>
    
      #include <cmath>
    
      #include <cstddef>
    
      #include <vector>
    
      #include "lobanov_d_multi_matrix_crs/common/include/common.hpp"
    
      #include "util/include/util.hpp"
    
      namespace lobanov_d_multi_matrix_crs {
    
      ✗
      void LobanovMultyMatrixALL::SortIndices(std::vector<int> &vec) {
    
        std::ranges::sort(vec);
    
      ✗
      }
    
      ✗
      LobanovMultyMatrixALL::LobanovMultyMatrixALL(const InType &in) {
    
        SetTypeOfTask(GetStaticTypeOfTask());
    
      ✗
        int rank = 0;
    
      ✗
        MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    
      ✗
        if (rank == 0) {
    
          GetInput() = in;
    
        }
    
      ✗
      }
    
      ✗
      bool LobanovMultyMatrixALL::ValidationImpl() {
    
      ✗
        int rank = 0;
    
      ✗
        MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    
      ✗
        int valid_flag = 0;
    
      ✗
        if (rank == 0) {
    
          const auto &input = GetInput();
    
          const auto &mat_a = input.first;
    
          const auto &mat_b = input.second;
    
      ✗
          if (mat_a.column_count == mat_b.row_count && mat_a.row_count > 0 && mat_b.column_count > 0) {
    
      ✗
            valid_flag = 1;
    
          }
    
        }
    
      ✗
        MPI_Bcast(&valid_flag, 1, MPI_INT, 0, MPI_COMM_WORLD);
    
      ✗
        return valid_flag == 1;
    
      }
    
      ✗
      bool LobanovMultyMatrixALL::PreProcessingImpl() {
    
      ✗
        return true;
    
      }
    
      ✗
      void LobanovMultyMatrixALL::DistributeSparseMatrix(CompressedRowMatrix &mat, int root, int rows, int cols) {
    
      ✗
        int rank = 0;
    
      ✗
        MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    
      ✗
        std::array<int, 2> sizes = {0, 0};
    
      ✗
        if (rank == root) {
    
      ✗
          sizes[0] = static_cast<int>(mat.row_pointer_data.size());
    
      ✗
          sizes[1] = static_cast<int>(mat.value_data.size());
    
        }
    
      ✗
        MPI_Bcast(sizes.data(), 2, MPI_INT, root, MPI_COMM_WORLD);
    
      ✗
        if (rank != root) {
    
      ✗
          mat.row_pointer_data.resize(sizes[0]);
    
      ✗
          mat.value_data.resize(sizes[1]);
    
      ✗
          mat.column_index_data.resize(sizes[1]);
    
        }
    
      ✗
        if (sizes[0] > 0) {
    
      ✗
          MPI_Bcast(mat.row_pointer_data.data(), sizes[0], MPI_INT, root, MPI_COMM_WORLD);
    
        }
    
      ✗
        if (sizes[1] > 0) {
    
      ✗
          MPI_Bcast(mat.value_data.data(), sizes[1], MPI_DOUBLE, root, MPI_COMM_WORLD);
    
      ✗
          MPI_Bcast(mat.column_index_data.data(), sizes[1], MPI_INT, root, MPI_COMM_WORLD);
    
        }
    
      ✗
        mat.row_count = rows;
    
      ✗
        mat.column_count = cols;
    
      ✗
        mat.non_zero_count = static_cast<int>(mat.value_data.size());
    
      ✗
      }
    
      ✗
      CompressedRowMatrix LobanovMultyMatrixALL::TransposeSparseMatrix(const CompressedRowMatrix &src) {
    
      ✗
        CompressedRowMatrix dst;
    
      ✗
        dst.row_count = src.column_count;
    
      ✗
        dst.column_count = src.row_count;
    
      ✗
        dst.row_pointer_data.assign(dst.row_count + 1, 0);
    
      ✗
        for (int col : src.column_index_data) {
    
      ✗
          dst.row_pointer_data[col + 1]++;
    
        }
    
      ✗
        for (int i = 0; i < dst.row_count; ++i) {
    
      ✗
          dst.row_pointer_data[i + 1] += dst.row_pointer_data[i];
    
        }
    
      ✗
        dst.value_data.resize(src.value_data.size());
    
      ✗
        dst.column_index_data.resize(src.column_index_data.size());
    
      ✗
        dst.non_zero_count = src.non_zero_count;
    
      ✗
        std::vector<int> cursor = dst.row_pointer_data;
    
      ✗
        for (int i = 0; i < src.row_count; ++i) {
    
      ✗
          for (int j = src.row_pointer_data[i]; j < src.row_pointer_data[i + 1]; ++j) {
    
      ✗
            int col = src.column_index_data[j];
    
      ✗
            int pos = cursor[col];
    
      ✗
            cursor[col]++;
    
      ✗
            dst.value_data[pos] = src.value_data[j];
    
      ✗
            dst.column_index_data[pos] = i;
    
          }
    
        }
    
      ✗
        return dst;
    
      ✗
      }
    
      ✗
      void LobanovMultyMatrixALL::ComputeLocalProduct(const CompressedRowMatrix &a, const CompressedRowMatrix &b_tr,
    
                                                      int start_row, int local_rows, std::vector<int> &row_nnz_counts,
    
                                                      std::vector<double> &packed_vals, std::vector<int> &packed_cols) {
    
      ✗
        if (local_rows <= 0) {
    
      ✗
          return;
    
        }
    
      ✗
        int result_cols = b_tr.row_count;
    
      ✗
        std::vector<std::vector<double>> row_vals(local_rows);
    
      ✗
        std::vector<std::vector<int>> row_cols(local_rows);
    
      ✗
      #pragma omp parallel default(none) shared(a, b_tr, start_row, local_rows, row_vals, row_cols, row_nnz_counts, \
    
      ✗
                                                    result_cols) num_threads(ppc::util::GetNumThreads())
    
        {
    
          std::vector<int> marker(result_cols, -1);
    
          std::vector<double> accumulator(result_cols, 0.0);
    
          std::vector<int> active_columns;
    
      #pragma omp for schedule(dynamic)
    
          for (int i = 0; i < local_rows; ++i) {
    
            int global_row = start_row + i;
    
            active_columns.clear();
    
            for (int idx = a.row_pointer_data[global_row]; idx < a.row_pointer_data[global_row + 1]; ++idx) {
    
              int col_a = a.column_index_data[idx];
    
              double val_a = a.value_data[idx];
    
              for (int j = b_tr.row_pointer_data[col_a]; j < b_tr.row_pointer_data[col_a + 1]; ++j) {
    
                int col_res = b_tr.column_index_data[j];
    
                double contrib = val_a * b_tr.value_data[j];
    
                if (marker[col_res] != i) {
    
                  marker[col_res] = i;
    
                  active_columns.push_back(col_res);
    
                  accumulator[col_res] = contrib;
    
                } else {
    
                  accumulator[col_res] += contrib;
    
                }
    
              }
    
            }
    
            row_nnz_counts[i] = static_cast<int>(active_columns.size());
    
            SortIndices(active_columns);
    
            for (int col : active_columns) {
    
              row_vals[i].push_back(accumulator[col]);
    
              row_cols[i].push_back(col);
    
              accumulator[col] = 0.0;
    
            }
    
          }
    
        }
    
      ✗
        for (int i = 0; i < local_rows; ++i) {
    
      ✗
          packed_vals.insert(packed_vals.end(), row_vals[i].begin(), row_vals[i].end());
    
      ✗
          packed_cols.insert(packed_cols.end(), row_cols[i].begin(), row_cols[i].end());
    
        }
    
      ✗
      }
    
      ✗
      void LobanovMultyMatrixALL::MergeLocalResults(int rank, int comm_size, int total_rows, int result_cols, int local_rows,
    
                                                    CompressedRowMatrix &result_mat, const std::vector<int> &row_nnz_counts,
    
                                                    const std::vector<double> &packed_vals,
    
                                                    const std::vector<int> &packed_cols) {
    
      ✗
        int local_total_nnz = static_cast<int>(packed_vals.size());
    
      ✗
        std::vector<int> global_nnz_counts(comm_size);
    
      ✗
        std::vector<int> global_row_counts(comm_size);
    
      ✗
        MPI_Gather(&local_total_nnz, 1, MPI_INT, global_nnz_counts.data(), 1, MPI_INT, 0, MPI_COMM_WORLD);
    
      ✗
        MPI_Gather(&local_rows, 1, MPI_INT, global_row_counts.data(), 1, MPI_INT, 0, MPI_COMM_WORLD);
    
        // Создаем неконстантные копии для MPI (mpi не меняет данные)
    
      ✗
        std::vector<double> packed_vals_copy = packed_vals;
    
      ✗
        std::vector<int> packed_cols_copy = packed_cols;
    
      ✗
        std::vector<int> row_nnz_counts_copy = row_nnz_counts;
    
      ✗
        if (rank == 0) {
    
      ✗
          result_mat.row_count = total_rows;
    
      ✗
          result_mat.column_count = result_cols;
    
      ✗
          std::vector<int> nnz_offsets(comm_size, 0);
    
      ✗
          std::vector<int> row_offsets(comm_size, 0);
    
          int total_nnz = 0;
    
      ✗
          for (int proc = 0; proc < comm_size; ++proc) {
    
      ✗
            nnz_offsets[proc] = total_nnz;
    
      ✗
            total_nnz += global_nnz_counts[proc];
    
      ✗
            if (proc > 0) {
    
      ✗
              row_offsets[proc] = row_offsets[proc - 1] + global_row_counts[proc - 1];
    
            }
    
          }
    
      ✗
          result_mat.value_data.resize(total_nnz);
    
      ✗
          result_mat.column_index_data.resize(total_nnz);
    
      ✗
          result_mat.row_pointer_data.assign(static_cast<size_t>(total_rows) + 1, 0);
    
      ✗
          result_mat.non_zero_count = total_nnz;
    
      ✗
          MPI_Gatherv(packed_vals_copy.data(), local_total_nnz, MPI_DOUBLE, result_mat.value_data.data(),
    
                      global_nnz_counts.data(), nnz_offsets.data(), MPI_DOUBLE, 0, MPI_COMM_WORLD);
    
      ✗
          MPI_Gatherv(packed_cols_copy.data(), local_total_nnz, MPI_INT, result_mat.column_index_data.data(),
    
                      global_nnz_counts.data(), nnz_offsets.data(), MPI_INT, 0, MPI_COMM_WORLD);
    
      ✗
          std::vector<int> all_row_nnz(total_rows);
    
      ✗
          MPI_Gatherv(row_nnz_counts_copy.data(), local_rows, MPI_INT, all_row_nnz.data(), global_row_counts.data(),
    
                      row_offsets.data(), MPI_INT, 0, MPI_COMM_WORLD);
    
      ✗
          for (int i = 0; i < total_rows; ++i) {
    
      ✗
            result_mat.row_pointer_data[i + 1] = result_mat.row_pointer_data[i] + all_row_nnz[i];
    
          }
    
        } else {
    
      ✗
          MPI_Gatherv(packed_vals_copy.data(), local_total_nnz, MPI_DOUBLE, nullptr, nullptr, nullptr, MPI_DOUBLE, 0,
    
                      MPI_COMM_WORLD);
    
      ✗
          MPI_Gatherv(packed_cols_copy.data(), local_total_nnz, MPI_INT, nullptr, nullptr, nullptr, MPI_INT, 0,
    
                      MPI_COMM_WORLD);
    
      ✗
          MPI_Gatherv(row_nnz_counts_copy.data(), local_rows, MPI_INT, nullptr, nullptr, nullptr, MPI_INT, 0, MPI_COMM_WORLD);
    
        }
    
      ✗
        DistributeSparseMatrix(result_mat, 0, total_rows, result_cols);
    
      ✗
      }
    
      ✗
      bool LobanovMultyMatrixALL::RunImpl() {
    
      ✗
        int comm_size = 0;
    
      ✗
        int rank = 0;
    
      ✗
        MPI_Comm_size(MPI_COMM_WORLD, &comm_size);
    
      ✗
        MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    
      ✗
        std::array<int, 3> dimensions = {0, 0, 0};
    
      ✗
        if (rank == 0) {
    
          const auto &mat_a = GetInput().first;
    
          const auto &mat_b = GetInput().second;
    
      ✗
          dimensions[0] = mat_a.row_count;
    
      ✗
          dimensions[1] = mat_a.column_count;
    
      ✗
          dimensions[2] = mat_b.column_count;
    
        }
    
      ✗
        MPI_Bcast(dimensions.data(), 3, MPI_INT, 0, MPI_COMM_WORLD);
    
      ✗
        int a_rows = dimensions[0];
    
      ✗
        int a_cols = dimensions[1];
    
      ✗
        int b_cols = dimensions[2];
    
      ✗
        CompressedRowMatrix matrix_a;
    
      ✗
        CompressedRowMatrix matrix_b;
    
      ✗
        if (rank == 0) {
    
      ✗
          matrix_a = GetInput().first;
    
      ✗
          matrix_b = GetInput().second;
    
        }
    
      ✗
        DistributeSparseMatrix(matrix_a, 0, a_rows, a_cols);
    
      ✗
        DistributeSparseMatrix(matrix_b, 0, a_cols, b_cols);
    
      ✗
        CompressedRowMatrix matrix_b_transposed = TransposeSparseMatrix(matrix_b);
    
      ✗
        int base_chunk = a_rows / comm_size;
    
      ✗
        int remainder = a_rows % comm_size;
    
      ✗
        int start_row = (rank * base_chunk) + std::min(rank, remainder);
    
      ✗
        int local_rows = base_chunk + (rank < remainder ? 1 : 0);
    
      ✗
        std::vector<int> local_row_nnz(local_rows, 0);
    
      ✗
        std::vector<double> flat_values;
    
      ✗
        std::vector<int> flat_columns;
    
      ✗
        ComputeLocalProduct(matrix_a, matrix_b_transposed, start_row, local_rows, local_row_nnz, flat_values, flat_columns);
    
      ✗
        CompressedRowMatrix result_matrix;
    
      ✗
        MergeLocalResults(rank, comm_size, a_rows, b_cols, local_rows, result_matrix, local_row_nnz, flat_values,
    
                          flat_columns);
    
      ✗
        if (rank == 0) {
    
      ✗
          GetOutput() = result_matrix;
    
        }
    
      ✗
        return true;
    
      ✗
      }
    
      ✗
      bool LobanovMultyMatrixALL::PostProcessingImpl() {
    
      ✗
        return true;
    
      }
    
      }  // namespace lobanov_d_multi_matrix_crs

Line	Exec	Source
1		#include "lobanov_d_multi_matrix_crs/all/include/ops_all.hpp"
2
3		#include <mpi.h>
4		#include <omp.h>
5
6		#include <algorithm>
7		#include <array>
8		#include <cmath>
9		#include <cstddef>
10		#include <vector>
11
12		#include "lobanov_d_multi_matrix_crs/common/include/common.hpp"
13		#include "util/include/util.hpp"
14
15		namespace lobanov_d_multi_matrix_crs {
16
17	✗	void LobanovMultyMatrixALL::SortIndices(std::vector<int> &vec) {
18		std::ranges::sort(vec);
19	✗	}
20
21	✗	LobanovMultyMatrixALL::LobanovMultyMatrixALL(const InType &in) {
22		SetTypeOfTask(GetStaticTypeOfTask());
23	✗	int rank = 0;
24	✗	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
25	✗	if (rank == 0) {
26		GetInput() = in;
27		}
28	✗	}
29
30	✗	bool LobanovMultyMatrixALL::ValidationImpl() {
31	✗	int rank = 0;
32	✗	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
33	✗	int valid_flag = 0;
34	✗	if (rank == 0) {
35		const auto &input = GetInput();
36		const auto &mat_a = input.first;
37		const auto &mat_b = input.second;
38	✗	if (mat_a.column_count == mat_b.row_count && mat_a.row_count > 0 && mat_b.column_count > 0) {
39	✗	valid_flag = 1;
40		}
41		}
42	✗	MPI_Bcast(&valid_flag, 1, MPI_INT, 0, MPI_COMM_WORLD);
43	✗	return valid_flag == 1;
44		}
45
46	✗	bool LobanovMultyMatrixALL::PreProcessingImpl() {
47	✗	return true;
48		}
49
50	✗	void LobanovMultyMatrixALL::DistributeSparseMatrix(CompressedRowMatrix &mat, int root, int rows, int cols) {
51	✗	int rank = 0;
52	✗	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
53	✗	std::array<int, 2> sizes = {0, 0};
54	✗	if (rank == root) {
55	✗	sizes[0] = static_cast<int>(mat.row_pointer_data.size());
56	✗	sizes[1] = static_cast<int>(mat.value_data.size());
57		}
58	✗	MPI_Bcast(sizes.data(), 2, MPI_INT, root, MPI_COMM_WORLD);
59	✗	if (rank != root) {
60	✗	mat.row_pointer_data.resize(sizes[0]);
61	✗	mat.value_data.resize(sizes[1]);
62	✗	mat.column_index_data.resize(sizes[1]);
63		}
64	✗	if (sizes[0] > 0) {
65	✗	MPI_Bcast(mat.row_pointer_data.data(), sizes[0], MPI_INT, root, MPI_COMM_WORLD);
66		}
67	✗	if (sizes[1] > 0) {
68	✗	MPI_Bcast(mat.value_data.data(), sizes[1], MPI_DOUBLE, root, MPI_COMM_WORLD);
69	✗	MPI_Bcast(mat.column_index_data.data(), sizes[1], MPI_INT, root, MPI_COMM_WORLD);
70		}
71	✗	mat.row_count = rows;
72	✗	mat.column_count = cols;
73	✗	mat.non_zero_count = static_cast<int>(mat.value_data.size());
74	✗	}
75
76	✗	CompressedRowMatrix LobanovMultyMatrixALL::TransposeSparseMatrix(const CompressedRowMatrix &src) {
77	✗	CompressedRowMatrix dst;
78	✗	dst.row_count = src.column_count;
79	✗	dst.column_count = src.row_count;
80	✗	dst.row_pointer_data.assign(dst.row_count + 1, 0);
81
82	✗	for (int col : src.column_index_data) {
83	✗	dst.row_pointer_data[col + 1]++;
84		}
85	✗	for (int i = 0; i < dst.row_count; ++i) {
86	✗	dst.row_pointer_data[i + 1] += dst.row_pointer_data[i];
87		}
88	✗	dst.value_data.resize(src.value_data.size());
89	✗	dst.column_index_data.resize(src.column_index_data.size());
90	✗	dst.non_zero_count = src.non_zero_count;
91
92	✗	std::vector<int> cursor = dst.row_pointer_data;
93	✗	for (int i = 0; i < src.row_count; ++i) {
94	✗	for (int j = src.row_pointer_data[i]; j < src.row_pointer_data[i + 1]; ++j) {
95	✗	int col = src.column_index_data[j];
96	✗	int pos = cursor[col];
97	✗	cursor[col]++;
98	✗	dst.value_data[pos] = src.value_data[j];
99	✗	dst.column_index_data[pos] = i;
100		}
101		}
102	✗	return dst;
103	✗	}
104
105	✗	void LobanovMultyMatrixALL::ComputeLocalProduct(const CompressedRowMatrix &a, const CompressedRowMatrix &b_tr,
106		int start_row, int local_rows, std::vector<int> &row_nnz_counts,
107		std::vector<double> &packed_vals, std::vector<int> &packed_cols) {
108	✗	if (local_rows <= 0) {
109	✗	return;
110		}
111	✗	int result_cols = b_tr.row_count;
112
113	✗	std::vector<std::vector<double>> row_vals(local_rows);
114	✗	std::vector<std::vector<int>> row_cols(local_rows);
115
116	✗	#pragma omp parallel default(none) shared(a, b_tr, start_row, local_rows, row_vals, row_cols, row_nnz_counts, \
117	✗	result_cols) num_threads(ppc::util::GetNumThreads())
118		{
119		std::vector<int> marker(result_cols, -1);
120		std::vector<double> accumulator(result_cols, 0.0);
121		std::vector<int> active_columns;
122
123		#pragma omp for schedule(dynamic)
124		for (int i = 0; i < local_rows; ++i) {
125		int global_row = start_row + i;
126		active_columns.clear();
127
128		for (int idx = a.row_pointer_data[global_row]; idx < a.row_pointer_data[global_row + 1]; ++idx) {
129		int col_a = a.column_index_data[idx];
130		double val_a = a.value_data[idx];
131
132		for (int j = b_tr.row_pointer_data[col_a]; j < b_tr.row_pointer_data[col_a + 1]; ++j) {
133		int col_res = b_tr.column_index_data[j];
134		double contrib = val_a * b_tr.value_data[j];
135		if (marker[col_res] != i) {
136		marker[col_res] = i;
137		active_columns.push_back(col_res);
138		accumulator[col_res] = contrib;
139		} else {
140		accumulator[col_res] += contrib;
141		}
142		}
143		}
144
145		row_nnz_counts[i] = static_cast<int>(active_columns.size());
146		SortIndices(active_columns);
147		for (int col : active_columns) {
148		row_vals[i].push_back(accumulator[col]);
149		row_cols[i].push_back(col);
150		accumulator[col] = 0.0;
151		}
152		}
153		}
154
155	✗	for (int i = 0; i < local_rows; ++i) {
156	✗	packed_vals.insert(packed_vals.end(), row_vals[i].begin(), row_vals[i].end());
157	✗	packed_cols.insert(packed_cols.end(), row_cols[i].begin(), row_cols[i].end());
158		}
159	✗	}
160
161	✗	void LobanovMultyMatrixALL::MergeLocalResults(int rank, int comm_size, int total_rows, int result_cols, int local_rows,
162		CompressedRowMatrix &result_mat, const std::vector<int> &row_nnz_counts,
163		const std::vector<double> &packed_vals,
164		const std::vector<int> &packed_cols) {
165	✗	int local_total_nnz = static_cast<int>(packed_vals.size());
166	✗	std::vector<int> global_nnz_counts(comm_size);
167	✗	std::vector<int> global_row_counts(comm_size);
168
169	✗	MPI_Gather(&local_total_nnz, 1, MPI_INT, global_nnz_counts.data(), 1, MPI_INT, 0, MPI_COMM_WORLD);
170	✗	MPI_Gather(&local_rows, 1, MPI_INT, global_row_counts.data(), 1, MPI_INT, 0, MPI_COMM_WORLD);
171
172		// Создаем неконстантные копии для MPI (mpi не меняет данные)
173	✗	std::vector<double> packed_vals_copy = packed_vals;
174	✗	std::vector<int> packed_cols_copy = packed_cols;
175	✗	std::vector<int> row_nnz_counts_copy = row_nnz_counts;
176
177	✗	if (rank == 0) {
178	✗	result_mat.row_count = total_rows;
179	✗	result_mat.column_count = result_cols;
180
181	✗	std::vector<int> nnz_offsets(comm_size, 0);
182	✗	std::vector<int> row_offsets(comm_size, 0);
183		int total_nnz = 0;
184	✗	for (int proc = 0; proc < comm_size; ++proc) {
185	✗	nnz_offsets[proc] = total_nnz;
186	✗	total_nnz += global_nnz_counts[proc];
187	✗	if (proc > 0) {
188	✗	row_offsets[proc] = row_offsets[proc - 1] + global_row_counts[proc - 1];
189		}
190		}
191
192	✗	result_mat.value_data.resize(total_nnz);
193	✗	result_mat.column_index_data.resize(total_nnz);
194	✗	result_mat.row_pointer_data.assign(static_cast<size_t>(total_rows) + 1, 0);
195	✗	result_mat.non_zero_count = total_nnz;
196
197	✗	MPI_Gatherv(packed_vals_copy.data(), local_total_nnz, MPI_DOUBLE, result_mat.value_data.data(),
198		global_nnz_counts.data(), nnz_offsets.data(), MPI_DOUBLE, 0, MPI_COMM_WORLD);
199	✗	MPI_Gatherv(packed_cols_copy.data(), local_total_nnz, MPI_INT, result_mat.column_index_data.data(),
200		global_nnz_counts.data(), nnz_offsets.data(), MPI_INT, 0, MPI_COMM_WORLD);
201
202	✗	std::vector<int> all_row_nnz(total_rows);
203	✗	MPI_Gatherv(row_nnz_counts_copy.data(), local_rows, MPI_INT, all_row_nnz.data(), global_row_counts.data(),
204		row_offsets.data(), MPI_INT, 0, MPI_COMM_WORLD);
205	✗	for (int i = 0; i < total_rows; ++i) {
206	✗	result_mat.row_pointer_data[i + 1] = result_mat.row_pointer_data[i] + all_row_nnz[i];
207		}
208		} else {
209	✗	MPI_Gatherv(packed_vals_copy.data(), local_total_nnz, MPI_DOUBLE, nullptr, nullptr, nullptr, MPI_DOUBLE, 0,
210		MPI_COMM_WORLD);
211	✗	MPI_Gatherv(packed_cols_copy.data(), local_total_nnz, MPI_INT, nullptr, nullptr, nullptr, MPI_INT, 0,
212		MPI_COMM_WORLD);
213	✗	MPI_Gatherv(row_nnz_counts_copy.data(), local_rows, MPI_INT, nullptr, nullptr, nullptr, MPI_INT, 0, MPI_COMM_WORLD);
214		}
215
216	✗	DistributeSparseMatrix(result_mat, 0, total_rows, result_cols);
217	✗	}
218
219	✗	bool LobanovMultyMatrixALL::RunImpl() {
220	✗	int comm_size = 0;
221	✗	int rank = 0;
222	✗	MPI_Comm_size(MPI_COMM_WORLD, &comm_size);
223	✗	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
224
225	✗	std::array<int, 3> dimensions = {0, 0, 0};
226	✗	if (rank == 0) {
227		const auto &mat_a = GetInput().first;
228		const auto &mat_b = GetInput().second;
229	✗	dimensions[0] = mat_a.row_count;
230	✗	dimensions[1] = mat_a.column_count;
231	✗	dimensions[2] = mat_b.column_count;
232		}
233	✗	MPI_Bcast(dimensions.data(), 3, MPI_INT, 0, MPI_COMM_WORLD);
234	✗	int a_rows = dimensions[0];
235	✗	int a_cols = dimensions[1];
236	✗	int b_cols = dimensions[2];
237
238	✗	CompressedRowMatrix matrix_a;
239	✗	CompressedRowMatrix matrix_b;
240	✗	if (rank == 0) {
241	✗	matrix_a = GetInput().first;
242	✗	matrix_b = GetInput().second;
243		}
244	✗	DistributeSparseMatrix(matrix_a, 0, a_rows, a_cols);
245	✗	DistributeSparseMatrix(matrix_b, 0, a_cols, b_cols);
246
247	✗	CompressedRowMatrix matrix_b_transposed = TransposeSparseMatrix(matrix_b);
248
249	✗	int base_chunk = a_rows / comm_size;
250	✗	int remainder = a_rows % comm_size;
251	✗	int start_row = (rank * base_chunk) + std::min(rank, remainder);
252	✗	int local_rows = base_chunk + (rank < remainder ? 1 : 0);
253
254	✗	std::vector<int> local_row_nnz(local_rows, 0);
255	✗	std::vector<double> flat_values;
256	✗	std::vector<int> flat_columns;
257
258	✗	ComputeLocalProduct(matrix_a, matrix_b_transposed, start_row, local_rows, local_row_nnz, flat_values, flat_columns);
259
260	✗	CompressedRowMatrix result_matrix;
261	✗	MergeLocalResults(rank, comm_size, a_rows, b_cols, local_rows, result_matrix, local_row_nnz, flat_values,
262		flat_columns);
263
264	✗	if (rank == 0) {
265	✗	GetOutput() = result_matrix;
266		}
267	✗	return true;
268	✗	}
269
270	✗	bool LobanovMultyMatrixALL::PostProcessingImpl() {
271	✗	return true;
272		}
273
274		} // namespace lobanov_d_multi_matrix_crs
275