GCC Code Coverage Report

Directory:	./
File:	tasks/guseva_crs/all/include/multiplier_all.hpp
Date:	2026-06-04 20:25:32

	Exec	Total	Coverage
Lines:	144	172	83.7%
Functions:	8	9	88.9%
Branches:	78	142	54.9%

  
      Line
      Branch
      Exec
      Source
    
      #pragma once
    
      #include <mpi.h>
    
      #include <omp.h>
    
      #include <algorithm>
    
      #include <cmath>
    
      #include <cstddef>
    
      #include <cstring>
    
      #include <stdexcept>
    
      #include <utility>
    
      #include <vector>
    
      #include "guseva_crs/common/include/common.hpp"
    
      #include "guseva_crs/common/include/multiplier.hpp"
    
      namespace guseva_crs {
    
      12
      class MultiplierAll : public Multiplier {
    
        static void PerformCalculation(std::size_t ind3, std::size_t ind4, const CRS &a, const CRS &bt, double &sum,
    
                                       const std::vector<int> &temp) {
    
        2/2✓ Branch 0 taken 5071 times.
✓ Branch 1 taken 2009 times.

      7080
          for (std::size_t k = ind3; k < ind4; k++) {
    
      5071
            std::size_t bcol = bt.cols[k];
    
      5071
            int aind = temp[bcol];
    
        2/2✓ Branch 0 taken 705 times.
✓ Branch 1 taken 4366 times.

      5071
            if (aind != -1) {
    
      705
              sum += a.values[aind] * bt.values[k];
    
            }
    
          }
    
        }
    
      97
        static void ComputeLocalRow(std::size_t global_i, std::size_t n, const CRS &a, const CRS &bt,
    
                                    std::vector<std::size_t> &columns, std::vector<double> &values, std::size_t &row_nnz) {
    
      97
          std::vector<int> temp(n, -1);
    
      97
          std::size_t ind1 = a.row_ptrs[global_i];
    
      97
          std::size_t ind2 = a.row_ptrs[global_i + 1];
    
        2/2✓ Branch 0 taken 293 times.
✓ Branch 1 taken 97 times.

      390
          for (std::size_t j = ind1; j < ind2; j++) {
    
      293
            std::size_t col = a.cols[j];
    
      293
            temp[col] = static_cast<int>(j);
    
          }
    
        2/2✓ Branch 0 taken 2009 times.
✓ Branch 1 taken 97 times.

      2106
          for (std::size_t j = 0; j < n; j++) {
    
      2009
            double sum = 0;
    
      2009
            std::size_t ind3 = bt.row_ptrs[j];
    
      2009
            std::size_t ind4 = bt.row_ptrs[j + 1];
    
            PerformCalculation(ind3, ind4, a, bt, sum, temp);
    
        2/2✓ Branch 0 taken 511 times.
✓ Branch 1 taken 1498 times.

      2009
            if (std::fabs(sum) > kZERO) {
    
              columns.push_back(j);
    
              values.push_back(sum);
    
      511
              row_nnz++;
    
            }
    
          }
    
      97
        }
    
        static void ComputeLocalResults(std::size_t start_row, std::size_t local_nrows, std::size_t n, const CRS &a,
    
                                        const CRS &bt, std::vector<std::vector<std::size_t>> &local_columns,
    
                                        std::vector<std::vector<double>> &local_values,
    
                                        std::vector<std::size_t> &local_row_index) {
    
      12
      #pragma omp parallel for default(none) \
    
          shared(n, a, bt, local_columns, local_values, local_row_index, start_row, local_nrows)
    
          for (std::size_t local_i = 0; local_i < local_nrows; local_i++) {
    
            std::size_t global_i = start_row + local_i;
    
            ComputeLocalRow(global_i, n, a, bt, local_columns[local_i], local_values[local_i], local_row_index[local_i]);
    
          }
    
        }
    
      12
        static void FlattenLocalData(const std::vector<std::vector<std::size_t>> &local_columns,
    
                                     const std::vector<std::vector<double>> &local_values,
    
                                     std::vector<std::size_t> &flat_columns, std::vector<double> &flat_values,
    
                                     std::vector<int> &row_sizes) {
    
        2/2✓ Branch 0 taken 97 times.
✓ Branch 1 taken 12 times.

      109
          for (std::size_t i = 0; i < local_columns.size(); i++) {
    
      97
            row_sizes[i] = static_cast<int>(local_columns[i].size());
    
      97
            flat_columns.insert(flat_columns.end(), local_columns[i].begin(), local_columns[i].end());
    
      97
            flat_values.insert(flat_values.end(), local_values[i].begin(), local_values[i].end());
    
          }
    
      12
        }
    
        struct ProcessData {
    
          std::size_t start_row{};
    
          std::size_t local_nrows{};
    
          std::vector<int> row_sizes;
    
          std::vector<std::size_t> flat_columns;
    
          std::vector<double> flat_values;
    
        };
    
      6
        static ProcessData ReceiveProcessData(int source, std::size_t p_start_row, std::size_t p_local_nrows) {
    
      6
          ProcessData data;
    
      6
          data.start_row = p_start_row;
    
      6
          data.local_nrows = p_local_nrows;
    
        1/2✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.

      6
          data.row_sizes.resize(p_local_nrows);
    
        1/2✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.

      6
          MPI_Recv(data.row_sizes.data(), static_cast<int>(p_local_nrows), MPI_INT, source, 0, MPI_COMM_WORLD,
    
                   MPI_STATUS_IGNORE);
    
          int total_nz = 0;
    
        2/2✓ Branch 0 taken 46 times.
✓ Branch 1 taken 6 times.

      52
          for (std::size_t i = 0; i < p_local_nrows; i++) {
    
      46
            total_nz += data.row_sizes[i];
    
          }
    
        1/2✓ Branch 0 taken 6 times.
✗ Branch 1 not taken.

      6
          if (total_nz > 0) {
    
        1/2✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.

      6
            data.flat_columns.resize(total_nz);
    
        1/2✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.

      6
            data.flat_values.resize(total_nz);
    
        1/2✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.

      6
            MPI_Recv(data.flat_columns.data(), total_nz, MPI_UNSIGNED_LONG, source, 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
    
        1/2✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.

      6
            MPI_Recv(data.flat_values.data(), total_nz, MPI_DOUBLE, source, 2, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
    
          }
    
      6
          return data;
    
      ✗
        }
    
      12
        static void AssembleResultFromProcess(const ProcessData &data, std::vector<std::vector<std::size_t>> &columns,
    
                                              std::vector<std::vector<double>> &values) {
    
          std::size_t offset = 0;
    
        2/2✓ Branch 0 taken 97 times.
✓ Branch 1 taken 12 times.

      109
          for (std::size_t local_i = 0; local_i < data.local_nrows; local_i++) {
    
        2/2✓ Branch 0 taken 85 times.
✓ Branch 1 taken 12 times.

      97
            std::size_t global_row = data.start_row + local_i;
    
      97
            int row_size = data.row_sizes[local_i];
    
        2/2✓ Branch 0 taken 85 times.
✓ Branch 1 taken 12 times.

      97
            if (row_size > 0) {
    
      85
              columns[global_row].resize(row_size);
    
      85
              values[global_row].resize(row_size);
    
        2/2✓ Branch 0 taken 511 times.
✓ Branch 1 taken 85 times.

      596
              for (int j = 0; j < row_size; j++) {
    
      511
                columns[global_row][j] = data.flat_columns[offset + j];
    
      511
                values[global_row][j] = data.flat_values[offset + j];
    
              }
    
      85
              offset += static_cast<std::size_t>(row_size);
    
            }
    
          }
    
      12
        }
    
      6
        static CRS BuildFinalMatrix(std::size_t n, std::vector<std::vector<std::size_t>> &columns,
    
                                    std::vector<std::vector<double>> &values) {
    
      6
          CRS result;
    
        1/2✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.

      6
          result.row_ptrs.resize(n + 1, 0);
    
          std::size_t nz = 0;
    
        2/2✓ Branch 0 taken 97 times.
✓ Branch 1 taken 6 times.

      103
          for (std::size_t i = 0; i < n; i++) {
    
      97
            result.row_ptrs[i] = nz;
    
      97
            nz += columns[i].size();
    
          }
    
      6
          result.row_ptrs[n] = nz;
    
        1/2✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.

      6
          result.cols.reserve(nz);
    
        1/2✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.

      6
          result.values.reserve(nz);
    
        2/2✓ Branch 0 taken 97 times.
✓ Branch 1 taken 6 times.

      103
          for (std::size_t i = 0; i < n; i++) {
    
        1/2✓ Branch 1 taken 97 times.
✗ Branch 2 not taken.

      97
            result.cols.insert(result.cols.end(), columns[i].begin(), columns[i].end());
    
      97
            result.values.insert(result.values.end(), values[i].begin(), values[i].end());
    
          }
    
      6
          result.nz = nz;
    
      6
          result.ncols = n;
    
      6
          result.nrows = n;
    
      6
          return result;
    
      ✗
        }
    
      6
        static void SendLocalData(int dest, const std::vector<int> &row_sizes, const std::vector<std::size_t> &flat_columns,
    
                                  const std::vector<double> &flat_values) {
    
      6
          std::vector<int> row_sizes_copy = row_sizes;
    
        1/2✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.

      6
          MPI_Send(row_sizes_copy.data(), static_cast<int>(row_sizes_copy.size()), MPI_INT, dest, 0, MPI_COMM_WORLD);
    
        1/2✓ Branch 0 taken 6 times.
✗ Branch 1 not taken.

      6
          if (!flat_columns.empty()) {
    
        1/2✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.

      6
            std::vector<std::size_t> flat_columns_copy = flat_columns;
    
        1/2✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.

      6
            std::vector<double> flat_values_copy = flat_values;
    
        1/2✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.

      6
            MPI_Send(flat_columns_copy.data(), static_cast<int>(flat_columns_copy.size()), MPI_UNSIGNED_LONG, dest, 1,
    
                     MPI_COMM_WORLD);
    
        1/2✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.

      6
            MPI_Send(flat_values_copy.data(), static_cast<int>(flat_values_copy.size()), MPI_DOUBLE, dest, 2, MPI_COMM_WORLD);
    
          }
    
      6
        }
    
       public:
    
      12
        [[nodiscard]] CRS Multiply(const CRS &a, const CRS &b) const override {
    
      12
          int rank = -1;
    
      12
          int num_procs = -1;
    
      12
          MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    
      12
          MPI_Comm_size(MPI_COMM_WORLD, &num_procs);
    
        2/4✓ Branch 0 taken 12 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 12 times.

      12
          if ((rank < 0) || (num_procs < 0)) {
    
      ✗
            throw std::runtime_error("MPI rank or world size is incorrect");
    
          }
    
      12
          std::size_t n = a.nrows;
    
        1/2✗ Branch 0 not taken.
✓ Branch 1 taken 12 times.

      12
          if (num_procs == 1) {
    
      ✗
            return MultiplySerial(a, b);
    
          }
    
      12
          std::size_t rows_per_proc = n / static_cast<std::size_t>(num_procs);
    
      12
          std::size_t remainder = n % static_cast<std::size_t>(num_procs);
    
          std::size_t start_row =
    
        2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 7 times.

      12
              (static_cast<std::size_t>(rank) * rows_per_proc) + std::min(static_cast<std::size_t>(rank), remainder);
    
      12
          std::size_t local_nrows = rows_per_proc + (std::cmp_less(rank, remainder) ? 1 : 0);
    
      12
          auto bt = this->Transpose(b);
    
        1/2✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.

      12
          std::vector<std::vector<std::size_t>> local_columns(local_nrows);
    
        1/2✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.

      12
          std::vector<std::vector<double>> local_values(local_nrows);
    
        1/2✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.

      12
          std::vector<std::size_t> local_row_index(local_nrows, 0);
    
          ComputeLocalResults(start_row, local_nrows, n, a, bt, local_columns, local_values, local_row_index);
    
      12
          std::vector<std::size_t> flat_columns;
    
      12
          std::vector<double> flat_values;
    
        1/4✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.

      12
          std::vector<int> row_sizes(local_nrows);
    
        1/2✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.

      12
          FlattenLocalData(local_columns, local_values, flat_columns, flat_values, row_sizes);
    
      12
          CRS result;
    
        2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 6 times.

      12
          if (rank == 0) {
    
        1/2✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.

      6
            std::vector<std::vector<std::size_t>> columns(n);
    
        1/2✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.

      6
            std::vector<std::vector<double>> values(n);
    
        2/2✓ Branch 0 taken 12 times.
✓ Branch 1 taken 6 times.

      18
            for (int pp = 0; pp < num_procs; pp++) {
    
              std::size_t p_start_row =
    
        2/2✓ Branch 0 taken 7 times.
✓ Branch 1 taken 5 times.

      12
                  (static_cast<std::size_t>(pp) * rows_per_proc) + std::min(static_cast<std::size_t>(pp), remainder);
    
      12
              std::size_t p_local_nrows = rows_per_proc + (std::cmp_less(pp, remainder) ? 1 : 0);
    
        2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 6 times.

      12
              if (pp == 0) {
    
      6
                ProcessData self_data;
    
      6
                self_data.start_row = p_start_row;
    
      6
                self_data.local_nrows = p_local_nrows;
    
        1/2✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.

      6
                self_data.row_sizes = row_sizes;
    
        1/2✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.

      6
                self_data.flat_columns = flat_columns;
    
        1/2✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.

      6
                self_data.flat_values = flat_values;
    
        1/2✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.

      6
                AssembleResultFromProcess(self_data, columns, values);
    
      6
              } else {
    
        1/2✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.

      6
                ProcessData received_data = ReceiveProcessData(pp, p_start_row, p_local_nrows);
    
        1/2✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.

      6
                AssembleResultFromProcess(received_data, columns, values);
    
      6
              }
    
            }
    
        1/2✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.

      6
            result = BuildFinalMatrix(n, columns, values);
    
      6
          } else {
    
        1/2✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.

      6
            SendLocalData(0, row_sizes, flat_columns, flat_values);
    
          }
    
        1/2✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.

      12
          BroadcastResult(result, rank);
    
          return result;
    
      24
        }
    
      ✗
        [[nodiscard]] CRS MultiplySerial(const CRS &a, const CRS &b) const {
    
      ✗
          std::size_t n = a.nrows;
    
      ✗
          auto bt = this->Transpose(b);
    
      ✗
          std::vector<std::vector<std::size_t>> columns(n);
    
      ✗
          std::vector<std::vector<double>> values(n);
    
      ✗
          std::vector<std::size_t> row_index(n + 1, 0);
    
      ✗
      #pragma omp parallel for default(none) shared(n, a, bt, columns, values, row_index)
    
          for (std::size_t i = 0; i < n; i++) {
    
            std::vector<int> temp(n, -1);
    
            std::size_t ind1 = a.row_ptrs[i];
    
            std::size_t ind2 = a.row_ptrs[i + 1];
    
            for (std::size_t j = ind1; j < ind2; j++) {
    
              std::size_t col = a.cols[j];
    
              temp[col] = static_cast<int>(j);
    
            }
    
            for (std::size_t j = 0; j < n; j++) {
    
              double sum = 0;
    
              std::size_t ind3 = bt.row_ptrs[j];
    
              std::size_t ind4 = bt.row_ptrs[j + 1];
    
              PerformCalculation(ind3, ind4, a, bt, sum, temp);
    
              if (std::fabs(sum) > kZERO) {
    
                columns[i].push_back(j);
    
                values[i].push_back(sum);
    
                row_index[i]++;
    
              }
    
            }
    
          }
    
          std::size_t nz = 0;
    
      ✗
          for (std::size_t i = 0; i < n; i++) {
    
      ✗
            std::size_t tmp = row_index[i];
    
      ✗
            row_index[i] = nz;
    
      ✗
            nz += tmp;
    
          }
    
      ✗
          row_index[n] = nz;
    
      ✗
          CRS result;
    
      ✗
          result.cols.reserve(nz);
    
      ✗
          result.values.reserve(nz);
    
      ✗
          for (std::size_t i = 0; i < n; i++) {
    
      ✗
            result.cols.insert(result.cols.end(), columns[i].begin(), columns[i].end());
    
      ✗
            result.values.insert(result.values.end(), values[i].begin(), values[i].end());
    
          }
    
      ✗
          result.row_ptrs = row_index;
    
      ✗
          result.nz = nz;
    
      ✗
          result.ncols = n;
    
      ✗
          result.nrows = n;
    
      ✗
          return result;
    
      ✗
        }
    
      12
        static void BroadcastResult(CRS &result, int rank) {
    
      12
          std::size_t nrows = result.nrows;
    
      12
          std::size_t ncols = result.ncols;
    
      12
          std::size_t nz = result.nz;
    
      12
          MPI_Bcast(&nrows, 1, MPI_UNSIGNED_LONG, 0, MPI_COMM_WORLD);
    
      12
          MPI_Bcast(&ncols, 1, MPI_UNSIGNED_LONG, 0, MPI_COMM_WORLD);
    
      12
          MPI_Bcast(&nz, 1, MPI_UNSIGNED_LONG, 0, MPI_COMM_WORLD);
    
        2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 6 times.

      12
          if (rank != 0) {
    
      6
            result.nrows = nrows;
    
      6
            result.ncols = ncols;
    
      6
            result.nz = nz;
    
        1/2✓ Branch 0 taken 6 times.
✗ Branch 1 not taken.

      6
            if (nrows > 0) {
    
      6
              result.row_ptrs.resize(nrows + 1);
    
            }
    
        1/2✓ Branch 0 taken 6 times.
✗ Branch 1 not taken.

      6
            if (nz > 0) {
    
      6
              result.cols.resize(nz);
    
      6
              result.values.resize(nz);
    
            }
    
          }
    
        1/2✓ Branch 0 taken 12 times.
✗ Branch 1 not taken.

      12
          if (nrows > 0) {
    
      12
            MPI_Bcast(result.row_ptrs.data(), static_cast<int>(nrows + 1), MPI_UNSIGNED_LONG, 0, MPI_COMM_WORLD);
    
          }
    
        1/2✓ Branch 0 taken 12 times.
✗ Branch 1 not taken.

      12
          if (nz > 0) {
    
      12
            MPI_Bcast(result.cols.data(), static_cast<int>(nz), MPI_UNSIGNED_LONG, 0, MPI_COMM_WORLD);
    
      12
            MPI_Bcast(result.values.data(), static_cast<int>(nz), MPI_DOUBLE, 0, MPI_COMM_WORLD);
    
          }
    
      12
        }
    
      };
    
      }  // namespace guseva_crs

Line	Branch	Exec	Source
1			#pragma once
2			#include <mpi.h>
3			#include <omp.h>
4
5			#include <algorithm>
6			#include <cmath>
7			#include <cstddef>
8			#include <cstring>
9			#include <stdexcept>
10			#include <utility>
11			#include <vector>
12
13			#include "guseva_crs/common/include/common.hpp"
14			#include "guseva_crs/common/include/multiplier.hpp"
15
16			namespace guseva_crs {
17
18		12	class MultiplierAll : public Multiplier {
19			static void PerformCalculation(std::size_t ind3, std::size_t ind4, const CRS &a, const CRS &bt, double &sum,
20			const std::vector<int> &temp) {
21	2/2 ✓ Branch 0 taken 5071 times. ✓ Branch 1 taken 2009 times.	7080	for (std::size_t k = ind3; k < ind4; k++) {
22		5071	std::size_t bcol = bt.cols[k];
23		5071	int aind = temp[bcol];
24
25	2/2 ✓ Branch 0 taken 705 times. ✓ Branch 1 taken 4366 times.	5071	if (aind != -1) {
26		705	sum += a.values[aind] * bt.values[k];
27			}
28			}
29			}
30
31		97	static void ComputeLocalRow(std::size_t global_i, std::size_t n, const CRS &a, const CRS &bt,
32			std::vector<std::size_t> &columns, std::vector<double> &values, std::size_t &row_nnz) {
33		97	std::vector<int> temp(n, -1);
34
35		97	std::size_t ind1 = a.row_ptrs[global_i];
36		97	std::size_t ind2 = a.row_ptrs[global_i + 1];
37	2/2 ✓ Branch 0 taken 293 times. ✓ Branch 1 taken 97 times.	390	for (std::size_t j = ind1; j < ind2; j++) {
38		293	std::size_t col = a.cols[j];
39		293	temp[col] = static_cast<int>(j);
40			}
41
42	2/2 ✓ Branch 0 taken 2009 times. ✓ Branch 1 taken 97 times.	2106	for (std::size_t j = 0; j < n; j++) {
43		2009	double sum = 0;
44		2009	std::size_t ind3 = bt.row_ptrs[j];
45		2009	std::size_t ind4 = bt.row_ptrs[j + 1];
46
47			PerformCalculation(ind3, ind4, a, bt, sum, temp);
48
49	2/2 ✓ Branch 0 taken 511 times. ✓ Branch 1 taken 1498 times.	2009	if (std::fabs(sum) > kZERO) {
50			columns.push_back(j);
51			values.push_back(sum);
52		511	row_nnz++;
53			}
54			}
55		97	}
56
57			static void ComputeLocalResults(std::size_t start_row, std::size_t local_nrows, std::size_t n, const CRS &a,
58			const CRS &bt, std::vector<std::vector<std::size_t>> &local_columns,
59			std::vector<std::vector<double>> &local_values,
60			std::vector<std::size_t> &local_row_index) {
61		12	#pragma omp parallel for default(none) \
62			shared(n, a, bt, local_columns, local_values, local_row_index, start_row, local_nrows)
63			for (std::size_t local_i = 0; local_i < local_nrows; local_i++) {
64			std::size_t global_i = start_row + local_i;
65			ComputeLocalRow(global_i, n, a, bt, local_columns[local_i], local_values[local_i], local_row_index[local_i]);
66			}
67			}
68
69		12	static void FlattenLocalData(const std::vector<std::vector<std::size_t>> &local_columns,
70			const std::vector<std::vector<double>> &local_values,
71			std::vector<std::size_t> &flat_columns, std::vector<double> &flat_values,
72			std::vector<int> &row_sizes) {
73	2/2 ✓ Branch 0 taken 97 times. ✓ Branch 1 taken 12 times.	109	for (std::size_t i = 0; i < local_columns.size(); i++) {
74		97	row_sizes[i] = static_cast<int>(local_columns[i].size());
75		97	flat_columns.insert(flat_columns.end(), local_columns[i].begin(), local_columns[i].end());
76		97	flat_values.insert(flat_values.end(), local_values[i].begin(), local_values[i].end());
77			}
78		12	}
79
80			struct ProcessData {
81			std::size_t start_row{};
82			std::size_t local_nrows{};
83			std::vector<int> row_sizes;
84			std::vector<std::size_t> flat_columns;
85			std::vector<double> flat_values;
86			};
87
88		6	static ProcessData ReceiveProcessData(int source, std::size_t p_start_row, std::size_t p_local_nrows) {
89		6	ProcessData data;
90		6	data.start_row = p_start_row;
91		6	data.local_nrows = p_local_nrows;
92	1/2 ✓ Branch 1 taken 6 times. ✗ Branch 2 not taken.	6	data.row_sizes.resize(p_local_nrows);
93
94	1/2 ✓ Branch 1 taken 6 times. ✗ Branch 2 not taken.	6	MPI_Recv(data.row_sizes.data(), static_cast<int>(p_local_nrows), MPI_INT, source, 0, MPI_COMM_WORLD,
95			MPI_STATUS_IGNORE);
96
97			int total_nz = 0;
98	2/2 ✓ Branch 0 taken 46 times. ✓ Branch 1 taken 6 times.	52	for (std::size_t i = 0; i < p_local_nrows; i++) {
99		46	total_nz += data.row_sizes[i];
100			}
101
102	1/2 ✓ Branch 0 taken 6 times. ✗ Branch 1 not taken.	6	if (total_nz > 0) {
103	1/2 ✓ Branch 1 taken 6 times. ✗ Branch 2 not taken.	6	data.flat_columns.resize(total_nz);
104	1/2 ✓ Branch 1 taken 6 times. ✗ Branch 2 not taken.	6	data.flat_values.resize(total_nz);
105	1/2 ✓ Branch 1 taken 6 times. ✗ Branch 2 not taken.	6	MPI_Recv(data.flat_columns.data(), total_nz, MPI_UNSIGNED_LONG, source, 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
106	1/2 ✓ Branch 1 taken 6 times. ✗ Branch 2 not taken.	6	MPI_Recv(data.flat_values.data(), total_nz, MPI_DOUBLE, source, 2, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
107			}
108
109		6	return data;
110		✗	}
111
112		12	static void AssembleResultFromProcess(const ProcessData &data, std::vector<std::vector<std::size_t>> &columns,
113			std::vector<std::vector<double>> &values) {
114			std::size_t offset = 0;
115	2/2 ✓ Branch 0 taken 97 times. ✓ Branch 1 taken 12 times.	109	for (std::size_t local_i = 0; local_i < data.local_nrows; local_i++) {
116	2/2 ✓ Branch 0 taken 85 times. ✓ Branch 1 taken 12 times.	97	std::size_t global_row = data.start_row + local_i;
117		97	int row_size = data.row_sizes[local_i];
118
119	2/2 ✓ Branch 0 taken 85 times. ✓ Branch 1 taken 12 times.	97	if (row_size > 0) {
120		85	columns[global_row].resize(row_size);
121		85	values[global_row].resize(row_size);
122
123	2/2 ✓ Branch 0 taken 511 times. ✓ Branch 1 taken 85 times.	596	for (int j = 0; j < row_size; j++) {
124		511	columns[global_row][j] = data.flat_columns[offset + j];
125		511	values[global_row][j] = data.flat_values[offset + j];
126			}
127		85	offset += static_cast<std::size_t>(row_size);
128			}
129			}
130		12	}
131
132		6	static CRS BuildFinalMatrix(std::size_t n, std::vector<std::vector<std::size_t>> &columns,
133			std::vector<std::vector<double>> &values) {
134		6	CRS result;
135	1/2 ✓ Branch 1 taken 6 times. ✗ Branch 2 not taken.	6	result.row_ptrs.resize(n + 1, 0);
136
137			std::size_t nz = 0;
138	2/2 ✓ Branch 0 taken 97 times. ✓ Branch 1 taken 6 times.	103	for (std::size_t i = 0; i < n; i++) {
139		97	result.row_ptrs[i] = nz;
140		97	nz += columns[i].size();
141			}
142		6	result.row_ptrs[n] = nz;
143
144	1/2 ✓ Branch 1 taken 6 times. ✗ Branch 2 not taken.	6	result.cols.reserve(nz);
145	1/2 ✓ Branch 1 taken 6 times. ✗ Branch 2 not taken.	6	result.values.reserve(nz);
146	2/2 ✓ Branch 0 taken 97 times. ✓ Branch 1 taken 6 times.	103	for (std::size_t i = 0; i < n; i++) {
147	1/2 ✓ Branch 1 taken 97 times. ✗ Branch 2 not taken.	97	result.cols.insert(result.cols.end(), columns[i].begin(), columns[i].end());
148		97	result.values.insert(result.values.end(), values[i].begin(), values[i].end());
149			}
150
151		6	result.nz = nz;
152		6	result.ncols = n;
153		6	result.nrows = n;
154
155		6	return result;
156		✗	}
157
158		6	static void SendLocalData(int dest, const std::vector<int> &row_sizes, const std::vector<std::size_t> &flat_columns,
159			const std::vector<double> &flat_values) {
160		6	std::vector<int> row_sizes_copy = row_sizes;
161	1/2 ✓ Branch 1 taken 6 times. ✗ Branch 2 not taken.	6	MPI_Send(row_sizes_copy.data(), static_cast<int>(row_sizes_copy.size()), MPI_INT, dest, 0, MPI_COMM_WORLD);
162
163	1/2 ✓ Branch 0 taken 6 times. ✗ Branch 1 not taken.	6	if (!flat_columns.empty()) {
164	1/2 ✓ Branch 1 taken 6 times. ✗ Branch 2 not taken.	6	std::vector<std::size_t> flat_columns_copy = flat_columns;
165	1/2 ✓ Branch 1 taken 6 times. ✗ Branch 2 not taken.	6	std::vector<double> flat_values_copy = flat_values;
166	1/2 ✓ Branch 1 taken 6 times. ✗ Branch 2 not taken.	6	MPI_Send(flat_columns_copy.data(), static_cast<int>(flat_columns_copy.size()), MPI_UNSIGNED_LONG, dest, 1,
167			MPI_COMM_WORLD);
168	1/2 ✓ Branch 1 taken 6 times. ✗ Branch 2 not taken.	6	MPI_Send(flat_values_copy.data(), static_cast<int>(flat_values_copy.size()), MPI_DOUBLE, dest, 2, MPI_COMM_WORLD);
169			}
170		6	}
171
172			public:
173		12	[[nodiscard]] CRS Multiply(const CRS &a, const CRS &b) const override {
174		12	int rank = -1;
175		12	int num_procs = -1;
176		12	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
177		12	MPI_Comm_size(MPI_COMM_WORLD, &num_procs);
178
179	2/4 ✓ Branch 0 taken 12 times. ✗ Branch 1 not taken. ✗ Branch 2 not taken. ✓ Branch 3 taken 12 times.	12	if ((rank < 0) \|\| (num_procs < 0)) {
180		✗	throw std::runtime_error("MPI rank or world size is incorrect");
181			}
182
183		12	std::size_t n = a.nrows;
184
185	1/2 ✗ Branch 0 not taken. ✓ Branch 1 taken 12 times.	12	if (num_procs == 1) {
186		✗	return MultiplySerial(a, b);
187			}
188
189		12	std::size_t rows_per_proc = n / static_cast<std::size_t>(num_procs);
190		12	std::size_t remainder = n % static_cast<std::size_t>(num_procs);
191			std::size_t start_row =
192	2/2 ✓ Branch 0 taken 5 times. ✓ Branch 1 taken 7 times.	12	(static_cast<std::size_t>(rank) * rows_per_proc) + std::min(static_cast<std::size_t>(rank), remainder);
193		12	std::size_t local_nrows = rows_per_proc + (std::cmp_less(rank, remainder) ? 1 : 0);
194
195		12	auto bt = this->Transpose(b);
196
197	1/2 ✓ Branch 1 taken 12 times. ✗ Branch 2 not taken.	12	std::vector<std::vector<std::size_t>> local_columns(local_nrows);
198	1/2 ✓ Branch 1 taken 12 times. ✗ Branch 2 not taken.	12	std::vector<std::vector<double>> local_values(local_nrows);
199	1/2 ✓ Branch 1 taken 12 times. ✗ Branch 2 not taken.	12	std::vector<std::size_t> local_row_index(local_nrows, 0);
200
201			ComputeLocalResults(start_row, local_nrows, n, a, bt, local_columns, local_values, local_row_index);
202
203		12	std::vector<std::size_t> flat_columns;
204		12	std::vector<double> flat_values;
205	1/4 ✓ Branch 1 taken 12 times. ✗ Branch 2 not taken. ✗ Branch 3 not taken. ✗ Branch 4 not taken.	12	std::vector<int> row_sizes(local_nrows);
206	1/2 ✓ Branch 1 taken 12 times. ✗ Branch 2 not taken.	12	FlattenLocalData(local_columns, local_values, flat_columns, flat_values, row_sizes);
207
208		12	CRS result;
209
210	2/2 ✓ Branch 0 taken 6 times. ✓ Branch 1 taken 6 times.	12	if (rank == 0) {
211	1/2 ✓ Branch 1 taken 6 times. ✗ Branch 2 not taken.	6	std::vector<std::vector<std::size_t>> columns(n);
212	1/2 ✓ Branch 1 taken 6 times. ✗ Branch 2 not taken.	6	std::vector<std::vector<double>> values(n);
213
214	2/2 ✓ Branch 0 taken 12 times. ✓ Branch 1 taken 6 times.	18	for (int pp = 0; pp < num_procs; pp++) {
215			std::size_t p_start_row =
216	2/2 ✓ Branch 0 taken 7 times. ✓ Branch 1 taken 5 times.	12	(static_cast<std::size_t>(pp) * rows_per_proc) + std::min(static_cast<std::size_t>(pp), remainder);
217		12	std::size_t p_local_nrows = rows_per_proc + (std::cmp_less(pp, remainder) ? 1 : 0);
218
219	2/2 ✓ Branch 0 taken 6 times. ✓ Branch 1 taken 6 times.	12	if (pp == 0) {
220		6	ProcessData self_data;
221		6	self_data.start_row = p_start_row;
222		6	self_data.local_nrows = p_local_nrows;
223	1/2 ✓ Branch 1 taken 6 times. ✗ Branch 2 not taken.	6	self_data.row_sizes = row_sizes;
224	1/2 ✓ Branch 1 taken 6 times. ✗ Branch 2 not taken.	6	self_data.flat_columns = flat_columns;
225	1/2 ✓ Branch 1 taken 6 times. ✗ Branch 2 not taken.	6	self_data.flat_values = flat_values;
226	1/2 ✓ Branch 1 taken 6 times. ✗ Branch 2 not taken.	6	AssembleResultFromProcess(self_data, columns, values);
227		6	} else {
228	1/2 ✓ Branch 1 taken 6 times. ✗ Branch 2 not taken.	6	ProcessData received_data = ReceiveProcessData(pp, p_start_row, p_local_nrows);
229	1/2 ✓ Branch 1 taken 6 times. ✗ Branch 2 not taken.	6	AssembleResultFromProcess(received_data, columns, values);
230		6	}
231			}
232
233	1/2 ✓ Branch 1 taken 6 times. ✗ Branch 2 not taken.	6	result = BuildFinalMatrix(n, columns, values);
234		6	} else {
235	1/2 ✓ Branch 1 taken 6 times. ✗ Branch 2 not taken.	6	SendLocalData(0, row_sizes, flat_columns, flat_values);
236			}
237
238	1/2 ✓ Branch 1 taken 12 times. ✗ Branch 2 not taken.	12	BroadcastResult(result, rank);
239
240			return result;
241		24	}
242
243		✗	[[nodiscard]] CRS MultiplySerial(const CRS &a, const CRS &b) const {
244		✗	std::size_t n = a.nrows;
245		✗	auto bt = this->Transpose(b);
246
247		✗	std::vector<std::vector<std::size_t>> columns(n);
248		✗	std::vector<std::vector<double>> values(n);
249		✗	std::vector<std::size_t> row_index(n + 1, 0);
250
251		✗	#pragma omp parallel for default(none) shared(n, a, bt, columns, values, row_index)
252			for (std::size_t i = 0; i < n; i++) {
253			std::vector<int> temp(n, -1);
254
255			std::size_t ind1 = a.row_ptrs[i];
256			std::size_t ind2 = a.row_ptrs[i + 1];
257			for (std::size_t j = ind1; j < ind2; j++) {
258			std::size_t col = a.cols[j];
259			temp[col] = static_cast<int>(j);
260			}
261
262			for (std::size_t j = 0; j < n; j++) {
263			double sum = 0;
264			std::size_t ind3 = bt.row_ptrs[j];
265			std::size_t ind4 = bt.row_ptrs[j + 1];
266
267			PerformCalculation(ind3, ind4, a, bt, sum, temp);
268
269			if (std::fabs(sum) > kZERO) {
270			columns[i].push_back(j);
271			values[i].push_back(sum);
272			row_index[i]++;
273			}
274			}
275			}
276
277			std::size_t nz = 0;
278		✗	for (std::size_t i = 0; i < n; i++) {
279		✗	std::size_t tmp = row_index[i];
280		✗	row_index[i] = nz;
281		✗	nz += tmp;
282			}
283		✗	row_index[n] = nz;
284
285		✗	CRS result;
286		✗	result.cols.reserve(nz);
287		✗	result.values.reserve(nz);
288		✗	for (std::size_t i = 0; i < n; i++) {
289		✗	result.cols.insert(result.cols.end(), columns[i].begin(), columns[i].end());
290		✗	result.values.insert(result.values.end(), values[i].begin(), values[i].end());
291			}
292		✗	result.row_ptrs = row_index;
293		✗	result.nz = nz;
294		✗	result.ncols = n;
295		✗	result.nrows = n;
296
297		✗	return result;
298		✗	}
299
300		12	static void BroadcastResult(CRS &result, int rank) {
301		12	std::size_t nrows = result.nrows;
302		12	std::size_t ncols = result.ncols;
303		12	std::size_t nz = result.nz;
304
305		12	MPI_Bcast(&nrows, 1, MPI_UNSIGNED_LONG, 0, MPI_COMM_WORLD);
306		12	MPI_Bcast(&ncols, 1, MPI_UNSIGNED_LONG, 0, MPI_COMM_WORLD);
307		12	MPI_Bcast(&nz, 1, MPI_UNSIGNED_LONG, 0, MPI_COMM_WORLD);
308
309	2/2 ✓ Branch 0 taken 6 times. ✓ Branch 1 taken 6 times.	12	if (rank != 0) {
310		6	result.nrows = nrows;
311		6	result.ncols = ncols;
312		6	result.nz = nz;
313	1/2 ✓ Branch 0 taken 6 times. ✗ Branch 1 not taken.	6	if (nrows > 0) {
314		6	result.row_ptrs.resize(nrows + 1);
315			}
316	1/2 ✓ Branch 0 taken 6 times. ✗ Branch 1 not taken.	6	if (nz > 0) {
317		6	result.cols.resize(nz);
318		6	result.values.resize(nz);
319			}
320			}
321
322	1/2 ✓ Branch 0 taken 12 times. ✗ Branch 1 not taken.	12	if (nrows > 0) {
323		12	MPI_Bcast(result.row_ptrs.data(), static_cast<int>(nrows + 1), MPI_UNSIGNED_LONG, 0, MPI_COMM_WORLD);
324			}
325
326	1/2 ✓ Branch 0 taken 12 times. ✗ Branch 1 not taken.	12	if (nz > 0) {
327		12	MPI_Bcast(result.cols.data(), static_cast<int>(nz), MPI_UNSIGNED_LONG, 0, MPI_COMM_WORLD);
328		12	MPI_Bcast(result.values.data(), static_cast<int>(nz), MPI_DOUBLE, 0, MPI_COMM_WORLD);
329			}
330		12	}
331			};
332
333			} // namespace guseva_crs
334