| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #include "maslova_u_mult_matr_crs/all/include/ops_all.hpp" | ||
| 2 | |||
| 3 | #include <mpi.h> | ||
| 4 | #include <omp.h> | ||
| 5 | |||
| 6 | #include <algorithm> | ||
| 7 | #include <array> | ||
| 8 | #include <cstddef> | ||
| 9 | #include <vector> | ||
| 10 | |||
| 11 | #include "maslova_u_mult_matr_crs/common/include/common.hpp" | ||
| 12 | #include "util/include/util.hpp" | ||
| 13 | |||
| 14 | namespace maslova_u_mult_matr_crs { | ||
| 15 | |||
| 16 | ✗ | void MaslovaUMultMatrALL::SortVector(std::vector<int> &vec) { // для прохождения clang-tidy | |
| 17 | std::ranges::sort(vec); | ||
| 18 | ✗ | } | |
| 19 | |||
| 20 |
1/2✓ Branch 2 taken 10 times.
✗ Branch 3 not taken.
|
10 | MaslovaUMultMatrALL::MaslovaUMultMatrALL(const InType &in) { |
| 21 | SetTypeOfTask(GetStaticTypeOfTask()); | ||
| 22 | 10 | int rank = 0; | |
| 23 |
1/2✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
|
10 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); |
| 24 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
10 | if (rank == 0) { |
| 25 | GetInput() = in; | ||
| 26 | } | ||
| 27 | 10 | } | |
| 28 | |||
| 29 | 10 | bool MaslovaUMultMatrALL::ValidationImpl() { | |
| 30 | 10 | int rank = 0; | |
| 31 | 10 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); | |
| 32 | 10 | int ok = 0; | |
| 33 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
10 | if (rank == 0) { |
| 34 | const auto &input = GetInput(); | ||
| 35 | const auto &matrix_a = std::get<0>(input); | ||
| 36 | const auto &matrix_b = std::get<1>(input); | ||
| 37 |
3/6✓ Branch 0 taken 5 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 5 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 5 times.
✗ Branch 5 not taken.
|
5 | if (matrix_a.cols == matrix_b.rows && matrix_a.rows > 0 && matrix_b.cols > 0) { |
| 38 | 5 | ok = 1; | |
| 39 | } | ||
| 40 | } | ||
| 41 | 10 | MPI_Bcast(&ok, 1, MPI_INT, 0, MPI_COMM_WORLD); | |
| 42 | 10 | return ok == 1; | |
| 43 | } | ||
| 44 | |||
| 45 | 10 | bool MaslovaUMultMatrALL::PreProcessingImpl() { | |
| 46 | 10 | return true; | |
| 47 | } | ||
| 48 | |||
| 49 | 30 | void MaslovaUMultMatrALL::BroadcastCRSMatrix(CRSMatrix &m, int root, int r, int c) { | |
| 50 | 30 | int rank = 0; | |
| 51 | 30 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); | |
| 52 | 30 | std::array<int, 2> counts = {0, 0}; | |
| 53 |
2/2✓ Branch 0 taken 15 times.
✓ Branch 1 taken 15 times.
|
30 | if (rank == root) { |
| 54 | 15 | counts[0] = static_cast<int>(m.row_ptr.size()); | |
| 55 | 15 | counts[1] = static_cast<int>(m.values.size()); | |
| 56 | } | ||
| 57 | 30 | MPI_Bcast(counts.data(), 2, MPI_INT, root, MPI_COMM_WORLD); | |
| 58 |
2/2✓ Branch 0 taken 15 times.
✓ Branch 1 taken 15 times.
|
30 | if (rank != root) { |
| 59 | 15 | m.row_ptr.resize(counts[0]); | |
| 60 | 15 | m.values.resize(counts[1]); | |
| 61 | 15 | m.col_ind.resize(counts[1]); | |
| 62 | } | ||
| 63 |
1/2✓ Branch 0 taken 30 times.
✗ Branch 1 not taken.
|
30 | if (counts[0] > 0) { |
| 64 | 30 | MPI_Bcast(m.row_ptr.data(), counts[0], MPI_INT, root, MPI_COMM_WORLD); | |
| 65 | } | ||
| 66 |
1/2✓ Branch 0 taken 30 times.
✗ Branch 1 not taken.
|
30 | if (counts[1] > 0) { |
| 67 | 30 | MPI_Bcast(m.values.data(), counts[1], MPI_DOUBLE, root, MPI_COMM_WORLD); | |
| 68 | 30 | MPI_Bcast(m.col_ind.data(), counts[1], MPI_INT, root, MPI_COMM_WORLD); | |
| 69 | } | ||
| 70 | 30 | m.rows = r; | |
| 71 | 30 | m.cols = c; | |
| 72 | 30 | } | |
| 73 | |||
| 74 | 10 | void MaslovaUMultMatrALL::ComputeLocalPart(const CRSMatrix &a, const CRSMatrix &b, int start_row, int local_rows, | |
| 75 | std::vector<int> &local_nnz, std::vector<double> &flat_values, | ||
| 76 | std::vector<int> &flat_cols) { | ||
| 77 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
|
10 | if (local_rows <= 0) { |
| 78 | 2 | return; | |
| 79 | } | ||
| 80 | 8 | std::vector<std::vector<double>> t_vals(local_rows); | |
| 81 |
1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
|
8 | std::vector<std::vector<int>> t_cols(local_rows); |
| 82 | |||
| 83 | 8 | #pragma omp parallel default(none) shared(a, b, start_row, local_rows, local_nnz, t_vals, t_cols) \ | |
| 84 |
1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
|
8 | num_threads(ppc::util::GetNumThreads()) |
| 85 | { | ||
| 86 | std::vector<int> marker(b.cols, -1); | ||
| 87 | std::vector<double> acc(b.cols, 0.0); | ||
| 88 | std::vector<int> used; | ||
| 89 | #pragma omp for schedule(dynamic) | ||
| 90 | for (int i = 0; i < local_rows; ++i) { | ||
| 91 | int g_row = start_row + i; | ||
| 92 | used.clear(); | ||
| 93 | for (int j = a.row_ptr[g_row]; j < a.row_ptr[g_row + 1]; ++j) { | ||
| 94 | int col_a = a.col_ind[j]; | ||
| 95 | double val_a = a.values[j]; | ||
| 96 | for (int k = b.row_ptr[col_a]; k < b.row_ptr[col_a + 1]; ++k) { | ||
| 97 | int col_b = b.col_ind[k]; | ||
| 98 | if (marker[col_b] != i) { | ||
| 99 | marker[col_b] = i; | ||
| 100 | used.push_back(col_b); | ||
| 101 | acc[col_b] = val_a * b.values[k]; | ||
| 102 | } else { | ||
| 103 | acc[col_b] += val_a * b.values[k]; | ||
| 104 | } | ||
| 105 | } | ||
| 106 | } | ||
| 107 | local_nnz[i] = static_cast<int>(used.size()); | ||
| 108 | SortVector(used); | ||
| 109 | for (int col : used) { | ||
| 110 | t_vals[i].push_back(acc[col]); | ||
| 111 | t_cols[i].push_back(col); | ||
| 112 | acc[col] = 0.0; | ||
| 113 | } | ||
| 114 | } | ||
| 115 | } | ||
| 116 |
2/2✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
|
16 | for (int i = 0; i < local_rows; ++i) { |
| 117 |
2/4✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 8 times.
✗ Branch 5 not taken.
|
8 | flat_values.insert(flat_values.end(), t_vals[i].begin(), t_vals[i].end()); |
| 118 | 8 | flat_cols.insert(flat_cols.end(), t_cols[i].begin(), t_cols[i].end()); | |
| 119 | } | ||
| 120 | 8 | } | |
| 121 | |||
| 122 | 10 | void MaslovaUMultMatrALL::GatherResults(int rank, int size, int a_rows, int b_cols, int local_rows, CRSMatrix &c, | |
| 123 | const std::vector<int> &local_nnz, const std::vector<double> &flat_values, | ||
| 124 | const std::vector<int> &flat_cols) { | ||
| 125 | 10 | int local_nnz_total = static_cast<int>(flat_values.size()); | |
| 126 | 10 | std::vector<int> all_nnz_counts(size); | |
| 127 |
2/6✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 10 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
|
10 | std::vector<int> all_row_counts(size); |
| 128 | |||
| 129 |
1/2✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
|
10 | MPI_Gather(&local_nnz_total, 1, MPI_INT, all_nnz_counts.data(), 1, MPI_INT, 0, MPI_COMM_WORLD); |
| 130 |
1/2✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
|
10 | MPI_Gather(&local_rows, 1, MPI_INT, all_row_counts.data(), 1, MPI_INT, 0, MPI_COMM_WORLD); |
| 131 | |||
| 132 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
10 | if (rank == 0) { |
| 133 | 5 | c.rows = a_rows; | |
| 134 | 5 | c.cols = b_cols; | |
| 135 |
1/4✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
5 | std::vector<int> nnz_displs(size, 0); |
| 136 |
1/4✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
5 | std::vector<int> row_offsets(size, 0); |
| 137 | int total_nnz = 0; | ||
| 138 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 5 times.
|
15 | for (int i = 0; i < size; ++i) { |
| 139 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
10 | nnz_displs[i] = total_nnz; |
| 140 | 10 | total_nnz += all_nnz_counts[i]; | |
| 141 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
10 | if (i > 0) { |
| 142 | 5 | row_offsets[i] = row_offsets[i - 1] + all_row_counts[i - 1]; | |
| 143 | } | ||
| 144 | } | ||
| 145 |
1/2✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
|
5 | c.values.resize(total_nnz); |
| 146 |
1/2✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
|
5 | c.col_ind.resize(total_nnz); |
| 147 |
2/4✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 5 times.
✗ Branch 5 not taken.
|
5 | c.row_ptr.assign(static_cast<size_t>(a_rows) + 1, 0); |
| 148 | |||
| 149 |
1/2✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
|
5 | MPI_Gatherv(flat_values.data(), local_nnz_total, MPI_DOUBLE, c.values.data(), all_nnz_counts.data(), |
| 150 | nnz_displs.data(), MPI_DOUBLE, 0, MPI_COMM_WORLD); | ||
| 151 |
1/2✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
|
5 | MPI_Gatherv(flat_cols.data(), local_nnz_total, MPI_INT, c.col_ind.data(), all_nnz_counts.data(), nnz_displs.data(), |
| 152 | MPI_INT, 0, MPI_COMM_WORLD); | ||
| 153 | |||
| 154 |
2/6✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 5 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
|
5 | std::vector<int> all_nnz_per_row(a_rows); |
| 155 |
1/2✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
|
5 | MPI_Gatherv(local_nnz.data(), local_rows, MPI_INT, all_nnz_per_row.data(), all_row_counts.data(), |
| 156 | row_offsets.data(), MPI_INT, 0, MPI_COMM_WORLD); | ||
| 157 |
2/2✓ Branch 0 taken 8 times.
✓ Branch 1 taken 5 times.
|
13 | for (int i = 0; i < a_rows; ++i) { |
| 158 | 8 | c.row_ptr[i + 1] = c.row_ptr[i] + all_nnz_per_row[i]; | |
| 159 | } | ||
| 160 | } else { | ||
| 161 |
1/2✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
|
5 | MPI_Gatherv(flat_values.data(), local_nnz_total, MPI_DOUBLE, nullptr, nullptr, nullptr, MPI_DOUBLE, 0, |
| 162 | MPI_COMM_WORLD); | ||
| 163 |
1/2✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
|
5 | MPI_Gatherv(flat_cols.data(), local_nnz_total, MPI_INT, nullptr, nullptr, nullptr, MPI_INT, 0, MPI_COMM_WORLD); |
| 164 |
1/2✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
|
5 | MPI_Gatherv(local_nnz.data(), local_rows, MPI_INT, nullptr, nullptr, nullptr, MPI_INT, 0, MPI_COMM_WORLD); |
| 165 | } | ||
| 166 |
1/2✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
|
10 | BroadcastCRSMatrix(c, 0, a_rows, b_cols); |
| 167 | 10 | } | |
| 168 | |||
| 169 | 10 | bool MaslovaUMultMatrALL::RunImpl() { | |
| 170 | 10 | int size = 0; | |
| 171 | 10 | int rank = 0; | |
| 172 | 10 | MPI_Comm_size(MPI_COMM_WORLD, &size); | |
| 173 | 10 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); | |
| 174 | |||
| 175 | 10 | std::array<int, 3> dims = {0, 0, 0}; | |
| 176 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
10 | if (rank == 0) { |
| 177 | const auto &a_in = std::get<0>(GetInput()); | ||
| 178 | const auto &b_in = std::get<1>(GetInput()); | ||
| 179 | 5 | dims[0] = a_in.rows; | |
| 180 | 5 | dims[1] = a_in.cols; | |
| 181 | 5 | dims[2] = b_in.cols; | |
| 182 | } | ||
| 183 | 10 | MPI_Bcast(dims.data(), 3, MPI_INT, 0, MPI_COMM_WORLD); | |
| 184 | |||
| 185 | 10 | CRSMatrix a; | |
| 186 | 10 | CRSMatrix b; | |
| 187 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
10 | if (rank == 0) { |
| 188 |
1/2✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
|
5 | a = std::get<0>(GetInput()); |
| 189 |
1/2✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
|
5 | b = std::get<1>(GetInput()); |
| 190 | } | ||
| 191 |
1/2✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
|
10 | BroadcastCRSMatrix(a, 0, dims[0], dims[1]); |
| 192 |
1/2✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
|
10 | BroadcastCRSMatrix(b, 0, dims[1], dims[2]); |
| 193 | |||
| 194 | 10 | int part = dims[0] / size; | |
| 195 | 10 | int rem = dims[0] % size; | |
| 196 |
2/2✓ Branch 0 taken 8 times.
✓ Branch 1 taken 2 times.
|
10 | int start_row = (rank * part) + std::min(rank, rem); |
| 197 |
2/2✓ Branch 0 taken 8 times.
✓ Branch 1 taken 2 times.
|
10 | int local_rows = part + (rank < rem ? 1 : 0); |
| 198 | |||
| 199 |
1/2✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
|
10 | std::vector<int> local_nnz(local_rows); |
| 200 | 10 | std::vector<double> flat_values; | |
| 201 | 10 | std::vector<int> flat_cols; | |
| 202 | |||
| 203 |
1/2✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
|
10 | ComputeLocalPart(a, b, start_row, local_rows, local_nnz, flat_values, flat_cols); |
| 204 |
1/2✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
|
10 | GatherResults(rank, size, dims[0], dims[2], local_rows, GetOutput(), local_nnz, flat_values, flat_cols); |
| 205 | |||
| 206 | 10 | return true; | |
| 207 | 10 | } | |
| 208 | |||
| 209 | 10 | bool MaslovaUMultMatrALL::PostProcessingImpl() { | |
| 210 | 10 | return true; | |
| 211 | } | ||
| 212 | |||
| 213 | } // namespace maslova_u_mult_matr_crs | ||
| 214 |