| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #include "ermakov_a_spar_mat_mult/all/include/ops_all.hpp" | ||
| 2 | |||
| 3 | #include <mpi.h> | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <array> | ||
| 7 | #include <complex> | ||
| 8 | #include <cstddef> | ||
| 9 | #include <numeric> | ||
| 10 | #include <utility> | ||
| 11 | #include <vector> | ||
| 12 | |||
| 13 | #include "ermakov_a_spar_mat_mult/common/include/common.hpp" | ||
| 14 | #include "task/include/task.hpp" | ||
| 15 | #include "util/include/util.hpp" | ||
| 16 | |||
| 17 | namespace ermakov_a_spar_mat_mult { | ||
| 18 | |||
| 19 | namespace { | ||
| 20 | |||
| 21 | struct LocalRowData { | ||
| 22 | std::vector<int> cols; | ||
| 23 | std::vector<std::complex<double>> vals; // 1 | ||
| 24 | }; | ||
| 25 | |||
| 26 | 4 | std::vector<int> BuildRowBounds(const MatrixCRS &matrix, int proc_count) { | |
| 27 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
|
4 | if (proc_count <= 0) { |
| 28 | ✗ | return {}; | |
| 29 | } | ||
| 30 | |||
| 31 | 4 | std::vector<int> bounds(static_cast<std::size_t>(proc_count) + 1ULL, 0); | |
| 32 |
1/2✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
|
4 | bounds.back() = matrix.rows; |
| 33 | |||
| 34 |
1/2✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
|
4 | const int total_nnz = matrix.row_ptr[static_cast<std::size_t>(matrix.rows)]; |
| 35 |
1/2✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
|
4 | if (proc_count <= 1 || total_nnz == 0) { |
| 36 | ✗ | for (int proc = 0; proc <= proc_count; ++proc) { | |
| 37 | ✗ | bounds[static_cast<std::size_t>(proc)] = (proc * matrix.rows) / proc_count; | |
| 38 | } | ||
| 39 | return bounds; | ||
| 40 | } | ||
| 41 | |||
| 42 | int next_proc = 1; | ||
| 43 |
3/4✓ Branch 0 taken 32 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 28 times.
✓ Branch 3 taken 4 times.
|
32 | for (int row = 0; row < matrix.rows && next_proc < proc_count; ++row) { |
| 44 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 24 times.
|
28 | const int prefix_nnz = matrix.row_ptr[static_cast<std::size_t>(row) + 1ULL]; |
| 45 | 28 | const int target_nnz = (next_proc * total_nnz) / proc_count; | |
| 46 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 24 times.
|
28 | if (prefix_nnz >= target_nnz) { |
| 47 | 4 | bounds[static_cast<std::size_t>(next_proc)] = row + 1; | |
| 48 | 4 | ++next_proc; | |
| 49 | } | ||
| 50 | } | ||
| 51 | |||
| 52 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
|
4 | while (next_proc < proc_count) { |
| 53 | ✗ | bounds[static_cast<std::size_t>(next_proc)] = matrix.rows; | |
| 54 | ✗ | ++next_proc; | |
| 55 | } | ||
| 56 | |||
| 57 | return bounds; | ||
| 58 | } | ||
| 59 | |||
| 60 | 16 | std::vector<int> BuildCountsFromBounds(const std::vector<int> &bounds) { | |
| 61 | 16 | std::vector<int> counts(bounds.size() - 1ULL, 0); | |
| 62 |
2/2✓ Branch 0 taken 32 times.
✓ Branch 1 taken 16 times.
|
48 | for (std::size_t proc = 0; proc + 1 < bounds.size(); ++proc) { |
| 63 | 32 | counts[proc] = bounds[proc + 1] - bounds[proc]; | |
| 64 | } | ||
| 65 | 16 | return counts; | |
| 66 | } | ||
| 67 | |||
| 68 | 24 | std::vector<int> BuildDisplacements(const std::vector<int> &counts) { | |
| 69 | 24 | std::vector<int> displs(counts.size(), 0); | |
| 70 |
2/2✓ Branch 0 taken 24 times.
✓ Branch 1 taken 24 times.
|
48 | for (std::size_t proc = 1; proc < counts.size(); ++proc) { |
| 71 | 24 | displs[proc] = displs[proc - 1] + counts[proc - 1]; | |
| 72 | } | ||
| 73 | 24 | return displs; | |
| 74 | } | ||
| 75 | |||
| 76 | 4 | std::vector<int> BuildNNZCounts(const MatrixCRS &matrix, const std::vector<int> &bounds) { | |
| 77 | 4 | std::vector<int> counts(bounds.size() - 1ULL, 0); | |
| 78 |
2/2✓ Branch 0 taken 8 times.
✓ Branch 1 taken 4 times.
|
12 | for (std::size_t proc = 0; proc + 1 < bounds.size(); ++proc) { |
| 79 | 8 | counts[proc] = matrix.row_ptr[static_cast<std::size_t>(bounds[proc + 1])] - | |
| 80 | 8 | matrix.row_ptr[static_cast<std::size_t>(bounds[proc])]; | |
| 81 | } | ||
| 82 | 4 | return counts; | |
| 83 | } | ||
| 84 | |||
| 85 | 20 | std::vector<double> PackComplexValues(const std::vector<std::complex<double>> &values) { | |
| 86 | 20 | std::vector<double> packed(values.size() * 2ULL, 0.0); | |
| 87 |
2/2✓ Branch 0 taken 3583 times.
✓ Branch 1 taken 20 times.
|
3603 | for (std::size_t i = 0; i < values.size(); ++i) { |
| 88 | 3583 | packed[i * 2ULL] = values[i].real(); | |
| 89 | 3583 | packed[(i * 2ULL) + 1ULL] = values[i].imag(); | |
| 90 | } | ||
| 91 | 20 | return packed; | |
| 92 | } | ||
| 93 | |||
| 94 | 20 | void UnpackComplexValues(const std::vector<double> &packed, std::vector<std::complex<double>> &values) { | |
| 95 | 20 | const std::size_t count = packed.size() / 2ULL; | |
| 96 | 20 | values.resize(count); | |
| 97 |
2/2✓ Branch 0 taken 3583 times.
✓ Branch 1 taken 20 times.
|
3603 | for (std::size_t i = 0; i < count; ++i) { |
| 98 | 3583 | values[i] = std::complex<double>(packed[i * 2ULL], packed[(i * 2ULL) + 1ULL]); | |
| 99 | } | ||
| 100 | 20 | } | |
| 101 | |||
| 102 | 8 | MatrixCRS ScatterRows(const MatrixCRS &matrix, const std::vector<int> &row_bounds, const std::vector<int> &nnz_counts, | |
| 103 | int rank, int proc_count) { | ||
| 104 | 8 | const std::vector<int> row_counts = BuildCountsFromBounds(row_bounds); | |
| 105 |
1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
|
8 | const std::vector<int> row_displs = BuildDisplacements(row_counts); |
| 106 |
1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
|
8 | const std::vector<int> nnz_displs = BuildDisplacements(nnz_counts); |
| 107 | |||
| 108 | 8 | MatrixCRS local; | |
| 109 |
1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
|
8 | local.rows = row_counts[static_cast<std::size_t>(rank)]; |
| 110 | 8 | local.cols = matrix.cols; | |
| 111 |
1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
|
8 | local.row_ptr.assign(static_cast<std::size_t>(local.rows) + 1ULL, 0); |
| 112 |
1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
|
8 | local.col_index.resize(static_cast<std::size_t>(nnz_counts[static_cast<std::size_t>(rank)])); |
| 113 | |||
| 114 | 8 | std::vector<int> all_row_lengths; | |
| 115 | 8 | std::vector<double> packed_values; | |
| 116 | 8 | std::vector<int> packed_counts; | |
| 117 | 8 | std::vector<int> packed_displs; | |
| 118 | |||
| 119 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 4 times.
|
8 | if (rank == 0) { |
| 120 |
1/2✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
|
4 | all_row_lengths.resize(static_cast<std::size_t>(matrix.rows), 0); |
| 121 |
2/2✓ Branch 0 taken 63 times.
✓ Branch 1 taken 4 times.
|
67 | for (int row = 0; row < matrix.rows; ++row) { |
| 122 | 63 | all_row_lengths[static_cast<std::size_t>(row)] = | |
| 123 | 63 | matrix.row_ptr[static_cast<std::size_t>(row) + 1ULL] - matrix.row_ptr[static_cast<std::size_t>(row)]; | |
| 124 | } | ||
| 125 |
1/2✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
|
4 | packed_values = PackComplexValues(matrix.values); |
| 126 |
1/2✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
|
4 | packed_counts.resize(static_cast<std::size_t>(proc_count), 0); |
| 127 |
1/2✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
|
4 | packed_displs.resize(static_cast<std::size_t>(proc_count), 0); |
| 128 |
2/2✓ Branch 0 taken 8 times.
✓ Branch 1 taken 4 times.
|
12 | for (int proc = 0; proc < proc_count; ++proc) { |
| 129 | 8 | packed_counts[static_cast<std::size_t>(proc)] = nnz_counts[static_cast<std::size_t>(proc)] * 2; | |
| 130 | 8 | packed_displs[static_cast<std::size_t>(proc)] = nnz_displs[static_cast<std::size_t>(proc)] * 2; | |
| 131 | } | ||
| 132 | } | ||
| 133 | |||
| 134 |
1/4✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
8 | std::vector<int> local_row_lengths(static_cast<std::size_t>(local.rows), 0); |
| 135 |
1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
|
8 | MPI_Scatterv(all_row_lengths.data(), row_counts.data(), row_displs.data(), MPI_INT, local_row_lengths.data(), |
| 136 | local.rows, MPI_INT, 0, MPI_COMM_WORLD); | ||
| 137 | |||
| 138 |
1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
|
8 | const int local_nnz = nnz_counts[static_cast<std::size_t>(rank)]; |
| 139 |
1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
|
8 | MPI_Scatterv(matrix.col_index.data(), nnz_counts.data(), nnz_displs.data(), MPI_INT, local.col_index.data(), |
| 140 | local_nnz, MPI_INT, 0, MPI_COMM_WORLD); | ||
| 141 | |||
| 142 |
1/4✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
8 | std::vector<double> local_packed(static_cast<std::size_t>(local_nnz) * 2ULL, 0.0); |
| 143 |
1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
|
8 | MPI_Scatterv(packed_values.data(), packed_counts.data(), packed_displs.data(), MPI_DOUBLE, local_packed.data(), |
| 144 | local_nnz * 2, MPI_DOUBLE, 0, MPI_COMM_WORLD); | ||
| 145 |
1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
|
8 | UnpackComplexValues(local_packed, local.values); |
| 146 | |||
| 147 | int prefix = 0; | ||
| 148 |
2/2✓ Branch 0 taken 63 times.
✓ Branch 1 taken 8 times.
|
71 | for (int row = 0; row < local.rows; ++row) { |
| 149 | 63 | local.row_ptr[static_cast<std::size_t>(row)] = prefix; | |
| 150 | 63 | prefix += local_row_lengths[static_cast<std::size_t>(row)]; | |
| 151 | } | ||
| 152 |
1/2✓ Branch 0 taken 8 times.
✗ Branch 1 not taken.
|
8 | local.row_ptr[static_cast<std::size_t>(local.rows)] = prefix; |
| 153 | |||
| 154 | 8 | return local; | |
| 155 | ✗ | } | |
| 156 | |||
| 157 | 16 | void BroadcastMatrix(MatrixCRS &matrix, int rank) { | |
| 158 | 16 | std::array<int, 3> dims = {matrix.rows, matrix.cols, static_cast<int>(matrix.values.size())}; | |
| 159 | 16 | MPI_Bcast(dims.data(), static_cast<int>(dims.size()), MPI_INT, 0, MPI_COMM_WORLD); | |
| 160 | |||
| 161 |
2/2✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
|
16 | if (rank != 0) { |
| 162 | 8 | matrix.rows = dims[0]; | |
| 163 | 8 | matrix.cols = dims[1]; | |
| 164 | 8 | matrix.values.resize(static_cast<std::size_t>(dims[2])); | |
| 165 | 8 | matrix.col_index.resize(static_cast<std::size_t>(dims[2])); | |
| 166 | 8 | matrix.row_ptr.resize(static_cast<std::size_t>(matrix.rows) + 1ULL); | |
| 167 | } | ||
| 168 | |||
| 169 | 16 | MPI_Bcast(matrix.col_index.data(), dims[2], MPI_INT, 0, MPI_COMM_WORLD); | |
| 170 | 16 | MPI_Bcast(matrix.row_ptr.data(), matrix.rows + 1, MPI_INT, 0, MPI_COMM_WORLD); | |
| 171 | |||
| 172 | 16 | std::vector<double> packed_values; | |
| 173 |
2/2✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
|
16 | if (rank == 0) { |
| 174 |
1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
|
16 | packed_values = PackComplexValues(matrix.values); |
| 175 | } else { | ||
| 176 |
1/4✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
8 | packed_values.resize(static_cast<std::size_t>(dims[2]) * 2ULL, 0.0); |
| 177 | } | ||
| 178 | |||
| 179 |
1/2✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
|
16 | MPI_Bcast(packed_values.data(), dims[2] * 2, MPI_DOUBLE, 0, MPI_COMM_WORLD); |
| 180 |
2/2✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
|
16 | if (rank != 0) { |
| 181 |
1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
|
8 | UnpackComplexValues(packed_values, matrix.values); |
| 182 | } | ||
| 183 | 16 | } | |
| 184 | |||
| 185 |
2/2✓ Branch 0 taken 42 times.
✓ Branch 1 taken 21 times.
|
63 | void AccumulateRowProducts(const MatrixCRS &a, const MatrixCRS &b, int row_index, |
| 186 | std::vector<std::complex<double>> &row_vals, std::vector<int> &row_mark, | ||
| 187 | std::vector<int> &used_cols) { | ||
| 188 | used_cols.clear(); | ||
| 189 | |||
| 190 | 767 | for (int ak = a.row_ptr[static_cast<std::size_t>(row_index)]; | |
| 191 |
2/2✓ Branch 0 taken 704 times.
✓ Branch 1 taken 63 times.
|
767 | ak < a.row_ptr[static_cast<std::size_t>(row_index) + 1ULL]; ++ak) { |
| 192 | 704 | const int b_row = a.col_index[static_cast<std::size_t>(ak)]; | |
| 193 | 704 | const auto a_val = a.values[static_cast<std::size_t>(ak)]; | |
| 194 | |||
| 195 |
2/2✓ Branch 0 taken 13539 times.
✓ Branch 1 taken 704 times.
|
14243 | for (int bk = b.row_ptr[static_cast<std::size_t>(b_row)]; bk < b.row_ptr[static_cast<std::size_t>(b_row) + 1ULL]; |
| 196 | ++bk) { | ||
| 197 |
2/2✓ Branch 0 taken 1080 times.
✓ Branch 1 taken 12459 times.
|
13539 | const int col = b.col_index[static_cast<std::size_t>(bk)]; |
| 198 | 13539 | const auto product = a_val * b.values[static_cast<std::size_t>(bk)]; | |
| 199 | |||
| 200 |
2/2✓ Branch 0 taken 1080 times.
✓ Branch 1 taken 12459 times.
|
13539 | if (row_mark[static_cast<std::size_t>(col)] != row_index) { |
| 201 |
1/2✓ Branch 0 taken 1080 times.
✗ Branch 1 not taken.
|
1080 | row_mark[static_cast<std::size_t>(col)] = row_index; |
| 202 |
1/2✓ Branch 0 taken 1080 times.
✗ Branch 1 not taken.
|
1080 | row_vals[static_cast<std::size_t>(col)] = product; |
| 203 | used_cols.push_back(col); | ||
| 204 | } else { | ||
| 205 | row_vals[static_cast<std::size_t>(col)] += product; | ||
| 206 | } | ||
| 207 | } | ||
| 208 | } | ||
| 209 | 63 | } | |
| 210 | |||
| 211 | 63 | void CollectRowValues(const std::vector<std::complex<double>> &row_vals, std::vector<int> &used_cols, | |
| 212 | LocalRowData &row) { | ||
| 213 | std::ranges::sort(used_cols); | ||
| 214 | row.cols.clear(); | ||
| 215 | row.vals.clear(); | ||
| 216 | 63 | row.cols.reserve(used_cols.size()); | |
| 217 | 63 | row.vals.reserve(used_cols.size()); | |
| 218 | |||
| 219 |
2/2✓ Branch 0 taken 1080 times.
✓ Branch 1 taken 63 times.
|
1143 | for (int col : used_cols) { |
| 220 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1080 times.
|
1080 | const auto &value = row_vals[static_cast<std::size_t>(col)]; |
| 221 | if (value != std::complex<double>(0.0, 0.0)) { | ||
| 222 | row.cols.push_back(col); | ||
| 223 | row.vals.push_back(value); | ||
| 224 | } | ||
| 225 | } | ||
| 226 | 63 | } | |
| 227 | |||
| 228 | 8 | MatrixCRS MultiplyLocalOMP(const MatrixCRS &a, const MatrixCRS &b) { | |
| 229 | 8 | MatrixCRS result; | |
| 230 | 8 | result.rows = a.rows; | |
| 231 | 8 | result.cols = b.cols; | |
| 232 |
1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
|
8 | result.row_ptr.assign(static_cast<std::size_t>(result.rows) + 1ULL, 0); |
| 233 | |||
| 234 |
2/4✓ Branch 0 taken 8 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 8 times.
✗ Branch 3 not taken.
|
8 | if (a.rows == 0 || b.cols == 0) { |
| 235 | return result; | ||
| 236 | } | ||
| 237 | |||
| 238 |
1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
|
8 | std::vector<LocalRowData> rows_data(static_cast<std::size_t>(a.rows)); |
| 239 |
5/6✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 1 times.
✓ Branch 4 taken 7 times.
✓ Branch 5 taken 7 times.
✓ Branch 6 taken 1 times.
|
9 | const int thread_count = std::max(1, std::min(ppc::util::GetNumThreads(), a.rows)); |
| 240 | |||
| 241 | 8 | #pragma omp parallel default(none) shared(a, b, rows_data) num_threads(thread_count) if (thread_count > 1) | |
| 242 | { | ||
| 243 | std::vector<std::complex<double>> row_vals(static_cast<std::size_t>(b.cols), std::complex<double>(0.0, 0.0)); | ||
| 244 | std::vector<int> row_mark(static_cast<std::size_t>(b.cols), -1); | ||
| 245 | std::vector<int> used_cols; | ||
| 246 | used_cols.reserve(256); | ||
| 247 | |||
| 248 | #pragma omp for | ||
| 249 | for (int row = 0; row < a.rows; ++row) { | ||
| 250 | AccumulateRowProducts(a, b, row, row_vals, row_mark, used_cols); | ||
| 251 | CollectRowValues(row_vals, used_cols, rows_data[static_cast<std::size_t>(row)]); | ||
| 252 | } | ||
| 253 | } | ||
| 254 | |||
| 255 | int total_nnz = 0; | ||
| 256 |
2/2✓ Branch 0 taken 63 times.
✓ Branch 1 taken 8 times.
|
71 | for (int row = 0; row < result.rows; ++row) { |
| 257 | 63 | result.row_ptr[static_cast<std::size_t>(row)] = total_nnz; | |
| 258 | 63 | total_nnz += static_cast<int>(rows_data[static_cast<std::size_t>(row)].vals.size()); | |
| 259 | } | ||
| 260 |
1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
|
8 | result.row_ptr[static_cast<std::size_t>(result.rows)] = total_nnz; |
| 261 | |||
| 262 |
1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
|
8 | result.values.reserve(static_cast<std::size_t>(total_nnz)); |
| 263 |
1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
|
8 | result.col_index.reserve(static_cast<std::size_t>(total_nnz)); |
| 264 | |||
| 265 |
2/2✓ Branch 0 taken 63 times.
✓ Branch 1 taken 8 times.
|
71 | for (int row = 0; row < result.rows; ++row) { |
| 266 |
1/2✓ Branch 1 taken 63 times.
✗ Branch 2 not taken.
|
63 | const auto &row_data = rows_data[static_cast<std::size_t>(row)]; |
| 267 |
1/2✓ Branch 1 taken 63 times.
✗ Branch 2 not taken.
|
63 | result.col_index.insert(result.col_index.end(), row_data.cols.begin(), row_data.cols.end()); |
| 268 | 63 | result.values.insert(result.values.end(), row_data.vals.begin(), row_data.vals.end()); | |
| 269 | } | ||
| 270 | |||
| 271 | return result; | ||
| 272 | 8 | } | |
| 273 | |||
| 274 | 8 | void GatherMatrix(const MatrixCRS &local, MatrixCRS &global, const std::vector<int> &row_bounds, int rank, int size, | |
| 275 | int total_rows) { | ||
| 276 | 8 | const std::vector<int> row_counts = BuildCountsFromBounds(row_bounds); | |
| 277 |
1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
|
8 | const std::vector<int> row_displs = BuildDisplacements(row_counts); |
| 278 |
2/6✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 8 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
|
8 | std::vector<int> nnz_counts(static_cast<std::size_t>(size), 0); |
| 279 |
1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
|
8 | const int local_nnz = static_cast<int>(local.values.size()); |
| 280 |
1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
|
8 | MPI_Gather(&local_nnz, 1, MPI_INT, nnz_counts.data(), 1, MPI_INT, 0, MPI_COMM_WORLD); |
| 281 | |||
| 282 |
1/4✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
8 | std::vector<int> local_row_lengths(static_cast<std::size_t>(local.rows), 0); |
| 283 |
2/2✓ Branch 0 taken 63 times.
✓ Branch 1 taken 8 times.
|
71 | for (int row = 0; row < local.rows; ++row) { |
| 284 | 63 | local_row_lengths[static_cast<std::size_t>(row)] = | |
| 285 | 63 | local.row_ptr[static_cast<std::size_t>(row) + 1ULL] - local.row_ptr[static_cast<std::size_t>(row)]; | |
| 286 | } | ||
| 287 | |||
| 288 | 8 | std::vector<int> nnz_displs; | |
| 289 | 8 | std::vector<int> gathered_row_lengths; | |
| 290 | 8 | std::vector<int> gathered_cols; | |
| 291 | 8 | std::vector<double> gathered_packed_values; | |
| 292 | 8 | std::vector<int> packed_counts; | |
| 293 | 8 | std::vector<int> packed_displs; | |
| 294 | |||
| 295 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 4 times.
|
8 | if (rank == 0) { |
| 296 |
1/2✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
|
4 | nnz_displs.resize(static_cast<std::size_t>(size), 0); |
| 297 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 4 times.
|
8 | for (int proc = 1; proc < size; ++proc) { |
| 298 | 4 | nnz_displs[static_cast<std::size_t>(proc)] = | |
| 299 | 4 | nnz_displs[static_cast<std::size_t>(proc - 1)] + nnz_counts[static_cast<std::size_t>(proc - 1)]; | |
| 300 | } | ||
| 301 | |||
| 302 |
1/2✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
|
4 | gathered_row_lengths.resize(static_cast<std::size_t>(total_rows), 0); |
| 303 |
1/2✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
|
8 | gathered_cols.resize(static_cast<std::size_t>(std::accumulate(nnz_counts.begin(), nnz_counts.end(), 0)), 0); |
| 304 |
1/2✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
|
4 | gathered_packed_values.resize(gathered_cols.size() * 2ULL, 0.0); |
| 305 |
1/2✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
|
4 | packed_counts.resize(static_cast<std::size_t>(size), 0); |
| 306 |
1/4✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
4 | packed_displs.resize(static_cast<std::size_t>(size), 0); |
| 307 |
2/2✓ Branch 0 taken 8 times.
✓ Branch 1 taken 4 times.
|
12 | for (int proc = 0; proc < size; ++proc) { |
| 308 | 8 | packed_counts[static_cast<std::size_t>(proc)] = nnz_counts[static_cast<std::size_t>(proc)] * 2; | |
| 309 | 8 | packed_displs[static_cast<std::size_t>(proc)] = nnz_displs[static_cast<std::size_t>(proc)] * 2; | |
| 310 | } | ||
| 311 | } | ||
| 312 | |||
| 313 |
1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
|
8 | MPI_Gatherv(local_row_lengths.data(), local.rows, MPI_INT, gathered_row_lengths.data(), row_counts.data(), |
| 314 | row_displs.data(), MPI_INT, 0, MPI_COMM_WORLD); | ||
| 315 |
1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
|
8 | MPI_Gatherv(local.col_index.data(), local_nnz, MPI_INT, gathered_cols.data(), nnz_counts.data(), nnz_displs.data(), |
| 316 | MPI_INT, 0, MPI_COMM_WORLD); | ||
| 317 | |||
| 318 |
1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
|
8 | const std::vector<double> local_packed_values = PackComplexValues(local.values); |
| 319 |
1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
|
8 | MPI_Gatherv(local_packed_values.data(), local_nnz * 2, MPI_DOUBLE, gathered_packed_values.data(), |
| 320 | packed_counts.data(), packed_displs.data(), MPI_DOUBLE, 0, MPI_COMM_WORLD); | ||
| 321 | |||
| 322 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 4 times.
|
8 | if (rank != 0) { |
| 323 | return; | ||
| 324 | } | ||
| 325 | |||
| 326 |
1/4✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
4 | global.row_ptr.assign(static_cast<std::size_t>(total_rows) + 1ULL, 0); |
| 327 | int prefix = 0; | ||
| 328 |
2/2✓ Branch 0 taken 63 times.
✓ Branch 1 taken 4 times.
|
67 | for (int row = 0; row < total_rows; ++row) { |
| 329 | 63 | global.row_ptr[static_cast<std::size_t>(row)] = prefix; | |
| 330 | 63 | prefix += gathered_row_lengths[static_cast<std::size_t>(row)]; | |
| 331 | } | ||
| 332 | 4 | global.row_ptr[static_cast<std::size_t>(total_rows)] = prefix; | |
| 333 | |||
| 334 | 4 | global.col_index = std::move(gathered_cols); | |
| 335 |
1/2✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
|
4 | UnpackComplexValues(gathered_packed_values, global.values); |
| 336 | } | ||
| 337 | |||
| 338 | } // namespace | ||
| 339 | |||
| 340 |
1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
|
8 | ErmakovASparMatMultALL::ErmakovASparMatMultALL(const InType &in) { |
| 341 | SetTypeOfTask(GetStaticTypeOfTask()); | ||
| 342 | GetInput() = in; | ||
| 343 | 8 | } | |
| 344 | |||
| 345 | 16 | bool ErmakovASparMatMultALL::ValidateMatrix(const MatrixCRS &m) { | |
| 346 |
2/4✓ Branch 0 taken 16 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 16 times.
|
16 | if (m.rows < 0 || m.cols < 0) { |
| 347 | return false; | ||
| 348 | } | ||
| 349 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 16 times.
|
16 | if (m.row_ptr.size() != static_cast<std::size_t>(m.rows) + 1ULL) { |
| 350 | return false; | ||
| 351 | } | ||
| 352 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 16 times.
|
16 | if (m.values.size() != m.col_index.size()) { |
| 353 | return false; | ||
| 354 | } | ||
| 355 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 16 times.
|
16 | if (m.row_ptr.empty()) { |
| 356 | return false; | ||
| 357 | } | ||
| 358 | |||
| 359 | 16 | const int nnz = static_cast<int>(m.values.size()); | |
| 360 |
2/4✗ Branch 0 not taken.
✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 16 times.
|
16 | if (m.row_ptr.front() != 0 || m.row_ptr.back() != nnz) { |
| 361 | return false; | ||
| 362 | } | ||
| 363 | |||
| 364 |
2/2✓ Branch 0 taken 252 times.
✓ Branch 1 taken 16 times.
|
268 | for (int row = 0; row < m.rows; ++row) { |
| 365 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 252 times.
|
252 | if (m.row_ptr[static_cast<std::size_t>(row)] > m.row_ptr[static_cast<std::size_t>(row) + 1ULL]) { |
| 366 | return false; | ||
| 367 | } | ||
| 368 | } | ||
| 369 | |||
| 370 |
2/2✓ Branch 0 taken 2846 times.
✓ Branch 1 taken 16 times.
|
2862 | for (int idx = 0; idx < nnz; ++idx) { |
| 371 |
2/4✗ Branch 0 not taken.
✓ Branch 1 taken 2846 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 2846 times.
|
2846 | if (m.col_index[static_cast<std::size_t>(idx)] < 0 || m.col_index[static_cast<std::size_t>(idx)] >= m.cols) { |
| 372 | return false; | ||
| 373 | } | ||
| 374 | } | ||
| 375 | |||
| 376 | return true; | ||
| 377 | } | ||
| 378 | |||
| 379 | 8 | bool ErmakovASparMatMultALL::ValidationImpl() { | |
| 380 | 8 | const auto &a = GetInput().A; | |
| 381 | 8 | const auto &b = GetInput().B; | |
| 382 |
2/4✓ Branch 0 taken 8 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 8 times.
✗ Branch 3 not taken.
|
8 | return a.cols == b.rows && ValidateMatrix(a) && ValidateMatrix(b); |
| 383 | } | ||
| 384 | |||
| 385 | 8 | bool ErmakovASparMatMultALL::PreProcessingImpl() { | |
| 386 | 8 | a_ = GetInput().A; | |
| 387 | 8 | b_ = GetInput().B; | |
| 388 | 8 | c_.rows = a_.rows; | |
| 389 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 8 times.
|
8 | c_.cols = b_.cols; |
| 390 | c_.values.clear(); | ||
| 391 | c_.col_index.clear(); | ||
| 392 | 8 | c_.row_ptr.assign(static_cast<std::size_t>(c_.rows) + 1ULL, 0); | |
| 393 | 8 | return true; | |
| 394 | } | ||
| 395 | |||
| 396 | 8 | bool ErmakovASparMatMultALL::RunImpl() { | |
| 397 | 8 | int rank = 0; | |
| 398 | 8 | int size = 1; | |
| 399 | 8 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); | |
| 400 | 8 | MPI_Comm_size(MPI_COMM_WORLD, &size); | |
| 401 | |||
| 402 |
3/4✓ Branch 0 taken 4 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 4 times.
✗ Branch 3 not taken.
|
8 | if (rank == 0 && a_.cols != b_.rows) { |
| 403 | return false; | ||
| 404 | } | ||
| 405 | |||
| 406 | 8 | BroadcastMatrix(b_, rank); | |
| 407 | |||
| 408 | 8 | c_.rows = a_.rows; | |
| 409 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 8 times.
|
8 | c_.cols = b_.cols; |
| 410 | c_.values.clear(); | ||
| 411 | c_.col_index.clear(); | ||
| 412 | 8 | c_.row_ptr.assign(static_cast<std::size_t>(c_.rows) + 1ULL, 0); | |
| 413 | |||
| 414 | 8 | std::vector<int> row_bounds(static_cast<std::size_t>(size) + 1ULL, 0); | |
| 415 |
1/4✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
8 | std::vector<int> nnz_counts(static_cast<std::size_t>(size), 0); |
| 416 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 4 times.
|
8 | if (rank == 0) { |
| 417 |
1/2✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
|
4 | row_bounds = BuildRowBounds(a_, size); |
| 418 |
1/4✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
8 | nnz_counts = BuildNNZCounts(a_, row_bounds); |
| 419 | } | ||
| 420 |
1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
|
8 | MPI_Bcast(row_bounds.data(), size + 1, MPI_INT, 0, MPI_COMM_WORLD); |
| 421 |
1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
|
8 | MPI_Bcast(nnz_counts.data(), size, MPI_INT, 0, MPI_COMM_WORLD); |
| 422 | |||
| 423 |
1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
|
8 | const MatrixCRS local_a = ScatterRows(a_, row_bounds, nnz_counts, rank, size); |
| 424 |
1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
|
8 | const MatrixCRS local_c = MultiplyLocalOMP(local_a, b_); |
| 425 | |||
| 426 |
1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
|
8 | GatherMatrix(local_c, c_, row_bounds, rank, size, a_.rows); |
| 427 | |||
| 428 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 8 times.
|
8 | if (GetStateOfTesting() == ppc::task::StateOfTesting::kPerf) { |
| 429 | ✗ | if (rank != 0) { | |
| 430 | ✗ | c_.rows = a_.rows; | |
| 431 | ✗ | c_.cols = b_.cols; | |
| 432 | c_.values.clear(); | ||
| 433 | c_.col_index.clear(); | ||
| 434 | ✗ | c_.row_ptr.assign(static_cast<std::size_t>(c_.rows) + 1ULL, 0); | |
| 435 | } | ||
| 436 | ✗ | MPI_Barrier(MPI_COMM_WORLD); | |
| 437 | return true; | ||
| 438 | } | ||
| 439 | |||
| 440 |
1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
|
8 | BroadcastMatrix(c_, rank); |
| 441 |
1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
|
8 | MPI_Barrier(MPI_COMM_WORLD); |
| 442 | return true; | ||
| 443 | 8 | } | |
| 444 | |||
| 445 | 8 | bool ErmakovASparMatMultALL::PostProcessingImpl() { | |
| 446 | 8 | GetOutput() = c_; | |
| 447 | 8 | return true; | |
| 448 | } | ||
| 449 | |||
| 450 | } // namespace ermakov_a_spar_mat_mult | ||
| 451 |