| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #include "kotelnikova_a_double_matr_mult/all/include/ops_all.hpp" | ||
| 2 | |||
| 3 | #include <mpi.h> | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <cmath> | ||
| 7 | #include <cstddef> | ||
| 8 | #include <vector> | ||
| 9 | |||
| 10 | #include "kotelnikova_a_double_matr_mult/common/include/common.hpp" | ||
| 11 | |||
| 12 | namespace kotelnikova_a_double_matr_mult { | ||
| 13 | |||
| 14 |
1/2✓ Branch 2 taken 10 times.
✗ Branch 3 not taken.
|
10 | KotelnikovaATaskALL::KotelnikovaATaskALL(const InType &in) { |
| 15 | SetTypeOfTask(GetStaticTypeOfTask()); | ||
| 16 | GetInput() = in; | ||
| 17 | 10 | GetOutput() = SparseMatrixCCS(); | |
| 18 | 10 | } | |
| 19 | |||
| 20 | 20 | bool KotelnikovaATaskALL::IsMatrixValid(const SparseMatrixCCS &matrix) { | |
| 21 |
2/4✓ Branch 0 taken 20 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 20 times.
|
20 | if (matrix.rows < 0 || matrix.cols < 0) { |
| 22 | return false; | ||
| 23 | } | ||
| 24 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 20 times.
|
20 | if (matrix.col_ptrs.size() != static_cast<size_t>(matrix.cols) + 1) { |
| 25 | return false; | ||
| 26 | } | ||
| 27 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 20 times.
|
20 | if (matrix.values.size() != matrix.row_indices.size()) { |
| 28 | return false; | ||
| 29 | } | ||
| 30 | |||
| 31 |
2/4✗ Branch 0 not taken.
✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 20 times.
|
20 | if (matrix.col_ptrs.empty() || matrix.col_ptrs[0] != 0) { |
| 32 | return false; | ||
| 33 | } | ||
| 34 | |||
| 35 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 20 times.
|
20 | const int total_elements = static_cast<int>(matrix.values.size()); |
| 36 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 20 times.
|
20 | if (matrix.col_ptrs[matrix.cols] != total_elements) { |
| 37 | return false; | ||
| 38 | } | ||
| 39 | |||
| 40 |
2/2✓ Branch 0 taken 58 times.
✓ Branch 1 taken 20 times.
|
78 | for (size_t i = 0; i < matrix.col_ptrs.size() - 1; ++i) { |
| 41 |
2/4✗ Branch 0 not taken.
✓ Branch 1 taken 58 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 58 times.
|
58 | if (matrix.col_ptrs[i] > matrix.col_ptrs[i + 1] || matrix.col_ptrs[i] < 0) { |
| 42 | return false; | ||
| 43 | } | ||
| 44 | } | ||
| 45 | |||
| 46 |
2/2✓ Branch 0 taken 72 times.
✓ Branch 1 taken 20 times.
|
92 | for (size_t i = 0; i < matrix.row_indices.size(); ++i) { |
| 47 |
2/4✗ Branch 0 not taken.
✓ Branch 1 taken 72 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 72 times.
|
72 | if (matrix.row_indices[i] < 0 || matrix.row_indices[i] >= matrix.rows) { |
| 48 | return false; | ||
| 49 | } | ||
| 50 | } | ||
| 51 | |||
| 52 | return true; | ||
| 53 | } | ||
| 54 | |||
| 55 |
1/2✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
|
10 | bool KotelnikovaATaskALL::ValidationImpl() { |
| 56 | const auto &[a, b] = GetInput(); | ||
| 57 | |||
| 58 |
2/4✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 10 times.
✗ Branch 3 not taken.
|
10 | if (!IsMatrixValid(a) || !IsMatrixValid(b)) { |
| 59 | return false; | ||
| 60 | } | ||
| 61 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
|
10 | if (a.cols != b.rows) { |
| 62 | ✗ | return false; | |
| 63 | } | ||
| 64 | |||
| 65 | return true; | ||
| 66 | } | ||
| 67 | |||
| 68 | 10 | bool KotelnikovaATaskALL::PreProcessingImpl() { | |
| 69 | const auto &[a, b] = GetInput(); | ||
| 70 | 10 | GetOutput() = SparseMatrixCCS(a.rows, b.cols); | |
| 71 | 10 | return true; | |
| 72 | } | ||
| 73 | |||
| 74 | namespace { | ||
| 75 | void ComputeColumnBlock(const SparseMatrixCCS &a, const SparseMatrixCCS &b, int start_col, int end_col, | ||
| 76 | std::vector<std::vector<double>> &temp_columns, std::vector<int> &nnz_counts) { | ||
| 77 | const double epsilon = 1e-10; | ||
| 78 | |||
| 79 | 10 | #pragma omp parallel for default(none) shared(a, b, start_col, end_col, temp_columns, nnz_counts, epsilon) \ | |
| 80 | schedule(dynamic, 4) | ||
| 81 | for (int j = start_col; j < end_col; ++j) { | ||
| 82 | std::vector<double> &temp = temp_columns[j - start_col]; | ||
| 83 | temp.assign(a.rows, 0.0); | ||
| 84 | |||
| 85 | for (int b_idx = b.col_ptrs[j]; b_idx < b.col_ptrs[j + 1]; ++b_idx) { | ||
| 86 | const int k = b.row_indices[b_idx]; | ||
| 87 | const double b_val = b.values[b_idx]; | ||
| 88 | |||
| 89 | for (int a_idx = a.col_ptrs[k]; a_idx < a.col_ptrs[k + 1]; ++a_idx) { | ||
| 90 | const int i = a.row_indices[a_idx]; | ||
| 91 | temp[i] += a.values[a_idx] * b_val; | ||
| 92 | } | ||
| 93 | } | ||
| 94 | |||
| 95 | int count = 0; | ||
| 96 | for (double val : temp) { | ||
| 97 | if (std::abs(val) > epsilon) { | ||
| 98 | ++count; | ||
| 99 | } | ||
| 100 | } | ||
| 101 | nnz_counts[j - start_col] = count; | ||
| 102 | } | ||
| 103 | } | ||
| 104 | |||
| 105 | 10 | void BuildLocalResult(const SparseMatrixCCS &a, int start_col, int end_col, | |
| 106 | const std::vector<std::vector<double>> &temp_columns, const std::vector<int> &nnz_counts, | ||
| 107 | std::vector<double> &local_values, std::vector<int> &local_row_indices, | ||
| 108 | std::vector<int> &local_col_ptrs) { | ||
| 109 | (void)a; | ||
| 110 | |||
| 111 | const double epsilon = 1e-10; | ||
| 112 | 10 | const int local_cols = end_col - start_col; | |
| 113 | |||
| 114 | 10 | local_col_ptrs.resize(local_cols + 1, 0); | |
| 115 |
2/2✓ Branch 0 taken 14 times.
✓ Branch 1 taken 10 times.
|
24 | for (int j = 0; j < local_cols; ++j) { |
| 116 | 14 | local_col_ptrs[j + 1] = local_col_ptrs[j] + nnz_counts[j]; | |
| 117 | } | ||
| 118 | |||
| 119 | 10 | const int total_nnz = local_col_ptrs[local_cols]; | |
| 120 | 10 | local_values.resize(total_nnz); | |
| 121 | 10 | local_row_indices.resize(total_nnz); | |
| 122 | |||
| 123 | 10 | #pragma omp parallel for default(none) shared(a, start_col, end_col, temp_columns, local_values, local_row_indices, \ | |
| 124 | local_col_ptrs, epsilon, local_cols) schedule(dynamic, 4) | ||
| 125 | for (int j = 0; j < local_cols; ++j) { | ||
| 126 | const std::vector<double> &temp = temp_columns[j]; | ||
| 127 | int pos = local_col_ptrs[j]; | ||
| 128 | for (size_t i = 0; i < temp.size(); ++i) { | ||
| 129 | if (std::abs(temp[i]) > epsilon) { | ||
| 130 | local_row_indices[pos] = static_cast<int>(i); | ||
| 131 | local_values[pos] = temp[i]; | ||
| 132 | ++pos; | ||
| 133 | } | ||
| 134 | } | ||
| 135 | } | ||
| 136 | 10 | } | |
| 137 | |||
| 138 | 10 | void GatherAndBroadcastResult(int rank, int size, const std::vector<double> &local_values, | |
| 139 | const std::vector<int> &local_row_indices, const std::vector<int> &local_col_ptrs, | ||
| 140 | int total_cols, int rows, SparseMatrixCCS &result) { | ||
| 141 | 10 | std::vector<int> recv_counts(size, 0); | |
| 142 |
2/6✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 10 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
|
10 | std::vector<int> recv_offsets(size, 0); |
| 143 | |||
| 144 |
1/2✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
|
10 | int local_nnz = static_cast<int>(local_values.size()); |
| 145 | |||
| 146 |
1/2✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
|
10 | MPI_Gather(&local_nnz, 1, MPI_INT, recv_counts.data(), 1, MPI_INT, 0, MPI_COMM_WORLD); |
| 147 | |||
| 148 | 10 | std::vector<double> global_values; | |
| 149 | 10 | std::vector<int> global_row_indices; | |
| 150 | 10 | std::vector<int> global_col_ptrs; | |
| 151 |
1/4✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
10 | std::vector<int> global_col_counts(total_cols, 0); |
| 152 | |||
| 153 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
10 | if (rank == 0) { |
| 154 | int total_nnz = 0; | ||
| 155 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 5 times.
|
15 | for (int i = 0; i < size; ++i) { |
| 156 | 10 | recv_offsets[i] = total_nnz; | |
| 157 | 10 | total_nnz += recv_counts[i]; | |
| 158 | } | ||
| 159 |
1/2✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
|
5 | global_values.resize(total_nnz); |
| 160 |
1/2✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
|
5 | global_row_indices.resize(total_nnz); |
| 161 |
1/2✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
|
5 | global_col_ptrs.resize(total_cols + 1, 0); |
| 162 | } | ||
| 163 | |||
| 164 |
1/2✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
|
10 | MPI_Gatherv(local_values.data(), local_nnz, MPI_DOUBLE, global_values.data(), recv_counts.data(), recv_offsets.data(), |
| 165 | MPI_DOUBLE, 0, MPI_COMM_WORLD); | ||
| 166 | |||
| 167 |
1/2✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
|
10 | MPI_Gatherv(local_row_indices.data(), local_nnz, MPI_INT, global_row_indices.data(), recv_counts.data(), |
| 168 | recv_offsets.data(), MPI_INT, 0, MPI_COMM_WORLD); | ||
| 169 | |||
| 170 |
1/4✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
10 | std::vector<int> local_col_counts(total_cols, 0); |
| 171 | 10 | int cols_per_proc = total_cols / size; | |
| 172 | 10 | int remainder = total_cols % size; | |
| 173 | |||
| 174 |
2/2✓ Branch 0 taken 8 times.
✓ Branch 1 taken 2 times.
|
10 | int start = (rank * cols_per_proc) + std::min(rank, remainder); |
| 175 |
2/2✓ Branch 0 taken 8 times.
✓ Branch 1 taken 2 times.
|
10 | int end = start + cols_per_proc + (rank < remainder ? 1 : 0); |
| 176 | |||
| 177 |
2/2✓ Branch 0 taken 14 times.
✓ Branch 1 taken 10 times.
|
24 | for (int j = start; j < end; ++j) { |
| 178 |
1/2✓ Branch 0 taken 14 times.
✗ Branch 1 not taken.
|
28 | local_col_counts[j] = (j - start + 1 < static_cast<int>(local_col_ptrs.size())) |
| 179 |
1/2✓ Branch 0 taken 14 times.
✗ Branch 1 not taken.
|
14 | ? (local_col_ptrs[j - start + 1] - local_col_ptrs[j - start]) |
| 180 | : 0; | ||
| 181 | } | ||
| 182 | |||
| 183 |
2/6✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 10 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
|
10 | std::vector<int> global_col_counts_tmp(total_cols, 0); |
| 184 |
1/2✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
|
10 | MPI_Reduce(local_col_counts.data(), global_col_counts_tmp.data(), total_cols, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); |
| 185 | |||
| 186 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
10 | if (rank == 0) { |
| 187 | 5 | global_col_ptrs[0] = 0; | |
| 188 |
2/2✓ Branch 0 taken 14 times.
✓ Branch 1 taken 5 times.
|
19 | for (int j = 0; j < total_cols; ++j) { |
| 189 | 14 | global_col_ptrs[j + 1] = global_col_ptrs[j] + global_col_counts_tmp[j]; | |
| 190 | } | ||
| 191 | |||
| 192 |
1/2✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
|
5 | result.values = global_values; |
| 193 |
1/2✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
|
5 | result.row_indices = global_row_indices; |
| 194 |
1/2✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
|
5 | result.col_ptrs = global_col_ptrs; |
| 195 | 5 | result.rows = rows; | |
| 196 | 5 | result.cols = total_cols; | |
| 197 | } | ||
| 198 | |||
| 199 | 10 | int result_values_size = 0; | |
| 200 | 10 | int result_row_indices_size = 0; | |
| 201 | 10 | int result_col_ptrs_size = 0; | |
| 202 | |||
| 203 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
10 | if (rank == 0) { |
| 204 | 5 | result_values_size = static_cast<int>(result.values.size()); | |
| 205 | 5 | result_row_indices_size = static_cast<int>(result.row_indices.size()); | |
| 206 | 5 | result_col_ptrs_size = static_cast<int>(result.col_ptrs.size()); | |
| 207 | } | ||
| 208 | |||
| 209 |
1/2✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
|
10 | MPI_Bcast(&result_values_size, 1, MPI_INT, 0, MPI_COMM_WORLD); |
| 210 |
1/2✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
|
10 | MPI_Bcast(&result_row_indices_size, 1, MPI_INT, 0, MPI_COMM_WORLD); |
| 211 |
1/2✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
|
10 | MPI_Bcast(&result_col_ptrs_size, 1, MPI_INT, 0, MPI_COMM_WORLD); |
| 212 | |||
| 213 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
10 | if (rank != 0) { |
| 214 |
1/2✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
|
5 | result.values.resize(result_values_size); |
| 215 |
1/2✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
|
5 | result.row_indices.resize(result_row_indices_size); |
| 216 |
1/2✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
|
5 | result.col_ptrs.resize(result_col_ptrs_size); |
| 217 | 5 | result.rows = rows; | |
| 218 | 5 | result.cols = total_cols; | |
| 219 | } | ||
| 220 | |||
| 221 |
1/2✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
|
10 | MPI_Bcast(result.values.data(), result_values_size, MPI_DOUBLE, 0, MPI_COMM_WORLD); |
| 222 |
1/2✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
|
10 | MPI_Bcast(result.row_indices.data(), result_row_indices_size, MPI_INT, 0, MPI_COMM_WORLD); |
| 223 |
1/2✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
|
10 | MPI_Bcast(result.col_ptrs.data(), result_col_ptrs_size, MPI_INT, 0, MPI_COMM_WORLD); |
| 224 | 10 | } | |
| 225 | |||
| 226 | } // namespace | ||
| 227 | |||
| 228 | 10 | SparseMatrixCCS KotelnikovaATaskALL::MultiplyMatricesMPIOMP(const SparseMatrixCCS &a, const SparseMatrixCCS &b) { | |
| 229 | 10 | int rank = -1; | |
| 230 | 10 | int size = -1; | |
| 231 | 10 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); | |
| 232 | 10 | MPI_Comm_size(MPI_COMM_WORLD, &size); | |
| 233 | |||
| 234 | 10 | const int total_cols = b.cols; | |
| 235 | 10 | const int cols_per_proc = total_cols / size; | |
| 236 | 10 | const int remainder = total_cols % size; | |
| 237 | |||
| 238 |
4/4✓ Branch 0 taken 8 times.
✓ Branch 1 taken 2 times.
✓ Branch 2 taken 8 times.
✓ Branch 3 taken 2 times.
|
20 | int start_col = (rank * cols_per_proc) + std::min(rank, remainder); |
| 239 |
2/2✓ Branch 0 taken 8 times.
✓ Branch 1 taken 2 times.
|
10 | int end_col = start_col + cols_per_proc + (rank < remainder ? 1 : 0); |
| 240 | |||
| 241 | 10 | const int local_cols = end_col - start_col; | |
| 242 | |||
| 243 |
1/2✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
|
10 | if (local_cols > 0) { |
| 244 | 10 | std::vector<std::vector<double>> temp_columns(local_cols); | |
| 245 |
1/2✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
|
10 | std::vector<int> nnz_counts(local_cols, 0); |
| 246 | |||
| 247 | ComputeColumnBlock(a, b, start_col, end_col, temp_columns, nnz_counts); | ||
| 248 | |||
| 249 | 10 | std::vector<double> local_values; | |
| 250 | 10 | std::vector<int> local_row_indices; | |
| 251 | 10 | std::vector<int> local_col_ptrs; | |
| 252 | |||
| 253 |
1/2✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
|
10 | BuildLocalResult(a, start_col, end_col, temp_columns, nnz_counts, local_values, local_row_indices, local_col_ptrs); |
| 254 | |||
| 255 |
1/2✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
|
10 | SparseMatrixCCS result(a.rows, total_cols); |
| 256 |
1/2✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
|
10 | GatherAndBroadcastResult(rank, size, local_values, local_row_indices, local_col_ptrs, total_cols, a.rows, result); |
| 257 | return result; | ||
| 258 | 10 | } | |
| 259 | ✗ | SparseMatrixCCS result(a.rows, total_cols); | |
| 260 | ✗ | std::vector<double> empty_values; | |
| 261 | ✗ | std::vector<int> empty_row_indices; | |
| 262 | ✗ | std::vector<int> empty_col_ptrs(1, 0); | |
| 263 | ✗ | GatherAndBroadcastResult(rank, size, empty_values, empty_row_indices, empty_col_ptrs, total_cols, a.rows, result); | |
| 264 | return result; | ||
| 265 | ✗ | } | |
| 266 | |||
| 267 | 10 | bool KotelnikovaATaskALL::RunImpl() { | |
| 268 | const auto &[a, b] = GetInput(); | ||
| 269 | 10 | GetOutput() = MultiplyMatricesMPIOMP(a, b); | |
| 270 | 10 | return true; | |
| 271 | } | ||
| 272 | |||
| 273 | 10 | bool KotelnikovaATaskALL::PostProcessingImpl() { | |
| 274 | 10 | return true; | |
| 275 | } | ||
| 276 | |||
| 277 | } // namespace kotelnikova_a_double_matr_mult | ||
| 278 |