| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #include "viderman_a_sparse_matrix_mult_crs_complex/all/include/ops_all.hpp" | ||
| 2 | |||
| 3 | #include <mpi.h> | ||
| 4 | #include <omp.h> | ||
| 5 | |||
| 6 | #include <algorithm> | ||
| 7 | #include <complex> | ||
| 8 | #include <vector> | ||
| 9 | |||
| 10 | #include "viderman_a_sparse_matrix_mult_crs_complex/common/include/common.hpp" | ||
| 11 | |||
| 12 | namespace viderman_a_sparse_matrix_mult_crs_complex { | ||
| 13 | |||
| 14 |
1/2✓ Branch 2 taken 12 times.
✗ Branch 3 not taken.
|
12 | VidermanASparseMatrixMultCRSComplexALL::VidermanASparseMatrixMultCRSComplexALL(const InType &in) { |
| 15 | SetTypeOfTask(GetStaticTypeOfTask()); | ||
| 16 | GetInput() = in; | ||
| 17 |
1/2✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.
|
12 | GetOutput() = CRSMatrix(0, 0); |
| 18 | 12 | } | |
| 19 | |||
| 20 | 12 | bool VidermanASparseMatrixMultCRSComplexALL::ValidationImpl() { | |
| 21 | 12 | MPI_Initialized(&initialized_pipiline_); | |
| 22 | |||
| 23 | 12 | rank_ = 0; | |
| 24 |
1/2✓ Branch 0 taken 12 times.
✗ Branch 1 not taken.
|
12 | if (initialized_pipiline_ != 0) { |
| 25 | 12 | MPI_Comm_rank(MPI_COMM_WORLD, &rank_); | |
| 26 | } | ||
| 27 | |||
| 28 | 12 | int is_valid = 1; | |
| 29 |
2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 6 times.
|
12 | if (rank_ == 0) { |
| 30 | const auto &input = GetInput(); | ||
| 31 | const auto &a = std::get<0>(input); | ||
| 32 | const auto &b = std::get<1>(input); | ||
| 33 | |||
| 34 |
4/6✓ Branch 0 taken 6 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 6 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 1 times.
✓ Branch 5 taken 5 times.
|
6 | if (!a.IsValid() || !b.IsValid() || a.cols != b.rows) { |
| 35 | 1 | is_valid = 0; | |
| 36 | } | ||
| 37 | } | ||
| 38 | |||
| 39 |
1/2✓ Branch 0 taken 12 times.
✗ Branch 1 not taken.
|
12 | if (initialized_pipiline_ != 0) { |
| 40 | 12 | MPI_Bcast(&is_valid, 1, MPI_INT, 0, MPI_COMM_WORLD); | |
| 41 | } | ||
| 42 | |||
| 43 | 12 | return is_valid == 1; | |
| 44 | } | ||
| 45 | |||
| 46 | 10 | bool VidermanASparseMatrixMultCRSComplexALL::PreProcessingImpl() { | |
| 47 |
1/2✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
|
10 | if (initialized_pipiline_ != 0) { |
| 48 | 10 | MPI_Comm_rank(MPI_COMM_WORLD, &rank_); | |
| 49 | 10 | MPI_Comm_size(MPI_COMM_WORLD, &world_size_); | |
| 50 | } else { | ||
| 51 | ✗ | rank_ = 0; | |
| 52 | ✗ | world_size_ = 1; | |
| 53 | } | ||
| 54 | |||
| 55 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
10 | if (rank_ == 0) { |
| 56 | const auto &input = GetInput(); | ||
| 57 | const auto &a = std::get<0>(input); | ||
| 58 | 5 | b_ = std::get<1>(input); | |
| 59 | |||
| 60 | 5 | a_rows_ = a.rows; | |
| 61 | 5 | a_cols_ = a.cols; | |
| 62 | } | ||
| 63 | |||
| 64 |
1/2✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
|
10 | if (initialized_pipiline_ != 0) { |
| 65 | 10 | MPI_Bcast(&a_rows_, 1, MPI_INT, 0, MPI_COMM_WORLD); | |
| 66 | 10 | MPI_Bcast(&a_cols_, 1, MPI_INT, 0, MPI_COMM_WORLD); | |
| 67 | } | ||
| 68 | |||
| 69 | 10 | BroadcastData(); | |
| 70 | 10 | ScatterData(); | |
| 71 | |||
| 72 | 10 | return true; | |
| 73 | } | ||
| 74 | |||
| 75 | 10 | bool VidermanASparseMatrixMultCRSComplexALL::RunImpl() { | |
| 76 | 10 | MultiplyLocal(); | |
| 77 | |||
| 78 | 10 | return true; | |
| 79 | } | ||
| 80 | |||
| 81 | 10 | bool VidermanASparseMatrixMultCRSComplexALL::PostProcessingImpl() { | |
| 82 | 10 | GatherData(); | |
| 83 | |||
| 84 | 10 | return true; | |
| 85 | } | ||
| 86 | |||
| 87 | 5 | void VidermanASparseMatrixMultCRSComplexALL::MasterScatter() { | |
| 88 | 5 | SendDataProcesses(); | |
| 89 | |||
| 90 | 5 | int count = a_rows_ / world_size_; | |
| 91 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 3 times.
|
5 | if (a_rows_ % world_size_ > 0) { |
| 92 | 2 | count++; | |
| 93 | } | ||
| 94 | |||
| 95 | 5 | local_a_ = CRSMatrix(count, a_cols_); | |
| 96 | const auto &a = std::get<0>(GetInput()); | ||
| 97 |
2/4✓ Branch 0 taken 5 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 5 times.
✗ Branch 3 not taken.
|
5 | if (count > 0 && !a.row_ptr.empty()) { |
| 98 | 5 | int nnz = a.row_ptr[count] - a.row_ptr[0]; | |
| 99 | 5 | std::copy(a.row_ptr.begin(), a.row_ptr.begin() + count + 1, local_a_.row_ptr.begin()); | |
| 100 | |||
| 101 | 5 | local_a_.col_indices.assign(a.col_indices.begin(), a.col_indices.begin() + nnz); | |
| 102 | 5 | local_a_.values.assign(a.values.begin(), a.values.begin() + nnz); | |
| 103 | } | ||
| 104 | 5 | } | |
| 105 | |||
| 106 | 5 | void VidermanASparseMatrixMultCRSComplexALL::WorkerScatter() { | |
| 107 | 5 | int count = 0; | |
| 108 | 5 | MPI_Recv(&count, 1, MPI_INT, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); | |
| 109 | |||
| 110 | 5 | local_a_ = CRSMatrix(count, a_cols_); | |
| 111 | |||
| 112 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 1 times.
|
5 | if (count > 0) { |
| 113 | 4 | int nnz = 0; | |
| 114 | 4 | MPI_Recv(&nnz, 1, MPI_INT, 0, 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE); | |
| 115 | |||
| 116 | 4 | local_a_.row_ptr.resize(count + 1); | |
| 117 | 4 | MPI_Recv(local_a_.row_ptr.data(), count + 1, MPI_INT, 0, 2, MPI_COMM_WORLD, MPI_STATUS_IGNORE); | |
| 118 | |||
| 119 |
2/2✓ Branch 0 taken 3 times.
✓ Branch 1 taken 1 times.
|
4 | if (nnz > 0) { |
| 120 | 3 | local_a_.col_indices.resize(nnz); | |
| 121 | 3 | local_a_.values.resize(nnz); | |
| 122 | |||
| 123 | 3 | MPI_Recv(local_a_.col_indices.data(), nnz, MPI_INT, 0, 3, MPI_COMM_WORLD, MPI_STATUS_IGNORE); | |
| 124 | 3 | MPI_Recv(reinterpret_cast<double *>(local_a_.values.data()), nnz * 2, MPI_DOUBLE, 0, 4, MPI_COMM_WORLD, | |
| 125 | MPI_STATUS_IGNORE); | ||
| 126 | } | ||
| 127 | } | ||
| 128 | 5 | } | |
| 129 | |||
| 130 | 10 | void VidermanASparseMatrixMultCRSComplexALL::ScatterData() { | |
| 131 |
2/4✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 10 times.
|
10 | if (initialized_pipiline_ == 0 || world_size_ == 1) { |
| 132 | ✗ | if (rank_ == 0) { | |
| 133 | ✗ | local_a_ = std::get<0>(GetInput()); | |
| 134 | } | ||
| 135 | |||
| 136 | ✗ | return; | |
| 137 | } | ||
| 138 | |||
| 139 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
10 | if (rank_ == 0) { |
| 140 | 5 | MasterScatter(); | |
| 141 | } else { | ||
| 142 | 5 | WorkerScatter(); | |
| 143 | } | ||
| 144 | } | ||
| 145 | |||
| 146 | 5 | void VidermanASparseMatrixMultCRSComplexALL::MasterGather() { | |
| 147 | CRSMatrix &out = GetOutput(); | ||
| 148 | 5 | out = CRSMatrix(a_rows_, b_.cols); | |
| 149 | |||
| 150 | int rows_to_copy = std::min(local_c_.rows, a_rows_); | ||
| 151 |
2/2✓ Branch 0 taken 11 times.
✓ Branch 1 taken 5 times.
|
16 | for (int i = 0; i <= rows_to_copy; ++i) { |
| 152 | 11 | out.row_ptr[i] = local_c_.row_ptr[i]; | |
| 153 | } | ||
| 154 | |||
| 155 | 5 | out.col_indices = local_c_.col_indices; | |
| 156 | 5 | out.values = local_c_.values; | |
| 157 | |||
| 158 | 5 | ReceiveDataProcesses(out); | |
| 159 | 5 | } | |
| 160 | |||
| 161 | 5 | void VidermanASparseMatrixMultCRSComplexALL::WorkerGather() { | |
| 162 | 5 | int send_rows = local_c_.rows; | |
| 163 | 5 | int send_nnz = static_cast<int>(local_c_.values.size()); | |
| 164 | |||
| 165 | 5 | MPI_Send(&send_rows, 1, MPI_INT, 0, 0, MPI_COMM_WORLD); | |
| 166 | |||
| 167 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 1 times.
|
5 | if (send_rows > 0) { |
| 168 | 4 | MPI_Send(&send_nnz, 1, MPI_INT, 0, 1, MPI_COMM_WORLD); | |
| 169 | 4 | MPI_Send(local_c_.row_ptr.data(), send_rows + 1, MPI_INT, 0, 2, MPI_COMM_WORLD); | |
| 170 | |||
| 171 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
|
4 | if (send_nnz > 0) { |
| 172 | 2 | MPI_Send(local_c_.col_indices.data(), send_nnz, MPI_INT, 0, 3, MPI_COMM_WORLD); | |
| 173 | 2 | MPI_Send(reinterpret_cast<const double *>(local_c_.values.data()), send_nnz * 2, MPI_DOUBLE, 0, 4, | |
| 174 | MPI_COMM_WORLD); | ||
| 175 | } | ||
| 176 | } | ||
| 177 | 5 | } | |
| 178 | |||
| 179 | 10 | void VidermanASparseMatrixMultCRSComplexALL::GatherData() { | |
| 180 |
2/4✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 10 times.
|
10 | if (initialized_pipiline_ == 0 || world_size_ == 1) { |
| 181 | ✗ | if (rank_ == 0) { | |
| 182 | ✗ | GetOutput() = local_c_; | |
| 183 | } | ||
| 184 | |||
| 185 | ✗ | return; | |
| 186 | } | ||
| 187 | |||
| 188 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
10 | if (rank_ == 0) { |
| 189 | 5 | MasterGather(); | |
| 190 | } else { | ||
| 191 | 5 | WorkerGather(); | |
| 192 | } | ||
| 193 | |||
| 194 | CRSMatrix &out = GetOutput(); | ||
| 195 | |||
| 196 | 10 | std::vector<int> g_info(3, 0); | |
| 197 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
10 | if (rank_ == 0) { |
| 198 | 5 | g_info[0] = out.rows; | |
| 199 | 5 | g_info[1] = out.cols; | |
| 200 | 5 | g_info[2] = static_cast<int>(out.values.size()); | |
| 201 | } | ||
| 202 | |||
| 203 |
1/2✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
|
10 | MPI_Bcast(g_info.data(), 3, MPI_INT, 0, MPI_COMM_WORLD); |
| 204 | |||
| 205 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
10 | if (rank_ != 0) { |
| 206 | 5 | out.rows = g_info[0]; | |
| 207 | 5 | out.cols = g_info[1]; | |
| 208 |
1/4✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
5 | out.row_ptr.assign(out.rows + 1, 0); |
| 209 |
1/2✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
|
5 | out.col_indices.resize(g_info[2]); |
| 210 |
1/2✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
|
5 | out.values.resize(g_info[2]); |
| 211 | } | ||
| 212 | |||
| 213 |
1/2✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
|
10 | if (out.rows >= 0) { |
| 214 |
1/2✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
|
10 | MPI_Bcast(out.row_ptr.data(), out.rows + 1, MPI_INT, 0, MPI_COMM_WORLD); |
| 215 | } | ||
| 216 | |||
| 217 |
2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 4 times.
|
10 | if (g_info[2] > 0) { |
| 218 |
1/2✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.
|
6 | MPI_Bcast(out.col_indices.data(), g_info[2], MPI_INT, 0, MPI_COMM_WORLD); |
| 219 |
1/2✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.
|
6 | MPI_Bcast(reinterpret_cast<double *>(out.values.data()), g_info[2] * 2, MPI_DOUBLE, 0, MPI_COMM_WORLD); |
| 220 | } | ||
| 221 | } | ||
| 222 | |||
| 223 | 5 | void VidermanASparseMatrixMultCRSComplexALL::SendDataProcesses() { | |
| 224 | const auto &a = std::get<0>(GetInput()); | ||
| 225 | 5 | int rows_per_proc = a_rows_ / world_size_; | |
| 226 | 5 | int remainder = a_rows_ % world_size_; | |
| 227 | |||
| 228 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
10 | for (int process = 1; process < world_size_; ++process) { |
| 229 |
1/2✓ Branch 0 taken 5 times.
✗ Branch 1 not taken.
|
5 | int start = (process * rows_per_proc) + std::min(process, remainder); |
| 230 |
1/2✓ Branch 0 taken 5 times.
✗ Branch 1 not taken.
|
5 | int count = rows_per_proc + (process < remainder ? 1 : 0); |
| 231 | 5 | MPI_Send(&count, 1, MPI_INT, process, 0, MPI_COMM_WORLD); | |
| 232 | |||
| 233 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 1 times.
|
5 | if (count > 0) { |
| 234 | 4 | int nnz = a.row_ptr[start + count] - a.row_ptr[start]; | |
| 235 | 4 | MPI_Send(&nnz, 1, MPI_INT, process, 1, MPI_COMM_WORLD); | |
| 236 | |||
| 237 | 4 | std::vector<int> p_row_ptr(count + 1); | |
| 238 | |||
| 239 | 4 | int offset = a.row_ptr[start]; | |
| 240 |
2/2✓ Branch 0 taken 8 times.
✓ Branch 1 taken 4 times.
|
12 | for (int i = 0; i <= count; ++i) { |
| 241 | 8 | p_row_ptr[i] = a.row_ptr[start + i] - offset; | |
| 242 | } | ||
| 243 | |||
| 244 |
1/2✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
|
4 | MPI_Send(p_row_ptr.data(), count + 1, MPI_INT, process, 2, MPI_COMM_WORLD); |
| 245 | |||
| 246 |
2/2✓ Branch 0 taken 3 times.
✓ Branch 1 taken 1 times.
|
4 | if (nnz > 0) { |
| 247 |
1/2✓ Branch 1 taken 3 times.
✗ Branch 2 not taken.
|
3 | MPI_Send(&a.col_indices[offset], nnz, MPI_INT, process, 3, MPI_COMM_WORLD); |
| 248 |
1/2✓ Branch 1 taken 3 times.
✗ Branch 2 not taken.
|
3 | MPI_Send(reinterpret_cast<const double *>(&a.values[offset]), nnz * 2, MPI_DOUBLE, process, 4, MPI_COMM_WORLD); |
| 249 | } | ||
| 250 | } | ||
| 251 | } | ||
| 252 | 5 | } | |
| 253 | |||
| 254 | 5 | void VidermanASparseMatrixMultCRSComplexALL::ReceiveDataProcesses(CRSMatrix &out) const { | |
| 255 | 5 | int current_global_row = local_c_.rows; | |
| 256 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
10 | for (int process = 1; process < world_size_; ++process) { |
| 257 | 5 | int p_rows = 0; | |
| 258 | 5 | MPI_Recv(&p_rows, 1, MPI_INT, process, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); | |
| 259 | |||
| 260 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 1 times.
|
5 | if (p_rows > 0) { |
| 261 | 4 | int p_nnz = 0; | |
| 262 | 4 | MPI_Recv(&p_nnz, 1, MPI_INT, process, 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE); | |
| 263 | |||
| 264 | 4 | std::vector<int> p_ptr(p_rows + 1); | |
| 265 |
1/2✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
|
4 | MPI_Recv(p_ptr.data(), p_rows + 1, MPI_INT, process, 2, MPI_COMM_WORLD, MPI_STATUS_IGNORE); |
| 266 | |||
| 267 | 4 | int offset = static_cast<int>(out.values.size()); | |
| 268 | |||
| 269 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
|
4 | if (p_nnz > 0) { |
| 270 |
1/4✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
2 | std::vector<int> p_cols(p_nnz); |
| 271 |
1/4✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
2 | std::vector<Complex> p_vals(p_nnz); |
| 272 | |||
| 273 |
1/2✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
|
2 | MPI_Recv(p_cols.data(), p_nnz, MPI_INT, process, 3, MPI_COMM_WORLD, MPI_STATUS_IGNORE); |
| 274 |
1/2✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
|
2 | MPI_Recv(reinterpret_cast<double *>(p_vals.data()), p_nnz * 2, MPI_DOUBLE, process, 4, MPI_COMM_WORLD, |
| 275 | MPI_STATUS_IGNORE); | ||
| 276 | |||
| 277 |
1/2✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
|
2 | out.col_indices.insert(out.col_indices.end(), p_cols.begin(), p_cols.end()); |
| 278 |
2/4✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✗ Branch 4 not taken.
|
2 | out.values.insert(out.values.end(), p_vals.begin(), p_vals.end()); |
| 279 | } | ||
| 280 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 4 times.
|
8 | for (int i = 1; i <= p_rows; ++i) { |
| 281 |
1/2✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
|
4 | if (current_global_row + i <= a_rows_) { |
| 282 | 4 | out.row_ptr[current_global_row + i] = offset + p_ptr[i]; | |
| 283 | } | ||
| 284 | } | ||
| 285 | |||
| 286 |
1/2✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
|
4 | current_global_row += p_rows; |
| 287 | } | ||
| 288 | } | ||
| 289 | 5 | } | |
| 290 | |||
| 291 | 10 | void VidermanASparseMatrixMultCRSComplexALL::BroadcastData() { | |
| 292 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
|
10 | if (initialized_pipiline_ == 0) { |
| 293 | ✗ | return; | |
| 294 | } | ||
| 295 | |||
| 296 | 10 | MPI_Bcast(&b_.rows, 1, MPI_INT, 0, MPI_COMM_WORLD); | |
| 297 | 10 | MPI_Bcast(&b_.cols, 1, MPI_INT, 0, MPI_COMM_WORLD); | |
| 298 | |||
| 299 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
10 | int nnz = (rank_ == 0) ? static_cast<int>(b_.values.size()) : 0; |
| 300 | 10 | MPI_Bcast(&nnz, 1, MPI_INT, 0, MPI_COMM_WORLD); | |
| 301 | |||
| 302 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
10 | if (rank_ != 0) { |
| 303 | 5 | b_.row_ptr.assign(b_.rows + 1, 0); | |
| 304 | 5 | b_.col_indices.resize(nnz); | |
| 305 | 5 | b_.values.resize(nnz); | |
| 306 | } | ||
| 307 | |||
| 308 |
1/2✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
|
10 | if (!b_.row_ptr.empty()) { |
| 309 | 10 | MPI_Bcast(b_.row_ptr.data(), b_.rows + 1, MPI_INT, 0, MPI_COMM_WORLD); | |
| 310 | } | ||
| 311 | |||
| 312 |
2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 4 times.
|
10 | if (nnz > 0) { |
| 313 | 6 | MPI_Bcast(b_.col_indices.data(), nnz, MPI_INT, 0, MPI_COMM_WORLD); | |
| 314 | 6 | MPI_Bcast(reinterpret_cast<double *>(b_.values.data()), nnz * 2, MPI_DOUBLE, 0, MPI_COMM_WORLD); | |
| 315 | } | ||
| 316 | } | ||
| 317 | |||
| 318 | 10 | void VidermanASparseMatrixMultCRSComplexALL::ComputeMultiply(int i, std::vector<int> &cols, | |
| 319 | std::vector<Complex> &vals) { | ||
| 320 | 10 | std::vector<Complex> accumulator(b_.cols, Complex(0, 0)); | |
| 321 | bool has_data = false; | ||
| 322 | |||
| 323 |
2/2✓ Branch 0 taken 7 times.
✓ Branch 1 taken 10 times.
|
17 | for (int j = local_a_.row_ptr[i]; j < local_a_.row_ptr[i + 1]; ++j) { |
| 324 | 7 | int a_col = local_a_.col_indices[j]; | |
| 325 | 7 | Complex a_val = local_a_.values[j]; | |
| 326 | |||
| 327 |
2/2✓ Branch 0 taken 7 times.
✓ Branch 1 taken 7 times.
|
14 | for (int k = b_.row_ptr[a_col]; k < b_.row_ptr[a_col + 1]; ++k) { |
| 328 | 7 | accumulator[b_.col_indices[k]] += a_val * b_.values[k]; | |
| 329 | |||
| 330 | has_data = true; | ||
| 331 | } | ||
| 332 | } | ||
| 333 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
10 | if (has_data) { |
| 334 |
2/2✓ Branch 0 taken 9 times.
✓ Branch 1 taken 5 times.
|
14 | for (int j = 0; j < b_.cols; ++j) { |
| 335 |
2/2✓ Branch 0 taken 7 times.
✓ Branch 1 taken 2 times.
|
9 | if (std::norm(accumulator[j]) > 1e-18) { |
| 336 | cols.push_back(j); | ||
| 337 | vals.push_back(accumulator[j]); | ||
| 338 | } | ||
| 339 | } | ||
| 340 | } | ||
| 341 | 10 | } | |
| 342 | |||
| 343 | 10 | void VidermanASparseMatrixMultCRSComplexALL::MultiplyLocal() { | |
| 344 | 10 | local_c_ = CRSMatrix(local_a_.rows, b_.cols); | |
| 345 |
3/4✓ Branch 0 taken 9 times.
✓ Branch 1 taken 1 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 9 times.
|
10 | if (local_a_.rows <= 0 || b_.cols <= 0) { |
| 346 | 1 | return; | |
| 347 | } | ||
| 348 | |||
| 349 | 9 | std::vector<std::vector<int>> row_cols(local_a_.rows); | |
| 350 |
1/2✓ Branch 1 taken 9 times.
✗ Branch 2 not taken.
|
9 | std::vector<std::vector<Complex>> row_vals(local_a_.rows); |
| 351 | |||
| 352 | 9 | #pragma omp parallel for default(none) shared(row_cols, row_vals) | |
| 353 | for (int i = 0; i < local_a_.rows; ++i) { | ||
| 354 | ComputeMultiply(i, row_cols[i], row_vals[i]); | ||
| 355 | } | ||
| 356 | |||
| 357 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 9 times.
|
19 | for (int i = 0; i < local_a_.rows; ++i) { |
| 358 |
1/2✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
|
10 | local_c_.col_indices.insert(local_c_.col_indices.end(), row_cols[i].begin(), row_cols[i].end()); |
| 359 |
1/2✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
|
10 | local_c_.values.insert(local_c_.values.end(), row_vals[i].begin(), row_vals[i].end()); |
| 360 | 10 | local_c_.row_ptr[i + 1] = static_cast<int>(local_c_.values.size()); | |
| 361 | } | ||
| 362 | 9 | } | |
| 363 | |||
| 364 | } // namespace viderman_a_sparse_matrix_mult_crs_complex | ||
| 365 |