| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #include "posternak_a_crs_mul_complex_matrix/all/include/ops_all.hpp" | ||
| 2 | |||
| 3 | #include <mpi.h> | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <cmath> | ||
| 7 | #include <complex> | ||
| 8 | #include <cstddef> | ||
| 9 | #include <unordered_map> | ||
| 10 | #include <utility> | ||
| 11 | #include <vector> | ||
| 12 | |||
| 13 | #include "posternak_a_crs_mul_complex_matrix/common/include/common.hpp" | ||
| 14 | |||
| 15 | namespace { | ||
| 16 | |||
| 17 | 10 | size_t ComputeRowNoZeroCount(const posternak_a_crs_mul_complex_matrix::CRSMatrix &a, | |
| 18 | const posternak_a_crs_mul_complex_matrix::CRSMatrix &b, int row, double threshold) { | ||
| 19 | std::unordered_map<int, std::complex<double>> row_sum; | ||
| 20 | |||
| 21 |
2/2✓ Branch 0 taken 12 times.
✓ Branch 1 taken 10 times.
|
22 | for (int idx_a = a.index_row[row]; idx_a < a.index_row[row + 1]; ++idx_a) { |
| 22 | 12 | int col_a = a.index_col[idx_a]; | |
| 23 | 12 | auto val_a = a.values[idx_a]; | |
| 24 | |||
| 25 |
2/2✓ Branch 0 taken 17 times.
✓ Branch 1 taken 12 times.
|
29 | for (int idx_b = b.index_row[col_a]; idx_b < b.index_row[col_a + 1]; ++idx_b) { |
| 26 |
1/2✓ Branch 1 taken 17 times.
✗ Branch 2 not taken.
|
17 | int col_b = b.index_col[idx_b]; |
| 27 |
1/2✓ Branch 1 taken 17 times.
✗ Branch 2 not taken.
|
17 | auto val_b = b.values[idx_b]; |
| 28 | row_sum[col_b] += val_a * val_b; | ||
| 29 | } | ||
| 30 | } | ||
| 31 | |||
| 32 | size_t local = 0; | ||
| 33 |
2/2✓ Branch 0 taken 14 times.
✓ Branch 1 taken 10 times.
|
24 | for (const auto &[col, val] : row_sum) { |
| 34 |
1/2✓ Branch 0 taken 14 times.
✗ Branch 1 not taken.
|
14 | if (std::abs(val) > threshold) { |
| 35 | 14 | ++local; | |
| 36 | } | ||
| 37 | } | ||
| 38 | 10 | return local; | |
| 39 | } | ||
| 40 | |||
| 41 | 4 | void BuildResultStructure(posternak_a_crs_mul_complex_matrix::CRSMatrix &res, std::vector<size_t> &row_prefix) { | |
| 42 |
2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 4 times.
|
10 | for (int i = 1; i < res.rows; ++i) { |
| 43 | 6 | row_prefix[i] += row_prefix[i - 1]; | |
| 44 | } | ||
| 45 | |||
| 46 |
1/2✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
|
4 | const size_t total = row_prefix.empty() ? 0 : row_prefix.back(); |
| 47 | 4 | res.values.resize(total); | |
| 48 | 4 | res.index_col.resize(total); | |
| 49 | 4 | res.index_row.resize(res.rows + 1); | |
| 50 | |||
| 51 |
2/2✓ Branch 0 taken 14 times.
✓ Branch 1 taken 4 times.
|
18 | for (int i = 0; i <= res.rows; ++i) { |
| 52 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 4 times.
|
14 | res.index_row[i] = (i == 0 ? 0 : static_cast<int>(row_prefix[i - 1])); |
| 53 | } | ||
| 54 | 4 | } | |
| 55 | |||
| 56 | 10 | void ComputeAndWriteRow(const posternak_a_crs_mul_complex_matrix::CRSMatrix &a, | |
| 57 | const posternak_a_crs_mul_complex_matrix::CRSMatrix &b, | ||
| 58 | posternak_a_crs_mul_complex_matrix::CRSMatrix &res, int row, double threshold) { | ||
| 59 | std::unordered_map<int, std::complex<double>> row_sum; | ||
| 60 | |||
| 61 |
2/2✓ Branch 0 taken 12 times.
✓ Branch 1 taken 10 times.
|
22 | for (int idx_a = a.index_row[row]; idx_a < a.index_row[row + 1]; ++idx_a) { |
| 62 | 12 | int col_a = a.index_col[idx_a]; | |
| 63 | 12 | auto val_a = a.values[idx_a]; | |
| 64 | |||
| 65 |
2/2✓ Branch 0 taken 17 times.
✓ Branch 1 taken 12 times.
|
29 | for (int idx_b = b.index_row[col_a]; idx_b < b.index_row[col_a + 1]; ++idx_b) { |
| 66 |
1/2✓ Branch 1 taken 17 times.
✗ Branch 2 not taken.
|
17 | int col_b = b.index_col[idx_b]; |
| 67 |
1/2✓ Branch 1 taken 17 times.
✗ Branch 2 not taken.
|
17 | auto val_b = b.values[idx_b]; |
| 68 | row_sum[col_b] += val_a * val_b; | ||
| 69 | } | ||
| 70 | } | ||
| 71 | |||
| 72 |
1/2✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
|
10 | std::vector<std::pair<int, std::complex<double>>> sorted(row_sum.begin(), row_sum.end()); |
| 73 | |||
| 74 |
1/22✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✓ Branch 12 taken 4 times.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
|
4 | std::ranges::sort(sorted, [](const auto &p1, const auto &p2) { return p1.first < p2.first; }); |
| 75 | |||
| 76 | 10 | size_t pos = res.index_row[row]; | |
| 77 |
2/2✓ Branch 0 taken 14 times.
✓ Branch 1 taken 10 times.
|
24 | for (const auto &[col_idx, value] : sorted) { |
| 78 |
1/2✓ Branch 0 taken 14 times.
✗ Branch 1 not taken.
|
14 | if (std::abs(value) > threshold) { |
| 79 | 14 | res.values[pos] = value; | |
| 80 | 14 | res.index_col[pos] = col_idx; | |
| 81 | 14 | ++pos; | |
| 82 | } | ||
| 83 | } | ||
| 84 | 10 | } | |
| 85 | |||
| 86 | void ComputeLocalCounts(const posternak_a_crs_mul_complex_matrix::CRSMatrix &a, | ||
| 87 | const posternak_a_crs_mul_complex_matrix::CRSMatrix &b, std::vector<size_t> &local_counts, | ||
| 88 | int local_start, int local_count, double threshold) { | ||
| 89 | 8 | #pragma omp parallel for schedule(dynamic) default(none) shared(a, b, local_counts, local_start, local_count, threshold) | |
| 90 | for (int i = 0; i < local_count; ++i) { | ||
| 91 | local_counts[i] = ComputeRowNoZeroCount(a, b, local_start + i, threshold); | ||
| 92 | } | ||
| 93 | } | ||
| 94 | |||
| 95 | 8 | void GatherCountsToRoot(const std::vector<size_t> &local_counts, int local_count, std::vector<size_t> &global_counts, | |
| 96 | const std::vector<int> &recv_counts, const std::vector<int> &displs, int rank) { | ||
| 97 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 4 times.
|
8 | if (rank == 0) { |
| 98 | 4 | std::vector<size_t> send_buf = local_counts; | |
| 99 |
1/2✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
|
4 | MPI_Gatherv(send_buf.data(), local_count, MPI_UNSIGNED_LONG_LONG, global_counts.data(), recv_counts.data(), |
| 100 | displs.data(), MPI_UNSIGNED_LONG_LONG, 0, MPI_COMM_WORLD); | ||
| 101 | } else { | ||
| 102 | 4 | MPI_Gatherv(local_counts.data(), local_count, MPI_UNSIGNED_LONG_LONG, nullptr, nullptr, nullptr, | |
| 103 | MPI_UNSIGNED_LONG_LONG, 0, MPI_COMM_WORLD); | ||
| 104 | } | ||
| 105 | 8 | } | |
| 106 | |||
| 107 | 8 | void BuildAndBroadcastStructure(posternak_a_crs_mul_complex_matrix::CRSMatrix &res, | |
| 108 | std::vector<size_t> &global_counts_copy, int rank) { | ||
| 109 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 4 times.
|
8 | if (rank == 0) { |
| 110 | 4 | BuildResultStructure(res, global_counts_copy); | |
| 111 | } | ||
| 112 | 8 | res.index_row.resize(res.rows + 1); | |
| 113 | 8 | MPI_Bcast(res.index_row.data(), static_cast<int>(res.index_row.size()), MPI_INT, 0, MPI_COMM_WORLD); | |
| 114 | 8 | } | |
| 115 | |||
| 116 | void ComputeLocalRows(const posternak_a_crs_mul_complex_matrix::CRSMatrix &a, | ||
| 117 | const posternak_a_crs_mul_complex_matrix::CRSMatrix &b, | ||
| 118 | posternak_a_crs_mul_complex_matrix::CRSMatrix &res, int local_start, int local_count, | ||
| 119 | double threshold) { | ||
| 120 | 8 | #pragma omp parallel for schedule(dynamic) default(none) shared(a, b, res, local_start, local_count, threshold) | |
| 121 | for (int i = 0; i < local_count; ++i) { | ||
| 122 | ComputeAndWriteRow(a, b, res, local_start + i, threshold); | ||
| 123 | } | ||
| 124 | } | ||
| 125 | |||
| 126 | 8 | void PrepareGatherParams(std::vector<int> &g_counts, std::vector<int> &g_displs, | |
| 127 | const posternak_a_crs_mul_complex_matrix::CRSMatrix &res, int rows_per_proc, int rem, | ||
| 128 | int size) { | ||
| 129 |
2/2✓ Branch 0 taken 16 times.
✓ Branch 1 taken 8 times.
|
24 | for (int process = 0; process < size; ++process) { |
| 130 |
2/2✓ Branch 0 taken 12 times.
✓ Branch 1 taken 4 times.
|
16 | int process_start = (process * rows_per_proc) + std::min(process, rem); |
| 131 |
2/2✓ Branch 0 taken 12 times.
✓ Branch 1 taken 4 times.
|
16 | int process_end = process_start + rows_per_proc + (process < rem ? 1 : 0); |
| 132 | 16 | g_displs[process] = res.index_row[process_start]; | |
| 133 | 16 | g_counts[process] = res.index_row[process_end] - res.index_row[process_start]; | |
| 134 | } | ||
| 135 | 8 | } | |
| 136 | |||
| 137 | 8 | void GatherResultData(posternak_a_crs_mul_complex_matrix::CRSMatrix &res, int local_start, int local_nnz, | |
| 138 | const std::vector<int> &g_counts, const std::vector<int> &g_displs, int rank) { | ||
| 139 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 4 times.
|
8 | if (rank == 0) { |
| 140 | 4 | std::vector<std::complex<double>> local_values_copy(res.values.data() + res.index_row[local_start], | |
| 141 |
1/2✓ Branch 2 taken 4 times.
✗ Branch 3 not taken.
|
4 | res.values.data() + res.index_row[local_start] + local_nnz); |
| 142 | 4 | std::vector<int> local_index_copy(res.index_col.data() + res.index_row[local_start], | |
| 143 |
2/6✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 4 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
|
4 | res.index_col.data() + res.index_row[local_start] + local_nnz); |
| 144 | |||
| 145 |
1/2✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
|
4 | MPI_Gatherv(local_values_copy.data(), local_nnz, MPI_C_DOUBLE_COMPLEX, res.values.data(), g_counts.data(), |
| 146 | g_displs.data(), MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD); | ||
| 147 |
1/2✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
|
4 | MPI_Gatherv(local_index_copy.data(), local_nnz, MPI_INT, res.index_col.data(), g_counts.data(), g_displs.data(), |
| 148 | MPI_INT, 0, MPI_COMM_WORLD); | ||
| 149 | } else { | ||
| 150 | 4 | MPI_Gatherv(res.values.data() + res.index_row[local_start], local_nnz, MPI_C_DOUBLE_COMPLEX, nullptr, nullptr, | |
| 151 | nullptr, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD); | ||
| 152 | 4 | MPI_Gatherv(res.index_col.data() + res.index_row[local_start], local_nnz, MPI_INT, nullptr, nullptr, nullptr, | |
| 153 | MPI_INT, 0, MPI_COMM_WORLD); | ||
| 154 | } | ||
| 155 | 8 | } | |
| 156 | |||
| 157 | 8 | void BroadcastResult(posternak_a_crs_mul_complex_matrix::CRSMatrix &res, int total_nnz) { | |
| 158 | 8 | MPI_Bcast(res.values.data(), total_nnz, MPI_C_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD); | |
| 159 | 8 | MPI_Bcast(res.index_col.data(), total_nnz, MPI_INT, 0, MPI_COMM_WORLD); | |
| 160 | 8 | } | |
| 161 | |||
| 162 | } // namespace | ||
| 163 | |||
| 164 | namespace posternak_a_crs_mul_complex_matrix { | ||
| 165 | |||
| 166 |
1/2✓ Branch 2 taken 12 times.
✗ Branch 3 not taken.
|
12 | PosternakACRSMulComplexMatrixALL::PosternakACRSMulComplexMatrixALL(const InType &in) { |
| 167 | SetTypeOfTask(GetStaticTypeOfTask()); | ||
| 168 | GetInput() = in; | ||
| 169 | 12 | GetOutput() = CRSMatrix{}; | |
| 170 | 12 | } | |
| 171 | |||
| 172 | 12 | bool PosternakACRSMulComplexMatrixALL::ValidationImpl() { | |
| 173 | const auto &input = GetInput(); | ||
| 174 | 12 | const auto &a = input.first; | |
| 175 | 12 | const auto &b = input.second; | |
| 176 |
3/6✓ Branch 0 taken 12 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 12 times.
✗ Branch 4 not taken.
✓ Branch 5 taken 12 times.
|
12 | return a.IsValid() && b.IsValid() && a.cols == b.rows; |
| 177 | } | ||
| 178 | |||
| 179 | 12 | bool PosternakACRSMulComplexMatrixALL::PreProcessingImpl() { | |
| 180 | const auto &input = GetInput(); | ||
| 181 | const auto &a = input.first; | ||
| 182 | const auto &b = input.second; | ||
| 183 | auto &res = GetOutput(); | ||
| 184 | |||
| 185 | 12 | res.rows = a.rows; | |
| 186 | 12 | res.cols = b.cols; | |
| 187 | 12 | return true; | |
| 188 | } | ||
| 189 | |||
| 190 | 12 | bool PosternakACRSMulComplexMatrixALL::RunImpl() { | |
| 191 | const auto &input = GetInput(); | ||
| 192 | 12 | const auto &a = input.first; | |
| 193 | 12 | const auto &b = input.second; | |
| 194 | auto &res = GetOutput(); | ||
| 195 | |||
| 196 | 12 | int rank = 0; | |
| 197 | 12 | int size = 1; | |
| 198 | 12 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); | |
| 199 | 12 | MPI_Comm_size(MPI_COMM_WORLD, &size); | |
| 200 | |||
| 201 |
4/4✓ Branch 0 taken 10 times.
✓ Branch 1 taken 2 times.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 8 times.
|
12 | if (a.values.empty() || b.values.empty()) { |
| 202 | res.values.clear(); | ||
| 203 | res.index_col.clear(); | ||
| 204 | 4 | res.index_row.assign(res.rows + 1, 0); | |
| 205 | 4 | return true; | |
| 206 | } | ||
| 207 | |||
| 208 | constexpr double kThreshold = 1e-12; | ||
| 209 | |||
| 210 | 8 | int rows_per_proc = res.rows / size; | |
| 211 | 8 | int rem = res.rows % size; | |
| 212 |
2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.
|
8 | int local_start = (rank * rows_per_proc) + std::min(rank, rem); |
| 213 |
2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.
|
8 | int local_end = local_start + rows_per_proc + (rank < rem ? 1 : 0); |
| 214 | 8 | int local_count = local_end - local_start; | |
| 215 | |||
| 216 | 8 | std::vector<size_t> local_counts(local_count); | |
| 217 |
1/4✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
8 | std::vector<size_t> global_counts(res.rows); |
| 218 | ComputeLocalCounts(a, b, local_counts, local_start, local_count, kThreshold); | ||
| 219 | |||
| 220 |
1/4✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
8 | std::vector<int> recv_counts(size); |
| 221 |
1/4✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
8 | std::vector<int> displs(size); |
| 222 |
2/2✓ Branch 0 taken 16 times.
✓ Branch 1 taken 8 times.
|
24 | for (int process = 0; process < size; ++process) { |
| 223 |
2/2✓ Branch 0 taken 12 times.
✓ Branch 1 taken 4 times.
|
16 | int process_start = (process * rows_per_proc) + std::min(process, rem); |
| 224 |
2/2✓ Branch 0 taken 12 times.
✓ Branch 1 taken 4 times.
|
16 | int process_end = process_start + rows_per_proc + (process < rem ? 1 : 0); |
| 225 | 16 | recv_counts[process] = process_end - process_start; | |
| 226 | 16 | displs[process] = process_start; | |
| 227 | } | ||
| 228 | |||
| 229 |
1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
|
8 | GatherCountsToRoot(local_counts, local_count, global_counts, recv_counts, displs, rank); |
| 230 | |||
| 231 |
1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
|
8 | std::vector<size_t> global_counts_copy = global_counts; |
| 232 |
1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
|
8 | BuildAndBroadcastStructure(res, global_counts_copy, rank); |
| 233 | |||
| 234 | 8 | int total_nnz = res.index_row.back(); | |
| 235 |
1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
|
8 | res.values.resize(total_nnz); |
| 236 |
1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
|
8 | res.index_col.resize(total_nnz); |
| 237 | |||
| 238 | ComputeLocalRows(a, b, res, local_start, local_count, kThreshold); | ||
| 239 | |||
| 240 |
1/4✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
8 | std::vector<int> g_counts(size); |
| 241 |
1/4✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
8 | std::vector<int> g_displs(size); |
| 242 | 8 | PrepareGatherParams(g_counts, g_displs, res, rows_per_proc, rem, size); | |
| 243 | |||
| 244 |
1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
|
8 | int local_nnz = res.index_row[local_end] - res.index_row[local_start]; |
| 245 | |||
| 246 |
1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
|
8 | GatherResultData(res, local_start, local_nnz, g_counts, g_displs, rank); |
| 247 |
1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
|
8 | BroadcastResult(res, total_nnz); |
| 248 | |||
| 249 |
1/2✓ Branch 0 taken 8 times.
✗ Branch 1 not taken.
|
8 | return res.IsValid(); |
| 250 | } | ||
| 251 | |||
| 252 | 12 | bool PosternakACRSMulComplexMatrixALL::PostProcessingImpl() { | |
| 253 | 12 | return GetOutput().IsValid(); | |
| 254 | } | ||
| 255 | |||
| 256 | } // namespace posternak_a_crs_mul_complex_matrix | ||
| 257 |