| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #include "dilshodov_a_spmm_double_css/all/include/ops_all.hpp" | ||
| 2 | |||
| 3 | #include <mpi.h> | ||
| 4 | #include <omp.h> | ||
| 5 | |||
| 6 | #include <algorithm> | ||
| 7 | #include <cmath> | ||
| 8 | #include <cstddef> | ||
| 9 | #include <cstdint> | ||
| 10 | #include <utility> | ||
| 11 | #include <vector> | ||
| 12 | |||
| 13 | #include "dilshodov_a_spmm_double_css/common/include/common.hpp" | ||
| 14 | |||
| 15 | namespace dilshodov_a_spmm_double_css { | ||
| 16 | |||
| 17 | namespace { | ||
| 18 | constexpr double kEps = 1e-10; | ||
| 19 | |||
| 20 | bool HasValidDimensions(const SparseMatrixCCS &m) { | ||
| 21 |
1/2✓ Branch 0 taken 12 times.
✗ Branch 1 not taken.
|
12 | return m.rows_count > 0 && m.cols_count > 0; |
| 22 | } | ||
| 23 | |||
| 24 |
1/2✓ Branch 0 taken 12 times.
✗ Branch 1 not taken.
|
12 | bool HasValidContainers(const SparseMatrixCCS &m) { |
| 25 |
1/2✓ Branch 0 taken 12 times.
✗ Branch 1 not taken.
|
12 | if (m.col_ptrs.size() != static_cast<size_t>(m.cols_count) + 1) { |
| 26 | return false; | ||
| 27 | } | ||
| 28 |
1/2✓ Branch 0 taken 12 times.
✗ Branch 1 not taken.
|
12 | if (m.row_indices.size() != m.values.size()) { |
| 29 | return false; | ||
| 30 | } | ||
| 31 |
2/4✓ Branch 0 taken 12 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 12 times.
✗ Branch 3 not taken.
|
12 | if (m.col_ptrs.empty() || m.col_ptrs.front() != 0) { |
| 32 | return false; | ||
| 33 | } | ||
| 34 |
1/2✓ Branch 0 taken 12 times.
✗ Branch 1 not taken.
|
12 | if (m.col_ptrs.back() < 0) { |
| 35 | return false; | ||
| 36 | } | ||
| 37 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 12 times.
|
12 | if (static_cast<size_t>(m.col_ptrs.back()) != m.values.size()) { |
| 38 | ✗ | return false; | |
| 39 | } | ||
| 40 | return true; | ||
| 41 | } | ||
| 42 | |||
| 43 | 12 | bool HasValidColumnOrdering(const SparseMatrixCCS &m) { | |
| 44 |
2/2✓ Branch 0 taken 30 times.
✓ Branch 1 taken 12 times.
|
42 | for (int j = 0; j < m.cols_count; ++j) { |
| 45 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 30 times.
|
30 | if (m.col_ptrs[j] > m.col_ptrs[j + 1]) { |
| 46 | return false; | ||
| 47 | } | ||
| 48 | int prev_row = -1; | ||
| 49 |
2/2✓ Branch 0 taken 46 times.
✓ Branch 1 taken 30 times.
|
76 | for (int idx = m.col_ptrs[j]; idx < m.col_ptrs[j + 1]; ++idx) { |
| 50 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 46 times.
|
46 | const int row = m.row_indices[idx]; |
| 51 |
2/4✗ Branch 0 not taken.
✓ Branch 1 taken 46 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 46 times.
|
46 | if (row < 0 || row >= m.rows_count) { |
| 52 | return false; | ||
| 53 | } | ||
| 54 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 46 times.
|
46 | if (row <= prev_row) { |
| 55 | return false; | ||
| 56 | } | ||
| 57 | prev_row = row; | ||
| 58 | } | ||
| 59 | } | ||
| 60 | return true; | ||
| 61 | } | ||
| 62 | |||
| 63 |
1/2✓ Branch 0 taken 12 times.
✗ Branch 1 not taken.
|
12 | bool IsValidCCS(const SparseMatrixCCS &m) { |
| 64 |
1/2✓ Branch 0 taken 12 times.
✗ Branch 1 not taken.
|
12 | return HasValidDimensions(m) && HasValidContainers(m) && HasValidColumnOrdering(m); |
| 65 | } | ||
| 66 | |||
| 67 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 6 times.
|
7 | void AccumulateColumnProduct(const SparseMatrixCCS &lhs, const SparseMatrixCCS &rhs, int rhs_col, |
| 68 | std::vector<double> &acc, std::vector<int> &marker, std::vector<int> &used_rows, | ||
| 69 | std::vector<std::pair<int, double>> &col) { | ||
| 70 | used_rows.clear(); | ||
| 71 | col.clear(); | ||
| 72 | |||
| 73 |
2/2✓ Branch 0 taken 12 times.
✓ Branch 1 taken 7 times.
|
19 | for (int idx_b = rhs.col_ptrs[rhs_col]; idx_b < rhs.col_ptrs[rhs_col + 1]; ++idx_b) { |
| 74 | 12 | const int k = rhs.row_indices[idx_b]; | |
| 75 | 12 | const double rhs_value = rhs.values[idx_b]; | |
| 76 | |||
| 77 |
2/2✓ Branch 0 taken 16 times.
✓ Branch 1 taken 12 times.
|
28 | for (int idx_a = lhs.col_ptrs[k]; idx_a < lhs.col_ptrs[k + 1]; ++idx_a) { |
| 78 |
2/2✓ Branch 0 taken 12 times.
✓ Branch 1 taken 4 times.
|
16 | const int row = lhs.row_indices[idx_a]; |
| 79 | 16 | const double value = lhs.values[idx_a] * rhs_value; | |
| 80 | |||
| 81 |
2/2✓ Branch 0 taken 12 times.
✓ Branch 1 taken 4 times.
|
16 | if (marker[row] != rhs_col) { |
| 82 |
1/2✓ Branch 0 taken 12 times.
✗ Branch 1 not taken.
|
12 | marker[row] = rhs_col; |
| 83 |
1/2✓ Branch 0 taken 12 times.
✗ Branch 1 not taken.
|
12 | acc[row] = value; |
| 84 | used_rows.push_back(row); | ||
| 85 | } else { | ||
| 86 | 4 | acc[row] += value; | |
| 87 | } | ||
| 88 | } | ||
| 89 | } | ||
| 90 | |||
| 91 | 7 | col.reserve(used_rows.size()); | |
| 92 |
2/2✓ Branch 0 taken 12 times.
✓ Branch 1 taken 7 times.
|
19 | for (int row : used_rows) { |
| 93 |
1/2✓ Branch 0 taken 12 times.
✗ Branch 1 not taken.
|
12 | const double value = acc[row]; |
| 94 |
1/2✓ Branch 0 taken 12 times.
✗ Branch 1 not taken.
|
12 | if (std::abs(value) > kEps) { |
| 95 | 12 | col.emplace_back(row, value); | |
| 96 | } | ||
| 97 | } | ||
| 98 | |||
| 99 | std::ranges::sort(col, {}, &std::pair<int, double>::first); | ||
| 100 | 7 | } | |
| 101 | |||
| 102 | 6 | void ComputeLocalSlab(const SparseMatrixCCS &lhs, const SparseMatrixCCS &rhs, int j_start, int j_end, | |
| 103 | std::vector<int> &local_col_ptrs, std::vector<int> &local_row_indices, | ||
| 104 | std::vector<double> &local_values) { | ||
| 105 | 6 | const int local_cols = j_end - j_start; | |
| 106 | 6 | std::vector<std::vector<std::pair<int, double>>> column_results(local_cols); | |
| 107 | |||
| 108 | 6 | #pragma omp parallel default(none) shared(lhs, rhs, column_results, j_start, j_end) | |
| 109 | { | ||
| 110 | std::vector<double> acc(lhs.rows_count, 0.0); | ||
| 111 | std::vector<int> marker(lhs.rows_count, -1); | ||
| 112 | std::vector<int> used_rows; | ||
| 113 | used_rows.reserve(256); | ||
| 114 | |||
| 115 | #pragma omp for schedule(dynamic) | ||
| 116 | for (int j = j_start; j < j_end; ++j) { | ||
| 117 | AccumulateColumnProduct(lhs, rhs, j, acc, marker, used_rows, column_results[j - j_start]); | ||
| 118 | } | ||
| 119 | } | ||
| 120 | |||
| 121 |
1/2✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.
|
6 | local_col_ptrs.assign(static_cast<size_t>(local_cols) + 1, 0); |
| 122 |
2/2✓ Branch 0 taken 7 times.
✓ Branch 1 taken 6 times.
|
13 | for (int col_idx = 0; col_idx < local_cols; ++col_idx) { |
| 123 | 7 | local_col_ptrs[col_idx + 1] = local_col_ptrs[col_idx] + static_cast<int>(column_results[col_idx].size()); | |
| 124 | } | ||
| 125 | 6 | const int local_nnz = local_col_ptrs[local_cols]; | |
| 126 |
1/2✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.
|
6 | local_row_indices.resize(local_nnz); |
| 127 |
1/2✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.
|
6 | local_values.resize(local_nnz); |
| 128 | |||
| 129 |
2/2✓ Branch 0 taken 7 times.
✓ Branch 1 taken 6 times.
|
13 | for (int col_idx = 0; col_idx < local_cols; ++col_idx) { |
| 130 | 7 | int write = local_col_ptrs[col_idx]; | |
| 131 |
2/2✓ Branch 0 taken 12 times.
✓ Branch 1 taken 7 times.
|
19 | for (const auto &[row, value] : column_results[col_idx]) { |
| 132 | 12 | local_row_indices[write] = row; | |
| 133 | 12 | local_values[write] = value; | |
| 134 | 12 | ++write; | |
| 135 | } | ||
| 136 | } | ||
| 137 | 6 | } | |
| 138 | |||
| 139 | } // namespace | ||
| 140 | |||
| 141 |
1/2✓ Branch 2 taken 6 times.
✗ Branch 3 not taken.
|
6 | DilshodovASpmmDoubleCssAll::DilshodovASpmmDoubleCssAll(const InType &in) { |
| 142 | SetTypeOfTask(GetStaticTypeOfTask()); | ||
| 143 | GetInput() = in; | ||
| 144 | 6 | } | |
| 145 | |||
| 146 |
1/2✓ Branch 0 taken 6 times.
✗ Branch 1 not taken.
|
6 | bool DilshodovASpmmDoubleCssAll::ValidationImpl() { |
| 147 | const auto &lhs = std::get<0>(GetInput()); | ||
| 148 | const auto &rhs = std::get<1>(GetInput()); | ||
| 149 |
3/6✓ Branch 0 taken 6 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 6 times.
✗ Branch 4 not taken.
✓ Branch 5 taken 6 times.
|
6 | return IsValidCCS(lhs) && IsValidCCS(rhs) && lhs.cols_count == rhs.rows_count; |
| 150 | } | ||
| 151 | |||
| 152 | 6 | bool DilshodovASpmmDoubleCssAll::PreProcessingImpl() { | |
| 153 | 6 | GetOutput() = SparseMatrixCCS{}; | |
| 154 | 6 | return true; | |
| 155 | } | ||
| 156 | |||
| 157 | 6 | bool DilshodovASpmmDoubleCssAll::RunImpl() { | |
| 158 | const auto &lhs = std::get<0>(GetInput()); | ||
| 159 | const auto &rhs = std::get<1>(GetInput()); | ||
| 160 | auto &out = GetOutput(); | ||
| 161 | |||
| 162 | 6 | int rank = 0; | |
| 163 | 6 | int size = 1; | |
| 164 | 6 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); | |
| 165 | 6 | MPI_Comm_size(MPI_COMM_WORLD, &size); | |
| 166 | |||
| 167 | 6 | out.rows_count = lhs.rows_count; | |
| 168 | 6 | out.cols_count = rhs.cols_count; | |
| 169 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 6 times.
|
6 | out.col_ptrs.assign(static_cast<size_t>(out.cols_count) + 1, 0); |
| 170 | out.row_indices.clear(); | ||
| 171 | out.values.clear(); | ||
| 172 | |||
| 173 | 6 | std::vector<int> col_starts(static_cast<size_t>(size) + 1); | |
| 174 |
2/2✓ Branch 0 taken 18 times.
✓ Branch 1 taken 6 times.
|
24 | for (int proc = 0; proc <= size; ++proc) { |
| 175 | 18 | col_starts[proc] = static_cast<int>((static_cast<int64_t>(proc) * rhs.cols_count) / size); | |
| 176 | } | ||
| 177 | |||
| 178 |
1/2✓ Branch 0 taken 6 times.
✗ Branch 1 not taken.
|
6 | const int j_start = col_starts[rank]; |
| 179 | 6 | const int j_end = col_starts[rank + 1]; | |
| 180 | 6 | const int local_cols = j_end - j_start; | |
| 181 | |||
| 182 | 6 | std::vector<int> local_col_ptrs; | |
| 183 | 6 | std::vector<int> local_row_indices; | |
| 184 | 6 | std::vector<double> local_values; | |
| 185 | |||
| 186 |
1/2✓ Branch 0 taken 6 times.
✗ Branch 1 not taken.
|
6 | if (local_cols > 0) { |
| 187 |
1/2✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.
|
6 | ComputeLocalSlab(lhs, rhs, j_start, j_end, local_col_ptrs, local_row_indices, local_values); |
| 188 | } else { | ||
| 189 | ✗ | local_col_ptrs.assign(1, 0); | |
| 190 | } | ||
| 191 | |||
| 192 |
1/2✓ Branch 0 taken 6 times.
✗ Branch 1 not taken.
|
6 | const int local_nnz = local_col_ptrs.empty() ? 0 : local_col_ptrs.back(); |
| 193 | |||
| 194 |
2/6✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 6 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
|
6 | std::vector<int> nnz_per_proc(size, 0); |
| 195 |
1/2✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.
|
6 | MPI_Allgather(&local_nnz, 1, MPI_INT, nnz_per_proc.data(), 1, MPI_INT, MPI_COMM_WORLD); |
| 196 | |||
| 197 |
1/4✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
6 | std::vector<int> nnz_displs(size, 0); |
| 198 | int total_nnz = 0; | ||
| 199 |
2/2✓ Branch 0 taken 12 times.
✓ Branch 1 taken 6 times.
|
18 | for (int proc = 0; proc < size; ++proc) { |
| 200 | 12 | nnz_displs[proc] = total_nnz; | |
| 201 | 12 | total_nnz += nnz_per_proc[proc]; | |
| 202 | } | ||
| 203 | |||
| 204 |
1/2✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.
|
6 | out.row_indices.resize(total_nnz); |
| 205 |
1/2✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.
|
6 | out.values.resize(total_nnz); |
| 206 | |||
| 207 |
1/2✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.
|
6 | MPI_Allgatherv(local_row_indices.data(), local_nnz, MPI_INT, out.row_indices.data(), nnz_per_proc.data(), |
| 208 | nnz_displs.data(), MPI_INT, MPI_COMM_WORLD); | ||
| 209 |
1/2✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.
|
6 | MPI_Allgatherv(local_values.data(), local_nnz, MPI_DOUBLE, out.values.data(), nnz_per_proc.data(), nnz_displs.data(), |
| 210 | MPI_DOUBLE, MPI_COMM_WORLD); | ||
| 211 | |||
| 212 |
1/4✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
6 | std::vector<int> cols_per_proc(size, 0); |
| 213 |
1/4✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
6 | std::vector<int> col_displs(size, 0); |
| 214 |
2/2✓ Branch 0 taken 12 times.
✓ Branch 1 taken 6 times.
|
18 | for (int proc = 0; proc < size; ++proc) { |
| 215 | 12 | cols_per_proc[proc] = col_starts[proc + 1] - col_starts[proc]; | |
| 216 | 12 | col_displs[proc] = col_starts[proc]; | |
| 217 | } | ||
| 218 | |||
| 219 |
1/4✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
6 | std::vector<int> local_inner(static_cast<size_t>(local_cols)); |
| 220 |
2/2✓ Branch 0 taken 7 times.
✓ Branch 1 taken 6 times.
|
13 | for (int col_idx = 0; col_idx < local_cols; ++col_idx) { |
| 221 | 7 | local_inner[col_idx] = local_col_ptrs[col_idx] + nnz_displs[rank]; | |
| 222 | } | ||
| 223 | |||
| 224 |
1/2✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.
|
6 | MPI_Allgatherv(local_inner.data(), local_cols, MPI_INT, out.col_ptrs.data(), cols_per_proc.data(), col_displs.data(), |
| 225 | MPI_INT, MPI_COMM_WORLD); | ||
| 226 |
1/2✓ Branch 0 taken 6 times.
✗ Branch 1 not taken.
|
6 | out.col_ptrs[out.cols_count] = total_nnz; |
| 227 | |||
| 228 |
1/2✓ Branch 0 taken 6 times.
✗ Branch 1 not taken.
|
6 | out.non_zeros = total_nnz; |
| 229 | 6 | return true; | |
| 230 | } | ||
| 231 | |||
| 232 | 6 | bool DilshodovASpmmDoubleCssAll::PostProcessingImpl() { | |
| 233 | 6 | GetOutput().non_zeros = static_cast<int>(GetOutput().values.size()); | |
| 234 | 6 | return true; | |
| 235 | } | ||
| 236 | |||
| 237 | } // namespace dilshodov_a_spmm_double_css | ||
| 238 |