| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #pragma once | ||
| 2 | |||
| 3 | #include <algorithm> | ||
| 4 | #include <cmath> | ||
| 5 | #include <cstddef> | ||
| 6 | #include <cstdint> | ||
| 7 | #include <random> | ||
| 8 | #include <stdexcept> | ||
| 9 | #include <tuple> | ||
| 10 | #include <utility> | ||
| 11 | #include <vector> | ||
| 12 | |||
| 13 | #include "task/include/task.hpp" | ||
| 14 | |||
| 15 | namespace romanov_a_crs_product { | ||
| 16 | |||
| 17 | constexpr double kEps = 1e-6; | ||
| 18 | |||
| 19 | 420 | struct Dense { | |
| 20 | uint64_t n; | ||
| 21 | uint64_t m; | ||
| 22 | |||
| 23 | std::vector<double> data; | ||
| 24 | |||
| 25 | Dense(uint64_t n, uint64_t m) : n(n), m(m), data(n * m, 0.0) {} | ||
| 26 | 420 | explicit Dense(uint64_t n) : n(n), m(n), data(n * n, 0.0) {} | |
| 27 | |||
| 28 | [[nodiscard]] uint64_t GetRows() const { | ||
| 29 | 420 | return n; | |
| 30 | } | ||
| 31 | |||
| 32 | [[nodiscard]] uint64_t GetCols() const { | ||
| 33 | 420 | return m; | |
| 34 | } | ||
| 35 | |||
| 36 | double &operator()(uint64_t i, uint64_t j) { | ||
| 37 | 3276 | return data[(i * m) + j]; | |
| 38 | } | ||
| 39 | |||
| 40 | double operator()(uint64_t i, uint64_t j) const { | ||
| 41 |
2/2✓ Branch 0 taken 1680 times.
✓ Branch 1 taken 1596 times.
|
3276 | return data[(i * m) + j]; |
| 42 | } | ||
| 43 | }; | ||
| 44 | |||
| 45 | 2544 | struct CRS { | |
| 46 | uint64_t n; | ||
| 47 | uint64_t m; | ||
| 48 | |||
| 49 | std::vector<double> value; | ||
| 50 | std::vector<uint64_t> column; | ||
| 51 | std::vector<uint64_t> row_index; | ||
| 52 | |||
| 53 |
1/4✓ Branch 1 taken 355 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
355 | CRS() : n(0), m(0), row_index(1, 0) {} |
| 54 |
1/4✓ Branch 1 taken 485 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
485 | CRS(uint64_t n, uint64_t m) : n(n), m(m), row_index(n + 1, 0) {} |
| 55 |
1/4✓ Branch 1 taken 50 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
50 | explicit CRS(uint64_t n) : n(n), m(n), row_index(n + 1, 0) {} |
| 56 | |||
| 57 | [[nodiscard]] uint64_t GetRows() const { | ||
| 58 |
1/2✓ Branch 1 taken 280 times.
✗ Branch 2 not taken.
|
428 | return n; |
| 59 | } | ||
| 60 | |||
| 61 | [[nodiscard]] uint64_t GetCols() const { | ||
| 62 | 148 | return m; | |
| 63 | } | ||
| 64 | |||
| 65 | [[nodiscard]] uint64_t Nnz() const { | ||
| 66 | return value.size(); | ||
| 67 | } | ||
| 68 | |||
| 69 | 50 | bool operator==(const CRS &other) const { | |
| 70 |
2/4✓ Branch 0 taken 50 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 50 times.
|
50 | if (n != other.n || m != other.m) { |
| 71 | return false; | ||
| 72 | } | ||
| 73 | |||
| 74 |
2/4✗ Branch 0 not taken.
✓ Branch 1 taken 50 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 50 times.
|
50 | if (row_index != other.row_index || column != other.column) { |
| 75 | return false; | ||
| 76 | } | ||
| 77 | |||
| 78 |
2/2✓ Branch 0 taken 200 times.
✓ Branch 1 taken 50 times.
|
250 | for (uint64_t i = 0; i < value.size(); ++i) { |
| 79 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 200 times.
|
200 | if (std::abs(value[i] - other.value[i]) > kEps) { |
| 80 | return false; | ||
| 81 | } | ||
| 82 | } | ||
| 83 | |||
| 84 | return true; | ||
| 85 | } | ||
| 86 | |||
| 87 | 49 | void Transpose() { | |
| 88 | uint64_t nnz_val = this->Nnz(); | ||
| 89 | |||
| 90 | 49 | std::vector<double> new_value(nnz_val); | |
| 91 |
1/4✓ Branch 1 taken 49 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
49 | std::vector<uint64_t> new_column(nnz_val); |
| 92 |
1/4✓ Branch 1 taken 49 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
49 | std::vector<uint64_t> new_row_index(m + 1, 0); |
| 93 | |||
| 94 |
2/2✓ Branch 0 taken 199 times.
✓ Branch 1 taken 49 times.
|
248 | for (uint64_t i = 0; i < nnz_val; ++i) { |
| 95 | 199 | ++new_row_index[column[i] + 1]; | |
| 96 | } | ||
| 97 | |||
| 98 |
2/2✓ Branch 0 taken 129 times.
✓ Branch 1 taken 49 times.
|
178 | for (uint64_t i = 1; i <= m; ++i) { |
| 99 | 129 | new_row_index[i] += new_row_index[i - 1]; | |
| 100 | } | ||
| 101 | |||
| 102 |
1/2✓ Branch 1 taken 49 times.
✗ Branch 2 not taken.
|
49 | std::vector<uint64_t> offset = new_row_index; |
| 103 |
2/2✓ Branch 0 taken 129 times.
✓ Branch 1 taken 49 times.
|
178 | for (uint64_t row = 0; row < n; ++row) { |
| 104 |
2/2✓ Branch 0 taken 199 times.
✓ Branch 1 taken 129 times.
|
328 | for (uint64_t idx = row_index[row]; idx < row_index[row + 1]; ++idx) { |
| 105 | 199 | uint64_t col = column[idx]; | |
| 106 | 199 | uint64_t pos = offset[col]++; | |
| 107 | |||
| 108 | 199 | new_value[pos] = value[idx]; | |
| 109 | 199 | new_column[pos] = row; | |
| 110 | } | ||
| 111 | } | ||
| 112 | |||
| 113 | value.swap(new_value); | ||
| 114 | column.swap(new_column); | ||
| 115 | row_index.swap(new_row_index); | ||
| 116 | |||
| 117 | std::swap(n, m); | ||
| 118 | 49 | } | |
| 119 | |||
| 120 | [[nodiscard]] CRS GetTransposed() const { | ||
| 121 |
1/2✓ Branch 1 taken 49 times.
✗ Branch 2 not taken.
|
49 | CRS result = *this; |
| 122 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 49 times.
|
49 | result.Transpose(); |
| 123 | return result; | ||
| 124 | ✗ | } | |
| 125 | |||
| 126 | ✗ | void FillRandom(double density, unsigned int seed = 0) { | |
| 127 | ✗ | if (density < 0.0 || density > 1.0) { | |
| 128 | ✗ | throw std::invalid_argument("Density must be within [0, 1]!"); | |
| 129 | } | ||
| 130 | |||
| 131 | value.clear(); | ||
| 132 | column.clear(); | ||
| 133 | ✗ | row_index.assign(n + 1, 0); | |
| 134 | |||
| 135 | ✗ | std::mt19937 gen(seed); | |
| 136 | std::uniform_real_distribution<double> prob(0.0, 1.0); | ||
| 137 | std::uniform_real_distribution<double> val(-1.0, 1.0); | ||
| 138 | |||
| 139 | ✗ | for (uint64_t row = 0; row < n; ++row) { | |
| 140 | ✗ | row_index[row + 1] = row_index[row]; | |
| 141 | ✗ | for (uint64_t col = 0; col < m; ++col) { | |
| 142 | ✗ | if (prob(gen) < density) { | |
| 143 | ✗ | value.push_back(val(gen)); | |
| 144 | ✗ | column.push_back(col); | |
| 145 | ✗ | ++row_index[row + 1]; | |
| 146 | } | ||
| 147 | } | ||
| 148 | } | ||
| 149 | ✗ | } | |
| 150 | |||
| 151 | 9 | [[nodiscard]] CRS ExtractRows(uint64_t start_r, uint64_t end_r) const { | |
| 152 |
2/4✓ Branch 0 taken 9 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 9 times.
|
9 | if (start_r >= end_r || start_r >= n) { |
| 153 | ✗ | return CRS{0, m}; | |
| 154 | } | ||
| 155 | |||
| 156 | 9 | end_r = std::min(end_r, n); | |
| 157 | 9 | uint64_t new_n = end_r - start_r; | |
| 158 | |||
| 159 | 9 | uint64_t nnz_start = row_index[start_r]; | |
| 160 | 9 | uint64_t nnz_end = row_index[end_r]; | |
| 161 | 9 | uint64_t nnz_count = nnz_end - nnz_start; | |
| 162 | |||
| 163 | 9 | CRS result(new_n, m); | |
| 164 |
1/2✓ Branch 1 taken 9 times.
✗ Branch 2 not taken.
|
9 | result.value.resize(nnz_count); |
| 165 |
1/2✓ Branch 1 taken 9 times.
✗ Branch 2 not taken.
|
9 | result.column.resize(nnz_count); |
| 166 |
1/2✓ Branch 1 taken 9 times.
✗ Branch 2 not taken.
|
9 | result.row_index.resize(new_n + 1); |
| 167 | |||
| 168 | 9 | std::copy(value.begin() + static_cast<std::ptrdiff_t>(nnz_start), | |
| 169 | value.begin() + static_cast<std::ptrdiff_t>(nnz_end), result.value.begin()); | ||
| 170 | 9 | std::copy(column.begin() + static_cast<std::ptrdiff_t>(nnz_start), | |
| 171 | column.begin() + static_cast<std::ptrdiff_t>(nnz_end), result.column.begin()); | ||
| 172 | |||
| 173 |
2/2✓ Branch 0 taken 22 times.
✓ Branch 1 taken 9 times.
|
31 | for (uint64_t i = 0; i <= new_n; ++i) { |
| 174 | 22 | result.row_index[i] = row_index[start_r + i] - nnz_start; | |
| 175 | } | ||
| 176 | |||
| 177 | return result; | ||
| 178 | 9 | } | |
| 179 | |||
| 180 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
|
5 | static CRS ConcatRows(const std::vector<CRS> &parts) { |
| 181 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
|
5 | if (parts.empty()) { |
| 182 | ✗ | return CRS{}; | |
| 183 | } | ||
| 184 | |||
| 185 | uint64_t total_n = 0; | ||
| 186 | uint64_t total_nnz = 0; | ||
| 187 | 5 | uint64_t m = parts[0].m; | |
| 188 | |||
| 189 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 5 times.
|
15 | for (const auto &part : parts) { |
| 190 | 10 | total_n += part.n; | |
| 191 | 10 | total_nnz += part.Nnz(); | |
| 192 | } | ||
| 193 | |||
| 194 | 5 | CRS result(total_n, m); | |
| 195 |
1/2✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
|
5 | result.value.resize(total_nnz); |
| 196 |
1/2✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
|
5 | result.column.resize(total_nnz); |
| 197 |
1/2✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
|
5 | result.row_index.resize(total_n + 1); |
| 198 | |||
| 199 | uint64_t row_offset = 0; | ||
| 200 | uint64_t nnz_offset = 0; | ||
| 201 | |||
| 202 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 5 times.
|
15 | for (const auto &part : parts) { |
| 203 | std::ranges::copy(part.value, result.value.begin() + static_cast<std::ptrdiff_t>(nnz_offset)); | ||
| 204 | std::ranges::copy(part.column, result.column.begin() + static_cast<std::ptrdiff_t>(nnz_offset)); | ||
| 205 | |||
| 206 |
2/2✓ Branch 0 taken 23 times.
✓ Branch 1 taken 10 times.
|
33 | for (uint64_t i = 0; i <= part.n; ++i) { |
| 207 | 23 | result.row_index[row_offset + i] = part.row_index[i] + nnz_offset; | |
| 208 | } | ||
| 209 | |||
| 210 | 10 | row_offset += part.n; | |
| 211 | 10 | nnz_offset += part.Nnz(); | |
| 212 | } | ||
| 213 | |||
| 214 | return result; | ||
| 215 | 5 | } | |
| 216 | }; | ||
| 217 | |||
| 218 | 252 | inline double ComputeDotProduct(uint64_t a_begin, uint64_t a_end, const std::vector<uint64_t> &a_col, | |
| 219 | const std::vector<double> &a_val, uint64_t b_begin, uint64_t b_end, | ||
| 220 | const std::vector<uint64_t> &b_col, const std::vector<double> &b_val) { | ||
| 221 | double sum = 0.0; | ||
| 222 | uint64_t a_pos = a_begin; | ||
| 223 | uint64_t b_pos = b_begin; | ||
| 224 | |||
| 225 |
2/2✓ Branch 0 taken 468 times.
✓ Branch 1 taken 252 times.
|
720 | while (a_pos < a_end && b_pos < b_end) { |
| 226 | 468 | uint64_t a_col_idx = a_col[a_pos]; | |
| 227 | 468 | uint64_t b_col_idx = b_col[b_pos]; | |
| 228 | |||
| 229 |
2/2✓ Branch 0 taken 360 times.
✓ Branch 1 taken 108 times.
|
468 | if (a_col_idx == b_col_idx) { |
| 230 | 360 | sum += a_val[a_pos] * b_val[b_pos]; | |
| 231 | 360 | ++a_pos; | |
| 232 | 360 | ++b_pos; | |
| 233 |
2/2✓ Branch 0 taken 54 times.
✓ Branch 1 taken 54 times.
|
108 | } else if (a_col_idx < b_col_idx) { |
| 234 | 54 | ++a_pos; | |
| 235 | } else { | ||
| 236 | 54 | ++b_pos; | |
| 237 | } | ||
| 238 | } | ||
| 239 | |||
| 240 | 252 | return sum; | |
| 241 | } | ||
| 242 | |||
| 243 | 117 | inline std::vector<std::pair<uint64_t, double>> ComputeResultRow(const CRS &a, uint64_t row_idx, | |
| 244 | const CRS &b_transposed, uint64_t num_cols) { | ||
| 245 | 117 | std::vector<std::pair<uint64_t, double>> result_row; | |
| 246 |
1/2✓ Branch 1 taken 117 times.
✗ Branch 2 not taken.
|
117 | result_row.reserve(num_cols); |
| 247 | |||
| 248 | 117 | uint64_t a_begin = a.row_index[row_idx]; | |
| 249 | 117 | uint64_t a_end = a.row_index[row_idx + 1]; | |
| 250 | |||
| 251 |
2/2✓ Branch 0 taken 99 times.
✓ Branch 1 taken 18 times.
|
117 | if (a_begin == a_end) { |
| 252 | return result_row; | ||
| 253 | } | ||
| 254 | |||
| 255 |
2/2✓ Branch 0 taken 288 times.
✓ Branch 1 taken 99 times.
|
387 | for (uint64_t j = 0; j < num_cols; ++j) { |
| 256 | 288 | uint64_t b_begin = b_transposed.row_index[j]; | |
| 257 | 288 | uint64_t b_end = b_transposed.row_index[j + 1]; | |
| 258 | |||
| 259 |
2/2✓ Branch 0 taken 36 times.
✓ Branch 1 taken 252 times.
|
288 | if (b_begin == b_end) { |
| 260 | 36 | continue; | |
| 261 | } | ||
| 262 | |||
| 263 | double sum = | ||
| 264 |
2/2✓ Branch 0 taken 180 times.
✓ Branch 1 taken 72 times.
|
252 | ComputeDotProduct(a_begin, a_end, a.column, a.value, b_begin, b_end, b_transposed.column, b_transposed.value); |
| 265 | |||
| 266 |
2/2✓ Branch 0 taken 180 times.
✓ Branch 1 taken 72 times.
|
252 | if (std::abs(sum) > kEps) { |
| 267 |
1/2✓ Branch 1 taken 180 times.
✗ Branch 2 not taken.
|
180 | result_row.emplace_back(j, sum); |
| 268 | } | ||
| 269 | } | ||
| 270 | |||
| 271 | 99 | return result_row; | |
| 272 | } | ||
| 273 | |||
| 274 | 117 | inline void AppendRowToCRS(CRS &result, const std::vector<std::pair<uint64_t, double>> &row_data, uint64_t row_index) { | |
| 275 | 117 | result.row_index[row_index] = result.column.size(); | |
| 276 | |||
| 277 |
2/2✓ Branch 0 taken 180 times.
✓ Branch 1 taken 117 times.
|
297 | for (const auto &[col, val] : row_data) { |
| 278 |
2/2✓ Branch 0 taken 50 times.
✓ Branch 1 taken 130 times.
|
180 | result.column.push_back(col); |
| 279 |
2/2✓ Branch 0 taken 50 times.
✓ Branch 1 taken 130 times.
|
180 | result.value.push_back(val); |
| 280 | } | ||
| 281 | 117 | } | |
| 282 | |||
| 283 | 49 | inline CRS operator*(const CRS &a, const CRS &b) { | |
| 284 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 49 times.
|
49 | if (a.GetCols() != b.GetRows()) { |
| 285 | ✗ | throw std::runtime_error("Matrix dimensions do not match for multiplication!"); | |
| 286 | } | ||
| 287 | |||
| 288 | uint64_t n_rows = a.GetRows(); | ||
| 289 | uint64_t n_cols = b.GetCols(); | ||
| 290 | |||
| 291 | 49 | CRS result{n_rows, n_cols}; | |
| 292 | CRS b_transposed = b.GetTransposed(); | ||
| 293 | |||
| 294 |
1/2✓ Branch 1 taken 49 times.
✗ Branch 2 not taken.
|
49 | result.row_index.resize(n_rows + 1); |
| 295 | 49 | result.row_index[0] = 0; | |
| 296 | |||
| 297 |
2/2✓ Branch 0 taken 117 times.
✓ Branch 1 taken 49 times.
|
166 | for (uint64_t i = 0; i < n_rows; ++i) { |
| 298 |
1/2✓ Branch 1 taken 117 times.
✗ Branch 2 not taken.
|
117 | auto row_result = ComputeResultRow(a, i, b_transposed, n_cols); |
| 299 |
1/2✓ Branch 1 taken 117 times.
✗ Branch 2 not taken.
|
117 | AppendRowToCRS(result, row_result, i); |
| 300 | } | ||
| 301 | |||
| 302 | 49 | result.row_index[n_rows] = result.column.size(); | |
| 303 | |||
| 304 | 49 | return result; | |
| 305 | 49 | } | |
| 306 | |||
| 307 | 420 | inline CRS ToCRS(const Dense &a) { | |
| 308 | uint64_t rows = a.GetRows(); | ||
| 309 | uint64_t cols = a.GetCols(); | ||
| 310 | 420 | CRS crs(rows, cols); | |
| 311 | |||
| 312 |
2/2✓ Branch 0 taken 1092 times.
✓ Branch 1 taken 420 times.
|
1512 | for (uint64_t i = 0; i < rows; ++i) { |
| 313 | 1092 | crs.row_index[i + 1] = crs.row_index[i]; | |
| 314 |
2/2✓ Branch 0 taken 3276 times.
✓ Branch 1 taken 1092 times.
|
4368 | for (uint64_t j = 0; j < cols; ++j) { |
| 315 |
2/2✓ Branch 0 taken 1680 times.
✓ Branch 1 taken 1596 times.
|
3276 | if (std::abs(a(i, j)) > kEps) { |
| 316 |
1/2✓ Branch 1 taken 1680 times.
✗ Branch 2 not taken.
|
1680 | crs.value.push_back(a(i, j)); |
| 317 |
2/2✓ Branch 0 taken 504 times.
✓ Branch 1 taken 1176 times.
|
1680 | crs.column.push_back(j); |
| 318 | 1680 | ++crs.row_index[i + 1]; | |
| 319 | } | ||
| 320 | } | ||
| 321 | } | ||
| 322 | 420 | return crs; | |
| 323 | ✗ | } | |
| 324 | |||
| 325 | using InType = std::tuple<CRS, CRS>; | ||
| 326 | using OutType = CRS; | ||
| 327 | using TestType = std::tuple<CRS, CRS, CRS>; | ||
| 328 | using BaseTask = ppc::task::Task<InType, OutType>; | ||
| 329 | |||
| 330 | } // namespace romanov_a_crs_product | ||
| 331 |