| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #pragma once | ||
| 2 | #include <cmath> | ||
| 3 | #include <cstddef> | ||
| 4 | #include <functional> | ||
| 5 | #include <numeric> | ||
| 6 | #include <thread> | ||
| 7 | #include <vector> | ||
| 8 | |||
| 9 | #include "guseva_crs/common/include/common.hpp" | ||
| 10 | #include "guseva_crs/common/include/multiplier.hpp" | ||
| 11 | #include "util/include/util.hpp" | ||
| 12 | |||
| 13 | namespace guseva_crs { | ||
| 14 | |||
| 15 | 48 | class MultiplierStl : public Multiplier { | |
| 16 | static void PerformCalculation(std::size_t k, std::size_t ind3, std::size_t ind4, const CRS &a, const CRS &bt, | ||
| 17 | double &sum, std::vector<int> &temp) { | ||
| 18 |
2/2✓ Branch 0 taken 40568 times.
✓ Branch 1 taken 16072 times.
|
56640 | for (k = ind3; k < ind4; k++) { |
| 19 |
2/2✓ Branch 0 taken 5640 times.
✓ Branch 1 taken 34928 times.
|
40568 | std::size_t bcol = bt.cols[k]; |
| 20 | 40568 | int aind = temp[bcol]; | |
| 21 |
2/2✓ Branch 0 taken 5640 times.
✓ Branch 1 taken 34928 times.
|
40568 | if (aind != -1) { |
| 22 | 5640 | sum += a.values[aind] * bt.values[k]; | |
| 23 | } | ||
| 24 | } | ||
| 25 | } | ||
| 26 | |||
| 27 | 776 | static void ProcessRows(std::size_t i, const CRS &a, const CRS &bt, std::vector<std::vector<std::size_t>> &columns, | |
| 28 | std::vector<std::vector<double>> &values, std::vector<std::size_t> &row_index) { | ||
| 29 | 776 | std::size_t n = a.nrows; | |
| 30 | 776 | std::vector<int> temp(n); | |
| 31 | |||
| 32 |
2/2✓ Branch 0 taken 16072 times.
✓ Branch 1 taken 776 times.
|
16848 | for (int &l : temp) { |
| 33 | 16072 | l = -1; | |
| 34 | } | ||
| 35 | |||
| 36 | 776 | std::size_t ind1 = a.row_ptrs[i]; | |
| 37 | 776 | std::size_t ind2 = a.row_ptrs[i + 1]; | |
| 38 |
2/2✓ Branch 0 taken 2344 times.
✓ Branch 1 taken 776 times.
|
3120 | for (std::size_t j = ind1; j < ind2; j++) { |
| 39 | 2344 | std::size_t col = a.cols[j]; | |
| 40 | 2344 | temp[col] = static_cast<int>(j); | |
| 41 | } | ||
| 42 | |||
| 43 |
2/2✓ Branch 0 taken 16072 times.
✓ Branch 1 taken 776 times.
|
16848 | for (std::size_t j = 0; j < n; j++) { |
| 44 | 16072 | double sum = 0; | |
| 45 | 16072 | std::size_t ind3 = bt.row_ptrs[j]; | |
| 46 | 16072 | std::size_t ind4 = bt.row_ptrs[j + 1]; | |
| 47 | |||
| 48 | PerformCalculation(0, ind3, ind4, a, bt, sum, temp); | ||
| 49 | |||
| 50 |
2/2✓ Branch 0 taken 4088 times.
✓ Branch 1 taken 11984 times.
|
16072 | if (std::fabs(sum) > kZERO) { |
| 51 | columns[i].push_back(j); | ||
| 52 | values[i].push_back(sum); | ||
| 53 | 4088 | row_index[i]++; | |
| 54 | } | ||
| 55 | } | ||
| 56 | 776 | } | |
| 57 | |||
| 58 | 120 | static void ProcessRowsRange(const std::vector<std::size_t> &indices, std::size_t start, std::size_t end, | |
| 59 | const CRS &a, const CRS &bt, std::vector<std::vector<std::size_t>> &columns, | ||
| 60 | std::vector<std::vector<double>> &values, std::vector<std::size_t> &row_index) { | ||
| 61 |
2/2✓ Branch 0 taken 776 times.
✓ Branch 1 taken 120 times.
|
896 | for (std::size_t idx = start; idx < end; ++idx) { |
| 62 | 776 | std::size_t i = indices[idx]; | |
| 63 | 776 | ProcessRows(i, a, bt, columns, values, row_index); | |
| 64 | } | ||
| 65 | 120 | } | |
| 66 | |||
| 67 | public: | ||
| 68 | 48 | [[nodiscard]] CRS Multiply(const CRS &a, const CRS &b) const override { | |
| 69 | 48 | std::size_t n = a.nrows; | |
| 70 | |||
| 71 | 48 | auto bt = this->Transpose(b); | |
| 72 | |||
| 73 |
1/2✓ Branch 1 taken 48 times.
✗ Branch 2 not taken.
|
48 | std::vector<std::vector<std::size_t>> columns(n); |
| 74 |
1/2✓ Branch 1 taken 48 times.
✗ Branch 2 not taken.
|
48 | std::vector<std::vector<double>> values(n); |
| 75 |
1/2✓ Branch 1 taken 48 times.
✗ Branch 2 not taken.
|
48 | std::vector<std::size_t> row_index(n + 1, 0); |
| 76 | |||
| 77 |
1/4✓ Branch 1 taken 48 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
48 | std::vector<std::size_t> indices(n); |
| 78 | #ifdef __APPLE__ | ||
| 79 | std::iota(indices.begin(), indices.end(), 0); | ||
| 80 | #else | ||
| 81 | std::ranges::iota(indices, 0); | ||
| 82 | #endif | ||
| 83 | |||
| 84 |
1/2✓ Branch 1 taken 48 times.
✗ Branch 2 not taken.
|
48 | std::size_t num_threads = ppc::util::GetNumThreads(); |
| 85 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 48 times.
|
48 | if (num_threads == 0) { |
| 86 | num_threads = 2; | ||
| 87 | } | ||
| 88 | |||
| 89 | 48 | std::vector<std::thread> threads; | |
| 90 | 48 | std::size_t chunk_size = n / num_threads; | |
| 91 | 48 | std::size_t remainder = n % num_threads; | |
| 92 | |||
| 93 | 48 | std::size_t start = 0; | |
| 94 |
2/2✓ Branch 0 taken 120 times.
✓ Branch 1 taken 48 times.
|
168 | for (std::size_t thread = 0; thread < num_threads; ++thread) { |
| 95 |
2/2✓ Branch 0 taken 70 times.
✓ Branch 1 taken 50 times.
|
120 | std::size_t end = start + chunk_size + (thread < remainder ? 1 : 0); |
| 96 | |||
| 97 | 240 | threads.emplace_back(ProcessRowsRange, std::ref(indices), start, end, std::cref(a), std::cref(bt), | |
| 98 |
1/2✓ Branch 1 taken 120 times.
✗ Branch 2 not taken.
|
120 | std::ref(columns), std::ref(values), std::ref(row_index)); |
| 99 | 120 | start = end; | |
| 100 | } | ||
| 101 | |||
| 102 |
2/2✓ Branch 0 taken 120 times.
✓ Branch 1 taken 48 times.
|
168 | for (auto &thread : threads) { |
| 103 |
1/2✓ Branch 1 taken 120 times.
✗ Branch 2 not taken.
|
120 | thread.join(); |
| 104 | } | ||
| 105 | |||
| 106 | std::size_t nz = 0; | ||
| 107 |
2/2✓ Branch 0 taken 776 times.
✓ Branch 1 taken 48 times.
|
824 | for (std::size_t i = 0; i < n; i++) { |
| 108 | 776 | std::size_t tmp = row_index[i]; | |
| 109 | 776 | row_index[i] = nz; | |
| 110 | 776 | nz += tmp; | |
| 111 | } | ||
| 112 | 48 | row_index[n] = nz; | |
| 113 | |||
| 114 | 48 | CRS result; | |
| 115 |
1/2✓ Branch 1 taken 48 times.
✗ Branch 2 not taken.
|
48 | result.row_ptrs = row_index; |
| 116 | 48 | result.nrows = n; | |
| 117 | 48 | result.ncols = n; | |
| 118 | |||
| 119 |
2/2✓ Branch 0 taken 776 times.
✓ Branch 1 taken 48 times.
|
824 | for (std::size_t i = 0; i < n; i++) { |
| 120 |
1/2✓ Branch 1 taken 776 times.
✗ Branch 2 not taken.
|
776 | result.cols.insert(result.cols.end(), columns[i].begin(), columns[i].end()); |
| 121 |
1/2✓ Branch 1 taken 776 times.
✗ Branch 2 not taken.
|
776 | result.values.insert(result.values.end(), values[i].begin(), values[i].end()); |
| 122 | } | ||
| 123 | |||
| 124 | 48 | result.nz = result.values.size(); | |
| 125 | 48 | return result; | |
| 126 | 96 | } | |
| 127 | }; | ||
| 128 | |||
| 129 | } // namespace guseva_crs | ||
| 130 |