| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #pragma once | ||
| 2 | #include <cmath> | ||
| 3 | #include <cstddef> | ||
| 4 | #include <vector> | ||
| 5 | |||
| 6 | #include "guseva_crs/common/include/common.hpp" | ||
| 7 | #include "guseva_crs/common/include/multiplier.hpp" | ||
| 8 | #include "oneapi/tbb/blocked_range.h" | ||
| 9 | #include "oneapi/tbb/parallel_for.h" | ||
| 10 | |||
| 11 | namespace guseva_crs { | ||
| 12 | |||
| 13 | 24 | class MultiplierTbb : public Multiplier { | |
| 14 | static void PerformCalculation(std::size_t k, std::size_t ind3, std::size_t ind4, const CRS &a, const CRS &bt, | ||
| 15 | double &sum, std::vector<int> &temp) { | ||
| 16 |
2/2✓ Branch 0 taken 20284 times.
✓ Branch 1 taken 8036 times.
|
28320 | for (k = ind3; k < ind4; k++) { |
| 17 |
2/2✓ Branch 0 taken 2820 times.
✓ Branch 1 taken 17464 times.
|
20284 | std::size_t bcol = bt.cols[k]; |
| 18 | 20284 | int aind = temp[bcol]; | |
| 19 |
2/2✓ Branch 0 taken 2820 times.
✓ Branch 1 taken 17464 times.
|
20284 | if (aind != -1) { |
| 20 | 2820 | sum += a.values[aind] * bt.values[k]; | |
| 21 | } | ||
| 22 | } | ||
| 23 | } | ||
| 24 | |||
| 25 | 388 | static void ProcessRows(const tbb::blocked_range<std::size_t> &range, const CRS &a, const CRS &bt, | |
| 26 | std::vector<std::vector<std::size_t>> &columns, std::vector<std::vector<double>> &values, | ||
| 27 | std::vector<std::size_t> &row_index) { | ||
| 28 | 388 | std::size_t n = a.nrows; | |
| 29 | 388 | std::vector<int> temp(n); | |
| 30 | |||
| 31 |
2/2✓ Branch 0 taken 388 times.
✓ Branch 1 taken 388 times.
|
776 | for (std::size_t i = range.begin(); i != range.end(); ++i) { |
| 32 |
2/2✓ Branch 0 taken 8036 times.
✓ Branch 1 taken 388 times.
|
8424 | for (int &l : temp) { |
| 33 | 8036 | l = -1; | |
| 34 | } | ||
| 35 | 388 | std::size_t ind1 = a.row_ptrs[i]; | |
| 36 | 388 | std::size_t ind2 = a.row_ptrs[i + 1]; | |
| 37 |
2/2✓ Branch 0 taken 1172 times.
✓ Branch 1 taken 388 times.
|
1560 | for (std::size_t j = ind1; j < ind2; j++) { |
| 38 | 1172 | std::size_t col = a.cols[j]; | |
| 39 | 1172 | temp[col] = static_cast<int>(j); | |
| 40 | } | ||
| 41 | |||
| 42 |
2/2✓ Branch 0 taken 8036 times.
✓ Branch 1 taken 388 times.
|
8424 | for (std::size_t j = 0; j < n; j++) { |
| 43 | 8036 | double sum = 0; | |
| 44 | 8036 | std::size_t ind3 = bt.row_ptrs[j]; | |
| 45 | 8036 | std::size_t ind4 = bt.row_ptrs[j + 1]; | |
| 46 | |||
| 47 | PerformCalculation(0, ind3, ind4, a, bt, sum, temp); | ||
| 48 | |||
| 49 |
2/2✓ Branch 0 taken 2044 times.
✓ Branch 1 taken 5992 times.
|
8036 | if (std::fabs(sum) > kZERO) { |
| 50 | columns[i].push_back(j); | ||
| 51 | values[i].push_back(sum); | ||
| 52 | 2044 | row_index[i]++; | |
| 53 | } | ||
| 54 | } | ||
| 55 | } | ||
| 56 | 388 | } | |
| 57 | |||
| 58 | public: | ||
| 59 | 24 | [[nodiscard]] CRS Multiply(const CRS &a, const CRS &b) const override { | |
| 60 | 24 | std::size_t n = a.nrows; | |
| 61 | |||
| 62 | 24 | auto bt = this->Transpose(b); | |
| 63 | |||
| 64 |
1/2✓ Branch 1 taken 24 times.
✗ Branch 2 not taken.
|
24 | std::vector<std::vector<std::size_t>> columns(n); |
| 65 |
1/2✓ Branch 1 taken 24 times.
✗ Branch 2 not taken.
|
24 | std::vector<std::vector<double>> values(n); |
| 66 |
1/2✓ Branch 1 taken 24 times.
✗ Branch 2 not taken.
|
24 | std::vector<std::size_t> row_index(n + 1, 0); |
| 67 | |||
| 68 | 24 | tbb::parallel_for(tbb::blocked_range<std::size_t>(0, n), | |
| 69 |
1/4✓ Branch 1 taken 24 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
24 | [&a, &bt, &columns, &values, &row_index](const tbb::blocked_range<std::size_t> &range) { |
| 70 |
1/2✓ Branch 2 taken 56 times.
✗ Branch 3 not taken.
|
388 | guseva_crs::MultiplierTbb::ProcessRows(range, a, bt, columns, values, row_index); |
| 71 | }); | ||
| 72 | |||
| 73 | std::size_t nz = 0; | ||
| 74 |
2/2✓ Branch 0 taken 388 times.
✓ Branch 1 taken 24 times.
|
412 | for (std::size_t i = 0; i < n; i++) { |
| 75 | 388 | std::size_t tmp = row_index[i]; | |
| 76 | 388 | row_index[i] = nz; | |
| 77 | 388 | nz += tmp; | |
| 78 | } | ||
| 79 | 24 | row_index[n] = nz; | |
| 80 | |||
| 81 | 24 | CRS result; | |
| 82 |
1/2✓ Branch 1 taken 24 times.
✗ Branch 2 not taken.
|
24 | result.row_ptrs = row_index; |
| 83 | 24 | result.nrows = n; | |
| 84 | 24 | result.ncols = n; | |
| 85 | |||
| 86 |
2/2✓ Branch 0 taken 388 times.
✓ Branch 1 taken 24 times.
|
412 | for (std::size_t i = 0; i < n; i++) { |
| 87 |
1/2✓ Branch 1 taken 388 times.
✗ Branch 2 not taken.
|
388 | result.cols.insert(result.cols.end(), columns[i].begin(), columns[i].end()); |
| 88 |
1/2✓ Branch 1 taken 388 times.
✗ Branch 2 not taken.
|
388 | result.values.insert(result.values.end(), values[i].begin(), values[i].end()); |
| 89 | } | ||
| 90 | |||
| 91 |
1/2✓ Branch 0 taken 24 times.
✗ Branch 1 not taken.
|
24 | result.nz = result.values.size(); |
| 92 | 24 | return result; | |
| 93 | 24 | } | |
| 94 | }; | ||
| 95 | |||
| 96 | } // namespace guseva_crs | ||
| 97 |