| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #include "lazareva_a_matrix_mult_strassen/all/include/ops_all.hpp" | ||
| 2 | |||
| 3 | #include <mpi.h> | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <cmath> | ||
| 7 | #include <cstddef> | ||
| 8 | #include <vector> | ||
| 9 | |||
| 10 | #include "lazareva_a_matrix_mult_strassen/common/include/common.hpp" | ||
| 11 | |||
| 12 | namespace lazareva_a_matrix_mult_strassen { | ||
| 13 | |||
| 14 |
1/2✓ Branch 1 taken 18 times.
✗ Branch 2 not taken.
|
18 | LazarevaATestTaskALL::LazarevaATestTaskALL(const InType &in) { |
| 15 | SetTypeOfTask(GetStaticTypeOfTask()); | ||
| 16 | GetInput() = in; | ||
| 17 | 18 | } | |
| 18 | |||
| 19 | 18 | bool LazarevaATestTaskALL::ValidationImpl() { | |
| 20 | 18 | int rank = 0; | |
| 21 | 18 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); | |
| 22 | 18 | int ok = 0; | |
| 23 |
2/2✓ Branch 0 taken 9 times.
✓ Branch 1 taken 9 times.
|
18 | if (rank == 0) { |
| 24 | const auto &input = GetInput(); | ||
| 25 |
3/6✓ Branch 0 taken 9 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 9 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 9 times.
✗ Branch 5 not taken.
|
9 | if (input.n > 0 && input.a.size() == static_cast<size_t>(input.n) * input.n && |
| 26 | input.b.size() == static_cast<size_t>(input.n) * input.n) { | ||
| 27 | 9 | ok = 1; | |
| 28 | } | ||
| 29 | } | ||
| 30 | 18 | MPI_Bcast(&ok, 1, MPI_INT, 0, MPI_COMM_WORLD); | |
| 31 | 18 | return ok == 1; | |
| 32 | } | ||
| 33 | |||
| 34 | 18 | bool LazarevaATestTaskALL::PreProcessingImpl() { | |
| 35 | 18 | int rank = 0; | |
| 36 | 18 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); | |
| 37 |
2/2✓ Branch 0 taken 9 times.
✓ Branch 1 taken 9 times.
|
18 | if (rank == 0) { |
| 38 | 9 | n_ = GetInput().n; | |
| 39 | 9 | padded_n_ = NextPowerOfTwo(n_); | |
| 40 | 9 | a_ = PadMatrix(GetInput().a, n_, padded_n_); | |
| 41 | 18 | b_ = PadMatrix(GetInput().b, n_, padded_n_); | |
| 42 | } | ||
| 43 | 18 | MPI_Bcast(&n_, 1, MPI_INT, 0, MPI_COMM_WORLD); | |
| 44 | 18 | MPI_Bcast(&padded_n_, 1, MPI_INT, 0, MPI_COMM_WORLD); | |
| 45 | 18 | return true; | |
| 46 | } | ||
| 47 | |||
| 48 | 18 | bool LazarevaATestTaskALL::RunImpl() { | |
| 49 | 18 | result_ = StrassenALL(a_, b_, padded_n_); | |
| 50 | 18 | return true; | |
| 51 | } | ||
| 52 | |||
| 53 | 18 | bool LazarevaATestTaskALL::PostProcessingImpl() { | |
| 54 | 18 | int rank = 0; | |
| 55 | 18 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); | |
| 56 | 18 | const size_t final_sz = static_cast<size_t>(n_) * n_; | |
| 57 | |||
| 58 |
2/2✓ Branch 0 taken 9 times.
✓ Branch 1 taken 9 times.
|
18 | if (rank == 0) { |
| 59 | 18 | GetOutput() = UnpadMatrix(result_, padded_n_, n_); | |
| 60 | } else { | ||
| 61 | 9 | GetOutput().assign(final_sz, 0.0); | |
| 62 | } | ||
| 63 | |||
| 64 | 18 | MPI_Bcast(GetOutput().data(), static_cast<int>(final_sz), MPI_DOUBLE, 0, MPI_COMM_WORLD); | |
| 65 | 18 | return true; | |
| 66 | } | ||
| 67 | |||
| 68 | ✗ | int LazarevaATestTaskALL::NextPowerOfTwo(int n) { | |
| 69 | int p = 1; | ||
| 70 |
2/4✗ Branch 0 not taken.
✗ Branch 1 not taken.
✓ Branch 2 taken 41 times.
✓ Branch 3 taken 9 times.
|
50 | while (p < n) { |
| 71 | 41 | p <<= 1; | |
| 72 | } | ||
| 73 | ✗ | return p; | |
| 74 | } | ||
| 75 | |||
| 76 | 18 | std::vector<double> LazarevaATestTaskALL::PadMatrix(const std::vector<double> &m, int old_n, int new_n) { | |
| 77 | 18 | const size_t new_sz = static_cast<size_t>(new_n) * new_n; | |
| 78 | 18 | std::vector<double> res(new_sz, 0.0); | |
| 79 |
2/2✓ Branch 0 taken 1092 times.
✓ Branch 1 taken 18 times.
|
1110 | for (int i = 0; i < old_n; ++i) { |
| 80 | 1092 | std::copy(m.begin() + static_cast<ptrdiff_t>(static_cast<size_t>(i) * old_n), | |
| 81 | 1092 | m.begin() + static_cast<ptrdiff_t>(static_cast<size_t>(i + 1) * old_n), | |
| 82 | 1092 | res.begin() + static_cast<ptrdiff_t>(static_cast<size_t>(i) * new_n)); | |
| 83 | } | ||
| 84 | 18 | return res; | |
| 85 | } | ||
| 86 | |||
| 87 | 9 | std::vector<double> LazarevaATestTaskALL::UnpadMatrix(const std::vector<double> &m, int old_n, int new_n) { | |
| 88 | 9 | const size_t new_sz = static_cast<size_t>(new_n) * new_n; | |
| 89 | 9 | std::vector<double> res(new_sz); | |
| 90 |
2/2✓ Branch 0 taken 546 times.
✓ Branch 1 taken 9 times.
|
555 | for (int i = 0; i < new_n; ++i) { |
| 91 | 546 | std::copy(m.begin() + static_cast<ptrdiff_t>(static_cast<size_t>(i) * old_n), | |
| 92 | 546 | m.begin() + static_cast<ptrdiff_t>((static_cast<size_t>(i) * old_n) + new_n), | |
| 93 | 546 | res.begin() + static_cast<ptrdiff_t>(static_cast<size_t>(i) * new_n)); | |
| 94 | } | ||
| 95 | 9 | return res; | |
| 96 | } | ||
| 97 | |||
| 98 | 120 | std::vector<double> LazarevaATestTaskALL::Add(const std::vector<double> &a, const std::vector<double> &b, int n) { | |
| 99 | 120 | const size_t sz = static_cast<size_t>(n) * n; | |
| 100 | 120 | std::vector<double> res(sz); | |
| 101 |
2/2✓ Branch 0 taken 638976 times.
✓ Branch 1 taken 120 times.
|
639096 | for (size_t i = 0; i < sz; ++i) { |
| 102 | 638976 | res[i] = a[i] + b[i]; | |
| 103 | } | ||
| 104 | 120 | return res; | |
| 105 | } | ||
| 106 | |||
| 107 | 60 | std::vector<double> LazarevaATestTaskALL::Sub(const std::vector<double> &a, const std::vector<double> &b, int n) { | |
| 108 | 60 | const size_t sz = static_cast<size_t>(n) * n; | |
| 109 | 60 | std::vector<double> res(sz); | |
| 110 |
2/2✓ Branch 0 taken 319488 times.
✓ Branch 1 taken 60 times.
|
319548 | for (size_t i = 0; i < sz; ++i) { |
| 111 | 319488 | res[i] = a[i] - b[i]; | |
| 112 | } | ||
| 113 | 60 | return res; | |
| 114 | } | ||
| 115 | |||
| 116 | 20 | void LazarevaATestTaskALL::Split(const std::vector<double> &p, int n, std::vector<double> &a11, | |
| 117 | std::vector<double> &a12, std::vector<double> &a21, std::vector<double> &a22) { | ||
| 118 | 20 | const int h = n / 2; | |
| 119 | 20 | const size_t h_sz = static_cast<size_t>(h) * h; | |
| 120 | 20 | a11.resize(h_sz); | |
| 121 | 20 | a12.resize(h_sz); | |
| 122 | 20 | a21.resize(h_sz); | |
| 123 | 20 | a22.resize(h_sz); | |
| 124 | |||
| 125 |
2/2✓ Branch 0 taken 1408 times.
✓ Branch 1 taken 20 times.
|
1428 | for (int i = 0; i < h; ++i) { |
| 126 | 1408 | const double *src_top = p.data() + (static_cast<size_t>(i) * n); | |
| 127 | 1408 | const double *src_bot = p.data() + (static_cast<size_t>(i + h) * n); | |
| 128 | 1408 | std::copy(src_top, src_top + h, a11.begin() + static_cast<ptrdiff_t>(static_cast<size_t>(i) * h)); | |
| 129 | 1408 | std::copy(src_top + h, src_top + n, a12.begin() + static_cast<ptrdiff_t>(static_cast<size_t>(i) * h)); | |
| 130 | 1408 | std::copy(src_bot, src_bot + h, a21.begin() + static_cast<ptrdiff_t>(static_cast<size_t>(i) * h)); | |
| 131 | 1408 | std::copy(src_bot + h, src_bot + n, a22.begin() + static_cast<ptrdiff_t>(static_cast<size_t>(i) * h)); | |
| 132 | } | ||
| 133 | 20 | } | |
| 134 | |||
| 135 | 10 | std::vector<double> LazarevaATestTaskALL::Merge(const std::vector<double> &c11, const std::vector<double> &c12, | |
| 136 | const std::vector<double> &c21, const std::vector<double> &c22, int h) { | ||
| 137 | 10 | const int n = h * 2; | |
| 138 | 10 | std::vector<double> res(static_cast<size_t>(n) * n); | |
| 139 | |||
| 140 |
2/2✓ Branch 0 taken 704 times.
✓ Branch 1 taken 10 times.
|
714 | for (int i = 0; i < h; ++i) { |
| 141 | 704 | double *dst_top = res.data() + (static_cast<size_t>(i) * n); | |
| 142 | 704 | double *dst_bot = res.data() + (static_cast<size_t>(i + h) * n); | |
| 143 | 704 | std::copy(c11.begin() + static_cast<ptrdiff_t>(static_cast<size_t>(i) * h), | |
| 144 | 704 | c11.begin() + static_cast<ptrdiff_t>(static_cast<size_t>(i + 1) * h), dst_top); | |
| 145 | 704 | std::copy(c12.begin() + static_cast<ptrdiff_t>(static_cast<size_t>(i) * h), | |
| 146 | 704 | c12.begin() + static_cast<ptrdiff_t>(static_cast<size_t>(i + 1) * h), dst_top + h); | |
| 147 | 704 | std::copy(c21.begin() + static_cast<ptrdiff_t>(static_cast<size_t>(i) * h), | |
| 148 | c21.begin() + static_cast<ptrdiff_t>(static_cast<size_t>(i + 1) * h), dst_bot); | ||
| 149 | 704 | std::copy(c22.begin() + static_cast<ptrdiff_t>(static_cast<size_t>(i) * h), | |
| 150 | c22.begin() + static_cast<ptrdiff_t>(static_cast<size_t>(i + 1) * h), dst_bot + h); | ||
| 151 | } | ||
| 152 | 10 | return res; | |
| 153 | } | ||
| 154 | |||
| 155 | 69 | std::vector<double> LazarevaATestTaskALL::NaiveMult(const std::vector<double> &a, const std::vector<double> &b, int n) { | |
| 156 | 69 | const auto n_sz = static_cast<size_t>(n); | |
| 157 | 69 | std::vector<double> c(n_sz * n_sz, 0.0); | |
| 158 | |||
| 159 |
2/2✓ Branch 0 taken 4134 times.
✓ Branch 1 taken 69 times.
|
4203 | for (int i = 0; i < n; ++i) { |
| 160 |
2/2✓ Branch 0 taken 262548 times.
✓ Branch 1 taken 4134 times.
|
266682 | for (int k = 0; k < n; ++k) { |
| 161 |
2/2✓ Branch 0 taken 23923 times.
✓ Branch 1 taken 238625 times.
|
262548 | const double aik = a[(static_cast<size_t>(i) * n_sz) + static_cast<size_t>(k)]; |
| 162 |
2/2✓ Branch 0 taken 23923 times.
✓ Branch 1 taken 238625 times.
|
262548 | if (std::abs(aik) < 1e-18) { |
| 163 | 23923 | continue; | |
| 164 | } | ||
| 165 | 238625 | const double *b_row = b.data() + (static_cast<size_t>(k) * n_sz); | |
| 166 | double *c_row = c.data() + (static_cast<size_t>(i) * n_sz); | ||
| 167 |
2/2✓ Branch 0 taken 15254780 times.
✓ Branch 1 taken 238625 times.
|
15493405 | for (int j = 0; j < n; ++j) { |
| 168 | 15254780 | c_row[j] += aik * b_row[j]; | |
| 169 | } | ||
| 170 | } | ||
| 171 | } | ||
| 172 | 69 | return c; | |
| 173 | } | ||
| 174 | |||
| 175 | 76 | std::vector<double> LazarevaATestTaskALL::StrassenTBB(const std::vector<double> &a, const std::vector<double> &b, | |
| 176 | int n) { | ||
| 177 |
2/2✓ Branch 0 taken 69 times.
✓ Branch 1 taken 7 times.
|
76 | if (n <= 64) { |
| 178 | 69 | return NaiveMult(a, b, n); | |
| 179 | } | ||
| 180 | |||
| 181 | 7 | const int h = n / 2; | |
| 182 | 7 | std::vector<double> a11; | |
| 183 | 7 | std::vector<double> a12; | |
| 184 | 7 | std::vector<double> a21; | |
| 185 | 7 | std::vector<double> a22; | |
| 186 | 7 | std::vector<double> b11; | |
| 187 | 7 | std::vector<double> b12; | |
| 188 | 7 | std::vector<double> b21; | |
| 189 | 7 | std::vector<double> b22; | |
| 190 | |||
| 191 |
1/2✓ Branch 1 taken 7 times.
✗ Branch 2 not taken.
|
7 | Split(a, n, a11, a12, a21, a22); |
| 192 |
1/2✓ Branch 1 taken 7 times.
✗ Branch 2 not taken.
|
7 | Split(b, n, b11, b12, b21, b22); |
| 193 | |||
| 194 | 7 | std::vector<double> m0; | |
| 195 | 7 | std::vector<double> m1; | |
| 196 | 7 | std::vector<double> m2; | |
| 197 | 7 | std::vector<double> m3; | |
| 198 | 7 | std::vector<double> m4; | |
| 199 | 7 | std::vector<double> m5; | |
| 200 | 7 | std::vector<double> m6; | |
| 201 | |||
| 202 |
3/8✓ Branch 2 taken 7 times.
✗ Branch 3 not taken.
✓ Branch 5 taken 7 times.
✗ Branch 6 not taken.
✓ Branch 8 taken 7 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
|
70 | oneapi::tbb::parallel_invoke([&]() { m0 = StrassenTBB(Add(a11, a22, h), Add(b11, b22, h), h); }, [&]() { |
| 203 |
1/2✓ Branch 2 taken 7 times.
✗ Branch 3 not taken.
|
14 | m1 = StrassenTBB(Add(a21, a22, h), b11, h); |
| 204 | 49 | }, [&]() { m2 = StrassenTBB(a11, Sub(b12, b22, h), h); }, [&]() { | |
| 205 | 7 | m3 = StrassenTBB(a22, Sub(b21, b11, h), h); | |
| 206 |
1/2✓ Branch 2 taken 7 times.
✗ Branch 3 not taken.
|
63 | }, [&]() { m4 = StrassenTBB(Add(a11, a12, h), b22, h); }, [&]() { |
| 207 |
3/8✓ Branch 2 taken 7 times.
✗ Branch 3 not taken.
✓ Branch 5 taken 7 times.
✗ Branch 6 not taken.
✓ Branch 8 taken 7 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
|
28 | m5 = StrassenTBB(Sub(a21, a11, h), Add(b11, b12, h), h); |
| 208 |
4/10✓ Branch 1 taken 7 times.
✗ Branch 2 not taken.
✓ Branch 5 taken 7 times.
✗ Branch 6 not taken.
✓ Branch 8 taken 7 times.
✗ Branch 9 not taken.
✓ Branch 11 taken 7 times.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
|
42 | }, [&]() { m6 = StrassenTBB(Sub(a12, a22, h), Add(b21, b22, h), h); }); |
| 209 | |||
| 210 |
3/8✓ Branch 1 taken 7 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 7 times.
✗ Branch 5 not taken.
✓ Branch 7 taken 7 times.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
|
7 | const auto c11 = Add(Sub(Add(m0, m3, h), m4, h), m6, h); |
| 211 |
1/2✓ Branch 1 taken 7 times.
✗ Branch 2 not taken.
|
7 | const auto c12 = Add(m2, m4, h); |
| 212 |
1/2✓ Branch 1 taken 7 times.
✗ Branch 2 not taken.
|
7 | const auto c21 = Add(m1, m3, h); |
| 213 |
3/8✓ Branch 1 taken 7 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 7 times.
✗ Branch 5 not taken.
✓ Branch 7 taken 7 times.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
|
7 | const auto c22 = Add(Sub(Add(m0, m2, h), m1, h), m5, h); |
| 214 | |||
| 215 |
1/2✓ Branch 1 taken 7 times.
✗ Branch 2 not taken.
|
7 | return Merge(c11, c12, c21, c22, h); |
| 216 | } | ||
| 217 | |||
| 218 | 3 | std::vector<double> LazarevaATestTaskALL::StrassenMaster(const std::vector<double> &a, const std::vector<double> &b, | |
| 219 | int h, size_t h_sz, int size) { | ||
| 220 | 3 | std::vector<double> a11; | |
| 221 | 3 | std::vector<double> a12; | |
| 222 | 3 | std::vector<double> a21; | |
| 223 | 3 | std::vector<double> a22; | |
| 224 | 3 | std::vector<double> b11; | |
| 225 | 3 | std::vector<double> b12; | |
| 226 | 3 | std::vector<double> b21; | |
| 227 | 3 | std::vector<double> b22; | |
| 228 | |||
| 229 | 3 | const int n = h * 2; | |
| 230 |
1/2✓ Branch 1 taken 3 times.
✗ Branch 2 not taken.
|
3 | Split(a, n, a11, a12, a21, a22); |
| 231 |
1/2✓ Branch 1 taken 3 times.
✗ Branch 2 not taken.
|
3 | Split(b, n, b11, b12, b21, b22); |
| 232 | |||
| 233 |
1/4✓ Branch 1 taken 3 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
3 | std::vector<std::vector<double>> lhs(7); |
| 234 |
1/2✓ Branch 1 taken 3 times.
✗ Branch 2 not taken.
|
3 | std::vector<std::vector<double>> rhs(7); |
| 235 | |||
| 236 |
1/2✓ Branch 1 taken 3 times.
✗ Branch 2 not taken.
|
3 | lhs[0] = Add(a11, a22, h); |
| 237 |
1/2✓ Branch 1 taken 3 times.
✗ Branch 2 not taken.
|
3 | rhs[0] = Add(b11, b22, h); |
| 238 |
2/4✓ Branch 1 taken 3 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 3 times.
✗ Branch 5 not taken.
|
6 | lhs[1] = Add(a21, a22, h); |
| 239 |
1/2✓ Branch 1 taken 3 times.
✗ Branch 2 not taken.
|
3 | rhs[1] = b11; |
| 240 |
1/2✓ Branch 1 taken 3 times.
✗ Branch 2 not taken.
|
3 | lhs[2] = a11; |
| 241 |
2/4✓ Branch 1 taken 3 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 3 times.
✗ Branch 5 not taken.
|
6 | rhs[2] = Sub(b12, b22, h); |
| 242 |
1/2✓ Branch 1 taken 3 times.
✗ Branch 2 not taken.
|
3 | lhs[3] = a22; |
| 243 |
1/2✓ Branch 1 taken 3 times.
✗ Branch 2 not taken.
|
3 | rhs[3] = Sub(b21, b11, h); |
| 244 |
2/4✓ Branch 1 taken 3 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 3 times.
✗ Branch 5 not taken.
|
6 | lhs[4] = Add(a11, a12, h); |
| 245 |
1/2✓ Branch 1 taken 3 times.
✗ Branch 2 not taken.
|
3 | rhs[4] = b22; |
| 246 |
1/2✓ Branch 1 taken 3 times.
✗ Branch 2 not taken.
|
3 | lhs[5] = Sub(a21, a11, h); |
| 247 |
1/2✓ Branch 1 taken 3 times.
✗ Branch 2 not taken.
|
3 | rhs[5] = Add(b11, b12, h); |
| 248 |
1/2✓ Branch 1 taken 3 times.
✗ Branch 2 not taken.
|
3 | lhs[6] = Sub(a12, a22, h); |
| 249 |
1/2✓ Branch 1 taken 3 times.
✗ Branch 2 not taken.
|
3 | rhs[6] = Add(b21, b22, h); |
| 250 | |||
| 251 |
1/2✓ Branch 1 taken 3 times.
✗ Branch 2 not taken.
|
3 | std::vector<MPI_Request> send_requests(14, MPI_REQUEST_NULL); |
| 252 | int req_idx = 0; | ||
| 253 | |||
| 254 |
2/2✓ Branch 0 taken 21 times.
✓ Branch 1 taken 3 times.
|
24 | for (int k = 0; k < 7; ++k) { |
| 255 | 21 | const int dest = k % size; | |
| 256 |
2/2✓ Branch 0 taken 9 times.
✓ Branch 1 taken 12 times.
|
21 | if (dest != 0) { |
| 257 |
1/2✓ Branch 1 taken 9 times.
✗ Branch 2 not taken.
|
9 | MPI_Isend(lhs[k].data(), static_cast<int>(h_sz), MPI_DOUBLE, dest, k * 2, MPI_COMM_WORLD, |
| 258 |
1/2✓ Branch 1 taken 9 times.
✗ Branch 2 not taken.
|
9 | &send_requests[req_idx]); |
| 259 | 9 | ++req_idx; | |
| 260 |
1/2✓ Branch 1 taken 9 times.
✗ Branch 2 not taken.
|
9 | MPI_Isend(rhs[k].data(), static_cast<int>(h_sz), MPI_DOUBLE, dest, (k * 2) + 1, MPI_COMM_WORLD, |
| 261 |
1/2✓ Branch 1 taken 9 times.
✗ Branch 2 not taken.
|
9 | &send_requests[req_idx]); |
| 262 | 9 | ++req_idx; | |
| 263 | } | ||
| 264 | } | ||
| 265 | |||
| 266 |
1/4✓ Branch 1 taken 3 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
3 | std::vector<std::vector<double>> m(7); |
| 267 |
2/2✓ Branch 0 taken 21 times.
✓ Branch 1 taken 3 times.
|
24 | for (int k = 0; k < 7; ++k) { |
| 268 |
2/2✓ Branch 0 taken 12 times.
✓ Branch 1 taken 9 times.
|
21 | if (k % size == 0) { |
| 269 |
1/2✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.
|
24 | m[k] = StrassenTBB(lhs[k], rhs[k], h); |
| 270 | } | ||
| 271 | } | ||
| 272 | |||
| 273 |
2/2✓ Branch 0 taken 21 times.
✓ Branch 1 taken 3 times.
|
24 | for (int k = 0; k < 7; ++k) { |
| 274 | 21 | const int src = k % size; | |
| 275 |
2/2✓ Branch 0 taken 9 times.
✓ Branch 1 taken 12 times.
|
21 | if (src != 0) { |
| 276 |
1/2✓ Branch 1 taken 9 times.
✗ Branch 2 not taken.
|
9 | m[k].resize(h_sz); |
| 277 |
1/2✓ Branch 1 taken 9 times.
✗ Branch 2 not taken.
|
9 | MPI_Recv(m[k].data(), static_cast<int>(h_sz), MPI_DOUBLE, src, k + 100, MPI_COMM_WORLD, MPI_STATUS_IGNORE); |
| 278 | } | ||
| 279 | } | ||
| 280 | |||
| 281 |
1/2✓ Branch 0 taken 3 times.
✗ Branch 1 not taken.
|
3 | if (req_idx > 0) { |
| 282 |
1/2✓ Branch 1 taken 3 times.
✗ Branch 2 not taken.
|
3 | MPI_Waitall(req_idx, send_requests.data(), MPI_STATUSES_IGNORE); |
| 283 | } | ||
| 284 | |||
| 285 |
4/8✓ Branch 1 taken 3 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 3 times.
✗ Branch 5 not taken.
✓ Branch 7 taken 3 times.
✗ Branch 8 not taken.
✓ Branch 10 taken 3 times.
✗ Branch 11 not taken.
|
6 | const auto c11 = Add(Sub(Add(m[0], m[3], h), m[4], h), m[6], h); |
| 286 |
1/2✓ Branch 1 taken 3 times.
✗ Branch 2 not taken.
|
3 | const auto c12 = Add(m[2], m[4], h); |
| 287 |
1/2✓ Branch 1 taken 3 times.
✗ Branch 2 not taken.
|
3 | const auto c21 = Add(m[1], m[3], h); |
| 288 |
3/8✓ Branch 1 taken 3 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 3 times.
✗ Branch 5 not taken.
✓ Branch 7 taken 3 times.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
|
3 | const auto c22 = Add(Sub(Add(m[0], m[2], h), m[1], h), m[5], h); |
| 289 | |||
| 290 |
1/2✓ Branch 1 taken 3 times.
✗ Branch 2 not taken.
|
6 | return Merge(c11, c12, c21, c22, h); |
| 291 | 6 | } | |
| 292 | |||
| 293 | 3 | void LazarevaATestTaskALL::StrassenWorker(int rank, int h, size_t h_sz, int size) { | |
| 294 |
2/2✓ Branch 0 taken 21 times.
✓ Branch 1 taken 3 times.
|
24 | for (int k = 0; k < 7; ++k) { |
| 295 |
2/2✓ Branch 0 taken 9 times.
✓ Branch 1 taken 12 times.
|
21 | if (k % size == rank) { |
| 296 | 9 | std::vector<double> l(h_sz); | |
| 297 |
1/4✓ Branch 1 taken 9 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
9 | std::vector<double> r(h_sz); |
| 298 |
1/2✓ Branch 1 taken 9 times.
✗ Branch 2 not taken.
|
9 | MPI_Recv(l.data(), static_cast<int>(h_sz), MPI_DOUBLE, 0, k * 2, MPI_COMM_WORLD, MPI_STATUS_IGNORE); |
| 299 |
1/2✓ Branch 1 taken 9 times.
✗ Branch 2 not taken.
|
9 | MPI_Recv(r.data(), static_cast<int>(h_sz), MPI_DOUBLE, 0, (k * 2) + 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE); |
| 300 |
1/2✓ Branch 1 taken 9 times.
✗ Branch 2 not taken.
|
9 | const auto res = StrassenTBB(l, r, h); |
| 301 |
1/2✓ Branch 1 taken 9 times.
✗ Branch 2 not taken.
|
9 | MPI_Send(res.data(), static_cast<int>(h_sz), MPI_DOUBLE, 0, k + 100, MPI_COMM_WORLD); |
| 302 | } | ||
| 303 | } | ||
| 304 | 3 | } | |
| 305 | |||
| 306 | 18 | std::vector<double> LazarevaATestTaskALL::StrassenALL(const std::vector<double> &a, const std::vector<double> &b, | |
| 307 | int n) { | ||
| 308 | 18 | int rank = 0; | |
| 309 | 18 | int size = 0; | |
| 310 | 18 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); | |
| 311 | 18 | MPI_Comm_size(MPI_COMM_WORLD, &size); | |
| 312 | |||
| 313 |
3/4✓ Branch 0 taken 6 times.
✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 6 times.
|
18 | if (n <= 64 || size == 1) { |
| 314 |
2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 6 times.
|
12 | if (rank == 0) { |
| 315 | 6 | return StrassenTBB(a, b, n); | |
| 316 | } | ||
| 317 | 6 | return {}; | |
| 318 | } | ||
| 319 | |||
| 320 | 6 | const int h = n / 2; | |
| 321 | 6 | const size_t h_sz = static_cast<size_t>(h) * h; | |
| 322 | |||
| 323 |
2/2✓ Branch 0 taken 3 times.
✓ Branch 1 taken 3 times.
|
6 | if (rank == 0) { |
| 324 | 3 | return StrassenMaster(a, b, h, h_sz, size); | |
| 325 | } | ||
| 326 | |||
| 327 | 3 | StrassenWorker(rank, h, h_sz, size); | |
| 328 | 3 | return {}; | |
| 329 | } | ||
| 330 | |||
| 331 | } // namespace lazareva_a_matrix_mult_strassen | ||
| 332 |