GCC Code Coverage Report

Directory:	./
File:	tasks/muhammadkhon_i_stressen_alg/stl/src/ops_stl.cpp
Date:	2026-06-04 20:25:32

	Exec	Total	Coverage
Lines:	68	193	35.2%
Functions:	10	19	52.6%
Branches:	40	204	19.6%

  
      Line
      Branch
      Exec
      Source
    
      #include "muhammadkhon_i_stressen_alg/stl/include/ops_stl.hpp"
    
      #include <algorithm>
    
      #include <cstddef>
    
      #include <functional>
    
      #include <future>
    
      #include <vector>
    
      #include "muhammadkhon_i_stressen_alg/common/include/common.hpp"
    
      #include "util/include/util.hpp"
    
      namespace muhammadkhon_i_stressen_alg {
    
      namespace {
    
      constexpr std::size_t kCutoff = 64;
    
      constexpr std::size_t kBlockSize = 64;
    
      std::size_t NextPow2(std::size_t x) {
    
      48
        if (x <= 1) {
    
          return 1;
    
        }
    
        std::size_t p = 1;
    
        2/2✓ Branch 0 taken 192 times.
✓ Branch 1 taken 48 times.

      240
        while (p < x) {
    
      192
          p <<= 1;
    
        }
    
        return p;
    
      }
    
      void ZeroMatrix(double *dst, std::size_t stride, std::size_t n) {
    
        2/2✓ Branch 0 taken 1344 times.
✓ Branch 1 taken 48 times.

      1392
        for (std::size_t i = 0; i < n; ++i) {
    
      1344
          std::fill_n(dst + (i * stride), n, 0.0);
    
        }
    
      }
    
      ✗
      void AddToBuffer(const double *a, std::size_t a_stride, const double *b, std::size_t b_stride, double *dst,
    
                       std::size_t n, double b_coeff) {
    
      ✗
        for (std::size_t i = 0; i < n; ++i) {
    
      ✗
          const double *a_row = a + (i * a_stride);
    
      ✗
          const double *b_row = b + (i * b_stride);
    
      ✗
          double *dst_row = dst + (i * n);
    
      ✗
          for (std::size_t j = 0; j < n; ++j) {
    
      ✗
            dst_row[j] = a_row[j] + (b_coeff * b_row[j]);
    
          }
    
        }
    
      ✗
      }
    
      void MulMicroBlock(const double *a, std::size_t a_stride, const double *b, std::size_t b_stride, double *c,
    
                         std::size_t c_stride, std::size_t i_begin, std::size_t i_end, std::size_t k_begin, std::size_t k_end,
    
                         std::size_t j_begin, std::size_t j_end) {
    
        2/2✓ Branch 0 taken 1344 times.
✓ Branch 1 taken 48 times.

      1392
        for (std::size_t i = i_begin; i < i_end; ++i) {
    
      1344
          double *c_row = c + (i * c_stride);
    
      1344
          const double *a_row = a + (i * a_stride);
    
        2/2✓ Branch 0 taken 69888 times.
✓ Branch 1 taken 1344 times.

      71232
          for (std::size_t k = k_begin; k < k_end; ++k) {
    
      69888
            const double aik = a_row[k];
    
      69888
            const double *b_row = b + (k * b_stride);
    
        2/2✓ Branch 0 taken 4260864 times.
✓ Branch 1 taken 69888 times.

      4330752
            for (std::size_t j = j_begin; j < j_end; ++j) {
    
      4260864
              c_row[j] += aik * b_row[j];
    
            }
    
          }
    
        }
    
      }
    
      48
      void MulKBlocks(const double *a, std::size_t a_stride, const double *b, std::size_t b_stride, double *c,
    
                      std::size_t c_stride, std::size_t n, std::size_t ii, std::size_t i_end) {
    
        2/2✓ Branch 0 taken 48 times.
✓ Branch 1 taken 48 times.

      96
        for (std::size_t kk = 0; kk < n; kk += kBlockSize) {
    
      48
          const std::size_t k_end = std::min(kk + kBlockSize, n);
    
        2/2✓ Branch 0 taken 48 times.
✓ Branch 1 taken 48 times.

      96
          for (std::size_t jj = 0; jj < n; jj += kBlockSize) {
    
      48
            const std::size_t j_end = std::min(jj + kBlockSize, n);
    
            MulMicroBlock(a, a_stride, b, b_stride, c, c_stride, ii, i_end, kk, k_end, jj, j_end);
    
          }
    
        }
    
      48
      }
    
      48
      void NaiveMulBlocked(const double *a, std::size_t a_stride, const double *b, std::size_t b_stride, double *c,
    
                           std::size_t c_stride, std::size_t n) {
    
      48
        ZeroMatrix(c, c_stride, n);
    
        2/2✓ Branch 0 taken 48 times.
✓ Branch 1 taken 48 times.

      96
        for (std::size_t ii = 0; ii < n; ii += kBlockSize) {
    
      48
          const std::size_t i_end = std::min(ii + kBlockSize, n);
    
      48
          MulKBlocks(a, a_stride, b, b_stride, c, c_stride, n, ii, i_end);
    
        }
    
      48
      }
    
      ✗
      void CombineQuadrants(const std::vector<double> &m1, const std::vector<double> &m2, const std::vector<double> &m3,
    
                            const std::vector<double> &m4, const std::vector<double> &m5, const std::vector<double> &m6,
    
                            const std::vector<double> &m7, double *c, std::size_t c_stride, std::size_t half) {
    
      ✗
        for (std::size_t i = 0; i < half; ++i) {
    
      ✗
          double *c11 = c + (i * c_stride);
    
          double *c12 = c11 + half;
    
      ✗
          double *c21 = c + ((i + half) * c_stride);
    
          double *c22 = c21 + half;
    
      ✗
          const double *m1r = m1.data() + (i * half);
    
          const double *m2r = m2.data() + (i * half);
    
          const double *m3r = m3.data() + (i * half);
    
          const double *m4r = m4.data() + (i * half);
    
          const double *m5r = m5.data() + (i * half);
    
          const double *m6r = m6.data() + (i * half);
    
          const double *m7r = m7.data() + (i * half);
    
      ✗
          for (std::size_t j = 0; j < half; ++j) {
    
      ✗
            c11[j] = m1r[j] + m4r[j] - m5r[j] + m7r[j];
    
      ✗
            c12[j] = m3r[j] + m5r[j];
    
      ✗
            c21[j] = m2r[j] + m4r[j];
    
      ✗
            c22[j] = m1r[j] - m2r[j] + m3r[j] + m6r[j];
    
          }
    
        }
    
      ✗
      }
    
        1/2✓ Branch 1 taken 48 times.
✗ Branch 2 not taken.

      48
      void StrassenSeq(const double *a_in, std::size_t a_stride_in, const double *b_in, std::size_t b_stride_in, double *c_in,
    
                       std::size_t c_stride_in, std::size_t n_in) {
    
        std::function<void(const double *, std::size_t, const double *, std::size_t, double *, std::size_t, std::size_t)>
    
      48
            impl = [&](const double *a, std::size_t a_stride, const double *b, std::size_t b_stride, double *c,
    
                       std::size_t c_stride, std::size_t n) {
    
        1/2✓ Branch 0 taken 48 times.
✗ Branch 1 not taken.

      48
          if (n <= kCutoff) {
    
      48
            NaiveMulBlocked(a, a_stride, b, b_stride, c, c_stride, n);
    
      48
            return;
    
          }
    
      ✗
          const std::size_t half = n / 2;
    
          const double *a11 = a;
    
      ✗
          const double *a12 = a + half;
    
      ✗
          const double *a21 = a + (half * a_stride);
    
      ✗
          const double *a22 = a21 + half;
    
          const double *b11 = b;
    
      ✗
          const double *b12 = b + half;
    
      ✗
          const double *b21 = b + (half * b_stride);
    
      ✗
          const double *b22 = b21 + half;
    
      ✗
          std::vector<double> lhs(half * half);
    
      ✗
          std::vector<double> rhs(half * half);
    
      ✗
          std::vector<double> m1(half * half);
    
      ✗
          std::vector<double> m2(half * half);
    
      ✗
          std::vector<double> m3(half * half);
    
      ✗
          std::vector<double> m4(half * half);
    
      ✗
          std::vector<double> m5(half * half);
    
      ✗
          std::vector<double> m6(half * half);
    
      ✗
          std::vector<double> m7(half * half);
    
      ✗
          AddToBuffer(a11, a_stride, a22, a_stride, lhs.data(), half, 1.0);
    
      ✗
          AddToBuffer(b11, b_stride, b22, b_stride, rhs.data(), half, 1.0);
    
      ✗
          impl(lhs.data(), half, rhs.data(), half, m1.data(), half, half);
    
      ✗
          AddToBuffer(a21, a_stride, a22, a_stride, lhs.data(), half, 1.0);
    
      ✗
          impl(lhs.data(), half, b11, b_stride, m2.data(), half, half);
    
      ✗
          AddToBuffer(b12, b_stride, b22, b_stride, rhs.data(), half, -1.0);
    
      ✗
          impl(a11, a_stride, rhs.data(), half, m3.data(), half, half);
    
      ✗
          AddToBuffer(b21, b_stride, b11, b_stride, rhs.data(), half, -1.0);
    
      ✗
          impl(a22, a_stride, rhs.data(), half, m4.data(), half, half);
    
      ✗
          AddToBuffer(a11, a_stride, a12, a_stride, lhs.data(), half, 1.0);
    
      ✗
          impl(lhs.data(), half, b22, b_stride, m5.data(), half, half);
    
      ✗
          AddToBuffer(a21, a_stride, a11, a_stride, lhs.data(), half, -1.0);
    
      ✗
          AddToBuffer(b11, b_stride, b12, b_stride, rhs.data(), half, 1.0);
    
      ✗
          impl(lhs.data(), half, rhs.data(), half, m6.data(), half, half);
    
      ✗
          AddToBuffer(a12, a_stride, a22, a_stride, lhs.data(), half, -1.0);
    
      ✗
          AddToBuffer(b21, b_stride, b22, b_stride, rhs.data(), half, 1.0);
    
      ✗
          impl(lhs.data(), half, rhs.data(), half, m7.data(), half, half);
    
      ✗
          CombineQuadrants(m1, m2, m3, m4, m5, m6, m7, c, c_stride, half);
    
        };
    
        1/2✓ Branch 0 taken 48 times.
✗ Branch 1 not taken.

      48
        impl(a_in, a_stride_in, b_in, b_stride_in, c_in, c_stride_in, n_in);
    
      48
      }
    
      48
      void StrassenTopStl(const double *a, std::size_t a_stride, const double *b, std::size_t b_stride, double *c,
    
                          std::size_t c_stride, std::size_t n) {
    
        1/4✗ Branch 0 not taken.
✓ Branch 1 taken 48 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.

      48
        if (n <= kCutoff || ppc::util::GetNumThreads() <= 1) {
    
      48
          StrassenSeq(a, a_stride, b, b_stride, c, c_stride, n);
    
      48
          return;
    
        }
    
      ✗
        const std::size_t half = n / 2;
    
      ✗
        const double *a11 = a;
    
      ✗
        const double *a12 = a + half;
    
      ✗
        const double *a21 = a + (half * a_stride);
    
      ✗
        const double *a22 = a21 + half;
    
      ✗
        const double *b11 = b;
    
      ✗
        const double *b12 = b + half;
    
      ✗
        const double *b21 = b + (half * b_stride);
    
      ✗
        const double *b22 = b21 + half;
    
      ✗
        std::vector<double> m1;
    
      ✗
        std::vector<double> m2;
    
      ✗
        std::vector<double> m3;
    
      ✗
        std::vector<double> m4;
    
      ✗
        std::vector<double> m5;
    
      ✗
        std::vector<double> m6;
    
      ✗
        std::vector<double> m7;
    
      ✗
        auto f1 = std::async(std::launch::async, [&] {
    
      ✗
          std::vector<double> lhs(half * half);
    
      ✗
          std::vector<double> rhs(half * half);
    
      ✗
          AddToBuffer(a11, a_stride, a22, a_stride, lhs.data(), half, 1.0);
    
      ✗
          AddToBuffer(b11, b_stride, b22, b_stride, rhs.data(), half, 1.0);
    
      ✗
          m1.assign(half * half, 0.0);
    
      ✗
          StrassenSeq(lhs.data(), half, rhs.data(), half, m1.data(), half, half);
    
      ✗
        });
    
      ✗
        auto f2 = std::async(std::launch::async, [&] {
    
      ✗
          std::vector<double> lhs(half * half);
    
      ✗
          AddToBuffer(a21, a_stride, a22, a_stride, lhs.data(), half, 1.0);
    
      ✗
          m2.assign(half * half, 0.0);
    
      ✗
          StrassenSeq(lhs.data(), half, b11, b_stride, m2.data(), half, half);
    
      ✗
        });
    
      ✗
        auto f3 = std::async(std::launch::async, [&] {
    
      ✗
          std::vector<double> rhs(half * half);
    
      ✗
          AddToBuffer(b12, b_stride, b22, b_stride, rhs.data(), half, -1.0);
    
      ✗
          m3.assign(half * half, 0.0);
    
      ✗
          StrassenSeq(a11, a_stride, rhs.data(), half, m3.data(), half, half);
    
      ✗
        });
    
      ✗
        auto f4 = std::async(std::launch::async, [&] {
    
      ✗
          std::vector<double> rhs(half * half);
    
      ✗
          AddToBuffer(b21, b_stride, b11, b_stride, rhs.data(), half, -1.0);
    
      ✗
          m4.assign(half * half, 0.0);
    
      ✗
          StrassenSeq(a22, a_stride, rhs.data(), half, m4.data(), half, half);
    
      ✗
        });
    
      ✗
        auto f5 = std::async(std::launch::async, [&] {
    
      ✗
          std::vector<double> lhs(half * half);
    
      ✗
          AddToBuffer(a11, a_stride, a12, a_stride, lhs.data(), half, 1.0);
    
      ✗
          m5.assign(half * half, 0.0);
    
      ✗
          StrassenSeq(lhs.data(), half, b22, b_stride, m5.data(), half, half);
    
      ✗
        });
    
      ✗
        auto f6 = std::async(std::launch::async, [&] {
    
      ✗
          std::vector<double> lhs(half * half);
    
      ✗
          std::vector<double> rhs(half * half);
    
      ✗
          AddToBuffer(a21, a_stride, a11, a_stride, lhs.data(), half, -1.0);
    
      ✗
          AddToBuffer(b11, b_stride, b12, b_stride, rhs.data(), half, 1.0);
    
      ✗
          m6.assign(half * half, 0.0);
    
      ✗
          StrassenSeq(lhs.data(), half, rhs.data(), half, m6.data(), half, half);
    
      ✗
        });
    
      ✗
        auto f7 = std::async(std::launch::async, [&] {
    
      ✗
          std::vector<double> lhs(half * half);
    
      ✗
          std::vector<double> rhs(half * half);
    
      ✗
          AddToBuffer(a12, a_stride, a22, a_stride, lhs.data(), half, -1.0);
    
      ✗
          AddToBuffer(b21, b_stride, b22, b_stride, rhs.data(), half, 1.0);
    
      ✗
          m7.assign(half * half, 0.0);
    
      ✗
          StrassenSeq(lhs.data(), half, rhs.data(), half, m7.data(), half, half);
    
      ✗
        });
    
      ✗
        f1.get();
    
      ✗
        f2.get();
    
      ✗
        f3.get();
    
      ✗
        f4.get();
    
      ✗
        f5.get();
    
      ✗
        f6.get();
    
      ✗
        f7.get();
    
      ✗
        CombineQuadrants(m1, m2, m3, m4, m5, m6, m7, c, c_stride, half);
    
      }
    
      }  // namespace
    
        1/2✓ Branch 1 taken 48 times.
✗ Branch 2 not taken.

      48
      MuhammadkhonIStressenAlgSTL::MuhammadkhonIStressenAlgSTL(const InType &in) {
    
        SetTypeOfTask(GetStaticTypeOfTask());
    
        1/2✓ Branch 1 taken 48 times.
✗ Branch 2 not taken.

      48
        GetInput() = in;
    
        GetOutput() = {};
    
      48
      }
    
      48
      bool MuhammadkhonIStressenAlgSTL::ValidationImpl() {
    
        const auto &in = GetInput();
    
        2/4✗ Branch 0 not taken.
✓ Branch 1 taken 48 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 48 times.

      48
        return in.a_rows > 0 && in.a_cols_b_rows > 0 && in.b_cols > 0 &&
    
        2/4✓ Branch 0 taken 48 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 48 times.

      96
               in.a.size() == static_cast<size_t>(in.a_rows * in.a_cols_b_rows) &&
    
        1/2✗ Branch 0 not taken.
✓ Branch 1 taken 48 times.

      48
               in.b.size() == static_cast<size_t>(in.a_cols_b_rows * in.b_cols);
    
      }
    
      48
      bool MuhammadkhonIStressenAlgSTL::PreProcessingImpl() {
    
        GetOutput() = {};
    
        const auto &in = GetInput();
    
      48
        a_rows_ = in.a_rows;
    
      48
        a_cols_b_rows_ = in.a_cols_b_rows;
    
      48
        b_cols_ = in.b_cols;
    
        1/2✓ Branch 0 taken 48 times.
✗ Branch 1 not taken.

      96
        size_t max_dim = std::max({a_rows_, a_cols_b_rows_, b_cols_});
    
      48
        padded_n_ = NextPow2(max_dim);
    
      48
        padded_a_.assign(padded_n_ * padded_n_, 0.0);
    
      48
        padded_b_.assign(padded_n_ * padded_n_, 0.0);
    
        2/2✓ Branch 0 taken 1320 times.
✓ Branch 1 taken 48 times.

      1368
        for (size_t i = 0; i < a_rows_; ++i) {
    
        2/2✓ Branch 0 taken 67368 times.
✓ Branch 1 taken 1320 times.

      68688
          for (size_t j = 0; j < a_cols_b_rows_; ++j) {
    
      67368
            padded_a_[(i * padded_n_) + j] = in.a[(i * a_cols_b_rows_) + j];
    
          }
    
        }
    
        2/2✓ Branch 0 taken 1240 times.
✓ Branch 1 taken 48 times.

      1288
        for (size_t i = 0; i < a_cols_b_rows_; ++i) {
    
        2/2✓ Branch 0 taken 67368 times.
✓ Branch 1 taken 1240 times.

      68608
          for (size_t j = 0; j < b_cols_; ++j) {
    
      67368
            padded_b_[(i * padded_n_) + j] = in.b[(i * b_cols_) + j];
    
          }
    
        }
    
      48
        return true;
    
      }
    
      48
      bool MuhammadkhonIStressenAlgSTL::RunImpl() {
    
      48
        result_c_.assign(padded_n_ * padded_n_, 0.0);
    
      48
        StrassenTopStl(padded_a_.data(), padded_n_, padded_b_.data(), padded_n_, result_c_.data(), padded_n_, padded_n_);
    
        auto &out = GetOutput();
    
      48
        out.assign(a_rows_ * b_cols_, 0.0);
    
        2/2✓ Branch 0 taken 1320 times.
✓ Branch 1 taken 48 times.

      1368
        for (size_t i = 0; i < a_rows_; ++i) {
    
        2/2✓ Branch 0 taken 68568 times.
✓ Branch 1 taken 1320 times.

      69888
          for (size_t j = 0; j < b_cols_; ++j) {
    
      68568
            out[(i * b_cols_) + j] = result_c_[(i * padded_n_) + j];
    
          }
    
        }
    
      48
        return true;
    
      }
    
      48
      bool MuhammadkhonIStressenAlgSTL::PostProcessingImpl() {
    
      48
        return true;
    
      }
    
      }  // namespace muhammadkhon_i_stressen_alg

Line	Branch	Exec	Source
1			#include "muhammadkhon_i_stressen_alg/stl/include/ops_stl.hpp"
2
3			#include <algorithm>
4			#include <cstddef>
5			#include <functional>
6			#include <future>
7			#include <vector>
8
9			#include "muhammadkhon_i_stressen_alg/common/include/common.hpp"
10			#include "util/include/util.hpp"
11
12			namespace muhammadkhon_i_stressen_alg {
13
14			namespace {
15
16			constexpr std::size_t kCutoff = 64;
17			constexpr std::size_t kBlockSize = 64;
18
19			std::size_t NextPow2(std::size_t x) {
20		48	if (x <= 1) {
21			return 1;
22			}
23			std::size_t p = 1;
24	2/2 ✓ Branch 0 taken 192 times. ✓ Branch 1 taken 48 times.	240	while (p < x) {
25		192	p <<= 1;
26			}
27			return p;
28			}
29
30			void ZeroMatrix(double *dst, std::size_t stride, std::size_t n) {
31	2/2 ✓ Branch 0 taken 1344 times. ✓ Branch 1 taken 48 times.	1392	for (std::size_t i = 0; i < n; ++i) {
32		1344	std::fill_n(dst + (i * stride), n, 0.0);
33			}
34			}
35
36		✗	void AddToBuffer(const double a, std::size_t a_stride, const double b, std::size_t b_stride, double *dst,
37			std::size_t n, double b_coeff) {
38		✗	for (std::size_t i = 0; i < n; ++i) {
39		✗	const double a_row = a + (i a_stride);
40		✗	const double b_row = b + (i b_stride);
41		✗	double dst_row = dst + (i n);
42		✗	for (std::size_t j = 0; j < n; ++j) {
43		✗	dst_row[j] = a_row[j] + (b_coeff * b_row[j]);
44			}
45			}
46		✗	}
47
48			void MulMicroBlock(const double a, std::size_t a_stride, const double b, std::size_t b_stride, double *c,
49			std::size_t c_stride, std::size_t i_begin, std::size_t i_end, std::size_t k_begin, std::size_t k_end,
50			std::size_t j_begin, std::size_t j_end) {
51	2/2 ✓ Branch 0 taken 1344 times. ✓ Branch 1 taken 48 times.	1392	for (std::size_t i = i_begin; i < i_end; ++i) {
52		1344	double c_row = c + (i c_stride);
53		1344	const double a_row = a + (i a_stride);
54	2/2 ✓ Branch 0 taken 69888 times. ✓ Branch 1 taken 1344 times.	71232	for (std::size_t k = k_begin; k < k_end; ++k) {
55		69888	const double aik = a_row[k];
56		69888	const double b_row = b + (k b_stride);
57	2/2 ✓ Branch 0 taken 4260864 times. ✓ Branch 1 taken 69888 times.	4330752	for (std::size_t j = j_begin; j < j_end; ++j) {
58		4260864	c_row[j] += aik * b_row[j];
59			}
60			}
61			}
62			}
63
64		48	void MulKBlocks(const double a, std::size_t a_stride, const double b, std::size_t b_stride, double *c,
65			std::size_t c_stride, std::size_t n, std::size_t ii, std::size_t i_end) {
66	2/2 ✓ Branch 0 taken 48 times. ✓ Branch 1 taken 48 times.	96	for (std::size_t kk = 0; kk < n; kk += kBlockSize) {
67		48	const std::size_t k_end = std::min(kk + kBlockSize, n);
68	2/2 ✓ Branch 0 taken 48 times. ✓ Branch 1 taken 48 times.	96	for (std::size_t jj = 0; jj < n; jj += kBlockSize) {
69		48	const std::size_t j_end = std::min(jj + kBlockSize, n);
70			MulMicroBlock(a, a_stride, b, b_stride, c, c_stride, ii, i_end, kk, k_end, jj, j_end);
71			}
72			}
73		48	}
74
75		48	void NaiveMulBlocked(const double a, std::size_t a_stride, const double b, std::size_t b_stride, double *c,
76			std::size_t c_stride, std::size_t n) {
77		48	ZeroMatrix(c, c_stride, n);
78	2/2 ✓ Branch 0 taken 48 times. ✓ Branch 1 taken 48 times.	96	for (std::size_t ii = 0; ii < n; ii += kBlockSize) {
79		48	const std::size_t i_end = std::min(ii + kBlockSize, n);
80		48	MulKBlocks(a, a_stride, b, b_stride, c, c_stride, n, ii, i_end);
81			}
82		48	}
83
84		✗	void CombineQuadrants(const std::vector<double> &m1, const std::vector<double> &m2, const std::vector<double> &m3,
85			const std::vector<double> &m4, const std::vector<double> &m5, const std::vector<double> &m6,
86			const std::vector<double> &m7, double *c, std::size_t c_stride, std::size_t half) {
87		✗	for (std::size_t i = 0; i < half; ++i) {
88		✗	double c11 = c + (i c_stride);
89			double *c12 = c11 + half;
90		✗	double c21 = c + ((i + half) c_stride);
91			double *c22 = c21 + half;
92		✗	const double m1r = m1.data() + (i half);
93			const double m2r = m2.data() + (i half);
94			const double m3r = m3.data() + (i half);
95			const double m4r = m4.data() + (i half);
96			const double m5r = m5.data() + (i half);
97			const double m6r = m6.data() + (i half);
98			const double m7r = m7.data() + (i half);
99		✗	for (std::size_t j = 0; j < half; ++j) {
100		✗	c11[j] = m1r[j] + m4r[j] - m5r[j] + m7r[j];
101		✗	c12[j] = m3r[j] + m5r[j];
102		✗	c21[j] = m2r[j] + m4r[j];
103		✗	c22[j] = m1r[j] - m2r[j] + m3r[j] + m6r[j];
104			}
105			}
106		✗	}
107
108	1/2 ✓ Branch 1 taken 48 times. ✗ Branch 2 not taken.	48	void StrassenSeq(const double a_in, std::size_t a_stride_in, const double b_in, std::size_t b_stride_in, double *c_in,
109			std::size_t c_stride_in, std::size_t n_in) {
110			std::function<void(const double , std::size_t, const double , std::size_t, double *, std::size_t, std::size_t)>
111		48	impl = [&](const double a, std::size_t a_stride, const double b, std::size_t b_stride, double *c,
112			std::size_t c_stride, std::size_t n) {
113	1/2 ✓ Branch 0 taken 48 times. ✗ Branch 1 not taken.	48	if (n <= kCutoff) {
114		48	NaiveMulBlocked(a, a_stride, b, b_stride, c, c_stride, n);
115		48	return;
116			}
117		✗	const std::size_t half = n / 2;
118
119			const double *a11 = a;
120		✗	const double *a12 = a + half;
121		✗	const double a21 = a + (half a_stride);
122		✗	const double *a22 = a21 + half;
123			const double *b11 = b;
124		✗	const double *b12 = b + half;
125		✗	const double b21 = b + (half b_stride);
126		✗	const double *b22 = b21 + half;
127
128		✗	std::vector<double> lhs(half * half);
129		✗	std::vector<double> rhs(half * half);
130		✗	std::vector<double> m1(half * half);
131		✗	std::vector<double> m2(half * half);
132		✗	std::vector<double> m3(half * half);
133		✗	std::vector<double> m4(half * half);
134		✗	std::vector<double> m5(half * half);
135		✗	std::vector<double> m6(half * half);
136		✗	std::vector<double> m7(half * half);
137
138		✗	AddToBuffer(a11, a_stride, a22, a_stride, lhs.data(), half, 1.0);
139		✗	AddToBuffer(b11, b_stride, b22, b_stride, rhs.data(), half, 1.0);
140		✗	impl(lhs.data(), half, rhs.data(), half, m1.data(), half, half);
141
142		✗	AddToBuffer(a21, a_stride, a22, a_stride, lhs.data(), half, 1.0);
143		✗	impl(lhs.data(), half, b11, b_stride, m2.data(), half, half);
144
145		✗	AddToBuffer(b12, b_stride, b22, b_stride, rhs.data(), half, -1.0);
146		✗	impl(a11, a_stride, rhs.data(), half, m3.data(), half, half);
147
148		✗	AddToBuffer(b21, b_stride, b11, b_stride, rhs.data(), half, -1.0);
149		✗	impl(a22, a_stride, rhs.data(), half, m4.data(), half, half);
150
151		✗	AddToBuffer(a11, a_stride, a12, a_stride, lhs.data(), half, 1.0);
152		✗	impl(lhs.data(), half, b22, b_stride, m5.data(), half, half);
153
154		✗	AddToBuffer(a21, a_stride, a11, a_stride, lhs.data(), half, -1.0);
155		✗	AddToBuffer(b11, b_stride, b12, b_stride, rhs.data(), half, 1.0);
156		✗	impl(lhs.data(), half, rhs.data(), half, m6.data(), half, half);
157
158		✗	AddToBuffer(a12, a_stride, a22, a_stride, lhs.data(), half, -1.0);
159		✗	AddToBuffer(b21, b_stride, b22, b_stride, rhs.data(), half, 1.0);
160		✗	impl(lhs.data(), half, rhs.data(), half, m7.data(), half, half);
161
162		✗	CombineQuadrants(m1, m2, m3, m4, m5, m6, m7, c, c_stride, half);
163			};
164
165	1/2 ✓ Branch 0 taken 48 times. ✗ Branch 1 not taken.	48	impl(a_in, a_stride_in, b_in, b_stride_in, c_in, c_stride_in, n_in);
166		48	}
167
168		48	void StrassenTopStl(const double a, std::size_t a_stride, const double b, std::size_t b_stride, double *c,
169			std::size_t c_stride, std::size_t n) {
170	1/4 ✗ Branch 0 not taken. ✓ Branch 1 taken 48 times. ✗ Branch 3 not taken. ✗ Branch 4 not taken.	48	if (n <= kCutoff \|\| ppc::util::GetNumThreads() <= 1) {
171		48	StrassenSeq(a, a_stride, b, b_stride, c, c_stride, n);
172		48	return;
173			}
174
175		✗	const std::size_t half = n / 2;
176
177		✗	const double *a11 = a;
178		✗	const double *a12 = a + half;
179		✗	const double a21 = a + (half a_stride);
180		✗	const double *a22 = a21 + half;
181		✗	const double *b11 = b;
182		✗	const double *b12 = b + half;
183		✗	const double b21 = b + (half b_stride);
184		✗	const double *b22 = b21 + half;
185
186		✗	std::vector<double> m1;
187		✗	std::vector<double> m2;
188		✗	std::vector<double> m3;
189		✗	std::vector<double> m4;
190		✗	std::vector<double> m5;
191		✗	std::vector<double> m6;
192		✗	std::vector<double> m7;
193
194		✗	auto f1 = std::async(std::launch::async, [&] {
195		✗	std::vector<double> lhs(half * half);
196		✗	std::vector<double> rhs(half * half);
197		✗	AddToBuffer(a11, a_stride, a22, a_stride, lhs.data(), half, 1.0);
198		✗	AddToBuffer(b11, b_stride, b22, b_stride, rhs.data(), half, 1.0);
199		✗	m1.assign(half * half, 0.0);
200		✗	StrassenSeq(lhs.data(), half, rhs.data(), half, m1.data(), half, half);
201		✗	});
202		✗	auto f2 = std::async(std::launch::async, [&] {
203		✗	std::vector<double> lhs(half * half);
204		✗	AddToBuffer(a21, a_stride, a22, a_stride, lhs.data(), half, 1.0);
205		✗	m2.assign(half * half, 0.0);
206		✗	StrassenSeq(lhs.data(), half, b11, b_stride, m2.data(), half, half);
207		✗	});
208		✗	auto f3 = std::async(std::launch::async, [&] {
209		✗	std::vector<double> rhs(half * half);
210		✗	AddToBuffer(b12, b_stride, b22, b_stride, rhs.data(), half, -1.0);
211		✗	m3.assign(half * half, 0.0);
212		✗	StrassenSeq(a11, a_stride, rhs.data(), half, m3.data(), half, half);
213		✗	});
214		✗	auto f4 = std::async(std::launch::async, [&] {
215		✗	std::vector<double> rhs(half * half);
216		✗	AddToBuffer(b21, b_stride, b11, b_stride, rhs.data(), half, -1.0);
217		✗	m4.assign(half * half, 0.0);
218		✗	StrassenSeq(a22, a_stride, rhs.data(), half, m4.data(), half, half);
219		✗	});
220		✗	auto f5 = std::async(std::launch::async, [&] {
221		✗	std::vector<double> lhs(half * half);
222		✗	AddToBuffer(a11, a_stride, a12, a_stride, lhs.data(), half, 1.0);
223		✗	m5.assign(half * half, 0.0);
224		✗	StrassenSeq(lhs.data(), half, b22, b_stride, m5.data(), half, half);
225		✗	});
226		✗	auto f6 = std::async(std::launch::async, [&] {
227		✗	std::vector<double> lhs(half * half);
228		✗	std::vector<double> rhs(half * half);
229		✗	AddToBuffer(a21, a_stride, a11, a_stride, lhs.data(), half, -1.0);
230		✗	AddToBuffer(b11, b_stride, b12, b_stride, rhs.data(), half, 1.0);
231		✗	m6.assign(half * half, 0.0);
232		✗	StrassenSeq(lhs.data(), half, rhs.data(), half, m6.data(), half, half);
233		✗	});
234		✗	auto f7 = std::async(std::launch::async, [&] {
235		✗	std::vector<double> lhs(half * half);
236		✗	std::vector<double> rhs(half * half);
237		✗	AddToBuffer(a12, a_stride, a22, a_stride, lhs.data(), half, -1.0);
238		✗	AddToBuffer(b21, b_stride, b22, b_stride, rhs.data(), half, 1.0);
239		✗	m7.assign(half * half, 0.0);
240		✗	StrassenSeq(lhs.data(), half, rhs.data(), half, m7.data(), half, half);
241		✗	});
242
243		✗	f1.get();
244		✗	f2.get();
245		✗	f3.get();
246		✗	f4.get();
247		✗	f5.get();
248		✗	f6.get();
249		✗	f7.get();
250
251		✗	CombineQuadrants(m1, m2, m3, m4, m5, m6, m7, c, c_stride, half);
252			}
253
254			} // namespace
255
256	1/2 ✓ Branch 1 taken 48 times. ✗ Branch 2 not taken.	48	MuhammadkhonIStressenAlgSTL::MuhammadkhonIStressenAlgSTL(const InType &in) {
257			SetTypeOfTask(GetStaticTypeOfTask());
258	1/2 ✓ Branch 1 taken 48 times. ✗ Branch 2 not taken.	48	GetInput() = in;
259			GetOutput() = {};
260		48	}
261
262		48	bool MuhammadkhonIStressenAlgSTL::ValidationImpl() {
263			const auto &in = GetInput();
264	2/4 ✗ Branch 0 not taken. ✓ Branch 1 taken 48 times. ✗ Branch 2 not taken. ✓ Branch 3 taken 48 times.	48	return in.a_rows > 0 && in.a_cols_b_rows > 0 && in.b_cols > 0 &&
265	2/4 ✓ Branch 0 taken 48 times. ✗ Branch 1 not taken. ✗ Branch 2 not taken. ✓ Branch 3 taken 48 times.	96	in.a.size() == static_cast<size_t>(in.a_rows * in.a_cols_b_rows) &&
266	1/2 ✗ Branch 0 not taken. ✓ Branch 1 taken 48 times.	48	in.b.size() == static_cast<size_t>(in.a_cols_b_rows * in.b_cols);
267			}
268
269		48	bool MuhammadkhonIStressenAlgSTL::PreProcessingImpl() {
270			GetOutput() = {};
271			const auto &in = GetInput();
272		48	a_rows_ = in.a_rows;
273		48	a_cols_b_rows_ = in.a_cols_b_rows;
274		48	b_cols_ = in.b_cols;
275
276	1/2 ✓ Branch 0 taken 48 times. ✗ Branch 1 not taken.	96	size_t max_dim = std::max({a_rows_, a_cols_b_rows_, b_cols_});
277		48	padded_n_ = NextPow2(max_dim);
278
279		48	padded_a_.assign(padded_n_ * padded_n_, 0.0);
280		48	padded_b_.assign(padded_n_ * padded_n_, 0.0);
281
282	2/2 ✓ Branch 0 taken 1320 times. ✓ Branch 1 taken 48 times.	1368	for (size_t i = 0; i < a_rows_; ++i) {
283	2/2 ✓ Branch 0 taken 67368 times. ✓ Branch 1 taken 1320 times.	68688	for (size_t j = 0; j < a_cols_b_rows_; ++j) {
284		67368	padded_a_[(i * padded_n_) + j] = in.a[(i * a_cols_b_rows_) + j];
285			}
286			}
287
288	2/2 ✓ Branch 0 taken 1240 times. ✓ Branch 1 taken 48 times.	1288	for (size_t i = 0; i < a_cols_b_rows_; ++i) {
289	2/2 ✓ Branch 0 taken 67368 times. ✓ Branch 1 taken 1240 times.	68608	for (size_t j = 0; j < b_cols_; ++j) {
290		67368	padded_b_[(i * padded_n_) + j] = in.b[(i * b_cols_) + j];
291			}
292			}
293
294		48	return true;
295			}
296
297		48	bool MuhammadkhonIStressenAlgSTL::RunImpl() {
298		48	result_c_.assign(padded_n_ * padded_n_, 0.0);
299
300		48	StrassenTopStl(padded_a_.data(), padded_n_, padded_b_.data(), padded_n_, result_c_.data(), padded_n_, padded_n_);
301
302			auto &out = GetOutput();
303		48	out.assign(a_rows_ * b_cols_, 0.0);
304	2/2 ✓ Branch 0 taken 1320 times. ✓ Branch 1 taken 48 times.	1368	for (size_t i = 0; i < a_rows_; ++i) {
305	2/2 ✓ Branch 0 taken 68568 times. ✓ Branch 1 taken 1320 times.	69888	for (size_t j = 0; j < b_cols_; ++j) {
306		68568	out[(i * b_cols_) + j] = result_c_[(i * padded_n_) + j];
307			}
308			}
309
310		48	return true;
311			}
312
313		48	bool MuhammadkhonIStressenAlgSTL::PostProcessingImpl() {
314		48	return true;
315			}
316
317			} // namespace muhammadkhon_i_stressen_alg
318