GCC Code Coverage Report

Directory:	./
File:	tasks/muhammadkhon_i_stressen_alg/tbb/src/ops_tbb.cpp
Date:	2026-06-04 20:25:32

	Exec	Total	Coverage
Lines:	69	178	38.8%
Functions:	10	19	52.6%
Branches:	42	170	24.7%

  
      Line
      Branch
      Exec
      Source
    
      #include "muhammadkhon_i_stressen_alg/tbb/include/ops_tbb.hpp"
    
      #include <oneapi/tbb/global_control.h>
    
      #include <oneapi/tbb/parallel_invoke.h>
    
      #include <algorithm>
    
      #include <cstddef>
    
      #include <functional>
    
      #include <vector>
    
      #include "muhammadkhon_i_stressen_alg/common/include/common.hpp"
    
      #include "util/include/util.hpp"
    
      namespace muhammadkhon_i_stressen_alg {
    
      namespace {
    
      constexpr std::size_t kCutoff = 64;
    
      constexpr std::size_t kBlockSize = 64;
    
      std::size_t NextPow2(std::size_t x) {
    
      24
        if (x <= 1) {
    
          return 1;
    
        }
    
        std::size_t p = 1;
    
        2/2✓ Branch 0 taken 96 times.
✓ Branch 1 taken 24 times.

      120
        while (p < x) {
    
      96
          p <<= 1;
    
        }
    
        return p;
    
      }
    
      void ZeroMatrix(double *dst, std::size_t stride, std::size_t n) {
    
        2/2✓ Branch 0 taken 672 times.
✓ Branch 1 taken 24 times.

      696
        for (std::size_t i = 0; i < n; ++i) {
    
      672
          std::fill_n(dst + (i * stride), n, 0.0);
    
        }
    
      }
    
      ✗
      void AddToBuffer(const double *a, std::size_t a_stride, const double *b, std::size_t b_stride, double *dst,
    
                       std::size_t n, double b_coeff) {
    
      ✗
        for (std::size_t i = 0; i < n; ++i) {
    
      ✗
          for (std::size_t j = 0; j < n; ++j) {
    
      ✗
            dst[(i * n) + j] = a[(i * a_stride) + j] + (b_coeff * b[(i * b_stride) + j]);
    
          }
    
        }
    
      ✗
      }
    
      void MulMicroBlock(const double *a, std::size_t a_stride, const double *b, std::size_t b_stride, double *c,
    
                         std::size_t c_stride, std::size_t i_begin, std::size_t i_end, std::size_t k_begin, std::size_t k_end,
    
                         std::size_t j_begin, std::size_t j_end) {
    
        2/2✓ Branch 0 taken 672 times.
✓ Branch 1 taken 24 times.

      696
        for (std::size_t i = i_begin; i < i_end; ++i) {
    
      672
          double *c_row = c + (i * c_stride);
    
      672
          const double *a_row = a + (i * a_stride);
    
        2/2✓ Branch 0 taken 34944 times.
✓ Branch 1 taken 672 times.

      35616
          for (std::size_t k = k_begin; k < k_end; ++k) {
    
      34944
            const double aik = a_row[k];
    
      34944
            const double *b_row = b + (k * b_stride);
    
        2/2✓ Branch 0 taken 2130432 times.
✓ Branch 1 taken 34944 times.

      2165376
            for (std::size_t j = j_begin; j < j_end; ++j) {
    
      2130432
              c_row[j] += aik * b_row[j];
    
            }
    
          }
    
        }
    
      }
    
      24
      void MulKBlocks(const double *a, std::size_t a_stride, const double *b, std::size_t b_stride, double *c,
    
                      std::size_t c_stride, std::size_t n, std::size_t ii, std::size_t i_end) {
    
        2/2✓ Branch 0 taken 24 times.
✓ Branch 1 taken 24 times.

      48
        for (std::size_t kk = 0; kk < n; kk += kBlockSize) {
    
      24
          const std::size_t k_end = std::min(kk + kBlockSize, n);
    
        2/2✓ Branch 0 taken 24 times.
✓ Branch 1 taken 24 times.

      48
          for (std::size_t jj = 0; jj < n; jj += kBlockSize) {
    
      24
            const std::size_t j_end = std::min(jj + kBlockSize, n);
    
            MulMicroBlock(a, a_stride, b, b_stride, c, c_stride, ii, i_end, kk, k_end, jj, j_end);
    
          }
    
        }
    
      24
      }
    
      24
      void NaiveMulBlocked(const double *a, std::size_t a_stride, const double *b, std::size_t b_stride, double *c,
    
                           std::size_t c_stride, std::size_t n) {
    
      24
        ZeroMatrix(c, c_stride, n);
    
        2/2✓ Branch 0 taken 24 times.
✓ Branch 1 taken 24 times.

      48
        for (std::size_t ii = 0; ii < n; ii += kBlockSize) {
    
      24
          const std::size_t i_end = std::min(ii + kBlockSize, n);
    
      24
          MulKBlocks(a, a_stride, b, b_stride, c, c_stride, n, ii, i_end);
    
        }
    
      24
      }
    
      ✗
      void CombineQuadrants(const std::vector<double> &m1, const std::vector<double> &m2, const std::vector<double> &m3,
    
                            const std::vector<double> &m4, const std::vector<double> &m5, const std::vector<double> &m6,
    
                            const std::vector<double> &m7, double *c, std::size_t c_stride, std::size_t half) {
    
      ✗
        for (std::size_t i = 0; i < half; ++i) {
    
      ✗
          double *c11 = c + (i * c_stride);
    
          double *c12 = c11 + half;
    
      ✗
          double *c21 = c + ((i + half) * c_stride);
    
          double *c22 = c21 + half;
    
      ✗
          for (std::size_t j = 0; j < half; ++j) {
    
      ✗
            const std::size_t idx = (i * half) + j;
    
      ✗
            c11[j] = m1[idx] + m4[idx] - m5[idx] + m7[idx];
    
      ✗
            c12[j] = m3[idx] + m5[idx];
    
      ✗
            c21[j] = m2[idx] + m4[idx];
    
      ✗
            c22[j] = m1[idx] - m2[idx] + m3[idx] + m6[idx];
    
          }
    
        }
    
      ✗
      }
    
        1/2✓ Branch 1 taken 24 times.
✗ Branch 2 not taken.

      24
      void StrassenSeq(const double *a_in, std::size_t a_stride_in, const double *b_in, std::size_t b_stride_in, double *c_in,
    
                       std::size_t c_stride_in, std::size_t n_in) {
    
        std::function<void(const double *, std::size_t, const double *, std::size_t, double *, std::size_t, std::size_t)>
    
      24
            impl = [&](const double *a, std::size_t a_stride, const double *b, std::size_t b_stride, double *c,
    
                       std::size_t c_stride, std::size_t n) {
    
        1/2✓ Branch 0 taken 24 times.
✗ Branch 1 not taken.

      24
          if (n <= kCutoff) {
    
      24
            NaiveMulBlocked(a, a_stride, b, b_stride, c, c_stride, n);
    
      24
            return;
    
          }
    
      ✗
          const std::size_t half = n / 2;
    
          const double *a11 = a;
    
      ✗
          const double *a12 = a + half;
    
      ✗
          const double *a21 = a + (half * a_stride);
    
      ✗
          const double *a22 = a21 + half;
    
          const double *b11 = b;
    
      ✗
          const double *b12 = b + half;
    
      ✗
          const double *b21 = b + (half * b_stride);
    
      ✗
          const double *b22 = b21 + half;
    
      ✗
          std::vector<double> lhs(half * half);
    
      ✗
          std::vector<double> rhs(half * half);
    
      ✗
          std::vector<double> m1(half * half);
    
      ✗
          std::vector<double> m2(half * half);
    
      ✗
          std::vector<double> m3(half * half);
    
      ✗
          std::vector<double> m4(half * half);
    
      ✗
          std::vector<double> m5(half * half);
    
      ✗
          std::vector<double> m6(half * half);
    
      ✗
          std::vector<double> m7(half * half);
    
      ✗
          AddToBuffer(a11, a_stride, a22, a_stride, lhs.data(), half, 1.0);
    
      ✗
          AddToBuffer(b11, b_stride, b22, b_stride, rhs.data(), half, 1.0);
    
      ✗
          impl(lhs.data(), half, rhs.data(), half, m1.data(), half, half);
    
      ✗
          AddToBuffer(a21, a_stride, a22, a_stride, lhs.data(), half, 1.0);
    
      ✗
          impl(lhs.data(), half, b11, b_stride, m2.data(), half, half);
    
      ✗
          AddToBuffer(b12, b_stride, b22, b_stride, rhs.data(), half, -1.0);
    
      ✗
          impl(a11, a_stride, rhs.data(), half, m3.data(), half, half);
    
      ✗
          AddToBuffer(b21, b_stride, b11, b_stride, rhs.data(), half, -1.0);
    
      ✗
          impl(a22, a_stride, rhs.data(), half, m4.data(), half, half);
    
      ✗
          AddToBuffer(a11, a_stride, a12, a_stride, lhs.data(), half, 1.0);
    
      ✗
          impl(lhs.data(), half, b22, b_stride, m5.data(), half, half);
    
      ✗
          AddToBuffer(a21, a_stride, a11, a_stride, lhs.data(), half, -1.0);
    
      ✗
          AddToBuffer(b11, b_stride, b12, b_stride, rhs.data(), half, 1.0);
    
      ✗
          impl(lhs.data(), half, rhs.data(), half, m6.data(), half, half);
    
      ✗
          AddToBuffer(a12, a_stride, a22, a_stride, lhs.data(), half, -1.0);
    
      ✗
          AddToBuffer(b21, b_stride, b22, b_stride, rhs.data(), half, 1.0);
    
      ✗
          impl(lhs.data(), half, rhs.data(), half, m7.data(), half, half);
    
      ✗
          CombineQuadrants(m1, m2, m3, m4, m5, m6, m7, c, c_stride, half);
    
        };
    
        1/2✓ Branch 0 taken 24 times.
✗ Branch 1 not taken.

      24
        impl(a_in, a_stride_in, b_in, b_stride_in, c_in, c_stride_in, n_in);
    
      24
      }
    
      24
      void StrassenTopTbb(const double *a, std::size_t a_stride, const double *b, std::size_t b_stride, double *c,
    
                          std::size_t c_stride, std::size_t n) {
    
        1/4✗ Branch 0 not taken.
✓ Branch 1 taken 24 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.

      24
        if (n <= kCutoff || ppc::util::GetNumThreads() <= 1) {
    
      24
          StrassenSeq(a, a_stride, b, b_stride, c, c_stride, n);
    
      24
          return;
    
        }
    
      ✗
        const std::size_t half = n / 2;
    
      ✗
        const double *a11 = a;
    
      ✗
        const double *a12 = a + half;
    
      ✗
        const double *a21 = a + (half * a_stride);
    
      ✗
        const double *a22 = a21 + half;
    
      ✗
        const double *b11 = b;
    
      ✗
        const double *b12 = b + half;
    
      ✗
        const double *b21 = b + (half * b_stride);
    
      ✗
        const double *b22 = b21 + half;
    
      ✗
        std::vector<double> m1;
    
      ✗
        std::vector<double> m2;
    
      ✗
        std::vector<double> m3;
    
      ✗
        std::vector<double> m4;
    
      ✗
        std::vector<double> m5;
    
      ✗
        std::vector<double> m6;
    
      ✗
        std::vector<double> m7;
    
      ✗
        oneapi::tbb::parallel_invoke([&] {
    
      ✗
          std::vector<double> lhs(half * half);
    
      ✗
          std::vector<double> rhs(half * half);
    
      ✗
          AddToBuffer(a11, a_stride, a22, a_stride, lhs.data(), half, 1.0);
    
      ✗
          AddToBuffer(b11, b_stride, b22, b_stride, rhs.data(), half, 1.0);
    
      ✗
          m1.assign(half * half, 0.0);
    
      ✗
          StrassenSeq(lhs.data(), half, rhs.data(), half, m1.data(), half, half);
    
      ✗
        }, [&] {
    
      ✗
          std::vector<double> lhs(half * half);
    
      ✗
          AddToBuffer(a21, a_stride, a22, a_stride, lhs.data(), half, 1.0);
    
      ✗
          m2.assign(half * half, 0.0);
    
      ✗
          StrassenSeq(lhs.data(), half, b11, b_stride, m2.data(), half, half);
    
      ✗
        }, [&] {
    
      ✗
          std::vector<double> rhs(half * half);
    
      ✗
          AddToBuffer(b12, b_stride, b22, b_stride, rhs.data(), half, -1.0);
    
      ✗
          m3.assign(half * half, 0.0);
    
      ✗
          StrassenSeq(a11, a_stride, rhs.data(), half, m3.data(), half, half);
    
      ✗
        }, [&] {
    
      ✗
          std::vector<double> rhs(half * half);
    
      ✗
          AddToBuffer(b21, b_stride, b11, b_stride, rhs.data(), half, -1.0);
    
      ✗
          m4.assign(half * half, 0.0);
    
      ✗
          StrassenSeq(a22, a_stride, rhs.data(), half, m4.data(), half, half);
    
      ✗
        }, [&] {
    
      ✗
          std::vector<double> lhs(half * half);
    
      ✗
          AddToBuffer(a11, a_stride, a12, a_stride, lhs.data(), half, 1.0);
    
      ✗
          m5.assign(half * half, 0.0);
    
      ✗
          StrassenSeq(lhs.data(), half, b22, b_stride, m5.data(), half, half);
    
      ✗
        }, [&] {
    
      ✗
          std::vector<double> lhs(half * half);
    
      ✗
          std::vector<double> rhs(half * half);
    
      ✗
          AddToBuffer(a21, a_stride, a11, a_stride, lhs.data(), half, -1.0);
    
      ✗
          AddToBuffer(b11, b_stride, b12, b_stride, rhs.data(), half, 1.0);
    
      ✗
          m6.assign(half * half, 0.0);
    
      ✗
          StrassenSeq(lhs.data(), half, rhs.data(), half, m6.data(), half, half);
    
      ✗
        }, [&] {
    
      ✗
          std::vector<double> lhs(half * half);
    
      ✗
          std::vector<double> rhs(half * half);
    
      ✗
          AddToBuffer(a12, a_stride, a22, a_stride, lhs.data(), half, -1.0);
    
      ✗
          AddToBuffer(b21, b_stride, b22, b_stride, rhs.data(), half, 1.0);
    
      ✗
          m7.assign(half * half, 0.0);
    
      ✗
          StrassenSeq(lhs.data(), half, rhs.data(), half, m7.data(), half, half);
    
      ✗
        });
    
      ✗
        CombineQuadrants(m1, m2, m3, m4, m5, m6, m7, c, c_stride, half);
    
      }
    
      }  // namespace
    
        1/2✓ Branch 1 taken 24 times.
✗ Branch 2 not taken.

      24
      MuhammadkhonIStressenAlgTBB::MuhammadkhonIStressenAlgTBB(const InType &in) {
    
        SetTypeOfTask(GetStaticTypeOfTask());
    
        1/2✓ Branch 1 taken 24 times.
✗ Branch 2 not taken.

      24
        GetInput() = in;
    
        GetOutput() = {};
    
      24
      }
    
      24
      bool MuhammadkhonIStressenAlgTBB::ValidationImpl() {
    
        const auto &in = GetInput();
    
        2/4✗ Branch 0 not taken.
✓ Branch 1 taken 24 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 24 times.

      24
        return in.a_rows > 0 && in.a_cols_b_rows > 0 && in.b_cols > 0 &&
    
        2/4✓ Branch 0 taken 24 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 24 times.

      48
               in.a.size() == static_cast<size_t>(in.a_rows * in.a_cols_b_rows) &&
    
        1/2✗ Branch 0 not taken.
✓ Branch 1 taken 24 times.

      24
               in.b.size() == static_cast<size_t>(in.a_cols_b_rows * in.b_cols);
    
      }
    
      24
      bool MuhammadkhonIStressenAlgTBB::PreProcessingImpl() {
    
        GetOutput() = {};
    
        const auto &in = GetInput();
    
      24
        a_rows_ = in.a_rows;
    
      24
        a_cols_b_rows_ = in.a_cols_b_rows;
    
      24
        b_cols_ = in.b_cols;
    
        1/2✓ Branch 0 taken 24 times.
✗ Branch 1 not taken.

      48
        size_t max_dim = std::max({a_rows_, a_cols_b_rows_, b_cols_});
    
      24
        padded_n_ = NextPow2(max_dim);
    
      24
        padded_a_.assign(padded_n_ * padded_n_, 0.0);
    
      24
        padded_b_.assign(padded_n_ * padded_n_, 0.0);
    
        2/2✓ Branch 0 taken 660 times.
✓ Branch 1 taken 24 times.

      684
        for (size_t i = 0; i < a_rows_; ++i) {
    
        2/2✓ Branch 0 taken 33684 times.
✓ Branch 1 taken 660 times.

      34344
          for (size_t j = 0; j < a_cols_b_rows_; ++j) {
    
      33684
            padded_a_[(i * padded_n_) + j] = in.a[(i * a_cols_b_rows_) + j];
    
          }
    
        }
    
        2/2✓ Branch 0 taken 620 times.
✓ Branch 1 taken 24 times.

      644
        for (size_t i = 0; i < a_cols_b_rows_; ++i) {
    
        2/2✓ Branch 0 taken 33684 times.
✓ Branch 1 taken 620 times.

      34304
          for (size_t j = 0; j < b_cols_; ++j) {
    
      33684
            padded_b_[(i * padded_n_) + j] = in.b[(i * b_cols_) + j];
    
          }
    
        }
    
      24
        return true;
    
      }
    
      24
      bool MuhammadkhonIStressenAlgTBB::RunImpl() {
    
      24
        result_c_.assign(padded_n_ * padded_n_, 0.0);
    
        oneapi::tbb::global_control control(oneapi::tbb::global_control::max_allowed_parallelism,
    
      24
                                            static_cast<std::size_t>(std::max(1, ppc::util::GetNumThreads())));
    
        (void)control;
    
        1/2✓ Branch 1 taken 24 times.
✗ Branch 2 not taken.

      24
        StrassenTopTbb(padded_a_.data(), padded_n_, padded_b_.data(), padded_n_, result_c_.data(), padded_n_, padded_n_);
    
        auto &out = GetOutput();
    
        1/2✓ Branch 1 taken 24 times.
✗ Branch 2 not taken.

      24
        out.assign(a_rows_ * b_cols_, 0.0);
    
        2/2✓ Branch 0 taken 660 times.
✓ Branch 1 taken 24 times.

      684
        for (size_t i = 0; i < a_rows_; ++i) {
    
        2/2✓ Branch 0 taken 34284 times.
✓ Branch 1 taken 660 times.

      34944
          for (size_t j = 0; j < b_cols_; ++j) {
    
      34284
            out[(i * b_cols_) + j] = result_c_[(i * padded_n_) + j];
    
          }
    
        }
    
      24
        return true;
    
      }
    
      24
      bool MuhammadkhonIStressenAlgTBB::PostProcessingImpl() {
    
      24
        return true;
    
      }
    
      }  // namespace muhammadkhon_i_stressen_alg

Line	Branch	Exec	Source
1			#include "muhammadkhon_i_stressen_alg/tbb/include/ops_tbb.hpp"
2
3			#include <oneapi/tbb/global_control.h>
4			#include <oneapi/tbb/parallel_invoke.h>
5
6			#include <algorithm>
7			#include <cstddef>
8			#include <functional>
9			#include <vector>
10
11			#include "muhammadkhon_i_stressen_alg/common/include/common.hpp"
12			#include "util/include/util.hpp"
13
14			namespace muhammadkhon_i_stressen_alg {
15
16			namespace {
17
18			constexpr std::size_t kCutoff = 64;
19			constexpr std::size_t kBlockSize = 64;
20
21			std::size_t NextPow2(std::size_t x) {
22		24	if (x <= 1) {
23			return 1;
24			}
25			std::size_t p = 1;
26	2/2 ✓ Branch 0 taken 96 times. ✓ Branch 1 taken 24 times.	120	while (p < x) {
27		96	p <<= 1;
28			}
29			return p;
30			}
31
32			void ZeroMatrix(double *dst, std::size_t stride, std::size_t n) {
33	2/2 ✓ Branch 0 taken 672 times. ✓ Branch 1 taken 24 times.	696	for (std::size_t i = 0; i < n; ++i) {
34		672	std::fill_n(dst + (i * stride), n, 0.0);
35			}
36			}
37
38		✗	void AddToBuffer(const double a, std::size_t a_stride, const double b, std::size_t b_stride, double *dst,
39			std::size_t n, double b_coeff) {
40		✗	for (std::size_t i = 0; i < n; ++i) {
41		✗	for (std::size_t j = 0; j < n; ++j) {
42		✗	dst[(i * n) + j] = a[(i * a_stride) + j] + (b_coeff * b[(i * b_stride) + j]);
43			}
44			}
45		✗	}
46
47			void MulMicroBlock(const double a, std::size_t a_stride, const double b, std::size_t b_stride, double *c,
48			std::size_t c_stride, std::size_t i_begin, std::size_t i_end, std::size_t k_begin, std::size_t k_end,
49			std::size_t j_begin, std::size_t j_end) {
50	2/2 ✓ Branch 0 taken 672 times. ✓ Branch 1 taken 24 times.	696	for (std::size_t i = i_begin; i < i_end; ++i) {
51		672	double c_row = c + (i c_stride);
52		672	const double a_row = a + (i a_stride);
53	2/2 ✓ Branch 0 taken 34944 times. ✓ Branch 1 taken 672 times.	35616	for (std::size_t k = k_begin; k < k_end; ++k) {
54		34944	const double aik = a_row[k];
55		34944	const double b_row = b + (k b_stride);
56	2/2 ✓ Branch 0 taken 2130432 times. ✓ Branch 1 taken 34944 times.	2165376	for (std::size_t j = j_begin; j < j_end; ++j) {
57		2130432	c_row[j] += aik * b_row[j];
58			}
59			}
60			}
61			}
62
63		24	void MulKBlocks(const double a, std::size_t a_stride, const double b, std::size_t b_stride, double *c,
64			std::size_t c_stride, std::size_t n, std::size_t ii, std::size_t i_end) {
65	2/2 ✓ Branch 0 taken 24 times. ✓ Branch 1 taken 24 times.	48	for (std::size_t kk = 0; kk < n; kk += kBlockSize) {
66		24	const std::size_t k_end = std::min(kk + kBlockSize, n);
67	2/2 ✓ Branch 0 taken 24 times. ✓ Branch 1 taken 24 times.	48	for (std::size_t jj = 0; jj < n; jj += kBlockSize) {
68		24	const std::size_t j_end = std::min(jj + kBlockSize, n);
69			MulMicroBlock(a, a_stride, b, b_stride, c, c_stride, ii, i_end, kk, k_end, jj, j_end);
70			}
71			}
72		24	}
73
74		24	void NaiveMulBlocked(const double a, std::size_t a_stride, const double b, std::size_t b_stride, double *c,
75			std::size_t c_stride, std::size_t n) {
76		24	ZeroMatrix(c, c_stride, n);
77	2/2 ✓ Branch 0 taken 24 times. ✓ Branch 1 taken 24 times.	48	for (std::size_t ii = 0; ii < n; ii += kBlockSize) {
78		24	const std::size_t i_end = std::min(ii + kBlockSize, n);
79		24	MulKBlocks(a, a_stride, b, b_stride, c, c_stride, n, ii, i_end);
80			}
81		24	}
82
83		✗	void CombineQuadrants(const std::vector<double> &m1, const std::vector<double> &m2, const std::vector<double> &m3,
84			const std::vector<double> &m4, const std::vector<double> &m5, const std::vector<double> &m6,
85			const std::vector<double> &m7, double *c, std::size_t c_stride, std::size_t half) {
86		✗	for (std::size_t i = 0; i < half; ++i) {
87		✗	double c11 = c + (i c_stride);
88			double *c12 = c11 + half;
89		✗	double c21 = c + ((i + half) c_stride);
90			double *c22 = c21 + half;
91		✗	for (std::size_t j = 0; j < half; ++j) {
92		✗	const std::size_t idx = (i * half) + j;
93		✗	c11[j] = m1[idx] + m4[idx] - m5[idx] + m7[idx];
94		✗	c12[j] = m3[idx] + m5[idx];
95		✗	c21[j] = m2[idx] + m4[idx];
96		✗	c22[j] = m1[idx] - m2[idx] + m3[idx] + m6[idx];
97			}
98			}
99		✗	}
100
101	1/2 ✓ Branch 1 taken 24 times. ✗ Branch 2 not taken.	24	void StrassenSeq(const double a_in, std::size_t a_stride_in, const double b_in, std::size_t b_stride_in, double *c_in,
102			std::size_t c_stride_in, std::size_t n_in) {
103			std::function<void(const double , std::size_t, const double , std::size_t, double *, std::size_t, std::size_t)>
104		24	impl = [&](const double a, std::size_t a_stride, const double b, std::size_t b_stride, double *c,
105			std::size_t c_stride, std::size_t n) {
106	1/2 ✓ Branch 0 taken 24 times. ✗ Branch 1 not taken.	24	if (n <= kCutoff) {
107		24	NaiveMulBlocked(a, a_stride, b, b_stride, c, c_stride, n);
108		24	return;
109			}
110		✗	const std::size_t half = n / 2;
111
112			const double *a11 = a;
113		✗	const double *a12 = a + half;
114		✗	const double a21 = a + (half a_stride);
115		✗	const double *a22 = a21 + half;
116			const double *b11 = b;
117		✗	const double *b12 = b + half;
118		✗	const double b21 = b + (half b_stride);
119		✗	const double *b22 = b21 + half;
120
121		✗	std::vector<double> lhs(half * half);
122		✗	std::vector<double> rhs(half * half);
123		✗	std::vector<double> m1(half * half);
124		✗	std::vector<double> m2(half * half);
125		✗	std::vector<double> m3(half * half);
126		✗	std::vector<double> m4(half * half);
127		✗	std::vector<double> m5(half * half);
128		✗	std::vector<double> m6(half * half);
129		✗	std::vector<double> m7(half * half);
130
131		✗	AddToBuffer(a11, a_stride, a22, a_stride, lhs.data(), half, 1.0);
132		✗	AddToBuffer(b11, b_stride, b22, b_stride, rhs.data(), half, 1.0);
133		✗	impl(lhs.data(), half, rhs.data(), half, m1.data(), half, half);
134
135		✗	AddToBuffer(a21, a_stride, a22, a_stride, lhs.data(), half, 1.0);
136		✗	impl(lhs.data(), half, b11, b_stride, m2.data(), half, half);
137
138		✗	AddToBuffer(b12, b_stride, b22, b_stride, rhs.data(), half, -1.0);
139		✗	impl(a11, a_stride, rhs.data(), half, m3.data(), half, half);
140
141		✗	AddToBuffer(b21, b_stride, b11, b_stride, rhs.data(), half, -1.0);
142		✗	impl(a22, a_stride, rhs.data(), half, m4.data(), half, half);
143
144		✗	AddToBuffer(a11, a_stride, a12, a_stride, lhs.data(), half, 1.0);
145		✗	impl(lhs.data(), half, b22, b_stride, m5.data(), half, half);
146
147		✗	AddToBuffer(a21, a_stride, a11, a_stride, lhs.data(), half, -1.0);
148		✗	AddToBuffer(b11, b_stride, b12, b_stride, rhs.data(), half, 1.0);
149		✗	impl(lhs.data(), half, rhs.data(), half, m6.data(), half, half);
150
151		✗	AddToBuffer(a12, a_stride, a22, a_stride, lhs.data(), half, -1.0);
152		✗	AddToBuffer(b21, b_stride, b22, b_stride, rhs.data(), half, 1.0);
153		✗	impl(lhs.data(), half, rhs.data(), half, m7.data(), half, half);
154
155		✗	CombineQuadrants(m1, m2, m3, m4, m5, m6, m7, c, c_stride, half);
156			};
157
158	1/2 ✓ Branch 0 taken 24 times. ✗ Branch 1 not taken.	24	impl(a_in, a_stride_in, b_in, b_stride_in, c_in, c_stride_in, n_in);
159		24	}
160
161		24	void StrassenTopTbb(const double a, std::size_t a_stride, const double b, std::size_t b_stride, double *c,
162			std::size_t c_stride, std::size_t n) {
163	1/4 ✗ Branch 0 not taken. ✓ Branch 1 taken 24 times. ✗ Branch 3 not taken. ✗ Branch 4 not taken.	24	if (n <= kCutoff \|\| ppc::util::GetNumThreads() <= 1) {
164		24	StrassenSeq(a, a_stride, b, b_stride, c, c_stride, n);
165		24	return;
166			}
167
168		✗	const std::size_t half = n / 2;
169
170		✗	const double *a11 = a;
171		✗	const double *a12 = a + half;
172		✗	const double a21 = a + (half a_stride);
173		✗	const double *a22 = a21 + half;
174		✗	const double *b11 = b;
175		✗	const double *b12 = b + half;
176		✗	const double b21 = b + (half b_stride);
177		✗	const double *b22 = b21 + half;
178
179		✗	std::vector<double> m1;
180		✗	std::vector<double> m2;
181		✗	std::vector<double> m3;
182		✗	std::vector<double> m4;
183		✗	std::vector<double> m5;
184		✗	std::vector<double> m6;
185		✗	std::vector<double> m7;
186
187		✗	oneapi::tbb::parallel_invoke([&] {
188		✗	std::vector<double> lhs(half * half);
189		✗	std::vector<double> rhs(half * half);
190		✗	AddToBuffer(a11, a_stride, a22, a_stride, lhs.data(), half, 1.0);
191		✗	AddToBuffer(b11, b_stride, b22, b_stride, rhs.data(), half, 1.0);
192		✗	m1.assign(half * half, 0.0);
193		✗	StrassenSeq(lhs.data(), half, rhs.data(), half, m1.data(), half, half);
194		✗	}, [&] {
195		✗	std::vector<double> lhs(half * half);
196		✗	AddToBuffer(a21, a_stride, a22, a_stride, lhs.data(), half, 1.0);
197		✗	m2.assign(half * half, 0.0);
198		✗	StrassenSeq(lhs.data(), half, b11, b_stride, m2.data(), half, half);
199		✗	}, [&] {
200		✗	std::vector<double> rhs(half * half);
201		✗	AddToBuffer(b12, b_stride, b22, b_stride, rhs.data(), half, -1.0);
202		✗	m3.assign(half * half, 0.0);
203		✗	StrassenSeq(a11, a_stride, rhs.data(), half, m3.data(), half, half);
204		✗	}, [&] {
205		✗	std::vector<double> rhs(half * half);
206		✗	AddToBuffer(b21, b_stride, b11, b_stride, rhs.data(), half, -1.0);
207		✗	m4.assign(half * half, 0.0);
208		✗	StrassenSeq(a22, a_stride, rhs.data(), half, m4.data(), half, half);
209		✗	}, [&] {
210		✗	std::vector<double> lhs(half * half);
211		✗	AddToBuffer(a11, a_stride, a12, a_stride, lhs.data(), half, 1.0);
212		✗	m5.assign(half * half, 0.0);
213		✗	StrassenSeq(lhs.data(), half, b22, b_stride, m5.data(), half, half);
214		✗	}, [&] {
215		✗	std::vector<double> lhs(half * half);
216		✗	std::vector<double> rhs(half * half);
217		✗	AddToBuffer(a21, a_stride, a11, a_stride, lhs.data(), half, -1.0);
218		✗	AddToBuffer(b11, b_stride, b12, b_stride, rhs.data(), half, 1.0);
219		✗	m6.assign(half * half, 0.0);
220		✗	StrassenSeq(lhs.data(), half, rhs.data(), half, m6.data(), half, half);
221		✗	}, [&] {
222		✗	std::vector<double> lhs(half * half);
223		✗	std::vector<double> rhs(half * half);
224		✗	AddToBuffer(a12, a_stride, a22, a_stride, lhs.data(), half, -1.0);
225		✗	AddToBuffer(b21, b_stride, b22, b_stride, rhs.data(), half, 1.0);
226		✗	m7.assign(half * half, 0.0);
227		✗	StrassenSeq(lhs.data(), half, rhs.data(), half, m7.data(), half, half);
228		✗	});
229
230		✗	CombineQuadrants(m1, m2, m3, m4, m5, m6, m7, c, c_stride, half);
231			}
232
233			} // namespace
234
235	1/2 ✓ Branch 1 taken 24 times. ✗ Branch 2 not taken.	24	MuhammadkhonIStressenAlgTBB::MuhammadkhonIStressenAlgTBB(const InType &in) {
236			SetTypeOfTask(GetStaticTypeOfTask());
237	1/2 ✓ Branch 1 taken 24 times. ✗ Branch 2 not taken.	24	GetInput() = in;
238			GetOutput() = {};
239		24	}
240
241		24	bool MuhammadkhonIStressenAlgTBB::ValidationImpl() {
242			const auto &in = GetInput();
243	2/4 ✗ Branch 0 not taken. ✓ Branch 1 taken 24 times. ✗ Branch 2 not taken. ✓ Branch 3 taken 24 times.	24	return in.a_rows > 0 && in.a_cols_b_rows > 0 && in.b_cols > 0 &&
244	2/4 ✓ Branch 0 taken 24 times. ✗ Branch 1 not taken. ✗ Branch 2 not taken. ✓ Branch 3 taken 24 times.	48	in.a.size() == static_cast<size_t>(in.a_rows * in.a_cols_b_rows) &&
245	1/2 ✗ Branch 0 not taken. ✓ Branch 1 taken 24 times.	24	in.b.size() == static_cast<size_t>(in.a_cols_b_rows * in.b_cols);
246			}
247
248		24	bool MuhammadkhonIStressenAlgTBB::PreProcessingImpl() {
249			GetOutput() = {};
250			const auto &in = GetInput();
251		24	a_rows_ = in.a_rows;
252		24	a_cols_b_rows_ = in.a_cols_b_rows;
253		24	b_cols_ = in.b_cols;
254
255	1/2 ✓ Branch 0 taken 24 times. ✗ Branch 1 not taken.	48	size_t max_dim = std::max({a_rows_, a_cols_b_rows_, b_cols_});
256		24	padded_n_ = NextPow2(max_dim);
257
258		24	padded_a_.assign(padded_n_ * padded_n_, 0.0);
259		24	padded_b_.assign(padded_n_ * padded_n_, 0.0);
260
261	2/2 ✓ Branch 0 taken 660 times. ✓ Branch 1 taken 24 times.	684	for (size_t i = 0; i < a_rows_; ++i) {
262	2/2 ✓ Branch 0 taken 33684 times. ✓ Branch 1 taken 660 times.	34344	for (size_t j = 0; j < a_cols_b_rows_; ++j) {
263		33684	padded_a_[(i * padded_n_) + j] = in.a[(i * a_cols_b_rows_) + j];
264			}
265			}
266
267	2/2 ✓ Branch 0 taken 620 times. ✓ Branch 1 taken 24 times.	644	for (size_t i = 0; i < a_cols_b_rows_; ++i) {
268	2/2 ✓ Branch 0 taken 33684 times. ✓ Branch 1 taken 620 times.	34304	for (size_t j = 0; j < b_cols_; ++j) {
269		33684	padded_b_[(i * padded_n_) + j] = in.b[(i * b_cols_) + j];
270			}
271			}
272
273		24	return true;
274			}
275
276		24	bool MuhammadkhonIStressenAlgTBB::RunImpl() {
277		24	result_c_.assign(padded_n_ * padded_n_, 0.0);
278
279			oneapi::tbb::global_control control(oneapi::tbb::global_control::max_allowed_parallelism,
280		24	static_cast<std::size_t>(std::max(1, ppc::util::GetNumThreads())));
281			(void)control;
282
283	1/2 ✓ Branch 1 taken 24 times. ✗ Branch 2 not taken.	24	StrassenTopTbb(padded_a_.data(), padded_n_, padded_b_.data(), padded_n_, result_c_.data(), padded_n_, padded_n_);
284
285			auto &out = GetOutput();
286	1/2 ✓ Branch 1 taken 24 times. ✗ Branch 2 not taken.	24	out.assign(a_rows_ * b_cols_, 0.0);
287	2/2 ✓ Branch 0 taken 660 times. ✓ Branch 1 taken 24 times.	684	for (size_t i = 0; i < a_rows_; ++i) {
288	2/2 ✓ Branch 0 taken 34284 times. ✓ Branch 1 taken 660 times.	34944	for (size_t j = 0; j < b_cols_; ++j) {
289		34284	out[(i * b_cols_) + j] = result_c_[(i * padded_n_) + j];
290			}
291			}
292
293		24	return true;
294			}
295
296		24	bool MuhammadkhonIStressenAlgTBB::PostProcessingImpl() {
297		24	return true;
298			}
299
300			} // namespace muhammadkhon_i_stressen_alg
301