GCC Code Coverage Report

Directory:	./
File:	tasks/zorin_d_strassen_alg_matrix_seq/tbb/src/ops_tbb.cpp
Date:	2026-05-11 08:26:31

	Exec	Total	Coverage
Lines:	59	157	37.6%
Functions:	9	18	50.0%
Branches:	35	142	24.6%

  
      Line
      Branch
      Exec
      Source
    
      #include "zorin_d_strassen_alg_matrix_seq/tbb/include/ops_tbb.hpp"
    
      #include <oneapi/tbb/global_control.h>
    
      #include <oneapi/tbb/parallel_invoke.h>
    
      #include <algorithm>
    
      #include <cstddef>
    
      #include <vector>
    
      #include "util/include/util.hpp"
    
      #include "zorin_d_strassen_alg_matrix_seq/common/include/common.hpp"
    
      namespace zorin_d_strassen_alg_matrix_seq {
    
      namespace {
    
      constexpr std::size_t kCutoff = 256;
    
      constexpr std::size_t kBlockSize = 64;
    
      std::size_t NextPow2(std::size_t x) {
    
      84
        if (x <= 1) {
    
          return 1;
    
        }
    
        std::size_t p = 1;
    
        2/2✓ Branch 0 taken 204 times.
✓ Branch 1 taken 72 times.

      276
        while (p < x) {
    
      204
          p <<= 1;
    
        }
    
        return p;
    
      }
    
      void ZeroMatrix(double *dst, std::size_t stride, std::size_t n) {
    
        2/2✓ Branch 0 taken 660 times.
✓ Branch 1 taken 84 times.

      744
        for (std::size_t i = 0; i < n; ++i) {
    
      660
          std::fill_n(dst + (i * stride), n, 0.0);
    
        }
    
      }
    
      ✗
      void AddToBuffer(const double *a, std::size_t a_stride, const double *b, std::size_t b_stride, double *dst,
    
                       std::size_t n, double b_coeff) {
    
      ✗
        for (std::size_t i = 0; i < n; ++i) {
    
      ✗
          const double *a_row = a + (i * a_stride);
    
      ✗
          const double *b_row = b + (i * b_stride);
    
      ✗
          double *dst_row = dst + (i * n);
    
      ✗
          for (std::size_t j = 0; j < n; ++j) {
    
      ✗
            dst_row[j] = a_row[j] + (b_coeff * b_row[j]);
    
          }
    
        }
    
      ✗
      }
    
      void MulMicroBlock(const double *a, std::size_t a_stride, const double *b, std::size_t b_stride, double *c,
    
                         std::size_t c_stride, std::size_t i_begin, std::size_t i_end, std::size_t k_begin, std::size_t k_end,
    
                         std::size_t j_begin, std::size_t j_end) {
    
        2/2✓ Branch 0 taken 660 times.
✓ Branch 1 taken 84 times.

      744
        for (std::size_t i = i_begin; i < i_end; ++i) {
    
      660
          double *c_row = c + (i * c_stride);
    
      660
          const double *a_row = a + (i * a_stride);
    
        2/2✓ Branch 0 taken 7932 times.
✓ Branch 1 taken 660 times.

      8592
          for (std::size_t k = k_begin; k < k_end; ++k) {
    
      7932
            const double aik = a_row[k];
    
      7932
            const double *b_row = b + (k * b_stride);
    
        2/2✓ Branch 0 taken 111468 times.
✓ Branch 1 taken 7932 times.

      119400
            for (std::size_t j = j_begin; j < j_end; ++j) {
    
      111468
              c_row[j] += aik * b_row[j];
    
            }
    
          }
    
        }
    
      }
    
      84
      void MulJBlocks(const double *a, std::size_t a_stride, const double *b, std::size_t b_stride, double *c,
    
                      std::size_t c_stride, std::size_t n, std::size_t ii, std::size_t i_end, std::size_t kk,
    
                      std::size_t k_end) {
    
        2/2✓ Branch 0 taken 84 times.
✓ Branch 1 taken 84 times.

      168
        for (std::size_t jj = 0; jj < n; jj += kBlockSize) {
    
      84
          const std::size_t j_end = std::min(jj + kBlockSize, n);
    
          MulMicroBlock(a, a_stride, b, b_stride, c, c_stride, ii, i_end, kk, k_end, jj, j_end);
    
        }
    
      84
      }
    
      void MulKBlocks(const double *a, std::size_t a_stride, const double *b, std::size_t b_stride, double *c,
    
                      std::size_t c_stride, std::size_t n, std::size_t ii, std::size_t i_end) {
    
        2/2✓ Branch 0 taken 84 times.
✓ Branch 1 taken 84 times.

      168
        for (std::size_t kk = 0; kk < n; kk += kBlockSize) {
    
      84
          const std::size_t k_end = std::min(kk + kBlockSize, n);
    
      84
          MulJBlocks(a, a_stride, b, b_stride, c, c_stride, n, ii, i_end, kk, k_end);
    
        }
    
      }
    
      84
      void NaiveMulBlocked(const double *a, std::size_t a_stride, const double *b, std::size_t b_stride, double *c,
    
                           std::size_t c_stride, std::size_t n) {
    
      84
        ZeroMatrix(c, c_stride, n);
    
        2/2✓ Branch 0 taken 84 times.
✓ Branch 1 taken 84 times.

      168
        for (std::size_t ii = 0; ii < n; ii += kBlockSize) {
    
      84
          const std::size_t i_end = std::min(ii + kBlockSize, n);
    
          MulKBlocks(a, a_stride, b, b_stride, c, c_stride, n, ii, i_end);
    
        }
    
      84
      }
    
      ✗
      void CombineQuadrants(const std::vector<double> &m1, const std::vector<double> &m2, const std::vector<double> &m3,
    
                            const std::vector<double> &m4, const std::vector<double> &m5, const std::vector<double> &m6,
    
                            const std::vector<double> &m7, double *c, std::size_t c_stride, std::size_t half) {
    
      ✗
        for (std::size_t i = 0; i < half; ++i) {
    
      ✗
          double *c11 = c + (i * c_stride);
    
          double *c12 = c11 + half;
    
      ✗
          double *c21 = c + ((i + half) * c_stride);
    
          double *c22 = c21 + half;
    
      ✗
          const double *m1_row = m1.data() + (i * half);
    
          const double *m2_row = m2.data() + (i * half);
    
          const double *m3_row = m3.data() + (i * half);
    
          const double *m4_row = m4.data() + (i * half);
    
          const double *m5_row = m5.data() + (i * half);
    
          const double *m6_row = m6.data() + (i * half);
    
          const double *m7_row = m7.data() + (i * half);
    
      ✗
          for (std::size_t j = 0; j < half; ++j) {
    
      ✗
            c11[j] = m1_row[j] + m4_row[j] - m5_row[j] + m7_row[j];
    
      ✗
            c12[j] = m3_row[j] + m5_row[j];
    
      ✗
            c21[j] = m2_row[j] + m4_row[j];
    
      ✗
            c22[j] = m1_row[j] - m2_row[j] + m3_row[j] + m6_row[j];
    
          }
    
        }
    
      ✗
      }
    
      using StrassenSeqFn = void (*)(const double *, std::size_t, const double *, std::size_t, double *, std::size_t,
    
                                     std::size_t);
    
      void StrassenSeqImpl(const double *a, std::size_t a_stride, const double *b, std::size_t b_stride, double *c,
    
                           std::size_t c_stride, std::size_t n);
    
      constexpr StrassenSeqFn kStrassenSeqFn = &StrassenSeqImpl;
    
      ✗
      void ComputeProduct(const double *a1, std::size_t a1_stride, const double *a2, std::size_t a2_stride, double a2_coeff,
    
                          const double *b1, std::size_t b1_stride, const double *b2, std::size_t b2_stride, double b2_coeff,
    
                          std::vector<double> &out, std::size_t n) {
    
      ✗
        std::vector<double> lhs(n * n);
    
      ✗
        std::vector<double> rhs(n * n);
    
      ✗
        AddToBuffer(a1, a1_stride, a2, a2_stride, lhs.data(), n, a2_coeff);
    
      ✗
        AddToBuffer(b1, b1_stride, b2, b2_stride, rhs.data(), n, b2_coeff);
    
      ✗
        out.assign(n * n, 0.0);
    
      ✗
        kStrassenSeqFn(lhs.data(), n, rhs.data(), n, out.data(), n, n);
    
      ✗
      }
    
      ✗
      void ComputeProductSingle(const double *a, std::size_t a_stride, const double *b1, std::size_t b1_stride,
    
                                const double *b2, std::size_t b2_stride, double b2_coeff, std::vector<double> &out,
    
                                std::size_t n) {
    
      ✗
        std::vector<double> rhs(n * n);
    
      ✗
        AddToBuffer(b1, b1_stride, b2, b2_stride, rhs.data(), n, b2_coeff);
    
      ✗
        out.assign(n * n, 0.0);
    
      ✗
        kStrassenSeqFn(a, a_stride, rhs.data(), n, out.data(), n, n);
    
      ✗
      }
    
      ✗
      void ComputeProductSingleLeft(const double *a1, std::size_t a1_stride, const double *a2, std::size_t a2_stride,
    
                                    double a2_coeff, const double *b, std::size_t b_stride, std::vector<double> &out,
    
                                    std::size_t n) {
    
      ✗
        std::vector<double> lhs(n * n);
    
      ✗
        AddToBuffer(a1, a1_stride, a2, a2_stride, lhs.data(), n, a2_coeff);
    
      ✗
        out.assign(n * n, 0.0);
    
      ✗
        kStrassenSeqFn(lhs.data(), n, b, b_stride, out.data(), n, n);
    
      ✗
      }
    
      84
      void StrassenTopTbb(const double *a, std::size_t a_stride, const double *b, std::size_t b_stride, double *c,
    
                          std::size_t c_stride, std::size_t n) {
    
        1/4✗ Branch 0 not taken.
✓ Branch 1 taken 84 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.

      84
        if (n <= kCutoff || ppc::util::GetNumThreads() <= 1) {
    
      84
          kStrassenSeqFn(a, a_stride, b, b_stride, c, c_stride, n);
    
      84
          return;
    
        }
    
      ✗
        const std::size_t half = n / 2;
    
      ✗
        const double *a11 = a;
    
      ✗
        const double *a12 = a + half;
    
      ✗
        const double *a21 = a + (half * a_stride);
    
      ✗
        const double *a22 = a21 + half;
    
      ✗
        const double *b11 = b;
    
      ✗
        const double *b12 = b + half;
    
      ✗
        const double *b21 = b + (half * b_stride);
    
      ✗
        const double *b22 = b21 + half;
    
      ✗
        std::vector<double> m1;
    
      ✗
        std::vector<double> m2;
    
      ✗
        std::vector<double> m3;
    
      ✗
        std::vector<double> m4;
    
      ✗
        std::vector<double> m5;
    
      ✗
        std::vector<double> m6;
    
      ✗
        std::vector<double> m7;
    
        oneapi::tbb::parallel_invoke(
    
      ✗
            [&] { ComputeProduct(a11, a_stride, a22, a_stride, 1.0, b11, b_stride, b22, b_stride, 1.0, m1, half); }, [&] {
    
      ✗
          ComputeProductSingleLeft(a21, a_stride, a22, a_stride, 1.0, b11, b_stride, m2, half);
    
      ✗
        }, [&] { ComputeProductSingle(a11, a_stride, b12, b_stride, b22, b_stride, -1.0, m3, half); }, [&] {
    
      ✗
          ComputeProductSingle(a22, a_stride, b21, b_stride, b11, b_stride, -1.0, m4, half);
    
      ✗
        }, [&] { ComputeProductSingleLeft(a11, a_stride, a12, a_stride, 1.0, b22, b_stride, m5, half); }, [&] {
    
      ✗
          ComputeProduct(a21, a_stride, a11, a_stride, -1.0, b11, b_stride, b12, b_stride, 1.0, m6, half);
    
      ✗
        }, [&] { ComputeProduct(a12, a_stride, a22, a_stride, -1.0, b21, b_stride, b22, b_stride, 1.0, m7, half); });
    
      ✗
        CombineQuadrants(m1, m2, m3, m4, m5, m6, m7, c, c_stride, half);
    
      }
    
      84
      void StrassenSeqImpl(const double *a, std::size_t a_stride, const double *b, std::size_t b_stride, double *c,
    
                           std::size_t c_stride, std::size_t n) {
    
        1/2✓ Branch 0 taken 84 times.
✗ Branch 1 not taken.

      84
        if (n <= kCutoff) {
    
      84
          NaiveMulBlocked(a, a_stride, b, b_stride, c, c_stride, n);
    
      84
          return;
    
        }
    
      ✗
        const std::size_t half = n / 2;
    
        const double *a11 = a;
    
      ✗
        const double *a12 = a + half;
    
      ✗
        const double *a21 = a + (half * a_stride);
    
      ✗
        const double *a22 = a21 + half;
    
        const double *b11 = b;
    
      ✗
        const double *b12 = b + half;
    
      ✗
        const double *b21 = b + (half * b_stride);
    
      ✗
        const double *b22 = b21 + half;
    
      ✗
        std::vector<double> m1(half * half);
    
      ✗
        std::vector<double> m2(half * half);
    
      ✗
        std::vector<double> m3(half * half);
    
      ✗
        std::vector<double> m4(half * half);
    
      ✗
        std::vector<double> m5(half * half);
    
      ✗
        std::vector<double> m6(half * half);
    
      ✗
        std::vector<double> m7(half * half);
    
      ✗
        std::vector<double> lhs(half * half);
    
      ✗
        std::vector<double> rhs(half * half);
    
      ✗
        AddToBuffer(a11, a_stride, a22, a_stride, lhs.data(), half, 1.0);
    
      ✗
        AddToBuffer(b11, b_stride, b22, b_stride, rhs.data(), half, 1.0);
    
      ✗
        kStrassenSeqFn(lhs.data(), half, rhs.data(), half, m1.data(), half, half);
    
      ✗
        AddToBuffer(a21, a_stride, a22, a_stride, lhs.data(), half, 1.0);
    
      ✗
        kStrassenSeqFn(lhs.data(), half, b11, b_stride, m2.data(), half, half);
    
      ✗
        AddToBuffer(b12, b_stride, b22, b_stride, rhs.data(), half, -1.0);
    
      ✗
        kStrassenSeqFn(a11, a_stride, rhs.data(), half, m3.data(), half, half);
    
      ✗
        AddToBuffer(b21, b_stride, b11, b_stride, rhs.data(), half, -1.0);
    
      ✗
        kStrassenSeqFn(a22, a_stride, rhs.data(), half, m4.data(), half, half);
    
      ✗
        AddToBuffer(a11, a_stride, a12, a_stride, lhs.data(), half, 1.0);
    
      ✗
        kStrassenSeqFn(lhs.data(), half, b22, b_stride, m5.data(), half, half);
    
      ✗
        AddToBuffer(a21, a_stride, a11, a_stride, lhs.data(), half, -1.0);
    
      ✗
        AddToBuffer(b11, b_stride, b12, b_stride, rhs.data(), half, 1.0);
    
      ✗
        kStrassenSeqFn(lhs.data(), half, rhs.data(), half, m6.data(), half, half);
    
      ✗
        AddToBuffer(a12, a_stride, a22, a_stride, lhs.data(), half, -1.0);
    
      ✗
        AddToBuffer(b21, b_stride, b22, b_stride, rhs.data(), half, 1.0);
    
      ✗
        kStrassenSeqFn(lhs.data(), half, rhs.data(), half, m7.data(), half, half);
    
      ✗
        CombineQuadrants(m1, m2, m3, m4, m5, m6, m7, c, c_stride, half);
    
      }
    
      }  // namespace
    
        1/2✓ Branch 1 taken 84 times.
✗ Branch 2 not taken.

      84
      ZorinDStrassenAlgMatrixTBB::ZorinDStrassenAlgMatrixTBB(const InType &in) {
    
        SetTypeOfTask(GetStaticTypeOfTask());
    
        GetInput() = in;
    
        GetOutput().clear();
    
      84
      }
    
      84
      bool ZorinDStrassenAlgMatrixTBB::ValidationImpl() {
    
        const auto &in = GetInput();
    
        1/2✓ Branch 0 taken 84 times.
✗ Branch 1 not taken.

      84
        if (in.n == 0) {
    
          return false;
    
        }
    
        1/2✓ Branch 0 taken 84 times.
✗ Branch 1 not taken.

      84
        if (in.a.size() != in.n * in.n) {
    
          return false;
    
        }
    
        1/2✓ Branch 0 taken 84 times.
✗ Branch 1 not taken.

      84
        if (in.b.size() != in.n * in.n) {
    
          return false;
    
        }
    
        1/2✗ Branch 0 not taken.
✓ Branch 1 taken 84 times.

      84
        if (!GetOutput().empty()) {
    
      ✗
          return false;
    
        }
    
        return true;
    
      }
    
      84
      bool ZorinDStrassenAlgMatrixTBB::PreProcessingImpl() {
    
      84
        const auto n = GetInput().n;
    
      84
        GetOutput().assign(n * n, 0.0);
    
      84
        return true;
    
      }
    
      84
      bool ZorinDStrassenAlgMatrixTBB::RunImpl() {
    
        const auto &in = GetInput();
    
        2/2✓ Branch 0 taken 72 times.
✓ Branch 1 taken 12 times.

      84
        const std::size_t n = in.n;
    
        const std::size_t padded = NextPow2(n);
    
      84
        std::vector<double> a_pad(padded * padded, 0.0);
    
        1/4✓ Branch 1 taken 84 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.

      84
        std::vector<double> b_pad(padded * padded, 0.0);
    
        1/4✓ Branch 1 taken 84 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.

      84
        std::vector<double> c_pad(padded * padded, 0.0);
    
        2/2✓ Branch 0 taken 528 times.
✓ Branch 1 taken 84 times.

      612
        for (std::size_t i = 0; i < n; ++i) {
    
      528
          std::copy_n(in.a.data() + (i * n), n, a_pad.data() + (i * padded));
    
      528
          std::copy_n(in.b.data() + (i * n), n, b_pad.data() + (i * padded));
    
        }
    
        oneapi::tbb::global_control control(oneapi::tbb::global_control::max_allowed_parallelism,
    
        3/8✓ Branch 1 taken 84 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 84 times.
✗ Branch 5 not taken.
✓ Branch 7 taken 84 times.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.

      84
                                            static_cast<std::size_t>(std::max(1, ppc::util::GetNumThreads())));
    
        (void)control;
    
        1/2✓ Branch 1 taken 84 times.
✗ Branch 2 not taken.

      84
        StrassenTopTbb(a_pad.data(), padded, b_pad.data(), padded, c_pad.data(), padded, padded);
    
        auto &out = GetOutput();
    
        2/2✓ Branch 0 taken 528 times.
✓ Branch 1 taken 84 times.

      612
        for (std::size_t i = 0; i < n; ++i) {
    
      528
          std::copy_n(c_pad.data() + (i * padded), n, out.data() + (i * n));
    
        }
    
      84
        return true;
    
      }
    
      84
      bool ZorinDStrassenAlgMatrixTBB::PostProcessingImpl() {
    
      84
        return true;
    
      }
    
      }  // namespace zorin_d_strassen_alg_matrix_seq

Line	Branch	Exec	Source
1			#include "zorin_d_strassen_alg_matrix_seq/tbb/include/ops_tbb.hpp"
2
3			#include <oneapi/tbb/global_control.h>
4			#include <oneapi/tbb/parallel_invoke.h>
5
6			#include <algorithm>
7			#include <cstddef>
8			#include <vector>
9
10			#include "util/include/util.hpp"
11			#include "zorin_d_strassen_alg_matrix_seq/common/include/common.hpp"
12
13			namespace zorin_d_strassen_alg_matrix_seq {
14
15			namespace {
16
17			constexpr std::size_t kCutoff = 256;
18			constexpr std::size_t kBlockSize = 64;
19
20			std::size_t NextPow2(std::size_t x) {
21		84	if (x <= 1) {
22			return 1;
23			}
24			std::size_t p = 1;
25	2/2 ✓ Branch 0 taken 204 times. ✓ Branch 1 taken 72 times.	276	while (p < x) {
26		204	p <<= 1;
27			}
28			return p;
29			}
30
31			void ZeroMatrix(double *dst, std::size_t stride, std::size_t n) {
32	2/2 ✓ Branch 0 taken 660 times. ✓ Branch 1 taken 84 times.	744	for (std::size_t i = 0; i < n; ++i) {
33		660	std::fill_n(dst + (i * stride), n, 0.0);
34			}
35			}
36
37		✗	void AddToBuffer(const double a, std::size_t a_stride, const double b, std::size_t b_stride, double *dst,
38			std::size_t n, double b_coeff) {
39		✗	for (std::size_t i = 0; i < n; ++i) {
40		✗	const double a_row = a + (i a_stride);
41		✗	const double b_row = b + (i b_stride);
42		✗	double dst_row = dst + (i n);
43		✗	for (std::size_t j = 0; j < n; ++j) {
44		✗	dst_row[j] = a_row[j] + (b_coeff * b_row[j]);
45			}
46			}
47		✗	}
48
49			void MulMicroBlock(const double a, std::size_t a_stride, const double b, std::size_t b_stride, double *c,
50			std::size_t c_stride, std::size_t i_begin, std::size_t i_end, std::size_t k_begin, std::size_t k_end,
51			std::size_t j_begin, std::size_t j_end) {
52	2/2 ✓ Branch 0 taken 660 times. ✓ Branch 1 taken 84 times.	744	for (std::size_t i = i_begin; i < i_end; ++i) {
53		660	double c_row = c + (i c_stride);
54		660	const double a_row = a + (i a_stride);
55	2/2 ✓ Branch 0 taken 7932 times. ✓ Branch 1 taken 660 times.	8592	for (std::size_t k = k_begin; k < k_end; ++k) {
56		7932	const double aik = a_row[k];
57		7932	const double b_row = b + (k b_stride);
58	2/2 ✓ Branch 0 taken 111468 times. ✓ Branch 1 taken 7932 times.	119400	for (std::size_t j = j_begin; j < j_end; ++j) {
59		111468	c_row[j] += aik * b_row[j];
60			}
61			}
62			}
63			}
64
65		84	void MulJBlocks(const double a, std::size_t a_stride, const double b, std::size_t b_stride, double *c,
66			std::size_t c_stride, std::size_t n, std::size_t ii, std::size_t i_end, std::size_t kk,
67			std::size_t k_end) {
68	2/2 ✓ Branch 0 taken 84 times. ✓ Branch 1 taken 84 times.	168	for (std::size_t jj = 0; jj < n; jj += kBlockSize) {
69		84	const std::size_t j_end = std::min(jj + kBlockSize, n);
70			MulMicroBlock(a, a_stride, b, b_stride, c, c_stride, ii, i_end, kk, k_end, jj, j_end);
71			}
72		84	}
73
74			void MulKBlocks(const double a, std::size_t a_stride, const double b, std::size_t b_stride, double *c,
75			std::size_t c_stride, std::size_t n, std::size_t ii, std::size_t i_end) {
76	2/2 ✓ Branch 0 taken 84 times. ✓ Branch 1 taken 84 times.	168	for (std::size_t kk = 0; kk < n; kk += kBlockSize) {
77		84	const std::size_t k_end = std::min(kk + kBlockSize, n);
78		84	MulJBlocks(a, a_stride, b, b_stride, c, c_stride, n, ii, i_end, kk, k_end);
79			}
80			}
81
82		84	void NaiveMulBlocked(const double a, std::size_t a_stride, const double b, std::size_t b_stride, double *c,
83			std::size_t c_stride, std::size_t n) {
84		84	ZeroMatrix(c, c_stride, n);
85
86	2/2 ✓ Branch 0 taken 84 times. ✓ Branch 1 taken 84 times.	168	for (std::size_t ii = 0; ii < n; ii += kBlockSize) {
87		84	const std::size_t i_end = std::min(ii + kBlockSize, n);
88			MulKBlocks(a, a_stride, b, b_stride, c, c_stride, n, ii, i_end);
89			}
90		84	}
91
92		✗	void CombineQuadrants(const std::vector<double> &m1, const std::vector<double> &m2, const std::vector<double> &m3,
93			const std::vector<double> &m4, const std::vector<double> &m5, const std::vector<double> &m6,
94			const std::vector<double> &m7, double *c, std::size_t c_stride, std::size_t half) {
95		✗	for (std::size_t i = 0; i < half; ++i) {
96		✗	double c11 = c + (i c_stride);
97			double *c12 = c11 + half;
98		✗	double c21 = c + ((i + half) c_stride);
99			double *c22 = c21 + half;
100
101		✗	const double m1_row = m1.data() + (i half);
102			const double m2_row = m2.data() + (i half);
103			const double m3_row = m3.data() + (i half);
104			const double m4_row = m4.data() + (i half);
105			const double m5_row = m5.data() + (i half);
106			const double m6_row = m6.data() + (i half);
107			const double m7_row = m7.data() + (i half);
108
109		✗	for (std::size_t j = 0; j < half; ++j) {
110		✗	c11[j] = m1_row[j] + m4_row[j] - m5_row[j] + m7_row[j];
111		✗	c12[j] = m3_row[j] + m5_row[j];
112		✗	c21[j] = m2_row[j] + m4_row[j];
113		✗	c22[j] = m1_row[j] - m2_row[j] + m3_row[j] + m6_row[j];
114			}
115			}
116		✗	}
117
118			using StrassenSeqFn = void ()(const double , std::size_t, const double , std::size_t, double , std::size_t,
119			std::size_t);
120
121			void StrassenSeqImpl(const double a, std::size_t a_stride, const double b, std::size_t b_stride, double *c,
122			std::size_t c_stride, std::size_t n);
123
124			constexpr StrassenSeqFn kStrassenSeqFn = &StrassenSeqImpl;
125
126		✗	void ComputeProduct(const double a1, std::size_t a1_stride, const double a2, std::size_t a2_stride, double a2_coeff,
127			const double b1, std::size_t b1_stride, const double b2, std::size_t b2_stride, double b2_coeff,
128			std::vector<double> &out, std::size_t n) {
129		✗	std::vector<double> lhs(n * n);
130		✗	std::vector<double> rhs(n * n);
131		✗	AddToBuffer(a1, a1_stride, a2, a2_stride, lhs.data(), n, a2_coeff);
132		✗	AddToBuffer(b1, b1_stride, b2, b2_stride, rhs.data(), n, b2_coeff);
133		✗	out.assign(n * n, 0.0);
134		✗	kStrassenSeqFn(lhs.data(), n, rhs.data(), n, out.data(), n, n);
135		✗	}
136
137		✗	void ComputeProductSingle(const double a, std::size_t a_stride, const double b1, std::size_t b1_stride,
138			const double *b2, std::size_t b2_stride, double b2_coeff, std::vector<double> &out,
139			std::size_t n) {
140		✗	std::vector<double> rhs(n * n);
141		✗	AddToBuffer(b1, b1_stride, b2, b2_stride, rhs.data(), n, b2_coeff);
142		✗	out.assign(n * n, 0.0);
143		✗	kStrassenSeqFn(a, a_stride, rhs.data(), n, out.data(), n, n);
144		✗	}
145
146		✗	void ComputeProductSingleLeft(const double a1, std::size_t a1_stride, const double a2, std::size_t a2_stride,
147			double a2_coeff, const double *b, std::size_t b_stride, std::vector<double> &out,
148			std::size_t n) {
149		✗	std::vector<double> lhs(n * n);
150		✗	AddToBuffer(a1, a1_stride, a2, a2_stride, lhs.data(), n, a2_coeff);
151		✗	out.assign(n * n, 0.0);
152		✗	kStrassenSeqFn(lhs.data(), n, b, b_stride, out.data(), n, n);
153		✗	}
154
155		84	void StrassenTopTbb(const double a, std::size_t a_stride, const double b, std::size_t b_stride, double *c,
156			std::size_t c_stride, std::size_t n) {
157	1/4 ✗ Branch 0 not taken. ✓ Branch 1 taken 84 times. ✗ Branch 3 not taken. ✗ Branch 4 not taken.	84	if (n <= kCutoff \|\| ppc::util::GetNumThreads() <= 1) {
158		84	kStrassenSeqFn(a, a_stride, b, b_stride, c, c_stride, n);
159		84	return;
160			}
161
162		✗	const std::size_t half = n / 2;
163
164		✗	const double *a11 = a;
165		✗	const double *a12 = a + half;
166		✗	const double a21 = a + (half a_stride);
167		✗	const double *a22 = a21 + half;
168
169		✗	const double *b11 = b;
170		✗	const double *b12 = b + half;
171		✗	const double b21 = b + (half b_stride);
172		✗	const double *b22 = b21 + half;
173
174		✗	std::vector<double> m1;
175		✗	std::vector<double> m2;
176		✗	std::vector<double> m3;
177		✗	std::vector<double> m4;
178		✗	std::vector<double> m5;
179		✗	std::vector<double> m6;
180		✗	std::vector<double> m7;
181
182			oneapi::tbb::parallel_invoke(
183		✗	[&] { ComputeProduct(a11, a_stride, a22, a_stride, 1.0, b11, b_stride, b22, b_stride, 1.0, m1, half); }, [&] {
184		✗	ComputeProductSingleLeft(a21, a_stride, a22, a_stride, 1.0, b11, b_stride, m2, half);
185		✗	}, [&] { ComputeProductSingle(a11, a_stride, b12, b_stride, b22, b_stride, -1.0, m3, half); }, [&] {
186		✗	ComputeProductSingle(a22, a_stride, b21, b_stride, b11, b_stride, -1.0, m4, half);
187		✗	}, [&] { ComputeProductSingleLeft(a11, a_stride, a12, a_stride, 1.0, b22, b_stride, m5, half); }, [&] {
188		✗	ComputeProduct(a21, a_stride, a11, a_stride, -1.0, b11, b_stride, b12, b_stride, 1.0, m6, half);
189		✗	}, [&] { ComputeProduct(a12, a_stride, a22, a_stride, -1.0, b21, b_stride, b22, b_stride, 1.0, m7, half); });
190
191		✗	CombineQuadrants(m1, m2, m3, m4, m5, m6, m7, c, c_stride, half);
192			}
193
194		84	void StrassenSeqImpl(const double a, std::size_t a_stride, const double b, std::size_t b_stride, double *c,
195			std::size_t c_stride, std::size_t n) {
196	1/2 ✓ Branch 0 taken 84 times. ✗ Branch 1 not taken.	84	if (n <= kCutoff) {
197		84	NaiveMulBlocked(a, a_stride, b, b_stride, c, c_stride, n);
198		84	return;
199			}
200
201		✗	const std::size_t half = n / 2;
202
203			const double *a11 = a;
204		✗	const double *a12 = a + half;
205		✗	const double a21 = a + (half a_stride);
206		✗	const double *a22 = a21 + half;
207
208			const double *b11 = b;
209		✗	const double *b12 = b + half;
210		✗	const double b21 = b + (half b_stride);
211		✗	const double *b22 = b21 + half;
212
213		✗	std::vector<double> m1(half * half);
214		✗	std::vector<double> m2(half * half);
215		✗	std::vector<double> m3(half * half);
216		✗	std::vector<double> m4(half * half);
217		✗	std::vector<double> m5(half * half);
218		✗	std::vector<double> m6(half * half);
219		✗	std::vector<double> m7(half * half);
220		✗	std::vector<double> lhs(half * half);
221		✗	std::vector<double> rhs(half * half);
222
223		✗	AddToBuffer(a11, a_stride, a22, a_stride, lhs.data(), half, 1.0);
224		✗	AddToBuffer(b11, b_stride, b22, b_stride, rhs.data(), half, 1.0);
225		✗	kStrassenSeqFn(lhs.data(), half, rhs.data(), half, m1.data(), half, half);
226
227		✗	AddToBuffer(a21, a_stride, a22, a_stride, lhs.data(), half, 1.0);
228		✗	kStrassenSeqFn(lhs.data(), half, b11, b_stride, m2.data(), half, half);
229
230		✗	AddToBuffer(b12, b_stride, b22, b_stride, rhs.data(), half, -1.0);
231		✗	kStrassenSeqFn(a11, a_stride, rhs.data(), half, m3.data(), half, half);
232
233		✗	AddToBuffer(b21, b_stride, b11, b_stride, rhs.data(), half, -1.0);
234		✗	kStrassenSeqFn(a22, a_stride, rhs.data(), half, m4.data(), half, half);
235
236		✗	AddToBuffer(a11, a_stride, a12, a_stride, lhs.data(), half, 1.0);
237		✗	kStrassenSeqFn(lhs.data(), half, b22, b_stride, m5.data(), half, half);
238
239		✗	AddToBuffer(a21, a_stride, a11, a_stride, lhs.data(), half, -1.0);
240		✗	AddToBuffer(b11, b_stride, b12, b_stride, rhs.data(), half, 1.0);
241		✗	kStrassenSeqFn(lhs.data(), half, rhs.data(), half, m6.data(), half, half);
242
243		✗	AddToBuffer(a12, a_stride, a22, a_stride, lhs.data(), half, -1.0);
244		✗	AddToBuffer(b21, b_stride, b22, b_stride, rhs.data(), half, 1.0);
245		✗	kStrassenSeqFn(lhs.data(), half, rhs.data(), half, m7.data(), half, half);
246
247		✗	CombineQuadrants(m1, m2, m3, m4, m5, m6, m7, c, c_stride, half);
248			}
249
250			} // namespace
251
252	1/2 ✓ Branch 1 taken 84 times. ✗ Branch 2 not taken.	84	ZorinDStrassenAlgMatrixTBB::ZorinDStrassenAlgMatrixTBB(const InType &in) {
253			SetTypeOfTask(GetStaticTypeOfTask());
254			GetInput() = in;
255			GetOutput().clear();
256		84	}
257
258		84	bool ZorinDStrassenAlgMatrixTBB::ValidationImpl() {
259			const auto &in = GetInput();
260	1/2 ✓ Branch 0 taken 84 times. ✗ Branch 1 not taken.	84	if (in.n == 0) {
261			return false;
262			}
263	1/2 ✓ Branch 0 taken 84 times. ✗ Branch 1 not taken.	84	if (in.a.size() != in.n * in.n) {
264			return false;
265			}
266	1/2 ✓ Branch 0 taken 84 times. ✗ Branch 1 not taken.	84	if (in.b.size() != in.n * in.n) {
267			return false;
268			}
269	1/2 ✗ Branch 0 not taken. ✓ Branch 1 taken 84 times.	84	if (!GetOutput().empty()) {
270		✗	return false;
271			}
272			return true;
273			}
274
275		84	bool ZorinDStrassenAlgMatrixTBB::PreProcessingImpl() {
276		84	const auto n = GetInput().n;
277		84	GetOutput().assign(n * n, 0.0);
278		84	return true;
279			}
280
281		84	bool ZorinDStrassenAlgMatrixTBB::RunImpl() {
282			const auto &in = GetInput();
283	2/2 ✓ Branch 0 taken 72 times. ✓ Branch 1 taken 12 times.	84	const std::size_t n = in.n;
284			const std::size_t padded = NextPow2(n);
285
286		84	std::vector<double> a_pad(padded * padded, 0.0);
287	1/4 ✓ Branch 1 taken 84 times. ✗ Branch 2 not taken. ✗ Branch 3 not taken. ✗ Branch 4 not taken.	84	std::vector<double> b_pad(padded * padded, 0.0);
288	1/4 ✓ Branch 1 taken 84 times. ✗ Branch 2 not taken. ✗ Branch 3 not taken. ✗ Branch 4 not taken.	84	std::vector<double> c_pad(padded * padded, 0.0);
289
290	2/2 ✓ Branch 0 taken 528 times. ✓ Branch 1 taken 84 times.	612	for (std::size_t i = 0; i < n; ++i) {
291		528	std::copy_n(in.a.data() + (i * n), n, a_pad.data() + (i * padded));
292		528	std::copy_n(in.b.data() + (i * n), n, b_pad.data() + (i * padded));
293			}
294
295			oneapi::tbb::global_control control(oneapi::tbb::global_control::max_allowed_parallelism,
296	3/8 ✓ Branch 1 taken 84 times. ✗ Branch 2 not taken. ✓ Branch 4 taken 84 times. ✗ Branch 5 not taken. ✓ Branch 7 taken 84 times. ✗ Branch 8 not taken. ✗ Branch 9 not taken. ✗ Branch 10 not taken.	84	static_cast<std::size_t>(std::max(1, ppc::util::GetNumThreads())));
297			(void)control;
298	1/2 ✓ Branch 1 taken 84 times. ✗ Branch 2 not taken.	84	StrassenTopTbb(a_pad.data(), padded, b_pad.data(), padded, c_pad.data(), padded, padded);
299
300			auto &out = GetOutput();
301	2/2 ✓ Branch 0 taken 528 times. ✓ Branch 1 taken 84 times.	612	for (std::size_t i = 0; i < n; ++i) {
302		528	std::copy_n(c_pad.data() + (i * padded), n, out.data() + (i * n));
303			}
304
305		84	return true;
306			}
307
308		84	bool ZorinDStrassenAlgMatrixTBB::PostProcessingImpl() {
309		84	return true;
310			}
311
312			} // namespace zorin_d_strassen_alg_matrix_seq
313