GCC Code Coverage Report

Directory:	./
File:	tasks/akhmetov_daniil_strassen_dense_double/omp/src/ops_omp.cpp
Date:	2026-06-04 20:25:32

	Exec	Total	Coverage
Lines:	142	144	98.6%
Functions:	13	13	100.0%
Branches:	42	78	53.8%

  
      Line
      Branch
      Exec
      Source
    
      #include "akhmetov_daniil_strassen_dense_double/omp/include/ops_omp.hpp"
    
      #include <cstddef>
    
      #include <utility>
    
      #include <vector>
    
      #include "akhmetov_daniil_strassen_dense_double/common/include/common.hpp"
    
      namespace akhmetov_daniil_strassen_dense_double {
    
        1/2✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.

      12
      AkhmetovDStrassenDenseDoubleOMP::AkhmetovDStrassenDenseDoubleOMP(const InType &in) {
    
        SetTypeOfTask(GetStaticTypeOfTask());
    
        1/2✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.

      12
        GetInput() = in;
    
      12
      }
    
        1/2✗ Branch 0 not taken.
✓ Branch 1 taken 12 times.

      12
      bool AkhmetovDStrassenDenseDoubleOMP::ValidationImpl() {
    
        const auto &input = GetInput();
    
        1/2✗ Branch 0 not taken.
✓ Branch 1 taken 12 times.

      12
        if (input.empty()) {
    
          return false;
    
        }
    
      12
        const size_t n = format::GetN(input);
    
        1/2✗ Branch 0 not taken.
✓ Branch 1 taken 12 times.

      12
        if (n == 0) {
    
          return false;
    
        }
    
      12
        const size_t expected_size = 1 + (2 * n * n);
    
      12
        return input.size() == expected_size;
    
      }
    
      12
      bool AkhmetovDStrassenDenseDoubleOMP::PreProcessingImpl() {
    
        const auto &input = GetInput();
    
      12
        const size_t n = format::GetN(input);
    
      12
        GetOutput().assign(n * n, 0.0);
    
      12
        return true;
    
      }
    
      namespace {
    
      constexpr size_t kThreshold = 64;
    
      [[maybe_unused]] constexpr size_t kParallelThreshold = 256;
    
      inline size_t NextPow2(size_t n) {
    
        size_t p = 1;
    
        2/2✓ Branch 0 taken 60 times.
✓ Branch 1 taken 8 times.

      68
        while (p < n) {
    
      60
          p <<= 1;
    
        }
    
        return p;
    
      }
    
      228
      Matrix StandardMultiply(const Matrix &a, const Matrix &b, size_t size) {
    
      228
        Matrix c(size * size, 0.0);
    
      228
      #pragma omp parallel for default(none) shared(a, b, c, size) schedule(static) if (size >= kParallelThreshold)
    
        for (size_t i = 0; i < size; ++i) {
    
          for (size_t k = 0; k < size; ++k) {
    
            const double aik = a.at((i * size) + k);
    
            const size_t bk = k * size;
    
            const size_t ci = i * size;
    
            for (size_t j = 0; j < size; ++j) {
    
              c.at(ci + j) += aik * b.at(bk + j);
    
            }
    
          }
    
        }
    
      228
        return c;
    
      }
    
      72
      void Split(const Matrix &src, Matrix &a11, Matrix &a12, Matrix &a21, Matrix &a22, size_t size) {
    
      72
        const size_t half = size / 2;
    
      72
        const size_t block = half * half;
    
      72
        a11.assign(block, 0.0);
    
      72
        a12.assign(block, 0.0);
    
      72
        a21.assign(block, 0.0);
    
      72
        a22.assign(block, 0.0);
    
      72
      #pragma omp parallel for default(none) shared(src, a11, a12, a21, a22, size, half) \
    
          schedule(static) if (size >= kParallelThreshold)
    
        for (size_t i = 0; i < half; ++i) {
    
          const size_t is = i * size;
    
          const size_t ih = i * half;
    
          const size_t is2 = (i + half) * size;
    
          for (size_t j = 0; j < half; ++j) {
    
            a11.at(ih + j) = src.at(is + j);
    
            a12.at(ih + j) = src.at(is + j + half);
    
            a21.at(ih + j) = src.at(is2 + j);
    
            a22.at(ih + j) = src.at(is2 + j + half);
    
          }
    
        }
    
      72
      }
    
      void Merge(Matrix &dst, const Matrix &c11, const Matrix &c12, const Matrix &c21, const Matrix &c22, size_t size) {
    
      36
        const size_t half = size / 2;
    
      36
      #pragma omp parallel for default(none) shared(dst, c11, c12, c21, c22, size, half) \
    
          schedule(static) if (size >= kParallelThreshold)
    
        for (size_t i = 0; i < half; ++i) {
    
          const size_t is = i * size;
    
          const size_t ih = i * half;
    
          const size_t is2 = (i + half) * size;
    
          for (size_t j = 0; j < half; ++j) {
    
            dst.at(is + j) = c11.at(ih + j);
    
            dst.at(is + j + half) = c12.at(ih + j);
    
            dst.at(is2 + j) = c21.at(ih + j);
    
            dst.at(is2 + j + half) = c22.at(ih + j);
    
          }
    
        }
    
      }
    
      216
      inline void AddInto(const Matrix &a, const Matrix &b, Matrix &c) {
    
        const size_t n = a.size();
    
      216
      #pragma omp parallel for default(none) shared(a, b, c, n) \
    
          schedule(static) if (n >= (kParallelThreshold * kParallelThreshold))
    
        for (size_t i = 0; i < n; ++i) {
    
          c.at(i) = a.at(i) + b.at(i);
    
        }
    
      216
      }
    
      144
      inline void SubInto(const Matrix &a, const Matrix &b, Matrix &c) {
    
        const size_t n = a.size();
    
      144
      #pragma omp parallel for default(none) shared(a, b, c, n) \
    
          schedule(static) if (n >= (kParallelThreshold * kParallelThreshold))
    
        for (size_t i = 0; i < n; ++i) {
    
          c.at(i) = a.at(i) - b.at(i);
    
        }
    
      144
      }
    
      struct Frame {
    
        Matrix a;
    
        Matrix b;
    
        Matrix result;
    
        size_t size;
    
        size_t stage{0};
    
        Matrix a11;
    
        Matrix a12;
    
        Matrix a21;
    
        Matrix a22;
    
        Matrix b11;
    
        Matrix b12;
    
        Matrix b21;
    
        Matrix b22;
    
        Matrix m1;
    
        Matrix m2;
    
        Matrix m3;
    
        Matrix m4;
    
        Matrix m5;
    
        Matrix m6;
    
        Matrix m7;
    
        Matrix temp_a;
    
        Matrix temp_b;
    
      260
        Frame(Matrix aa, Matrix bb, size_t s) : a(std::move(aa)), b(std::move(bb)), size(s) {}
    
      };
    
        1/2✓ Branch 0 taken 548 times.
✗ Branch 1 not taken.

      548
      void ProcessTopFrame(std::vector<Frame> &stack, Matrix &final_result) {
    
        1/2✓ Branch 0 taken 548 times.
✗ Branch 1 not taken.

      548
        if (stack.empty()) {
    
          return;
    
        }
    
        1/2✗ Branch 0 not taken.
✓ Branch 1 taken 548 times.

      548
        const size_t current_index = stack.size() - 1;
    
        Frame &frame = stack.at(current_index);
    
        2/2✓ Branch 0 taken 224 times.
✓ Branch 1 taken 324 times.

      548
        if (frame.size <= kThreshold) {
    
      224
          Matrix base = StandardMultiply(frame.a, frame.b, frame.size);
    
          stack.pop_back();
    
        1/2✗ Branch 0 not taken.
✓ Branch 1 taken 224 times.

      224
          if (stack.empty()) {
    
            final_result = std::move(base);
    
          } else {
    
      224
            stack.back().temp_a = std::move(base);
    
          }
    
          return;
    
        }
    
      324
        const size_t half = frame.size / 2;
    
      324
        const size_t block_size = half * half;
    
        9/10✓ Branch 0 taken 36 times.
✓ Branch 1 taken 36 times.
✓ Branch 2 taken 36 times.
✓ Branch 3 taken 36 times.
✓ Branch 4 taken 36 times.
✓ Branch 5 taken 36 times.
✓ Branch 6 taken 36 times.
✓ Branch 7 taken 36 times.
✓ Branch 8 taken 36 times.
✗ Branch 9 not taken.

      324
        switch (frame.stage) {
    
      36
          case 0: {
    
      36
            Split(frame.a, frame.a11, frame.a12, frame.a21, frame.a22, frame.size);
    
      36
            Split(frame.b, frame.b11, frame.b12, frame.b21, frame.b22, frame.size);
    
      36
            frame.temp_a.assign(block_size, 0.0);
    
      36
            frame.temp_b.assign(block_size, 0.0);
    
      36
            frame.m1.assign(block_size, 0.0);
    
      36
            frame.m2.assign(block_size, 0.0);
    
      36
            frame.m3.assign(block_size, 0.0);
    
      36
            frame.m4.assign(block_size, 0.0);
    
      36
            frame.m5.assign(block_size, 0.0);
    
      36
            frame.m6.assign(block_size, 0.0);
    
      36
            frame.m7.assign(block_size, 0.0);
    
      36
            frame.stage = 1;
    
      36
            return;
    
          }
    
      36
          case 1: {
    
      36
            AddInto(frame.a11, frame.a22, frame.temp_a);
    
      36
            AddInto(frame.b11, frame.b22, frame.temp_b);
    
      36
            stack.emplace_back(frame.temp_a, frame.temp_b, half);
    
      36
            stack.at(current_index).stage = 2;
    
      36
            return;
    
          }
    
      36
          case 2: {
    
      36
            frame.m1 = frame.temp_a;
    
      36
            AddInto(frame.a21, frame.a22, frame.temp_a);
    
      36
            stack.emplace_back(frame.temp_a, frame.b11, half);
    
      36
            stack.at(current_index).stage = 3;
    
      36
            return;
    
          }
    
      36
          case 3: {
    
      36
            frame.m2 = frame.temp_a;
    
      36
            SubInto(frame.b12, frame.b22, frame.temp_b);
    
      36
            stack.emplace_back(frame.a11, frame.temp_b, half);
    
      36
            stack.at(current_index).stage = 4;
    
      36
            return;
    
          }
    
      36
          case 4: {
    
      36
            frame.m3 = frame.temp_a;
    
      36
            SubInto(frame.b21, frame.b11, frame.temp_b);
    
      36
            stack.emplace_back(frame.a22, frame.temp_b, half);
    
      36
            stack.at(current_index).stage = 5;
    
      36
            return;
    
          }
    
      36
          case 5: {
    
      36
            frame.m4 = frame.temp_a;
    
      36
            AddInto(frame.a11, frame.a12, frame.temp_a);
    
      36
            stack.emplace_back(frame.temp_a, frame.b22, half);
    
      36
            stack.at(current_index).stage = 6;
    
      36
            return;
    
          }
    
      36
          case 6: {
    
      36
            frame.m5 = frame.temp_a;
    
      36
            SubInto(frame.a21, frame.a11, frame.temp_a);
    
      36
            AddInto(frame.b11, frame.b12, frame.temp_b);
    
      36
            stack.emplace_back(frame.temp_a, frame.temp_b, half);
    
      36
            stack.at(current_index).stage = 7;
    
      36
            return;
    
          }
    
      36
          case 7: {
    
      36
            frame.m6 = frame.temp_a;
    
      36
            SubInto(frame.a12, frame.a22, frame.temp_a);
    
      36
            AddInto(frame.b21, frame.b22, frame.temp_b);
    
      36
            stack.emplace_back(frame.temp_a, frame.temp_b, half);
    
      36
            stack.at(current_index).stage = 8;
    
      36
            return;
    
          }
    
      36
          case 8: {
    
      36
            frame.m7 = frame.temp_a;
    
      36
            Matrix c11(block_size);
    
        1/4✓ Branch 1 taken 36 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.

      36
            Matrix c12(block_size);
    
        1/4✓ Branch 1 taken 36 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.

      36
            Matrix c21(block_size);
    
        1/4✓ Branch 1 taken 36 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.

      36
            Matrix c22(block_size);
    
      36
      #pragma omp parallel for default(none) shared(c11, c12, c21, c22, frame, block_size) \
    
          schedule(static) if (block_size >= (kParallelThreshold * kParallelThreshold))
    
            for (size_t i = 0; i < block_size; ++i) {
    
              c11.at(i) = frame.m1.at(i) + frame.m4.at(i) - frame.m5.at(i) + frame.m7.at(i);
    
              c12.at(i) = frame.m3.at(i) + frame.m5.at(i);
    
              c21.at(i) = frame.m2.at(i) + frame.m4.at(i);
    
              c22.at(i) = frame.m1.at(i) - frame.m2.at(i) + frame.m3.at(i) + frame.m6.at(i);
    
            }
    
        1/4✓ Branch 1 taken 36 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.

      36
            Matrix merged(frame.size * frame.size, 0.0);
    
      36
            Merge(merged, c11, c12, c21, c22, frame.size);
    
            stack.pop_back();
    
        2/2✓ Branch 0 taken 8 times.
✓ Branch 1 taken 28 times.

      36
            if (stack.empty()) {
    
              final_result = std::move(merged);
    
            } else {
    
      28
              stack.back().temp_a = std::move(merged);
    
            }
    
            return;
    
          }
    
          default:
    
            return;
    
        }
    
      }
    
      8
      Matrix StrassenMultiply(const Matrix &a_init, const Matrix &b_init, size_t size_init) {
    
      8
        std::vector<Frame> stack;
    
        1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.

      8
        stack.emplace_back(a_init, b_init, size_init);
    
      8
        Matrix result;
    
        2/2✓ Branch 0 taken 548 times.
✓ Branch 1 taken 8 times.

      556
        while (!stack.empty()) {
    
        1/2✓ Branch 1 taken 548 times.
✗ Branch 2 not taken.

      548
          ProcessTopFrame(stack, result);
    
        }
    
      8
        return result;
    
      8
      }
    
      16
      Matrix PadTo(const Matrix &src, size_t n, size_t new_n) {
    
        1/2✓ Branch 0 taken 16 times.
✗ Branch 1 not taken.

      16
        if (new_n == n) {
    
      16
          return src;
    
        }
    
      ✗
        Matrix padded(new_n * new_n, 0.0);
    
      ✗
      #pragma omp parallel for default(none) shared(padded, src, n, new_n) schedule(static) if (new_n >= kParallelThreshold)
    
        for (size_t i = 0; i < n; ++i) {
    
          const size_t is = i * n;
    
          const size_t id = i * new_n;
    
          for (size_t j = 0; j < n; ++j) {
    
            padded.at(id + j) = src.at(is + j);
    
          }
    
        }
    
        return padded;
    
      }
    
      }  // namespace
    
      12
      bool AkhmetovDStrassenDenseDoubleOMP::RunImpl() {
    
        const auto &input = GetInput();
    
        auto &output = GetOutput();
    
      12
        const size_t n = format::GetN(input);
    
      12
        const Matrix a = format::GetA(input);
    
        1/2✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.

      12
        const Matrix b = format::GetB(input);
    
        2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 8 times.

      12
        if (n <= kThreshold) {
    
        1/4✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.

      4
          output = StandardMultiply(a, b, n);
    
      4
          return true;
    
        }
    
        const size_t new_n = NextPow2(n);
    
        1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.

      8
        const Matrix a_padded = PadTo(a, n, new_n);
    
        1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.

      8
        const Matrix b_padded = PadTo(b, n, new_n);
    
        1/2✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.

      8
        const Matrix result_padded = StrassenMultiply(a_padded, b_padded, new_n);
    
        1/4✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.

      8
        output.assign(n * n, 0.0);
    
        1/2✓ Branch 0 taken 8 times.
✗ Branch 1 not taken.

      8
      #pragma omp parallel for default(none) shared(output, result_padded, n, new_n) \
    
          schedule(static) if (n >= kParallelThreshold)
    
        for (size_t i = 0; i < n; ++i) {
    
          const size_t is = i * new_n;
    
          const size_t id = i * n;
    
          for (size_t j = 0; j < n; ++j) {
    
            output.at(id + j) = result_padded.at(is + j);
    
          }
    
        }
    
        return true;
    
      }
    
      12
      bool AkhmetovDStrassenDenseDoubleOMP::PostProcessingImpl() {
    
        const auto &input = GetInput();
    
      12
        const size_t n = format::GetN(input);
    
      12
        return GetOutput().size() == n * n;
    
      }
    
      }  // namespace akhmetov_daniil_strassen_dense_double

Line	Branch	Exec	Source
1			#include "akhmetov_daniil_strassen_dense_double/omp/include/ops_omp.hpp"
2
3			#include <cstddef>
4			#include <utility>
5			#include <vector>
6
7			#include "akhmetov_daniil_strassen_dense_double/common/include/common.hpp"
8
9			namespace akhmetov_daniil_strassen_dense_double {
10
11	1/2 ✓ Branch 1 taken 12 times. ✗ Branch 2 not taken.	12	AkhmetovDStrassenDenseDoubleOMP::AkhmetovDStrassenDenseDoubleOMP(const InType &in) {
12			SetTypeOfTask(GetStaticTypeOfTask());
13	1/2 ✓ Branch 1 taken 12 times. ✗ Branch 2 not taken.	12	GetInput() = in;
14		12	}
15
16	1/2 ✗ Branch 0 not taken. ✓ Branch 1 taken 12 times.	12	bool AkhmetovDStrassenDenseDoubleOMP::ValidationImpl() {
17			const auto &input = GetInput();
18	1/2 ✗ Branch 0 not taken. ✓ Branch 1 taken 12 times.	12	if (input.empty()) {
19			return false;
20			}
21		12	const size_t n = format::GetN(input);
22	1/2 ✗ Branch 0 not taken. ✓ Branch 1 taken 12 times.	12	if (n == 0) {
23			return false;
24			}
25		12	const size_t expected_size = 1 + (2 * n * n);
26		12	return input.size() == expected_size;
27			}
28
29		12	bool AkhmetovDStrassenDenseDoubleOMP::PreProcessingImpl() {
30			const auto &input = GetInput();
31		12	const size_t n = format::GetN(input);
32		12	GetOutput().assign(n * n, 0.0);
33		12	return true;
34			}
35
36			namespace {
37
38			constexpr size_t kThreshold = 64;
39			[[maybe_unused]] constexpr size_t kParallelThreshold = 256;
40
41			inline size_t NextPow2(size_t n) {
42			size_t p = 1;
43	2/2 ✓ Branch 0 taken 60 times. ✓ Branch 1 taken 8 times.	68	while (p < n) {
44		60	p <<= 1;
45			}
46			return p;
47			}
48
49		228	Matrix StandardMultiply(const Matrix &a, const Matrix &b, size_t size) {
50		228	Matrix c(size * size, 0.0);
51		228	#pragma omp parallel for default(none) shared(a, b, c, size) schedule(static) if (size >= kParallelThreshold)
52			for (size_t i = 0; i < size; ++i) {
53			for (size_t k = 0; k < size; ++k) {
54			const double aik = a.at((i * size) + k);
55			const size_t bk = k * size;
56			const size_t ci = i * size;
57			for (size_t j = 0; j < size; ++j) {
58			c.at(ci + j) += aik * b.at(bk + j);
59			}
60			}
61			}
62		228	return c;
63			}
64
65		72	void Split(const Matrix &src, Matrix &a11, Matrix &a12, Matrix &a21, Matrix &a22, size_t size) {
66		72	const size_t half = size / 2;
67		72	const size_t block = half * half;
68		72	a11.assign(block, 0.0);
69		72	a12.assign(block, 0.0);
70		72	a21.assign(block, 0.0);
71		72	a22.assign(block, 0.0);
72
73		72	#pragma omp parallel for default(none) shared(src, a11, a12, a21, a22, size, half) \
74			schedule(static) if (size >= kParallelThreshold)
75			for (size_t i = 0; i < half; ++i) {
76			const size_t is = i * size;
77			const size_t ih = i * half;
78			const size_t is2 = (i + half) * size;
79			for (size_t j = 0; j < half; ++j) {
80			a11.at(ih + j) = src.at(is + j);
81			a12.at(ih + j) = src.at(is + j + half);
82			a21.at(ih + j) = src.at(is2 + j);
83			a22.at(ih + j) = src.at(is2 + j + half);
84			}
85			}
86		72	}
87
88			void Merge(Matrix &dst, const Matrix &c11, const Matrix &c12, const Matrix &c21, const Matrix &c22, size_t size) {
89		36	const size_t half = size / 2;
90		36	#pragma omp parallel for default(none) shared(dst, c11, c12, c21, c22, size, half) \
91			schedule(static) if (size >= kParallelThreshold)
92			for (size_t i = 0; i < half; ++i) {
93			const size_t is = i * size;
94			const size_t ih = i * half;
95			const size_t is2 = (i + half) * size;
96			for (size_t j = 0; j < half; ++j) {
97			dst.at(is + j) = c11.at(ih + j);
98			dst.at(is + j + half) = c12.at(ih + j);
99			dst.at(is2 + j) = c21.at(ih + j);
100			dst.at(is2 + j + half) = c22.at(ih + j);
101			}
102			}
103			}
104
105		216	inline void AddInto(const Matrix &a, const Matrix &b, Matrix &c) {
106			const size_t n = a.size();
107		216	#pragma omp parallel for default(none) shared(a, b, c, n) \
108			schedule(static) if (n >= (kParallelThreshold * kParallelThreshold))
109			for (size_t i = 0; i < n; ++i) {
110			c.at(i) = a.at(i) + b.at(i);
111			}
112		216	}
113
114		144	inline void SubInto(const Matrix &a, const Matrix &b, Matrix &c) {
115			const size_t n = a.size();
116		144	#pragma omp parallel for default(none) shared(a, b, c, n) \
117			schedule(static) if (n >= (kParallelThreshold * kParallelThreshold))
118			for (size_t i = 0; i < n; ++i) {
119			c.at(i) = a.at(i) - b.at(i);
120			}
121		144	}
122
123			struct Frame {
124			Matrix a;
125			Matrix b;
126			Matrix result;
127			size_t size;
128			size_t stage{0};
129
130			Matrix a11;
131			Matrix a12;
132			Matrix a21;
133			Matrix a22;
134			Matrix b11;
135			Matrix b12;
136			Matrix b21;
137			Matrix b22;
138
139			Matrix m1;
140			Matrix m2;
141			Matrix m3;
142			Matrix m4;
143			Matrix m5;
144			Matrix m6;
145			Matrix m7;
146
147			Matrix temp_a;
148			Matrix temp_b;
149
150		260	Frame(Matrix aa, Matrix bb, size_t s) : a(std::move(aa)), b(std::move(bb)), size(s) {}
151			};
152
153	1/2 ✓ Branch 0 taken 548 times. ✗ Branch 1 not taken.	548	void ProcessTopFrame(std::vector<Frame> &stack, Matrix &final_result) {
154	1/2 ✓ Branch 0 taken 548 times. ✗ Branch 1 not taken.	548	if (stack.empty()) {
155			return;
156			}
157
158	1/2 ✗ Branch 0 not taken. ✓ Branch 1 taken 548 times.	548	const size_t current_index = stack.size() - 1;
159			Frame &frame = stack.at(current_index);
160
161	2/2 ✓ Branch 0 taken 224 times. ✓ Branch 1 taken 324 times.	548	if (frame.size <= kThreshold) {
162		224	Matrix base = StandardMultiply(frame.a, frame.b, frame.size);
163			stack.pop_back();
164	1/2 ✗ Branch 0 not taken. ✓ Branch 1 taken 224 times.	224	if (stack.empty()) {
165			final_result = std::move(base);
166			} else {
167		224	stack.back().temp_a = std::move(base);
168			}
169			return;
170			}
171
172		324	const size_t half = frame.size / 2;
173		324	const size_t block_size = half * half;
174
175	9/10 ✓ Branch 0 taken 36 times. ✓ Branch 1 taken 36 times. ✓ Branch 2 taken 36 times. ✓ Branch 3 taken 36 times. ✓ Branch 4 taken 36 times. ✓ Branch 5 taken 36 times. ✓ Branch 6 taken 36 times. ✓ Branch 7 taken 36 times. ✓ Branch 8 taken 36 times. ✗ Branch 9 not taken.	324	switch (frame.stage) {
176		36	case 0: {
177		36	Split(frame.a, frame.a11, frame.a12, frame.a21, frame.a22, frame.size);
178		36	Split(frame.b, frame.b11, frame.b12, frame.b21, frame.b22, frame.size);
179
180		36	frame.temp_a.assign(block_size, 0.0);
181		36	frame.temp_b.assign(block_size, 0.0);
182		36	frame.m1.assign(block_size, 0.0);
183		36	frame.m2.assign(block_size, 0.0);
184		36	frame.m3.assign(block_size, 0.0);
185		36	frame.m4.assign(block_size, 0.0);
186		36	frame.m5.assign(block_size, 0.0);
187		36	frame.m6.assign(block_size, 0.0);
188		36	frame.m7.assign(block_size, 0.0);
189
190		36	frame.stage = 1;
191		36	return;
192			}
193
194		36	case 1: {
195		36	AddInto(frame.a11, frame.a22, frame.temp_a);
196		36	AddInto(frame.b11, frame.b22, frame.temp_b);
197
198		36	stack.emplace_back(frame.temp_a, frame.temp_b, half);
199		36	stack.at(current_index).stage = 2;
200		36	return;
201			}
202
203		36	case 2: {
204		36	frame.m1 = frame.temp_a;
205		36	AddInto(frame.a21, frame.a22, frame.temp_a);
206
207		36	stack.emplace_back(frame.temp_a, frame.b11, half);
208		36	stack.at(current_index).stage = 3;
209		36	return;
210			}
211
212		36	case 3: {
213		36	frame.m2 = frame.temp_a;
214		36	SubInto(frame.b12, frame.b22, frame.temp_b);
215
216		36	stack.emplace_back(frame.a11, frame.temp_b, half);
217		36	stack.at(current_index).stage = 4;
218		36	return;
219			}
220
221		36	case 4: {
222		36	frame.m3 = frame.temp_a;
223		36	SubInto(frame.b21, frame.b11, frame.temp_b);
224
225		36	stack.emplace_back(frame.a22, frame.temp_b, half);
226		36	stack.at(current_index).stage = 5;
227		36	return;
228			}
229
230		36	case 5: {
231		36	frame.m4 = frame.temp_a;
232		36	AddInto(frame.a11, frame.a12, frame.temp_a);
233
234		36	stack.emplace_back(frame.temp_a, frame.b22, half);
235		36	stack.at(current_index).stage = 6;
236		36	return;
237			}
238
239		36	case 6: {
240		36	frame.m5 = frame.temp_a;
241		36	SubInto(frame.a21, frame.a11, frame.temp_a);
242		36	AddInto(frame.b11, frame.b12, frame.temp_b);
243
244		36	stack.emplace_back(frame.temp_a, frame.temp_b, half);
245		36	stack.at(current_index).stage = 7;
246		36	return;
247			}
248
249		36	case 7: {
250		36	frame.m6 = frame.temp_a;
251		36	SubInto(frame.a12, frame.a22, frame.temp_a);
252		36	AddInto(frame.b21, frame.b22, frame.temp_b);
253
254		36	stack.emplace_back(frame.temp_a, frame.temp_b, half);
255		36	stack.at(current_index).stage = 8;
256		36	return;
257			}
258
259		36	case 8: {
260		36	frame.m7 = frame.temp_a;
261
262		36	Matrix c11(block_size);
263	1/4 ✓ Branch 1 taken 36 times. ✗ Branch 2 not taken. ✗ Branch 3 not taken. ✗ Branch 4 not taken.	36	Matrix c12(block_size);
264	1/4 ✓ Branch 1 taken 36 times. ✗ Branch 2 not taken. ✗ Branch 3 not taken. ✗ Branch 4 not taken.	36	Matrix c21(block_size);
265	1/4 ✓ Branch 1 taken 36 times. ✗ Branch 2 not taken. ✗ Branch 3 not taken. ✗ Branch 4 not taken.	36	Matrix c22(block_size);
266
267		36	#pragma omp parallel for default(none) shared(c11, c12, c21, c22, frame, block_size) \
268			schedule(static) if (block_size >= (kParallelThreshold * kParallelThreshold))
269			for (size_t i = 0; i < block_size; ++i) {
270			c11.at(i) = frame.m1.at(i) + frame.m4.at(i) - frame.m5.at(i) + frame.m7.at(i);
271			c12.at(i) = frame.m3.at(i) + frame.m5.at(i);
272			c21.at(i) = frame.m2.at(i) + frame.m4.at(i);
273			c22.at(i) = frame.m1.at(i) - frame.m2.at(i) + frame.m3.at(i) + frame.m6.at(i);
274			}
275
276	1/4 ✓ Branch 1 taken 36 times. ✗ Branch 2 not taken. ✗ Branch 3 not taken. ✗ Branch 4 not taken.	36	Matrix merged(frame.size * frame.size, 0.0);
277		36	Merge(merged, c11, c12, c21, c22, frame.size);
278
279			stack.pop_back();
280	2/2 ✓ Branch 0 taken 8 times. ✓ Branch 1 taken 28 times.	36	if (stack.empty()) {
281			final_result = std::move(merged);
282			} else {
283		28	stack.back().temp_a = std::move(merged);
284			}
285			return;
286			}
287
288			default:
289			return;
290			}
291			}
292
293		8	Matrix StrassenMultiply(const Matrix &a_init, const Matrix &b_init, size_t size_init) {
294		8	std::vector<Frame> stack;
295	1/2 ✓ Branch 1 taken 8 times. ✗ Branch 2 not taken.	8	stack.emplace_back(a_init, b_init, size_init);
296
297		8	Matrix result;
298	2/2 ✓ Branch 0 taken 548 times. ✓ Branch 1 taken 8 times.	556	while (!stack.empty()) {
299	1/2 ✓ Branch 1 taken 548 times. ✗ Branch 2 not taken.	548	ProcessTopFrame(stack, result);
300			}
301		8	return result;
302		8	}
303
304		16	Matrix PadTo(const Matrix &src, size_t n, size_t new_n) {
305	1/2 ✓ Branch 0 taken 16 times. ✗ Branch 1 not taken.	16	if (new_n == n) {
306		16	return src;
307			}
308		✗	Matrix padded(new_n * new_n, 0.0);
309		✗	#pragma omp parallel for default(none) shared(padded, src, n, new_n) schedule(static) if (new_n >= kParallelThreshold)
310			for (size_t i = 0; i < n; ++i) {
311			const size_t is = i * n;
312			const size_t id = i * new_n;
313			for (size_t j = 0; j < n; ++j) {
314			padded.at(id + j) = src.at(is + j);
315			}
316			}
317			return padded;
318			}
319
320			} // namespace
321
322		12	bool AkhmetovDStrassenDenseDoubleOMP::RunImpl() {
323			const auto &input = GetInput();
324			auto &output = GetOutput();
325
326		12	const size_t n = format::GetN(input);
327		12	const Matrix a = format::GetA(input);
328	1/2 ✓ Branch 1 taken 12 times. ✗ Branch 2 not taken.	12	const Matrix b = format::GetB(input);
329
330	2/2 ✓ Branch 0 taken 4 times. ✓ Branch 1 taken 8 times.	12	if (n <= kThreshold) {
331	1/4 ✓ Branch 1 taken 4 times. ✗ Branch 2 not taken. ✗ Branch 3 not taken. ✗ Branch 4 not taken.	4	output = StandardMultiply(a, b, n);
332		4	return true;
333			}
334
335			const size_t new_n = NextPow2(n);
336	1/2 ✓ Branch 1 taken 8 times. ✗ Branch 2 not taken.	8	const Matrix a_padded = PadTo(a, n, new_n);
337	1/2 ✓ Branch 1 taken 8 times. ✗ Branch 2 not taken.	8	const Matrix b_padded = PadTo(b, n, new_n);
338
339	1/2 ✓ Branch 1 taken 8 times. ✗ Branch 2 not taken.	8	const Matrix result_padded = StrassenMultiply(a_padded, b_padded, new_n);
340
341	1/4 ✓ Branch 1 taken 8 times. ✗ Branch 2 not taken. ✗ Branch 3 not taken. ✗ Branch 4 not taken.	8	output.assign(n * n, 0.0);
342	1/2 ✓ Branch 0 taken 8 times. ✗ Branch 1 not taken.	8	#pragma omp parallel for default(none) shared(output, result_padded, n, new_n) \
343			schedule(static) if (n >= kParallelThreshold)
344			for (size_t i = 0; i < n; ++i) {
345			const size_t is = i * new_n;
346			const size_t id = i * n;
347			for (size_t j = 0; j < n; ++j) {
348			output.at(id + j) = result_padded.at(is + j);
349			}
350			}
351
352			return true;
353			}
354
355		12	bool AkhmetovDStrassenDenseDoubleOMP::PostProcessingImpl() {
356			const auto &input = GetInput();
357		12	const size_t n = format::GetN(input);
358		12	return GetOutput().size() == n * n;
359			}
360
361			} // namespace akhmetov_daniil_strassen_dense_double
362