GCC Code Coverage Report

Directory:	./
File:	tasks/tabalaev_a_matrix_mul_strassen/all/src/ops_all.cpp
Date:	2026-06-04 20:25:32

	Exec	Total	Coverage
Lines:	177	209	84.7%
Functions:	15	15	100.0%
Branches:	94	260	36.2%

  
      Line
      Branch
      Exec
      Source
    
      #include "tabalaev_a_matrix_mul_strassen/all/include/ops_all.hpp"
    
      #include <mpi.h>
    
      #include <omp.h>
    
      #include <algorithm>
    
      #include <array>
    
      #include <cmath>
    
      #include <cstddef>
    
      #include <cstdint>
    
      #include <stack>
    
      #include <utility>
    
      #include <vector>
    
      #include "tabalaev_a_matrix_mul_strassen/common/include/common.hpp"
    
      #include "util/include/util.hpp"
    
      namespace tabalaev_a_matrix_mul_strassen {
    
      static constexpr size_t kBaseCaseSize = 128;
    
        1/2✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.

      12
      TabalaevAMatrixMulStrassenALL::TabalaevAMatrixMulStrassenALL(const InType &in) {
    
        SetTypeOfTask(GetStaticTypeOfTask());
    
        1/2✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.

      12
        GetInput() = in;
    
        GetOutput() = {};
    
      12
      }
    
      12
      bool TabalaevAMatrixMulStrassenALL::ValidationImpl() {
    
      12
        int rank = 0;
    
      12
        int size = 1;
    
      12
        MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    
      12
        MPI_Comm_size(MPI_COMM_WORLD, &size);
    
      12
        omp_set_num_threads(ppc::util::GetNumThreads());
    
      12
        int is_valid = 0;
    
        2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 6 times.

      12
        if (rank == 0) {
    
          const auto &in = GetInput();
    
        2/4✗ Branch 0 not taken.
✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 6 times.

      6
          bool valid = in.a_rows > 0 && in.a_cols_b_rows > 0 && in.b_cols > 0 &&
    
        2/4✓ Branch 0 taken 6 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 6 times.

      12
                       in.a.size() == static_cast<size_t>(in.a_rows * in.a_cols_b_rows) &&
    
        1/2✗ Branch 0 not taken.
✓ Branch 1 taken 6 times.

      6
                       in.b.size() == static_cast<size_t>(in.a_cols_b_rows * in.b_cols);
    
      6
          is_valid = valid ? 1 : 0;
    
        }
    
      12
        MPI_Bcast(&is_valid, 1, MPI_INT, 0, MPI_COMM_WORLD);
    
      12
        return is_valid == 1;
    
      }
    
      12
      bool TabalaevAMatrixMulStrassenALL::PreProcessingImpl() {
    
        GetOutput() = {};
    
      12
        int rank = 0;
    
      12
        MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    
        2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 6 times.

      12
        if (rank == 0) {
    
          const auto &in = GetInput();
    
      6
          a_rows_ = in.a_rows;
    
      6
          a_cols_b_rows_ = in.a_cols_b_rows;
    
      6
          b_cols_ = in.b_cols;
    
      6
          size_t max_dim = std::max({a_rows_, a_cols_b_rows_, b_cols_});
    
      6
          padded_n_ = 1;
    
        2/2✓ Branch 0 taken 28 times.
✓ Branch 1 taken 6 times.

      34
          while (padded_n_ < max_dim) {
    
      28
            padded_n_ *= 2;
    
          }
    
      6
          padded_a_.assign(padded_n_ * padded_n_, 0.0);
    
      6
          padded_b_.assign(padded_n_ * padded_n_, 0.0);
    
          auto &padded_a = padded_a_;
    
          auto &padded_b = padded_b_;
    
      6
          size_t a_rows = a_rows_;
    
      6
          size_t a_cols_b_rows = a_cols_b_rows_;
    
      6
          size_t b_cols = b_cols_;
    
      6
          size_t padded_n = padded_n_;
    
      6
      #pragma omp parallel default(none) shared(in, padded_a, padded_b, a_rows, a_cols_b_rows, b_cols, padded_n)
    
          {
    
      #pragma omp for nowait
    
            for (size_t i = 0; i < a_rows; ++i) {
    
              for (size_t j = 0; j < a_cols_b_rows; ++j) {
    
                padded_a[(i * padded_n) + j] = in.a[(i * a_cols_b_rows) + j];
    
              }
    
            }
    
      #pragma omp for
    
            for (size_t i = 0; i < a_cols_b_rows; ++i) {
    
              for (size_t j = 0; j < b_cols; ++j) {
    
                padded_b[(i * padded_n) + j] = in.b[(i * b_cols) + j];
    
              }
    
            }
    
          }
    
        }
    
      12
        return true;
    
      }
    
      12
      bool TabalaevAMatrixMulStrassenALL::RunImpl() {
    
      12
        int rank = 0;
    
      12
        int size = 0;
    
      12
        MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    
      12
        MPI_Comm_size(MPI_COMM_WORLD, &size);
    
      12
        int procs = omp_get_num_procs();
    
      12
        int threads_per_process = std::max(1, procs / size);
    
      12
        omp_set_num_threads(threads_per_process);
    
      12
        std::array<uint64_t, 3> dims = {0, 0, 0};
    
        2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 6 times.

      12
        if (rank == 0) {
    
      6
          dims[0] = static_cast<uint64_t>(a_rows_);
    
      6
          dims[1] = static_cast<uint64_t>(b_cols_);
    
      6
          dims[2] = static_cast<uint64_t>(padded_n_);
    
        }
    
      12
        MPI_Bcast(dims.data(), 3, MPI_UINT64_T, 0, MPI_COMM_WORLD);
    
        2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 6 times.

      12
        if (rank != 0) {
    
      6
          a_rows_ = static_cast<size_t>(dims[0]);
    
      6
          b_cols_ = static_cast<size_t>(dims[1]);
    
      6
          padded_n_ = static_cast<size_t>(dims[2]);
    
        }
    
        2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 6 times.

      12
        if (rank == 0) {
    
      6
          RunMaster(size);
    
        } else {
    
      6
          RunWorker();
    
        }
    
        auto &out = GetOutput();
    
        2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 6 times.

      12
        if (rank != 0) {
    
      6
          out.assign(a_rows_ * b_cols_, 0.0);
    
        }
    
      12
        MPI_Bcast(out.data(), static_cast<int>(out.size()), MPI_DOUBLE, 0, MPI_COMM_WORLD);
    
      12
        return true;
    
      }
    
      6
      void TabalaevAMatrixMulStrassenALL::RunMaster(int size) {
    
        3/4✓ Branch 0 taken 6 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 4 times.
✓ Branch 3 taken 2 times.

      6
        if (size == 1 || padded_n_ <= kBaseCaseSize) {
    
      4
          MasterBase(size);
    
        } else {
    
      2
          MasterAll(size);
    
        }
    
        auto &out = GetOutput();
    
      6
        out.assign(a_rows_ * b_cols_, 0.0);
    
      6
        const auto &result_c = result_c_;
    
      6
        size_t a_rows = a_rows_;
    
      6
        size_t b_cols = b_cols_;
    
      6
        size_t padded_n = padded_n_;
    
      6
      #pragma omp parallel for default(none) shared(out, result_c, a_rows, b_cols, padded_n)
    
        for (size_t i = 0; i < a_rows; ++i) {
    
          for (size_t j = 0; j < b_cols; ++j) {
    
            out[(i * b_cols) + j] = result_c[(i * padded_n) + j];
    
          }
    
        }
    
      6
      }
    
      4
      void TabalaevAMatrixMulStrassenALL::MasterBase(int size) {
    
      4
        result_c_ = StrassenMultiply(padded_a_, padded_b_, padded_n_);
    
      4
        uint64_t terminate_signal = 0;
    
        2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 4 times.

      8
        for (int dest = 1; dest < size; ++dest) {
    
      4
          MPI_Send(&terminate_signal, 1, MPI_UINT64_T, dest, 0, MPI_COMM_WORLD);
    
        }
    
      4
      }
    
      2
      void TabalaevAMatrixMulStrassenALL::MasterAll(int size) {
    
      2
        size_t h = padded_n_ / 2;
    
      2
        std::vector<double> a11;
    
      2
        std::vector<double> a12;
    
      2
        std::vector<double> a21;
    
      2
        std::vector<double> a22;
    
      2
        std::vector<double> b11;
    
      2
        std::vector<double> b12;
    
      2
        std::vector<double> b21;
    
      2
        std::vector<double> b22;
    
        1/2✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.

      2
        SplitMatrix(padded_a_, padded_n_, a11, a12, a21, a22);
    
        1/2✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.

      2
        SplitMatrix(padded_b_, padded_n_, b11, b12, b21, b22);
    
        std::vector<std::vector<double>> task_a = {Add(a11, a22),      Add(a21, a22),     a11, a22, Add(a11, a12),
    
        4/12✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 14 times.
✓ Branch 4 taken 2 times.
✓ Branch 5 taken 14 times.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.

      16
                                                   Subtract(a21, a11), Subtract(a12, a22)};
    
        std::vector<std::vector<double>> task_b = {Add(b11, b22), b11,           Subtract(b12, b22), Subtract(b21, b11),
    
        4/10✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 14 times.
✓ Branch 4 taken 2 times.
✓ Branch 5 taken 14 times.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.

      16
                                                   b22,           Add(b11, b12), Add(b21, b22)};
    
        2/4✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.

      2
        std::vector<std::vector<double>> p(7, std::vector<double>(h * h));
    
        int num_tasks = 7;
    
      2
        int tasks_sent = 0;
    
        int tasks_completed = 0;
    
      2
        int h_squared = static_cast<int>(h * h);
    
        3/4✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.

      4
        for (int dest = 1; dest < size && tasks_sent < num_tasks; ++dest) {
    
      2
          auto h_msg = static_cast<uint64_t>(h);
    
        1/2✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.

      2
          MPI_Send(&h_msg, 1, MPI_UINT64_T, dest, 0, MPI_COMM_WORLD);
    
        1/2✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.

      2
          MPI_Send(&tasks_sent, 1, MPI_INT, dest, 1, MPI_COMM_WORLD);
    
        1/2✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.

      2
          MPI_Send(task_a[tasks_sent].data(), h_squared, MPI_DOUBLE, dest, 2, MPI_COMM_WORLD);
    
        1/2✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.

      2
          MPI_Send(task_b[tasks_sent].data(), h_squared, MPI_DOUBLE, dest, 3, MPI_COMM_WORLD);
    
      2
          tasks_sent++;
    
        }
    
        2/2✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.

      16
        while (tasks_completed < num_tasks) {
    
          MPI_Status status;
    
      14
          int task_id = 0;
    
        1/2✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.

      14
          MPI_Recv(&task_id, 1, MPI_INT, MPI_ANY_SOURCE, 4, MPI_COMM_WORLD, &status);
    
      14
          int worker = status.MPI_SOURCE;
    
        1/2✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.

      14
          MPI_Recv(p[task_id].data(), h_squared, MPI_DOUBLE, worker, 5, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
    
      14
          tasks_completed++;
    
        2/2✓ Branch 0 taken 12 times.
✓ Branch 1 taken 2 times.

      14
          if (tasks_sent < num_tasks) {
    
      12
            auto h_msg = static_cast<uint64_t>(h);
    
        1/2✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.

      12
            MPI_Send(&h_msg, 1, MPI_UINT64_T, worker, 0, MPI_COMM_WORLD);
    
        1/2✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.

      12
            MPI_Send(&tasks_sent, 1, MPI_INT, worker, 1, MPI_COMM_WORLD);
    
        1/2✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.

      12
            MPI_Send(task_a[tasks_sent].data(), h_squared, MPI_DOUBLE, worker, 2, MPI_COMM_WORLD);
    
        1/2✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.

      12
            MPI_Send(task_b[tasks_sent].data(), h_squared, MPI_DOUBLE, worker, 3, MPI_COMM_WORLD);
    
      12
            tasks_sent++;
    
          }
    
        }
    
      2
        uint64_t terminate_signal = 0;
    
        2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.

      4
        for (int dest = 1; dest < size; ++dest) {
    
        1/2✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.

      2
          MPI_Send(&terminate_signal, 1, MPI_UINT64_T, dest, 0, MPI_COMM_WORLD);
    
        }
    
        1/2✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.

      2
        std::vector<double> c11(h * h);
    
        1/4✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.

      2
        std::vector<double> c12(h * h);
    
        1/4✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.

      2
        std::vector<double> c21(h * h);
    
        1/4✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.

      2
        std::vector<double> c22(h * h);
    
      2
      #pragma omp parallel for default(none) shared(p, c11, c12, c21, c22, h)
    
        for (size_t i = 0; i < h * h; ++i) {
    
          c11[i] = p[0][i] + p[3][i] - p[4][i] + p[6][i];
    
          c12[i] = p[2][i] + p[4][i];
    
          c21[i] = p[1][i] + p[3][i];
    
          c22[i] = p[0][i] - p[1][i] + p[2][i] + p[5][i];
    
        }
    
        2/6✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.

      4
        result_c_ = CombineMatrix(c11, c12, c21, c22, padded_n_);
    
        14/36✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 7 taken 2 times.
✗ Branch 8 not taken.
✓ Branch 10 taken 2 times.
✗ Branch 11 not taken.
✓ Branch 13 taken 2 times.
✗ Branch 14 not taken.
✓ Branch 16 taken 2 times.
✗ Branch 17 not taken.
✓ Branch 19 taken 2 times.
✗ Branch 20 not taken.
✓ Branch 22 taken 2 times.
✗ Branch 23 not taken.
✓ Branch 25 taken 2 times.
✗ Branch 26 not taken.
✓ Branch 28 taken 2 times.
✗ Branch 29 not taken.
✓ Branch 31 taken 2 times.
✗ Branch 32 not taken.
✓ Branch 34 taken 2 times.
✗ Branch 35 not taken.
✓ Branch 37 taken 2 times.
✗ Branch 38 not taken.
✓ Branch 40 taken 2 times.
✗ Branch 41 not taken.
✗ Branch 45 not taken.
✗ Branch 46 not taken.
✗ Branch 47 not taken.
✗ Branch 48 not taken.
✗ Branch 49 not taken.
✗ Branch 50 not taken.
✗ Branch 51 not taken.
✗ Branch 52 not taken.

      8
      }
    
      6
      void TabalaevAMatrixMulStrassenALL::RunWorker() {
    
        while (true) {
    
      20
          uint64_t h_msg = 0;
    
          MPI_Status status;
    
      20
          MPI_Recv(&h_msg, 1, MPI_UINT64_T, 0, 0, MPI_COMM_WORLD, &status);
    
        2/2✓ Branch 0 taken 14 times.
✓ Branch 1 taken 6 times.

      20
          if (h_msg == 0) {
    
            break;
    
          }
    
          auto h = static_cast<size_t>(h_msg);
    
      14
          int h_squared = static_cast<int>(h * h);
    
      14
          int task_id = 0;
    
      14
          MPI_Recv(&task_id, 1, MPI_INT, 0, 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
    
      14
          std::vector<double> a_sub(h * h);
    
        2/6✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 14 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.

      14
          std::vector<double> b_sub(h * h);
    
        1/2✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.

      14
          MPI_Recv(a_sub.data(), h_squared, MPI_DOUBLE, 0, 2, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
    
        1/2✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.

      14
          MPI_Recv(b_sub.data(), h_squared, MPI_DOUBLE, 0, 3, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
    
        1/2✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.

      14
          std::vector<double> res = StrassenMultiply(a_sub, b_sub, h);
    
        1/2✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.

      14
          MPI_Send(&task_id, 1, MPI_INT, 0, 4, MPI_COMM_WORLD);
    
        1/2✓ Branch 1 taken 14 times.
✗ Branch 2 not taken.

      14
          MPI_Send(res.data(), h_squared, MPI_DOUBLE, 0, 5, MPI_COMM_WORLD);
    
      14
        }
    
      6
      }
    
      12
      bool TabalaevAMatrixMulStrassenALL::PostProcessingImpl() {
    
      12
        return true;
    
      }
    
      12
      std::vector<double> TabalaevAMatrixMulStrassenALL::Add(const std::vector<double> &mat_a,
    
                                                             const std::vector<double> &mat_b) {
    
        const size_t n = mat_a.size();
    
      12
        std::vector<double> res(n);
    
      12
      #pragma omp parallel for default(none) shared(mat_a, mat_b, res, n)
    
        for (size_t i = 0; i < n; ++i) {
    
          res[i] = mat_a[i] + mat_b[i];
    
        }
    
      12
        return res;
    
      }
    
      8
      std::vector<double> TabalaevAMatrixMulStrassenALL::Subtract(const std::vector<double> &mat_a,
    
                                                                  const std::vector<double> &mat_b) {
    
        const size_t n = mat_a.size();
    
      8
        std::vector<double> res(n);
    
      8
      #pragma omp parallel for default(none) shared(mat_a, mat_b, res, n)
    
        for (size_t i = 0; i < n; ++i) {
    
          res[i] = mat_a[i] - mat_b[i];
    
        }
    
      8
        return res;
    
      }
    
      18
      std::vector<double> TabalaevAMatrixMulStrassenALL::BaseMultiply(const std::vector<double> &mat_a,
    
                                                                      const std::vector<double> &mat_b, size_t n) {
    
      18
        std::vector<double> res(n * n, 0.0);
    
      18
      #pragma omp parallel for default(none) shared(mat_a, mat_b, res, n)
    
        for (size_t i = 0; i < n; ++i) {
    
          for (size_t k = 0; k < n; ++k) {
    
            double temp = mat_a[(i * n) + k];
    
            if (temp == 0.0) {
    
              continue;
    
            }
    
            for (size_t j = 0; j < n; ++j) {
    
              res[(i * n) + j] += temp * mat_b[(k * n) + j];
    
            }
    
          }
    
        }
    
      18
        return res;
    
      }
    
      4
      void TabalaevAMatrixMulStrassenALL::SplitMatrix(const std::vector<double> &src, size_t n, std::vector<double> &c11,
    
                                                      std::vector<double> &c12, std::vector<double> &c21,
    
                                                      std::vector<double> &c22) {
    
      4
        size_t h = n / 2;
    
      4
        size_t sz = h * h;
    
      4
        c11.resize(sz);
    
      4
        c12.resize(sz);
    
      4
        c21.resize(sz);
    
      4
        c22.resize(sz);
    
      4
      #pragma omp parallel for collapse(2) default(none) shared(src, c11, c12, c21, c22, h, n)
    
        for (size_t i = 0; i < h; ++i) {
    
          for (size_t j = 0; j < h; ++j) {
    
            size_t src_idx = (i * n) + j;
    
            size_t dst_idx = (i * h) + j;
    
            c11[dst_idx] = src[src_idx];
    
            c12[dst_idx] = src[src_idx + h];
    
            c21[dst_idx] = src[src_idx + (h * n)];
    
            c22[dst_idx] = src[src_idx + (h * n) + h];
    
          }
    
        }
    
      4
      }
    
      2
      std::vector<double> TabalaevAMatrixMulStrassenALL::CombineMatrix(const std::vector<double> &c11,
    
                                                                       const std::vector<double> &c12,
    
                                                                       const std::vector<double> &c21,
    
                                                                       const std::vector<double> &c22, size_t n) {
    
      2
        size_t h = n / 2;
    
      2
        std::vector<double> res(n * n);
    
      2
      #pragma omp parallel for collapse(2) default(none) shared(res, c11, c12, c21, c22, h, n)
    
        for (size_t i = 0; i < h; ++i) {
    
          for (size_t j = 0; j < h; ++j) {
    
            size_t src_idx = (i * h) + j;
    
            res[(i * n) + j] = c11[src_idx];
    
            res[(i * n) + j + h] = c12[src_idx];
    
            res[((i + h) * n) + j] = c21[src_idx];
    
            res[((i + h) * n) + j + h] = c22[src_idx];
    
          }
    
        }
    
      2
        return res;
    
      }
    
      18
      std::vector<double> TabalaevAMatrixMulStrassenALL::StrassenMultiply(const std::vector<double> &mat_a,
    
                                                                          const std::vector<double> &mat_b, size_t n) {
    
        std::stack<StrassenFrameALL> frames;
    
        std::stack<std::vector<double>> results;
    
      18
        frames.push({mat_a, mat_b, n, 0});
    
        2/2✓ Branch 0 taken 18 times.
✓ Branch 1 taken 18 times.

      36
        while (!frames.empty()) {
    
          StrassenFrameALL current = std::move(frames.top());
    
          frames.pop();
    
        1/2✓ Branch 0 taken 18 times.
✗ Branch 1 not taken.

      18
          if (current.n <= kBaseCaseSize) {
    
        1/2✓ Branch 1 taken 18 times.
✗ Branch 2 not taken.

      18
            results.push(BaseMultiply(current.mat_a, current.mat_b, current.n));
    
            continue;
    
          }
    
      ✗
          if (current.stage == 8) {
    
      ✗
            std::vector<std::vector<double>> p(7);
    
      ✗
            for (int i = 6; i >= 0; --i) {
    
      ✗
              p[i] = std::move(results.top());
    
              results.pop();
    
            }
    
      ✗
            size_t h = current.n / 2;
    
      ✗
            size_t sz = h * h;
    
      ✗
            std::vector<double> c11(sz);
    
      ✗
            std::vector<double> c12(sz);
    
      ✗
            std::vector<double> c21(sz);
    
      ✗
            std::vector<double> c22(sz);
    
      ✗
      #pragma omp parallel for default(none) shared(p, c11, c12, c21, c22, sz)
    
            for (size_t i = 0; i < sz; ++i) {
    
              c11[i] = p[0][i] + p[3][i] - p[4][i] + p[6][i];
    
              c12[i] = p[2][i] + p[4][i];
    
              c21[i] = p[1][i] + p[3][i];
    
              c22[i] = p[0][i] - p[1][i] + p[2][i] + p[5][i];
    
            }
    
      ✗
            results.push(CombineMatrix(c11, c12, c21, c22, current.n));
    
      ✗
          } else {
    
      ✗
            size_t h = current.n / 2;
    
      ✗
            std::vector<double> a11;
    
      ✗
            std::vector<double> a12;
    
      ✗
            std::vector<double> a21;
    
      ✗
            std::vector<double> a22;
    
      ✗
            std::vector<double> b11;
    
      ✗
            std::vector<double> b12;
    
      ✗
            std::vector<double> b21;
    
      ✗
            std::vector<double> b22;
    
      ✗
            SplitMatrix(current.mat_a, current.n, a11, a12, a21, a22);
    
      ✗
            SplitMatrix(current.mat_b, current.n, b11, b12, b21, b22);
    
      ✗
            frames.push({{}, {}, current.n, 8});
    
      ✗
            frames.push({Subtract(a12, a22), Add(b21, b22), h, 0});
    
      ✗
            frames.push({Subtract(a21, a11), Add(b11, b12), h, 0});
    
      ✗
            frames.push({Add(a11, a12), b22, h, 0});
    
      ✗
            frames.push({a22, Subtract(b21, b11), h, 0});
    
      ✗
            frames.push({a11, Subtract(b12, b22), h, 0});
    
      ✗
            frames.push({Add(a21, a22), b11, h, 0});
    
      ✗
            frames.push({Add(a11, a22), Add(b11, b22), h, 0});
    
          }
    
      18
        }
    
      18
        return std::move(results.top());
    
        3/48✓ Branch 1 taken 18 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 18 times.
✗ Branch 5 not taken.
✓ Branch 7 taken 18 times.
✗ Branch 8 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 22 not taken.
✗ Branch 23 not taken.
✗ Branch 25 not taken.
✗ Branch 26 not taken.
✗ Branch 28 not taken.
✗ Branch 29 not taken.
✗ Branch 31 not taken.
✗ Branch 32 not taken.
✗ Branch 34 not taken.
✗ Branch 35 not taken.
✗ Branch 37 not taken.
✗ Branch 38 not taken.
✗ Branch 40 not taken.
✗ Branch 41 not taken.
✗ Branch 43 not taken.
✗ Branch 44 not taken.
✗ Branch 46 not taken.
✗ Branch 47 not taken.
✗ Branch 49 not taken.
✗ Branch 50 not taken.
✗ Branch 52 not taken.
✗ Branch 53 not taken.
✗ Branch 55 not taken.
✗ Branch 56 not taken.
✗ Branch 58 not taken.
✗ Branch 59 not taken.
✗ Branch 61 not taken.
✗ Branch 62 not taken.
✗ Branch 64 not taken.
✗ Branch 65 not taken.
✗ Branch 67 not taken.
✗ Branch 68 not taken.
✗ Branch 70 not taken.
✗ Branch 71 not taken.

      18
      }
    
      }  // namespace tabalaev_a_matrix_mul_strassen

Line	Branch	Exec	Source
1			#include "tabalaev_a_matrix_mul_strassen/all/include/ops_all.hpp"
2
3			#include <mpi.h>
4			#include <omp.h>
5
6			#include <algorithm>
7			#include <array>
8			#include <cmath>
9			#include <cstddef>
10			#include <cstdint>
11			#include <stack>
12			#include <utility>
13			#include <vector>
14
15			#include "tabalaev_a_matrix_mul_strassen/common/include/common.hpp"
16			#include "util/include/util.hpp"
17
18			namespace tabalaev_a_matrix_mul_strassen {
19
20			static constexpr size_t kBaseCaseSize = 128;
21
22	1/2 ✓ Branch 1 taken 12 times. ✗ Branch 2 not taken.	12	TabalaevAMatrixMulStrassenALL::TabalaevAMatrixMulStrassenALL(const InType &in) {
23			SetTypeOfTask(GetStaticTypeOfTask());
24	1/2 ✓ Branch 1 taken 12 times. ✗ Branch 2 not taken.	12	GetInput() = in;
25			GetOutput() = {};
26		12	}
27
28		12	bool TabalaevAMatrixMulStrassenALL::ValidationImpl() {
29		12	int rank = 0;
30		12	int size = 1;
31
32		12	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
33		12	MPI_Comm_size(MPI_COMM_WORLD, &size);
34
35		12	omp_set_num_threads(ppc::util::GetNumThreads());
36
37		12	int is_valid = 0;
38	2/2 ✓ Branch 0 taken 6 times. ✓ Branch 1 taken 6 times.	12	if (rank == 0) {
39			const auto &in = GetInput();
40	2/4 ✗ Branch 0 not taken. ✓ Branch 1 taken 6 times. ✗ Branch 2 not taken. ✓ Branch 3 taken 6 times.	6	bool valid = in.a_rows > 0 && in.a_cols_b_rows > 0 && in.b_cols > 0 &&
41	2/4 ✓ Branch 0 taken 6 times. ✗ Branch 1 not taken. ✗ Branch 2 not taken. ✓ Branch 3 taken 6 times.	12	in.a.size() == static_cast<size_t>(in.a_rows * in.a_cols_b_rows) &&
42	1/2 ✗ Branch 0 not taken. ✓ Branch 1 taken 6 times.	6	in.b.size() == static_cast<size_t>(in.a_cols_b_rows * in.b_cols);
43		6	is_valid = valid ? 1 : 0;
44			}
45		12	MPI_Bcast(&is_valid, 1, MPI_INT, 0, MPI_COMM_WORLD);
46		12	return is_valid == 1;
47			}
48
49		12	bool TabalaevAMatrixMulStrassenALL::PreProcessingImpl() {
50			GetOutput() = {};
51
52		12	int rank = 0;
53		12	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
54
55	2/2 ✓ Branch 0 taken 6 times. ✓ Branch 1 taken 6 times.	12	if (rank == 0) {
56			const auto &in = GetInput();
57
58		6	a_rows_ = in.a_rows;
59		6	a_cols_b_rows_ = in.a_cols_b_rows;
60		6	b_cols_ = in.b_cols;
61
62		6	size_t max_dim = std::max({a_rows_, a_cols_b_rows_, b_cols_});
63		6	padded_n_ = 1;
64	2/2 ✓ Branch 0 taken 28 times. ✓ Branch 1 taken 6 times.	34	while (padded_n_ < max_dim) {
65		28	padded_n_ *= 2;
66			}
67
68		6	padded_a_.assign(padded_n_ * padded_n_, 0.0);
69		6	padded_b_.assign(padded_n_ * padded_n_, 0.0);
70
71			auto &padded_a = padded_a_;
72			auto &padded_b = padded_b_;
73		6	size_t a_rows = a_rows_;
74		6	size_t a_cols_b_rows = a_cols_b_rows_;
75		6	size_t b_cols = b_cols_;
76		6	size_t padded_n = padded_n_;
77
78		6	#pragma omp parallel default(none) shared(in, padded_a, padded_b, a_rows, a_cols_b_rows, b_cols, padded_n)
79			{
80			#pragma omp for nowait
81			for (size_t i = 0; i < a_rows; ++i) {
82			for (size_t j = 0; j < a_cols_b_rows; ++j) {
83			padded_a[(i * padded_n) + j] = in.a[(i * a_cols_b_rows) + j];
84			}
85			}
86			#pragma omp for
87			for (size_t i = 0; i < a_cols_b_rows; ++i) {
88			for (size_t j = 0; j < b_cols; ++j) {
89			padded_b[(i * padded_n) + j] = in.b[(i * b_cols) + j];
90			}
91			}
92			}
93			}
94
95		12	return true;
96			}
97
98		12	bool TabalaevAMatrixMulStrassenALL::RunImpl() {
99		12	int rank = 0;
100		12	int size = 0;
101		12	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
102		12	MPI_Comm_size(MPI_COMM_WORLD, &size);
103
104		12	int procs = omp_get_num_procs();
105		12	int threads_per_process = std::max(1, procs / size);
106		12	omp_set_num_threads(threads_per_process);
107
108		12	std::array<uint64_t, 3> dims = {0, 0, 0};
109	2/2 ✓ Branch 0 taken 6 times. ✓ Branch 1 taken 6 times.	12	if (rank == 0) {
110		6	dims[0] = static_cast<uint64_t>(a_rows_);
111		6	dims[1] = static_cast<uint64_t>(b_cols_);
112		6	dims[2] = static_cast<uint64_t>(padded_n_);
113			}
114		12	MPI_Bcast(dims.data(), 3, MPI_UINT64_T, 0, MPI_COMM_WORLD);
115
116	2/2 ✓ Branch 0 taken 6 times. ✓ Branch 1 taken 6 times.	12	if (rank != 0) {
117		6	a_rows_ = static_cast<size_t>(dims[0]);
118		6	b_cols_ = static_cast<size_t>(dims[1]);
119		6	padded_n_ = static_cast<size_t>(dims[2]);
120			}
121
122	2/2 ✓ Branch 0 taken 6 times. ✓ Branch 1 taken 6 times.	12	if (rank == 0) {
123		6	RunMaster(size);
124			} else {
125		6	RunWorker();
126			}
127
128			auto &out = GetOutput();
129	2/2 ✓ Branch 0 taken 6 times. ✓ Branch 1 taken 6 times.	12	if (rank != 0) {
130		6	out.assign(a_rows_ * b_cols_, 0.0);
131			}
132
133		12	MPI_Bcast(out.data(), static_cast<int>(out.size()), MPI_DOUBLE, 0, MPI_COMM_WORLD);
134
135		12	return true;
136			}
137
138		6	void TabalaevAMatrixMulStrassenALL::RunMaster(int size) {
139	3/4 ✓ Branch 0 taken 6 times. ✗ Branch 1 not taken. ✓ Branch 2 taken 4 times. ✓ Branch 3 taken 2 times.	6	if (size == 1 \|\| padded_n_ <= kBaseCaseSize) {
140		4	MasterBase(size);
141			} else {
142		2	MasterAll(size);
143			}
144
145			auto &out = GetOutput();
146		6	out.assign(a_rows_ * b_cols_, 0.0);
147
148		6	const auto &result_c = result_c_;
149		6	size_t a_rows = a_rows_;
150		6	size_t b_cols = b_cols_;
151		6	size_t padded_n = padded_n_;
152
153		6	#pragma omp parallel for default(none) shared(out, result_c, a_rows, b_cols, padded_n)
154			for (size_t i = 0; i < a_rows; ++i) {
155			for (size_t j = 0; j < b_cols; ++j) {
156			out[(i * b_cols) + j] = result_c[(i * padded_n) + j];
157			}
158			}
159		6	}
160
161		4	void TabalaevAMatrixMulStrassenALL::MasterBase(int size) {
162		4	result_c_ = StrassenMultiply(padded_a_, padded_b_, padded_n_);
163
164		4	uint64_t terminate_signal = 0;
165	2/2 ✓ Branch 0 taken 4 times. ✓ Branch 1 taken 4 times.	8	for (int dest = 1; dest < size; ++dest) {
166		4	MPI_Send(&terminate_signal, 1, MPI_UINT64_T, dest, 0, MPI_COMM_WORLD);
167			}
168		4	}
169
170		2	void TabalaevAMatrixMulStrassenALL::MasterAll(int size) {
171		2	size_t h = padded_n_ / 2;
172
173		2	std::vector<double> a11;
174		2	std::vector<double> a12;
175		2	std::vector<double> a21;
176		2	std::vector<double> a22;
177
178		2	std::vector<double> b11;
179		2	std::vector<double> b12;
180		2	std::vector<double> b21;
181		2	std::vector<double> b22;
182
183	1/2 ✓ Branch 1 taken 2 times. ✗ Branch 2 not taken.	2	SplitMatrix(padded_a_, padded_n_, a11, a12, a21, a22);
184	1/2 ✓ Branch 1 taken 2 times. ✗ Branch 2 not taken.	2	SplitMatrix(padded_b_, padded_n_, b11, b12, b21, b22);
185
186			std::vector<std::vector<double>> task_a = {Add(a11, a22), Add(a21, a22), a11, a22, Add(a11, a12),
187	4/12 ✗ Branch 1 not taken. ✓ Branch 2 taken 2 times. ✓ Branch 3 taken 14 times. ✓ Branch 4 taken 2 times. ✓ Branch 5 taken 14 times. ✗ Branch 6 not taken. ✗ Branch 7 not taken. ✗ Branch 8 not taken. ✗ Branch 9 not taken. ✗ Branch 10 not taken. ✗ Branch 11 not taken. ✗ Branch 12 not taken.	16	Subtract(a21, a11), Subtract(a12, a22)};
188			std::vector<std::vector<double>> task_b = {Add(b11, b22), b11, Subtract(b12, b22), Subtract(b21, b11),
189	4/10 ✗ Branch 1 not taken. ✓ Branch 2 taken 2 times. ✓ Branch 3 taken 14 times. ✓ Branch 4 taken 2 times. ✓ Branch 5 taken 14 times. ✗ Branch 6 not taken. ✗ Branch 7 not taken. ✗ Branch 8 not taken. ✗ Branch 9 not taken. ✗ Branch 10 not taken.	16	b22, Add(b11, b12), Add(b21, b22)};
190
191	2/4 ✓ Branch 1 taken 2 times. ✗ Branch 2 not taken. ✓ Branch 4 taken 2 times. ✗ Branch 5 not taken.	2	std::vector<std::vector<double>> p(7, std::vector<double>(h * h));
192			int num_tasks = 7;
193		2	int tasks_sent = 0;
194			int tasks_completed = 0;
195
196		2	int h_squared = static_cast<int>(h * h);
197
198	3/4 ✓ Branch 0 taken 2 times. ✓ Branch 1 taken 2 times. ✓ Branch 2 taken 2 times. ✗ Branch 3 not taken.	4	for (int dest = 1; dest < size && tasks_sent < num_tasks; ++dest) {
199		2	auto h_msg = static_cast<uint64_t>(h);
200	1/2 ✓ Branch 1 taken 2 times. ✗ Branch 2 not taken.	2	MPI_Send(&h_msg, 1, MPI_UINT64_T, dest, 0, MPI_COMM_WORLD);
201	1/2 ✓ Branch 1 taken 2 times. ✗ Branch 2 not taken.	2	MPI_Send(&tasks_sent, 1, MPI_INT, dest, 1, MPI_COMM_WORLD);
202	1/2 ✓ Branch 1 taken 2 times. ✗ Branch 2 not taken.	2	MPI_Send(task_a[tasks_sent].data(), h_squared, MPI_DOUBLE, dest, 2, MPI_COMM_WORLD);
203	1/2 ✓ Branch 1 taken 2 times. ✗ Branch 2 not taken.	2	MPI_Send(task_b[tasks_sent].data(), h_squared, MPI_DOUBLE, dest, 3, MPI_COMM_WORLD);
204		2	tasks_sent++;
205			}
206
207	2/2 ✓ Branch 0 taken 14 times. ✓ Branch 1 taken 2 times.	16	while (tasks_completed < num_tasks) {
208			MPI_Status status;
209		14	int task_id = 0;
210	1/2 ✓ Branch 1 taken 14 times. ✗ Branch 2 not taken.	14	MPI_Recv(&task_id, 1, MPI_INT, MPI_ANY_SOURCE, 4, MPI_COMM_WORLD, &status);
211
212		14	int worker = status.MPI_SOURCE;
213	1/2 ✓ Branch 1 taken 14 times. ✗ Branch 2 not taken.	14	MPI_Recv(p[task_id].data(), h_squared, MPI_DOUBLE, worker, 5, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
214		14	tasks_completed++;
215
216	2/2 ✓ Branch 0 taken 12 times. ✓ Branch 1 taken 2 times.	14	if (tasks_sent < num_tasks) {
217		12	auto h_msg = static_cast<uint64_t>(h);
218	1/2 ✓ Branch 1 taken 12 times. ✗ Branch 2 not taken.	12	MPI_Send(&h_msg, 1, MPI_UINT64_T, worker, 0, MPI_COMM_WORLD);
219	1/2 ✓ Branch 1 taken 12 times. ✗ Branch 2 not taken.	12	MPI_Send(&tasks_sent, 1, MPI_INT, worker, 1, MPI_COMM_WORLD);
220	1/2 ✓ Branch 1 taken 12 times. ✗ Branch 2 not taken.	12	MPI_Send(task_a[tasks_sent].data(), h_squared, MPI_DOUBLE, worker, 2, MPI_COMM_WORLD);
221	1/2 ✓ Branch 1 taken 12 times. ✗ Branch 2 not taken.	12	MPI_Send(task_b[tasks_sent].data(), h_squared, MPI_DOUBLE, worker, 3, MPI_COMM_WORLD);
222		12	tasks_sent++;
223			}
224			}
225
226		2	uint64_t terminate_signal = 0;
227	2/2 ✓ Branch 0 taken 2 times. ✓ Branch 1 taken 2 times.	4	for (int dest = 1; dest < size; ++dest) {
228	1/2 ✓ Branch 1 taken 2 times. ✗ Branch 2 not taken.	2	MPI_Send(&terminate_signal, 1, MPI_UINT64_T, dest, 0, MPI_COMM_WORLD);
229			}
230
231	1/2 ✓ Branch 1 taken 2 times. ✗ Branch 2 not taken.	2	std::vector<double> c11(h * h);
232	1/4 ✓ Branch 1 taken 2 times. ✗ Branch 2 not taken. ✗ Branch 3 not taken. ✗ Branch 4 not taken.	2	std::vector<double> c12(h * h);
233	1/4 ✓ Branch 1 taken 2 times. ✗ Branch 2 not taken. ✗ Branch 3 not taken. ✗ Branch 4 not taken.	2	std::vector<double> c21(h * h);
234	1/4 ✓ Branch 1 taken 2 times. ✗ Branch 2 not taken. ✗ Branch 3 not taken. ✗ Branch 4 not taken.	2	std::vector<double> c22(h * h);
235
236		2	#pragma omp parallel for default(none) shared(p, c11, c12, c21, c22, h)
237			for (size_t i = 0; i < h * h; ++i) {
238			c11[i] = p[0][i] + p[3][i] - p[4][i] + p[6][i];
239			c12[i] = p[2][i] + p[4][i];
240			c21[i] = p[1][i] + p[3][i];
241			c22[i] = p[0][i] - p[1][i] + p[2][i] + p[5][i];
242			}
243
244	2/6 ✓ Branch 1 taken 2 times. ✗ Branch 2 not taken. ✓ Branch 4 taken 2 times. ✗ Branch 5 not taken. ✗ Branch 6 not taken. ✗ Branch 7 not taken.	4	result_c_ = CombineMatrix(c11, c12, c21, c22, padded_n_);
245	14/36 ✓ Branch 1 taken 2 times. ✗ Branch 2 not taken. ✓ Branch 4 taken 2 times. ✗ Branch 5 not taken. ✓ Branch 7 taken 2 times. ✗ Branch 8 not taken. ✓ Branch 10 taken 2 times. ✗ Branch 11 not taken. ✓ Branch 13 taken 2 times. ✗ Branch 14 not taken. ✓ Branch 16 taken 2 times. ✗ Branch 17 not taken. ✓ Branch 19 taken 2 times. ✗ Branch 20 not taken. ✓ Branch 22 taken 2 times. ✗ Branch 23 not taken. ✓ Branch 25 taken 2 times. ✗ Branch 26 not taken. ✓ Branch 28 taken 2 times. ✗ Branch 29 not taken. ✓ Branch 31 taken 2 times. ✗ Branch 32 not taken. ✓ Branch 34 taken 2 times. ✗ Branch 35 not taken. ✓ Branch 37 taken 2 times. ✗ Branch 38 not taken. ✓ Branch 40 taken 2 times. ✗ Branch 41 not taken. ✗ Branch 45 not taken. ✗ Branch 46 not taken. ✗ Branch 47 not taken. ✗ Branch 48 not taken. ✗ Branch 49 not taken. ✗ Branch 50 not taken. ✗ Branch 51 not taken. ✗ Branch 52 not taken.	8	}
246
247		6	void TabalaevAMatrixMulStrassenALL::RunWorker() {
248			while (true) {
249		20	uint64_t h_msg = 0;
250			MPI_Status status;
251		20	MPI_Recv(&h_msg, 1, MPI_UINT64_T, 0, 0, MPI_COMM_WORLD, &status);
252
253	2/2 ✓ Branch 0 taken 14 times. ✓ Branch 1 taken 6 times.	20	if (h_msg == 0) {
254			break;
255			}
256			auto h = static_cast<size_t>(h_msg);
257		14	int h_squared = static_cast<int>(h * h);
258
259		14	int task_id = 0;
260		14	MPI_Recv(&task_id, 1, MPI_INT, 0, 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
261
262		14	std::vector<double> a_sub(h * h);
263	2/6 ✓ Branch 1 taken 14 times. ✗ Branch 2 not taken. ✓ Branch 4 taken 14 times. ✗ Branch 5 not taken. ✗ Branch 6 not taken. ✗ Branch 7 not taken.	14	std::vector<double> b_sub(h * h);
264	1/2 ✓ Branch 1 taken 14 times. ✗ Branch 2 not taken.	14	MPI_Recv(a_sub.data(), h_squared, MPI_DOUBLE, 0, 2, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
265	1/2 ✓ Branch 1 taken 14 times. ✗ Branch 2 not taken.	14	MPI_Recv(b_sub.data(), h_squared, MPI_DOUBLE, 0, 3, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
266
267	1/2 ✓ Branch 1 taken 14 times. ✗ Branch 2 not taken.	14	std::vector<double> res = StrassenMultiply(a_sub, b_sub, h);
268
269	1/2 ✓ Branch 1 taken 14 times. ✗ Branch 2 not taken.	14	MPI_Send(&task_id, 1, MPI_INT, 0, 4, MPI_COMM_WORLD);
270	1/2 ✓ Branch 1 taken 14 times. ✗ Branch 2 not taken.	14	MPI_Send(res.data(), h_squared, MPI_DOUBLE, 0, 5, MPI_COMM_WORLD);
271		14	}
272		6	}
273
274		12	bool TabalaevAMatrixMulStrassenALL::PostProcessingImpl() {
275		12	return true;
276			}
277
278		12	std::vector<double> TabalaevAMatrixMulStrassenALL::Add(const std::vector<double> &mat_a,
279			const std::vector<double> &mat_b) {
280			const size_t n = mat_a.size();
281		12	std::vector<double> res(n);
282
283		12	#pragma omp parallel for default(none) shared(mat_a, mat_b, res, n)
284			for (size_t i = 0; i < n; ++i) {
285			res[i] = mat_a[i] + mat_b[i];
286			}
287
288		12	return res;
289			}
290
291		8	std::vector<double> TabalaevAMatrixMulStrassenALL::Subtract(const std::vector<double> &mat_a,
292			const std::vector<double> &mat_b) {
293			const size_t n = mat_a.size();
294		8	std::vector<double> res(n);
295
296		8	#pragma omp parallel for default(none) shared(mat_a, mat_b, res, n)
297			for (size_t i = 0; i < n; ++i) {
298			res[i] = mat_a[i] - mat_b[i];
299			}
300
301		8	return res;
302			}
303
304		18	std::vector<double> TabalaevAMatrixMulStrassenALL::BaseMultiply(const std::vector<double> &mat_a,
305			const std::vector<double> &mat_b, size_t n) {
306		18	std::vector<double> res(n * n, 0.0);
307
308		18	#pragma omp parallel for default(none) shared(mat_a, mat_b, res, n)
309			for (size_t i = 0; i < n; ++i) {
310			for (size_t k = 0; k < n; ++k) {
311			double temp = mat_a[(i * n) + k];
312			if (temp == 0.0) {
313			continue;
314			}
315			for (size_t j = 0; j < n; ++j) {
316			res[(i * n) + j] += temp * mat_b[(k * n) + j];
317			}
318			}
319			}
320
321		18	return res;
322			}
323
324		4	void TabalaevAMatrixMulStrassenALL::SplitMatrix(const std::vector<double> &src, size_t n, std::vector<double> &c11,
325			std::vector<double> &c12, std::vector<double> &c21,
326			std::vector<double> &c22) {
327		4	size_t h = n / 2;
328		4	size_t sz = h * h;
329		4	c11.resize(sz);
330		4	c12.resize(sz);
331		4	c21.resize(sz);
332		4	c22.resize(sz);
333
334		4	#pragma omp parallel for collapse(2) default(none) shared(src, c11, c12, c21, c22, h, n)
335			for (size_t i = 0; i < h; ++i) {
336			for (size_t j = 0; j < h; ++j) {
337			size_t src_idx = (i * n) + j;
338			size_t dst_idx = (i * h) + j;
339			c11[dst_idx] = src[src_idx];
340			c12[dst_idx] = src[src_idx + h];
341			c21[dst_idx] = src[src_idx + (h * n)];
342			c22[dst_idx] = src[src_idx + (h * n) + h];
343			}
344			}
345		4	}
346
347		2	std::vector<double> TabalaevAMatrixMulStrassenALL::CombineMatrix(const std::vector<double> &c11,
348			const std::vector<double> &c12,
349			const std::vector<double> &c21,
350			const std::vector<double> &c22, size_t n) {
351		2	size_t h = n / 2;
352		2	std::vector<double> res(n * n);
353
354		2	#pragma omp parallel for collapse(2) default(none) shared(res, c11, c12, c21, c22, h, n)
355			for (size_t i = 0; i < h; ++i) {
356			for (size_t j = 0; j < h; ++j) {
357			size_t src_idx = (i * h) + j;
358			res[(i * n) + j] = c11[src_idx];
359			res[(i * n) + j + h] = c12[src_idx];
360			res[((i + h) * n) + j] = c21[src_idx];
361			res[((i + h) * n) + j + h] = c22[src_idx];
362			}
363			}
364		2	return res;
365			}
366
367		18	std::vector<double> TabalaevAMatrixMulStrassenALL::StrassenMultiply(const std::vector<double> &mat_a,
368			const std::vector<double> &mat_b, size_t n) {
369			std::stack<StrassenFrameALL> frames;
370			std::stack<std::vector<double>> results;
371
372		18	frames.push({mat_a, mat_b, n, 0});
373
374	2/2 ✓ Branch 0 taken 18 times. ✓ Branch 1 taken 18 times.	36	while (!frames.empty()) {
375			StrassenFrameALL current = std::move(frames.top());
376			frames.pop();
377
378	1/2 ✓ Branch 0 taken 18 times. ✗ Branch 1 not taken.	18	if (current.n <= kBaseCaseSize) {
379	1/2 ✓ Branch 1 taken 18 times. ✗ Branch 2 not taken.	18	results.push(BaseMultiply(current.mat_a, current.mat_b, current.n));
380			continue;
381			}
382
383		✗	if (current.stage == 8) {
384		✗	std::vector<std::vector<double>> p(7);
385
386		✗	for (int i = 6; i >= 0; --i) {
387		✗	p[i] = std::move(results.top());
388			results.pop();
389			}
390
391		✗	size_t h = current.n / 2;
392		✗	size_t sz = h * h;
393		✗	std::vector<double> c11(sz);
394		✗	std::vector<double> c12(sz);
395		✗	std::vector<double> c21(sz);
396		✗	std::vector<double> c22(sz);
397
398		✗	#pragma omp parallel for default(none) shared(p, c11, c12, c21, c22, sz)
399			for (size_t i = 0; i < sz; ++i) {
400			c11[i] = p[0][i] + p[3][i] - p[4][i] + p[6][i];
401			c12[i] = p[2][i] + p[4][i];
402			c21[i] = p[1][i] + p[3][i];
403			c22[i] = p[0][i] - p[1][i] + p[2][i] + p[5][i];
404			}
405
406		✗	results.push(CombineMatrix(c11, c12, c21, c22, current.n));
407		✗	} else {
408		✗	size_t h = current.n / 2;
409		✗	std::vector<double> a11;
410		✗	std::vector<double> a12;
411		✗	std::vector<double> a21;
412		✗	std::vector<double> a22;
413		✗	std::vector<double> b11;
414		✗	std::vector<double> b12;
415		✗	std::vector<double> b21;
416		✗	std::vector<double> b22;
417
418		✗	SplitMatrix(current.mat_a, current.n, a11, a12, a21, a22);
419		✗	SplitMatrix(current.mat_b, current.n, b11, b12, b21, b22);
420
421		✗	frames.push({{}, {}, current.n, 8});
422
423		✗	frames.push({Subtract(a12, a22), Add(b21, b22), h, 0});
424		✗	frames.push({Subtract(a21, a11), Add(b11, b12), h, 0});
425		✗	frames.push({Add(a11, a12), b22, h, 0});
426		✗	frames.push({a22, Subtract(b21, b11), h, 0});
427		✗	frames.push({a11, Subtract(b12, b22), h, 0});
428		✗	frames.push({Add(a21, a22), b11, h, 0});
429		✗	frames.push({Add(a11, a22), Add(b11, b22), h, 0});
430			}
431		18	}
432
433		18	return std::move(results.top());
434	3/48 ✓ Branch 1 taken 18 times. ✗ Branch 2 not taken. ✓ Branch 4 taken 18 times. ✗ Branch 5 not taken. ✓ Branch 7 taken 18 times. ✗ Branch 8 not taken. ✗ Branch 10 not taken. ✗ Branch 11 not taken. ✗ Branch 13 not taken. ✗ Branch 14 not taken. ✗ Branch 16 not taken. ✗ Branch 17 not taken. ✗ Branch 19 not taken. ✗ Branch 20 not taken. ✗ Branch 22 not taken. ✗ Branch 23 not taken. ✗ Branch 25 not taken. ✗ Branch 26 not taken. ✗ Branch 28 not taken. ✗ Branch 29 not taken. ✗ Branch 31 not taken. ✗ Branch 32 not taken. ✗ Branch 34 not taken. ✗ Branch 35 not taken. ✗ Branch 37 not taken. ✗ Branch 38 not taken. ✗ Branch 40 not taken. ✗ Branch 41 not taken. ✗ Branch 43 not taken. ✗ Branch 44 not taken. ✗ Branch 46 not taken. ✗ Branch 47 not taken. ✗ Branch 49 not taken. ✗ Branch 50 not taken. ✗ Branch 52 not taken. ✗ Branch 53 not taken. ✗ Branch 55 not taken. ✗ Branch 56 not taken. ✗ Branch 58 not taken. ✗ Branch 59 not taken. ✗ Branch 61 not taken. ✗ Branch 62 not taken. ✗ Branch 64 not taken. ✗ Branch 65 not taken. ✗ Branch 67 not taken. ✗ Branch 68 not taken. ✗ Branch 70 not taken. ✗ Branch 71 not taken.	18	}
435
436			} // namespace tabalaev_a_matrix_mul_strassen
437