| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #include "telnov_a_integral_rectangle/all/include/ops_all.hpp" | ||
| 2 | |||
| 3 | #include <mpi.h> | ||
| 4 | #include <omp.h> | ||
| 5 | |||
| 6 | #include <algorithm> | ||
| 7 | #include <cstdint> | ||
| 8 | #include <functional> | ||
| 9 | #include <thread> | ||
| 10 | #include <vector> | ||
| 11 | |||
| 12 | #include "oneapi/tbb/blocked_range.h" | ||
| 13 | #include "oneapi/tbb/parallel_reduce.h" | ||
| 14 | #include "telnov_a_integral_rectangle/common/include/common.hpp" | ||
| 15 | #include "util/include/util.hpp" | ||
| 16 | |||
| 17 | namespace telnov_a_integral_rectangle { | ||
| 18 | |||
| 19 | namespace { | ||
| 20 | |||
| 21 | int64_t PowInt(int base, int degree) { | ||
| 22 | int64_t result = 1; | ||
| 23 |
2/2✓ Branch 0 taken 36 times.
✓ Branch 1 taken 16 times.
|
52 | for (int i = 0; i < degree; ++i) { |
| 24 | 36 | result *= base; | |
| 25 | } | ||
| 26 | return result; | ||
| 27 | } | ||
| 28 | |||
| 29 | double CalculatePointValue(int64_t index, int n, int dimensions, double h) { | ||
| 30 | double value = 0.0; | ||
| 31 | |||
| 32 |
2/2✓ Branch 0 taken 177991 times.
✓ Branch 1 taken 37669 times.
|
215660 | for (int dim = 0; dim < dimensions; ++dim) { |
| 33 | 177991 | const int coordinate_index = static_cast<int>(index % n); | |
| 34 | 177991 | index /= n; | |
| 35 | |||
| 36 | 177991 | const double x = ((static_cast<double>(coordinate_index) + 0.5) * h); | |
| 37 | 177991 | value += x; | |
| 38 | } | ||
| 39 | |||
| 40 | return value; | ||
| 41 | } | ||
| 42 | |||
| 43 | 30 | double CalculateRange(int64_t begin, int64_t end, int n, int dimensions, double h) { | |
| 44 | return oneapi::tbb::parallel_reduce( | ||
| 45 | 60 | oneapi::tbb::blocked_range<int64_t>(begin, end), 0.0, | |
| 46 | 30 | [n, dimensions, h](const oneapi::tbb::blocked_range<int64_t> &range, double local_sum) { | |
| 47 |
2/2✓ Branch 0 taken 37669 times.
✓ Branch 1 taken 2148 times.
|
39817 | for (int64_t index = range.begin(); index != range.end(); ++index) { |
| 48 | 75338 | local_sum += CalculatePointValue(index, n, dimensions, h); | |
| 49 | } | ||
| 50 | return local_sum; | ||
| 51 | 30 | }, std::plus<>()); | |
| 52 | } | ||
| 53 | |||
| 54 | } // namespace | ||
| 55 | |||
| 56 | 16 | TelnovAIntegralRectangleALL::TelnovAIntegralRectangleALL(const InType &in) { | |
| 57 | SetTypeOfTask(GetStaticTypeOfTask()); | ||
| 58 | GetInput() = in; | ||
| 59 | GetOutput() = 0.0; | ||
| 60 | 16 | } | |
| 61 | |||
| 62 | 16 | bool TelnovAIntegralRectangleALL::ValidationImpl() { | |
| 63 |
2/4✓ Branch 0 taken 16 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 16 times.
|
16 | return GetInput().first > 0 && GetInput().second > 0; |
| 64 | } | ||
| 65 | |||
| 66 | 16 | bool TelnovAIntegralRectangleALL::PreProcessingImpl() { | |
| 67 | 16 | GetOutput() = 0.0; | |
| 68 | 16 | return true; | |
| 69 | } | ||
| 70 | |||
| 71 | 16 | bool TelnovAIntegralRectangleALL::RunImpl() { | |
| 72 | 16 | const int n = GetInput().first; | |
| 73 | 16 | const int dimensions = GetInput().second; | |
| 74 | |||
| 75 | 16 | const double h = 1.0 / static_cast<double>(n); | |
| 76 | const int64_t total_points = PowInt(n, dimensions); | ||
| 77 | |||
| 78 | 16 | int rank = 0; | |
| 79 | 16 | int size = 1; | |
| 80 | |||
| 81 | 16 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); | |
| 82 | 16 | MPI_Comm_size(MPI_COMM_WORLD, &size); | |
| 83 | |||
| 84 | 16 | const int64_t base_block = total_points / size; | |
| 85 | 16 | const int64_t remainder = total_points % size; | |
| 86 | |||
| 87 |
2/2✓ Branch 0 taken 13 times.
✓ Branch 1 taken 3 times.
|
16 | const int64_t rank_begin = (static_cast<int64_t>(rank) * base_block) + std::min<int64_t>(rank, remainder); |
| 88 |
2/2✓ Branch 0 taken 13 times.
✓ Branch 1 taken 3 times.
|
16 | const int64_t rank_size = base_block + (rank < remainder ? 1 : 0); |
| 89 | |||
| 90 |
2/2✓ Branch 1 taken 2 times.
✓ Branch 2 taken 14 times.
|
16 | int thread_count = ppc::util::GetNumThreads(); |
| 91 |
4/4✓ Branch 0 taken 2 times.
✓ Branch 1 taken 14 times.
✓ Branch 2 taken 14 times.
✓ Branch 3 taken 2 times.
|
18 | thread_count = std::max(1, std::min(thread_count, static_cast<int>(std::max<int64_t>(1, rank_size)))); |
| 92 | |||
| 93 | 16 | std::vector<std::thread> threads(thread_count); | |
| 94 |
1/2✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
|
16 | std::vector<double> thread_sums(thread_count, 0.0); |
| 95 | |||
| 96 | 16 | const int64_t thread_block = rank_size / thread_count; | |
| 97 | 16 | const int64_t thread_remainder = rank_size % thread_count; | |
| 98 | |||
| 99 | int64_t current_begin = rank_begin; | ||
| 100 | |||
| 101 |
2/2✓ Branch 0 taken 30 times.
✓ Branch 1 taken 16 times.
|
46 | for (int thread_id = 0; thread_id < thread_count; ++thread_id) { |
| 102 |
2/2✓ Branch 0 taken 28 times.
✓ Branch 1 taken 2 times.
|
30 | const int64_t current_block = thread_block + (thread_id < thread_remainder ? 1 : 0); |
| 103 | 30 | const int64_t current_end = current_begin + current_block; | |
| 104 | |||
| 105 |
1/4✗ Branch 0 not taken.
✓ Branch 1 taken 30 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
30 | threads[thread_id] = std::thread([&, thread_id, current_begin, current_end]() { |
| 106 | 30 | thread_sums[thread_id] = CalculateRange(current_begin, current_end, n, dimensions, h); | |
| 107 |
1/2✓ Branch 1 taken 30 times.
✗ Branch 2 not taken.
|
30 | }); |
| 108 | |||
| 109 | current_begin = current_end; | ||
| 110 | } | ||
| 111 | |||
| 112 |
2/2✓ Branch 0 taken 30 times.
✓ Branch 1 taken 16 times.
|
46 | for (auto &thread : threads) { |
| 113 |
1/2✓ Branch 1 taken 30 times.
✗ Branch 2 not taken.
|
30 | thread.join(); |
| 114 | } | ||
| 115 | |||
| 116 | 16 | double local_sum = 0.0; | |
| 117 | |||
| 118 | 16 | #pragma omp parallel for default(none) shared(thread_sums, thread_count) reduction(+ : local_sum) \ | |
| 119 | num_threads(thread_count) | ||
| 120 | for (int i = 0; i < thread_count; ++i) { | ||
| 121 | local_sum += thread_sums[i]; | ||
| 122 | } | ||
| 123 | |||
| 124 | 16 | double global_sum = 0.0; | |
| 125 |
1/2✓ Branch 1 taken 16 times.
✗ Branch 2 not taken.
|
16 | MPI_Allreduce(&local_sum, &global_sum, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); |
| 126 | |||
| 127 | 16 | GetOutput() = global_sum; | |
| 128 | |||
| 129 |
2/2✓ Branch 0 taken 36 times.
✓ Branch 1 taken 16 times.
|
52 | for (int i = 0; i < dimensions; ++i) { |
| 130 | 36 | GetOutput() *= h; | |
| 131 | } | ||
| 132 | |||
| 133 | 16 | return true; | |
| 134 | 16 | } | |
| 135 | |||
| 136 | 16 | bool TelnovAIntegralRectangleALL::PostProcessingImpl() { | |
| 137 | 16 | return true; | |
| 138 | } | ||
| 139 | |||
| 140 | } // namespace telnov_a_integral_rectangle | ||
| 141 |