| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #include "smetanin_d_sent_num/mpi/include/ops_mpi.hpp" | ||
| 2 | |||
| 3 | #include <mpi.h> | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <cstddef> | ||
| 7 | #include <cstdint> | ||
| 8 | #include <string> | ||
| 9 | #include <vector> | ||
| 10 | |||
| 11 | #include "smetanin_d_sent_num/common/include/common.hpp" | ||
| 12 | |||
| 13 | namespace smetanin_d_sent_num { | ||
| 14 | |||
| 15 | namespace { | ||
| 16 | void ComputeSegments(std::size_t text_length, std::size_t proc_count, std::vector<std::size_t> &starts, | ||
| 17 | std::vector<std::size_t> &sizes) { | ||
| 18 | 10 | const std::size_t base_chunk = text_length / proc_count; | |
| 19 | 10 | const std::size_t remainder = text_length % proc_count; | |
| 20 | std::size_t cur = 0; | ||
| 21 |
2/2✓ Branch 0 taken 20 times.
✓ Branch 1 taken 10 times.
|
30 | for (std::size_t proc = 0; proc < proc_count; ++proc) { |
| 22 |
2/2✓ Branch 0 taken 14 times.
✓ Branch 1 taken 6 times.
|
20 | std::size_t add = (proc < remainder) ? 1U : 0U; |
| 23 | 20 | sizes[proc] = base_chunk + add; | |
| 24 | 20 | starts[proc] = cur; | |
| 25 | 20 | cur += sizes[proc]; | |
| 26 | } | ||
| 27 | } | ||
| 28 | |||
| 29 | 10 | void ComputeSendCounts(const std::vector<std::size_t> &starts, const std::vector<std::size_t> &sizes, | |
| 30 | std::vector<int> &sendcounts, std::vector<int> &displs) { | ||
| 31 | const std::size_t proc_count = starts.size(); | ||
| 32 |
2/2✓ Branch 0 taken 20 times.
✓ Branch 1 taken 10 times.
|
30 | for (std::size_t proc = 0; proc < proc_count; ++proc) { |
| 33 | 20 | const std::size_t real_start = starts[proc]; | |
| 34 | 20 | const std::size_t real_size = sizes[proc]; | |
| 35 | |||
| 36 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 20 times.
|
20 | if (real_size == 0) { |
| 37 | ✗ | sendcounts[proc] = 0; | |
| 38 | ✗ | displs[proc] = static_cast<int>(real_start); | |
| 39 | ✗ | continue; | |
| 40 | } | ||
| 41 | |||
| 42 | std::size_t send_start = real_start; | ||
| 43 | std::size_t send_size = real_size; | ||
| 44 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
20 | if (proc != 0 && real_start > 0) { |
| 45 | 10 | send_start = real_start - 1; | |
| 46 | 10 | send_size = real_size + 1U; | |
| 47 | } | ||
| 48 | |||
| 49 | 20 | sendcounts[proc] = static_cast<int>(send_size); | |
| 50 | 20 | displs[proc] = static_cast<int>(send_start); | |
| 51 | } | ||
| 52 | 10 | } | |
| 53 | |||
| 54 | 10 | std::size_t CountLocalSentences(const std::string &local_text, int local_start_offset, std::size_t segment_start_global, | |
| 55 | std::size_t segment_size_global) { | ||
| 56 | std::size_t local_sentence_count = 0; | ||
| 57 | 10 | const int max_index = static_cast<int>(local_text.size()); | |
| 58 | 10 | const int computed_end = local_start_offset + static_cast<int>(segment_size_global); | |
| 59 | const int end = std::min(max_index, computed_end); | ||
| 60 |
2/2✓ Branch 0 taken 5245 times.
✓ Branch 1 taken 10 times.
|
5255 | for (int idx = local_start_offset; idx < end; ++idx) { |
| 61 |
2/2✓ Branch 0 taken 5059 times.
✓ Branch 1 taken 186 times.
|
5245 | const auto local_idx = static_cast<std::size_t>(idx); |
| 62 | 5245 | char current_symbol = local_text[local_idx]; | |
| 63 | |||
| 64 |
4/4✓ Branch 0 taken 5059 times.
✓ Branch 1 taken 186 times.
✓ Branch 2 taken 4990 times.
✓ Branch 3 taken 69 times.
|
5245 | if (current_symbol != '.' && current_symbol != '!' && current_symbol != '?') { |
| 65 | 4990 | continue; | |
| 66 | } | ||
| 67 | |||
| 68 | 255 | std::size_t global_pos = segment_start_global + (local_idx - static_cast<std::size_t>(local_start_offset)); | |
| 69 | |||
| 70 |
1/2✓ Branch 0 taken 255 times.
✗ Branch 1 not taken.
|
255 | if (global_pos > 0) { |
| 71 |
2/2✓ Branch 0 taken 149 times.
✓ Branch 1 taken 106 times.
|
255 | char previous_symbol = local_text[local_idx - 1]; |
| 72 |
2/2✓ Branch 0 taken 149 times.
✓ Branch 1 taken 106 times.
|
255 | if (previous_symbol == '.' || previous_symbol == '!' || previous_symbol == '?') { |
| 73 | 149 | continue; | |
| 74 | } | ||
| 75 | } | ||
| 76 | |||
| 77 | 106 | local_sentence_count++; | |
| 78 | } | ||
| 79 | 10 | return local_sentence_count; | |
| 80 | } | ||
| 81 | |||
| 82 | } // namespace | ||
| 83 | |||
| 84 |
1/2✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
|
10 | SmetaninDSentNumMPI::SmetaninDSentNumMPI(const InType &in) { |
| 85 | SetTypeOfTask(GetStaticTypeOfTask()); | ||
| 86 | GetInput() = in; | ||
| 87 | 10 | GetOutput() = 0; | |
| 88 | 10 | } | |
| 89 | |||
| 90 |
1/2✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
|
10 | bool SmetaninDSentNumMPI::ValidationImpl() { |
| 91 | const InType &source_data = GetInput(); | ||
| 92 | const OutType ¤t_output = GetOutput(); | ||
| 93 | |||
| 94 |
2/4✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 10 times.
|
10 | return !source_data.empty() && current_output == 0; |
| 95 | } | ||
| 96 | |||
| 97 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
|
10 | bool SmetaninDSentNumMPI::PreProcessingImpl() { |
| 98 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
|
10 | if (GetInput()[0] == '.' || GetInput()[0] == '!' || GetInput()[0] == '?') { |
| 99 | 2 | GetInput()[0] = ' '; | |
| 100 | } | ||
| 101 | 10 | return true; | |
| 102 | } | ||
| 103 | |||
| 104 | 10 | bool SmetaninDSentNumMPI::RunImpl() { | |
| 105 | 10 | int process_count = 1; | |
| 106 | 10 | int process_rank = 0; | |
| 107 | |||
| 108 | 10 | MPI_Comm_size(MPI_COMM_WORLD, &process_count); | |
| 109 | 10 | MPI_Comm_rank(MPI_COMM_WORLD, &process_rank); | |
| 110 | |||
| 111 | std::size_t text_length = 0; | ||
| 112 | const InType *full_text_ptr = nullptr; | ||
| 113 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
10 | if (process_rank == 0) { |
| 114 | full_text_ptr = &GetInput(); | ||
| 115 | text_length = full_text_ptr->length(); | ||
| 116 | } | ||
| 117 | |||
| 118 | 10 | auto text_length_u = static_cast<std::uint64_t>(text_length); | |
| 119 | 10 | MPI_Bcast(&text_length_u, 1, MPI_UINT64_T, 0, MPI_COMM_WORLD); | |
| 120 | 10 | text_length = static_cast<std::size_t>(text_length_u); | |
| 121 | |||
| 122 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
|
10 | if (text_length == 0) { |
| 123 | std::size_t local_sentence_count = 0; | ||
| 124 | std::size_t global_sentence_count = 0; | ||
| 125 | |||
| 126 | ✗ | auto local_u = static_cast<std::uint64_t>(local_sentence_count); | |
| 127 | ✗ | auto global_u = static_cast<std::uint64_t>(0); | |
| 128 | ✗ | MPI_Reduce(&local_u, &global_u, 1, MPI_UINT64_T, MPI_SUM, 0, MPI_COMM_WORLD); | |
| 129 | ✗ | MPI_Bcast(&global_u, 1, MPI_UINT64_T, 0, MPI_COMM_WORLD); | |
| 130 | |||
| 131 | ✗ | global_sentence_count = static_cast<std::size_t>(global_u); | |
| 132 | ✗ | GetOutput() = static_cast<OutType>(global_sentence_count); | |
| 133 | return true; | ||
| 134 | } | ||
| 135 | |||
| 136 | 10 | const auto proc_count = static_cast<std::size_t>(process_count); | |
| 137 | 10 | std::vector<std::size_t> segment_starts(proc_count); | |
| 138 |
1/4✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
10 | std::vector<std::size_t> segment_sizes(proc_count); |
| 139 | |||
| 140 | ComputeSegments(text_length, proc_count, segment_starts, segment_sizes); | ||
| 141 | |||
| 142 |
1/4✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
10 | std::vector<int> sendcounts(proc_count, 0); |
| 143 |
1/4✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
10 | std::vector<int> displs(proc_count, 0); |
| 144 | 10 | ComputeSendCounts(segment_starts, segment_sizes, sendcounts, displs); | |
| 145 | |||
| 146 |
1/2✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
|
10 | const int local_buffer_size = sendcounts[process_rank]; |
| 147 |
1/2✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
|
10 | std::string local_text(static_cast<std::size_t>(local_buffer_size), ' '); |
| 148 | |||
| 149 | const char *sendbuf = nullptr; | ||
| 150 |
3/4✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
✓ Branch 2 taken 5 times.
✗ Branch 3 not taken.
|
10 | if (process_rank == 0 && full_text_ptr != nullptr) { |
| 151 | sendbuf = full_text_ptr->data(); | ||
| 152 | } | ||
| 153 | |||
| 154 |
1/2✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
|
10 | char *recvbuf = local_buffer_size > 0 ? local_text.data() : nullptr; |
| 155 | |||
| 156 |
1/2✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
|
10 | MPI_Scatterv(sendbuf, sendcounts.data(), displs.data(), MPI_CHAR, recvbuf, local_buffer_size, MPI_CHAR, 0, |
| 157 | MPI_COMM_WORLD); | ||
| 158 | |||
| 159 | std::size_t local_sentence_count = 0; | ||
| 160 | |||
| 161 |
1/2✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
|
10 | if (local_buffer_size > 0) { |
| 162 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
10 | const auto segment_start_global = segment_starts[process_rank]; |
| 163 | 10 | const auto segment_size_global = segment_sizes[process_rank]; | |
| 164 | |||
| 165 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
|
10 | const int local_start_offset = (process_rank == 0 || segment_start_global == 0 || segment_size_global == 0) ? 0 : 1; |
| 166 | |||
| 167 | local_sentence_count = | ||
| 168 | 10 | CountLocalSentences(local_text, local_start_offset, segment_start_global, segment_size_global); | |
| 169 | } | ||
| 170 | |||
| 171 | std::size_t global_sentence_count = 0; | ||
| 172 | 10 | auto local_u = static_cast<std::uint64_t>(local_sentence_count); | |
| 173 | 10 | auto global_u = static_cast<std::uint64_t>(0); | |
| 174 |
1/2✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
|
10 | MPI_Reduce(&local_u, &global_u, 1, MPI_UINT64_T, MPI_SUM, 0, MPI_COMM_WORLD); |
| 175 |
1/2✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
|
10 | MPI_Bcast(&global_u, 1, MPI_UINT64_T, 0, MPI_COMM_WORLD); |
| 176 | |||
| 177 | 10 | global_sentence_count = static_cast<std::size_t>(global_u); | |
| 178 | 10 | GetOutput() = static_cast<OutType>(global_sentence_count); | |
| 179 | |||
| 180 | return true; | ||
| 181 | } | ||
| 182 | |||
| 183 | 10 | bool SmetaninDSentNumMPI::PostProcessingImpl() { | |
| 184 | 10 | return true; | |
| 185 | } | ||
| 186 | |||
| 187 | } // namespace smetanin_d_sent_num | ||
| 188 |