| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | #pragma once | ||
| 2 | |||
| 3 | #include <mpi.h> | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <cstdint> | ||
| 7 | #include <cstdlib> | ||
| 8 | #include <cstring> | ||
| 9 | #include <vector> | ||
| 10 | |||
| 11 | #include "petrov_e_allreduce/common/include/common.hpp" | ||
| 12 | #include "task/include/task.hpp" | ||
| 13 | |||
| 14 | namespace petrov_e_allreduce { | ||
| 15 | |||
| 16 | template <typename T> | ||
| 17 | 20 | void ApplyOperation(T *dest, T *src, int count, MPI_Op op) { | |
| 18 | int flag = 0; | ||
| 19 |
1/2✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
|
20 | if (op == MPI_SUM) { |
| 20 | flag = 1; | ||
| 21 |
1/2✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
|
20 | } else if (op == MPI_PROD) { |
| 22 | flag = 2; | ||
| 23 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
|
20 | } else if (op == MPI_MAX) { |
| 24 | flag = 3; | ||
| 25 | ✗ | } else if (op == MPI_MIN) { | |
| 26 | flag = 4; | ||
| 27 | } | ||
| 28 |
1/4✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
20 | switch (flag) { |
| 29 | case 1: | ||
| 30 | ✗ | for (int i = 0; i < count; i++) { | |
| 31 | ✗ | dest[i] += src[i]; | |
| 32 | } | ||
| 33 | break; | ||
| 34 | case 2: | ||
| 35 | ✗ | for (int i = 0; i < count; i++) { | |
| 36 | ✗ | dest[i] *= src[i]; | |
| 37 | } | ||
| 38 | break; | ||
| 39 | case 3: | ||
| 40 |
2/2✓ Branch 0 taken 28 times.
✓ Branch 1 taken 10 times.
|
76 | for (int i = 0; i < count; i++) { |
| 41 |
2/2✓ Branch 0 taken 11 times.
✓ Branch 1 taken 17 times.
|
78 | dest[i] = std::max(dest[i], src[i]); |
| 42 | } | ||
| 43 | break; | ||
| 44 | case 4: | ||
| 45 | ✗ | for (int i = 0; i < count; i++) { | |
| 46 | ✗ | dest[i] = std::min(dest[i], src[i]); | |
| 47 | } | ||
| 48 | break; | ||
| 49 | default: | ||
| 50 | break; | ||
| 51 | } | ||
| 52 | 20 | } | |
| 53 | |||
| 54 | 10 | inline void Operation(void *dest, void *src, int count, MPI_Datatype datatype, MPI_Op op) { | |
| 55 | int flag = 0; | ||
| 56 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
|
10 | if (datatype == MPI_UNSIGNED_CHAR) { |
| 57 | flag = 1; | ||
| 58 |
1/2✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
|
10 | } else if (datatype == MPI_CHAR) { |
| 59 | flag = 2; | ||
| 60 |
1/2✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
|
10 | } else if (datatype == MPI_SHORT) { |
| 61 | flag = 3; | ||
| 62 |
1/2✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
|
10 | } else if (datatype == MPI_INT) { |
| 63 | flag = 4; | ||
| 64 |
1/2✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
|
10 | } else if (datatype == MPI_LONG) { |
| 65 | flag = 5; | ||
| 66 |
1/2✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
|
10 | } else if (datatype == MPI_FLOAT) { |
| 67 | flag = 6; | ||
| 68 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
|
10 | } else if (datatype == MPI_DOUBLE) { |
| 69 | flag = 7; | ||
| 70 | ✗ | } else if (datatype == MPI_LONG_DOUBLE) { | |
| 71 | flag = 8; | ||
| 72 | } | ||
| 73 | switch (flag) { | ||
| 74 | case 1: | ||
| 75 | ✗ | ApplyOperation(static_cast<unsigned char *>(dest), static_cast<unsigned char *>(src), count, op); | |
| 76 | ✗ | break; | |
| 77 | case 2: | ||
| 78 | ✗ | ApplyOperation(static_cast<char *>(dest), static_cast<char *>(src), count, op); | |
| 79 | ✗ | break; | |
| 80 | case 3: | ||
| 81 | ✗ | ApplyOperation(static_cast<int16_t *>(dest), static_cast<int16_t *>(src), count, op); | |
| 82 | ✗ | break; | |
| 83 | case 4: | ||
| 84 | ✗ | ApplyOperation(static_cast<int *>(dest), static_cast<int *>(src), count, op); | |
| 85 | ✗ | break; | |
| 86 | case 5: | ||
| 87 | ✗ | ApplyOperation(static_cast<int64_t *>(dest), static_cast<int64_t *>(src), count, op); | |
| 88 | ✗ | break; | |
| 89 | case 6: | ||
| 90 | ✗ | ApplyOperation(static_cast<float *>(dest), static_cast<float *>(src), count, op); | |
| 91 | ✗ | break; | |
| 92 | case 7: | ||
| 93 | 10 | ApplyOperation(static_cast<double *>(dest), static_cast<double *>(src), count, op); | |
| 94 | 10 | break; | |
| 95 | case 8: | ||
| 96 | ✗ | ApplyOperation(static_cast<long double *>(dest), static_cast<long double *>(src), count, op); | |
| 97 | ✗ | break; | |
| 98 | default: | ||
| 99 | break; | ||
| 100 | } | ||
| 101 | 10 | } | |
| 102 | |||
| 103 | 20 | inline void GetSizeOf2(MPI_Datatype type, int &size) { | |
| 104 | int res = 4; | ||
| 105 |
1/2✓ Branch 0 taken 20 times.
✗ Branch 1 not taken.
|
20 | if (type == MPI_CHAR) { |
| 106 | res = sizeof(char); | ||
| 107 |
1/2✓ Branch 0 taken 20 times.
✗ Branch 1 not taken.
|
20 | } else if (type == MPI_UNSIGNED_CHAR) { |
| 108 | res = sizeof(unsigned char); | ||
| 109 |
1/2✓ Branch 0 taken 20 times.
✗ Branch 1 not taken.
|
20 | } else if (type == MPI_SHORT) { |
| 110 | res = sizeof(int16_t); | ||
| 111 |
1/2✓ Branch 0 taken 20 times.
✗ Branch 1 not taken.
|
20 | } else if (type == MPI_UNSIGNED_SHORT) { |
| 112 | res = sizeof(uint16_t); | ||
| 113 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 20 times.
|
20 | } else if (type == MPI_INT) { |
| 114 | res = sizeof(int); | ||
| 115 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 20 times.
|
20 | } else if (type == MPI_UNSIGNED) { |
| 116 | res = sizeof(unsigned); | ||
| 117 |
1/2✓ Branch 0 taken 20 times.
✗ Branch 1 not taken.
|
20 | } else if (type == MPI_LONG) { |
| 118 | res = sizeof(int64_t); | ||
| 119 |
1/2✓ Branch 0 taken 20 times.
✗ Branch 1 not taken.
|
20 | } else if (type == MPI_UNSIGNED_LONG) { |
| 120 | res = sizeof(uint64_t); | ||
| 121 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 20 times.
|
20 | } else if (type == MPI_FLOAT) { |
| 122 | res = sizeof(float); | ||
| 123 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 20 times.
|
20 | } else if (type == MPI_DOUBLE) { |
| 124 | res = sizeof(double); | ||
| 125 | ✗ | } else if (type == MPI_LONG_DOUBLE) { | |
| 126 | res = sizeof(long double); | ||
| 127 | } | ||
| 128 | 20 | size = res; | |
| 129 | 20 | } | |
| 130 | |||
| 131 | template <typename MatrixElemType> | ||
| 132 | MPI_Datatype GetMPIDatatype() { | ||
| 133 | MPI_Datatype res = MPI_DATATYPE_NULL; | ||
| 134 | if (std::is_same_v<MatrixElemType, char>) { | ||
| 135 | res = MPI_CHAR; | ||
| 136 | } else if (std::is_same_v<MatrixElemType, unsigned char>) { | ||
| 137 | res = MPI_UNSIGNED_CHAR; | ||
| 138 | } else if (std::is_same_v<MatrixElemType, int16_t>) { | ||
| 139 | res = MPI_SHORT; | ||
| 140 | } else if (std::is_same_v<MatrixElemType, uint16_t>) { | ||
| 141 | res = MPI_UNSIGNED_SHORT; | ||
| 142 | } else if (std::is_same_v<MatrixElemType, int>) { | ||
| 143 | res = MPI_INT; | ||
| 144 | } else if (std::is_same_v<MatrixElemType, unsigned>) { | ||
| 145 | res = MPI_UNSIGNED; | ||
| 146 | } else if (std::is_same_v<MatrixElemType, int64_t>) { | ||
| 147 | res = MPI_LONG; | ||
| 148 | } else if (std::is_same_v<MatrixElemType, uint64_t>) { | ||
| 149 | res = MPI_UNSIGNED_LONG; | ||
| 150 | } else if (std::is_same_v<MatrixElemType, float>) { | ||
| 151 | res = MPI_FLOAT; | ||
| 152 | } else if (std::is_same_v<MatrixElemType, double>) { | ||
| 153 | res = MPI_DOUBLE; | ||
| 154 | } else if (std::is_same_v<MatrixElemType, long double>) { | ||
| 155 | res = MPI_LONG_DOUBLE; | ||
| 156 | } | ||
| 157 | return res; | ||
| 158 | } | ||
| 159 | |||
| 160 | 20 | inline int MpiMyAllreduce(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, | |
| 161 | MPI_Comm comm) { | ||
| 162 | 20 | int proc_num = 0; | |
| 163 | 20 | int proc_rank = 0; | |
| 164 | 20 | MPI_Comm_size(comm, &proc_num); | |
| 165 | 20 | MPI_Comm_rank(comm, &proc_rank); | |
| 166 | 20 | int type_size = 0; | |
| 167 | 20 | GetSizeOf2(datatype, type_size); | |
| 168 | 20 | int data_size = count * type_size; | |
| 169 | MPI_Status status; | ||
| 170 | const char *src = static_cast<const char *>(sendbuf); | ||
| 171 | char *dst = static_cast<char *>(recvbuf); | ||
| 172 |
2/2✓ Branch 0 taken 448 times.
✓ Branch 1 taken 20 times.
|
468 | for (int i = 0; i < data_size; i++) { |
| 173 | 448 | dst[i] = src[i]; | |
| 174 | } | ||
| 175 |
2/2✓ Branch 1 taken 10 times.
✓ Branch 2 taken 10 times.
|
20 | std::vector<char> tempbufvec(data_size); |
| 176 | void *tempbuf = tempbufvec.data(); | ||
| 177 | |||
| 178 | 20 | int parent = (proc_rank - 1) / 2; | |
| 179 | 20 | int left = (2 * proc_rank) + 1; | |
| 180 | 20 | int right = (2 * proc_rank) + 2; | |
| 181 | |||
| 182 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
20 | if (left < proc_num) { |
| 183 |
1/2✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
|
10 | MPI_Recv(tempbuf, count, datatype, left, 0, comm, &status); |
| 184 | 10 | Operation(recvbuf, tempbuf, count, datatype, op); | |
| 185 | } | ||
| 186 | |||
| 187 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 20 times.
|
20 | if (right < proc_num) { |
| 188 | ✗ | MPI_Recv(tempbuf, count, datatype, right, 0, comm, &status); | |
| 189 | ✗ | Operation(recvbuf, tempbuf, count, datatype, op); | |
| 190 | } | ||
| 191 | |||
| 192 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
20 | if (proc_rank != 0) { |
| 193 |
1/2✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
|
10 | MPI_Send(recvbuf, count, datatype, parent, 0, comm); |
| 194 |
1/2✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
|
10 | MPI_Recv(recvbuf, count, datatype, parent, 1, comm, &status); |
| 195 | } | ||
| 196 | |||
| 197 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
20 | if (left < proc_num) { |
| 198 |
1/2✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
|
10 | MPI_Send(recvbuf, count, datatype, left, 1, comm); |
| 199 | } | ||
| 200 | |||
| 201 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 20 times.
|
20 | if (right < proc_num) { |
| 202 | ✗ | MPI_Send(recvbuf, count, datatype, right, 1, comm); | |
| 203 | } | ||
| 204 | |||
| 205 | 20 | return MPI_SUCCESS; | |
| 206 | } | ||
| 207 | |||
| 208 | class PetrovEMyAllreduceMPI : public BaseTask { | ||
| 209 | public: | ||
| 210 | static constexpr ppc::task::TypeOfTask GetStaticTypeOfTask() { | ||
| 211 | return ppc::task::TypeOfTask::kMPI; | ||
| 212 | } | ||
| 213 | explicit PetrovEMyAllreduceMPI(const InType &in); | ||
| 214 | |||
| 215 | private: | ||
| 216 | bool ValidationImpl() override; | ||
| 217 | bool PreProcessingImpl() override; | ||
| 218 | bool RunImpl() override; | ||
| 219 | bool PostProcessingImpl() override; | ||
| 220 | }; | ||
| 221 | |||
| 222 | } // namespace petrov_e_allreduce | ||
| 223 |