[164] | 1 | SUBROUTINE transpose_xy( f_in, work, f_out ) |
---|
[1] | 2 | |
---|
| 3 | !------------------------------------------------------------------------------! |
---|
[484] | 4 | ! Current revisions: |
---|
[1] | 5 | ! ----------------- |
---|
[684] | 6 | ! |
---|
[198] | 7 | ! |
---|
| 8 | ! Former revisions: |
---|
| 9 | ! ----------------- |
---|
| 10 | ! $Id: transpose.f90 684 2011-02-09 14:49:31Z maronga $ |
---|
| 11 | ! |
---|
[684] | 12 | ! 683 2011-02-09 14:25:15Z raasch |
---|
| 13 | ! openMP parallelization of transpositions for 2d-domain-decomposition |
---|
| 14 | ! |
---|
[623] | 15 | ! 622 2010-12-10 08:08:13Z raasch |
---|
| 16 | ! optional barriers included in order to speed up collective operations |
---|
| 17 | ! |
---|
[198] | 18 | ! 164 2008-05-15 08:46:15Z raasch |
---|
[164] | 19 | ! f_inv changed from subroutine argument to automatic array in order to do |
---|
| 20 | ! re-ordering from f_in to f_inv in one step, one array work is needed instead |
---|
| 21 | ! of work1 and work2 |
---|
[1] | 22 | ! |
---|
[198] | 23 | ! February 2007 |
---|
[3] | 24 | ! RCS Log replace by Id keyword, revision history cleaned up |
---|
| 25 | ! |
---|
[1] | 26 | ! Revision 1.2 2004/04/30 13:12:17 raasch |
---|
| 27 | ! Switched from mpi_alltoallv to the simpler mpi_alltoall, |
---|
| 28 | ! all former transpose-routine files collected in this file, enlarged |
---|
| 29 | ! transposition arrays introduced |
---|
| 30 | ! |
---|
| 31 | ! Revision 1.1 2004/04/30 13:08:16 raasch |
---|
| 32 | ! Initial revision (collection of former routines transpose_xy, transpose_xz, |
---|
| 33 | ! transpose_yx, transpose_yz, transpose_zx, transpose_zy) |
---|
| 34 | ! |
---|
| 35 | ! Revision 1.1 1997/07/24 11:25:18 raasch |
---|
| 36 | ! Initial revision |
---|
| 37 | ! |
---|
| 38 | ! |
---|
| 39 | ! Description: |
---|
| 40 | ! ------------ |
---|
| 41 | ! Transposition of input array (f_in) from x to y. For the input array, all |
---|
| 42 | ! elements along x reside on the same PE, while after transposition, all |
---|
| 43 | ! elements along y reside on the same PE. |
---|
| 44 | !------------------------------------------------------------------------------! |
---|
| 45 | |
---|
| 46 | USE cpulog |
---|
| 47 | USE indices |
---|
| 48 | USE interfaces |
---|
| 49 | USE pegrid |
---|
| 50 | USE transpose_indices |
---|
| 51 | |
---|
| 52 | IMPLICIT NONE |
---|
| 53 | |
---|
| 54 | INTEGER :: i, j, k, l, m, ys |
---|
| 55 | |
---|
| 56 | REAL :: f_in(0:nxa,nys_x:nyn_xa,nzb_x:nzt_xa), & |
---|
| 57 | f_inv(nys_x:nyn_xa,nzb_x:nzt_xa,0:nxa), & |
---|
| 58 | f_out(0:nya,nxl_y:nxr_ya,nzb_y:nzt_ya), & |
---|
[164] | 59 | work(nnx*nny*nnz) |
---|
[1] | 60 | |
---|
| 61 | #if defined( __parallel ) |
---|
| 62 | |
---|
| 63 | ! |
---|
| 64 | !-- Rearrange indices of input array in order to make data to be send |
---|
| 65 | !-- by MPI contiguous |
---|
[683] | 66 | !$OMP PARALLEL PRIVATE ( i, j, k ) |
---|
| 67 | !$OMP DO |
---|
[1] | 68 | DO i = 0, nxa |
---|
| 69 | DO k = nzb_x, nzt_xa |
---|
| 70 | DO j = nys_x, nyn_xa |
---|
[164] | 71 | f_inv(j,k,i) = f_in(i,j,k) |
---|
[1] | 72 | ENDDO |
---|
| 73 | ENDDO |
---|
| 74 | ENDDO |
---|
[683] | 75 | !$OMP END PARALLEL |
---|
[1] | 76 | |
---|
| 77 | ! |
---|
| 78 | !-- Transpose array |
---|
| 79 | CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' ) |
---|
[622] | 80 | IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr ) |
---|
[1] | 81 | CALL MPI_ALLTOALL( f_inv(nys_x,nzb_x,0), sendrecvcount_xy, MPI_REAL, & |
---|
[164] | 82 | work(1), sendrecvcount_xy, MPI_REAL, & |
---|
[1] | 83 | comm1dy, ierr ) |
---|
| 84 | CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' ) |
---|
| 85 | |
---|
| 86 | ! |
---|
| 87 | !-- Reorder transposed array |
---|
[683] | 88 | !$OMP PARALLEL PRIVATE ( i, j, k, l, m, ys ) |
---|
| 89 | !$OMP DO |
---|
[1] | 90 | DO l = 0, pdims(2) - 1 |
---|
[683] | 91 | m = l * ( nxr_ya - nxl_y + 1 ) * ( nzt_ya - nzb_y + 1 ) * & |
---|
| 92 | ( nyn_xa - nys_x + 1 ) |
---|
[1] | 93 | ys = 0 + l * ( nyn_xa - nys_x + 1 ) |
---|
| 94 | DO i = nxl_y, nxr_ya |
---|
| 95 | DO k = nzb_y, nzt_ya |
---|
| 96 | DO j = ys, ys + nyn_xa - nys_x |
---|
| 97 | m = m + 1 |
---|
[164] | 98 | f_out(j,i,k) = work(m) |
---|
[1] | 99 | ENDDO |
---|
| 100 | ENDDO |
---|
| 101 | ENDDO |
---|
| 102 | ENDDO |
---|
[683] | 103 | !$OMP END PARALLEL |
---|
[1] | 104 | |
---|
| 105 | #endif |
---|
| 106 | |
---|
| 107 | END SUBROUTINE transpose_xy |
---|
| 108 | |
---|
| 109 | |
---|
[164] | 110 | SUBROUTINE transpose_xz( f_in, work, f_out ) |
---|
[1] | 111 | |
---|
| 112 | !------------------------------------------------------------------------------! |
---|
| 113 | ! Description: |
---|
| 114 | ! ------------ |
---|
| 115 | ! Transposition of input array (f_in) from x to z. For the input array, all |
---|
| 116 | ! elements along x reside on the same PE, while after transposition, all |
---|
| 117 | ! elements along z reside on the same PE. |
---|
| 118 | !------------------------------------------------------------------------------! |
---|
| 119 | |
---|
| 120 | USE cpulog |
---|
| 121 | USE indices |
---|
| 122 | USE interfaces |
---|
| 123 | USE pegrid |
---|
| 124 | USE transpose_indices |
---|
| 125 | |
---|
| 126 | IMPLICIT NONE |
---|
| 127 | |
---|
| 128 | INTEGER :: i, j, k, l, m, xs |
---|
| 129 | |
---|
| 130 | REAL :: f_in(0:nxa,nys_x:nyn_xa,nzb_x:nzt_xa), & |
---|
[164] | 131 | f_inv(nys:nyna,nxl:nxra,1:nza), & |
---|
[1] | 132 | f_out(1:nza,nys:nyna,nxl:nxra), & |
---|
[164] | 133 | work(nnx*nny*nnz) |
---|
[1] | 134 | |
---|
| 135 | #if defined( __parallel ) |
---|
| 136 | |
---|
| 137 | ! |
---|
| 138 | !-- If the PE grid is one-dimensional along y, the array has only to be |
---|
| 139 | !-- reordered locally and therefore no transposition has to be done. |
---|
| 140 | IF ( pdims(1) /= 1 ) THEN |
---|
| 141 | ! |
---|
| 142 | !-- Reorder input array for transposition |
---|
[683] | 143 | !$OMP PARALLEL PRIVATE ( i, j, k, l, m, xs ) |
---|
| 144 | !$OMP DO |
---|
[1] | 145 | DO l = 0, pdims(1) - 1 |
---|
[683] | 146 | m = l * ( nzt_xa - nzb_x + 1 ) * nnx * ( nyn_xa - nys_x + 1 ) |
---|
[1] | 147 | xs = 0 + l * nnx |
---|
| 148 | DO k = nzb_x, nzt_xa |
---|
[164] | 149 | DO i = xs, xs + nnx - 1 |
---|
| 150 | DO j = nys_x, nyn_xa |
---|
[1] | 151 | m = m + 1 |
---|
[164] | 152 | work(m) = f_in(i,j,k) |
---|
[1] | 153 | ENDDO |
---|
| 154 | ENDDO |
---|
| 155 | ENDDO |
---|
| 156 | ENDDO |
---|
[683] | 157 | !$OMP END PARALLEL |
---|
[1] | 158 | |
---|
| 159 | ! |
---|
| 160 | !-- Transpose array |
---|
| 161 | CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' ) |
---|
[622] | 162 | IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr ) |
---|
[164] | 163 | CALL MPI_ALLTOALL( work(1), sendrecvcount_zx, MPI_REAL, & |
---|
| 164 | f_inv(nys,nxl,1), sendrecvcount_zx, MPI_REAL, & |
---|
[1] | 165 | comm1dx, ierr ) |
---|
| 166 | CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' ) |
---|
| 167 | |
---|
| 168 | ! |
---|
| 169 | !-- Reorder transposed array in a way that the z index is in first position |
---|
[683] | 170 | !$OMP PARALLEL PRIVATE ( i, j, k ) |
---|
| 171 | !$OMP DO |
---|
[164] | 172 | DO k = 1, nza |
---|
| 173 | DO i = nxl, nxra |
---|
| 174 | DO j = nys, nyna |
---|
| 175 | f_out(k,j,i) = f_inv(j,i,k) |
---|
[1] | 176 | ENDDO |
---|
| 177 | ENDDO |
---|
| 178 | ENDDO |
---|
[683] | 179 | !$OMP END PARALLEL |
---|
[1] | 180 | ELSE |
---|
| 181 | ! |
---|
| 182 | !-- Reorder the array in a way that the z index is in first position |
---|
[683] | 183 | !$OMP PARALLEL PRIVATE ( i, j, k ) |
---|
| 184 | !$OMP DO |
---|
[1] | 185 | DO i = nxl, nxra |
---|
| 186 | DO j = nys, nyna |
---|
| 187 | DO k = 1, nza |
---|
[164] | 188 | f_inv(j,i,k) = f_in(i,j,k) |
---|
[1] | 189 | ENDDO |
---|
| 190 | ENDDO |
---|
| 191 | ENDDO |
---|
[683] | 192 | !$OMP END PARALLEL |
---|
[1] | 193 | |
---|
[683] | 194 | !$OMP PARALLEL PRIVATE ( i, j, k ) |
---|
| 195 | !$OMP DO |
---|
[164] | 196 | DO k = 1, nza |
---|
| 197 | DO i = nxl, nxra |
---|
| 198 | DO j = nys, nyna |
---|
| 199 | f_out(k,j,i) = f_inv(j,i,k) |
---|
| 200 | ENDDO |
---|
[1] | 201 | ENDDO |
---|
| 202 | ENDDO |
---|
[683] | 203 | !$OMP END PARALLEL |
---|
[1] | 204 | |
---|
[164] | 205 | ENDIF |
---|
| 206 | |
---|
| 207 | |
---|
[1] | 208 | #endif |
---|
| 209 | |
---|
| 210 | END SUBROUTINE transpose_xz |
---|
| 211 | |
---|
| 212 | |
---|
[164] | 213 | SUBROUTINE transpose_yx( f_in, work, f_out ) |
---|
[1] | 214 | |
---|
| 215 | !------------------------------------------------------------------------------! |
---|
| 216 | ! Description: |
---|
| 217 | ! ------------ |
---|
| 218 | ! Transposition of input array (f_in) from y to x. For the input array, all |
---|
| 219 | ! elements along y reside on the same PE, while after transposition, all |
---|
| 220 | ! elements along x reside on the same PE. |
---|
| 221 | !------------------------------------------------------------------------------! |
---|
| 222 | |
---|
| 223 | USE cpulog |
---|
| 224 | USE indices |
---|
| 225 | USE interfaces |
---|
| 226 | USE pegrid |
---|
| 227 | USE transpose_indices |
---|
| 228 | |
---|
| 229 | IMPLICIT NONE |
---|
| 230 | |
---|
| 231 | INTEGER :: i, j, k, l, m, ys |
---|
| 232 | |
---|
| 233 | REAL :: f_in(0:nya,nxl_y:nxr_ya,nzb_y:nzt_ya), & |
---|
| 234 | f_inv(nys_x:nyn_xa,nzb_x:nzt_xa,0:nxa), & |
---|
| 235 | f_out(0:nxa,nys_x:nyn_xa,nzb_x:nzt_xa), & |
---|
[164] | 236 | work(nnx*nny*nnz) |
---|
[1] | 237 | |
---|
| 238 | #if defined( __parallel ) |
---|
| 239 | |
---|
| 240 | ! |
---|
| 241 | !-- Reorder input array for transposition |
---|
[683] | 242 | !$OMP PARALLEL PRIVATE ( i, j, k, l, m, ys ) |
---|
| 243 | !$OMP DO |
---|
[1] | 244 | DO l = 0, pdims(2) - 1 |
---|
[683] | 245 | m = l * ( nxr_ya - nxl_y + 1 ) * ( nzt_ya - nzb_y + 1 ) * & |
---|
| 246 | ( nyn_xa - nys_x + 1 ) |
---|
[1] | 247 | ys = 0 + l * ( nyn_xa - nys_x + 1 ) |
---|
| 248 | DO i = nxl_y, nxr_ya |
---|
| 249 | DO k = nzb_y, nzt_ya |
---|
| 250 | DO j = ys, ys + nyn_xa - nys_x |
---|
| 251 | m = m + 1 |
---|
[164] | 252 | work(m) = f_in(j,i,k) |
---|
[1] | 253 | ENDDO |
---|
| 254 | ENDDO |
---|
| 255 | ENDDO |
---|
| 256 | ENDDO |
---|
[683] | 257 | !$OMP END PARALLEL |
---|
[1] | 258 | |
---|
| 259 | ! |
---|
| 260 | !-- Transpose array |
---|
| 261 | CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' ) |
---|
[622] | 262 | IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr ) |
---|
[164] | 263 | CALL MPI_ALLTOALL( work(1), sendrecvcount_xy, MPI_REAL, & |
---|
[1] | 264 | f_inv(nys_x,nzb_x,0), sendrecvcount_xy, MPI_REAL, & |
---|
| 265 | comm1dy, ierr ) |
---|
| 266 | CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' ) |
---|
| 267 | |
---|
| 268 | ! |
---|
| 269 | !-- Reorder transposed array in a way that the x index is in first position |
---|
[683] | 270 | !$OMP PARALLEL PRIVATE ( i, j, k ) |
---|
| 271 | !$OMP DO |
---|
[1] | 272 | DO i = 0, nxa |
---|
| 273 | DO k = nzb_x, nzt_xa |
---|
| 274 | DO j = nys_x, nyn_xa |
---|
[164] | 275 | f_out(i,j,k) = f_inv(j,k,i) |
---|
[1] | 276 | ENDDO |
---|
| 277 | ENDDO |
---|
| 278 | ENDDO |
---|
[683] | 279 | !$OMP END PARALLEL |
---|
[1] | 280 | |
---|
| 281 | #endif |
---|
| 282 | |
---|
| 283 | END SUBROUTINE transpose_yx |
---|
| 284 | |
---|
| 285 | |
---|
[164] | 286 | SUBROUTINE transpose_yxd( f_in, work, f_out ) |
---|
[1] | 287 | |
---|
| 288 | !------------------------------------------------------------------------------! |
---|
| 289 | ! Description: |
---|
| 290 | ! ------------ |
---|
| 291 | ! Transposition of input array (f_in) from y to x. For the input array, all |
---|
| 292 | ! elements along y reside on the same PE, while after transposition, all |
---|
| 293 | ! elements along x reside on the same PE. |
---|
| 294 | ! This is a direct transposition for arrays with indices in regular order |
---|
| 295 | ! (k,j,i) (cf. transpose_yx). |
---|
| 296 | !------------------------------------------------------------------------------! |
---|
| 297 | |
---|
| 298 | USE cpulog |
---|
| 299 | USE indices |
---|
| 300 | USE interfaces |
---|
| 301 | USE pegrid |
---|
| 302 | USE transpose_indices |
---|
| 303 | |
---|
| 304 | IMPLICIT NONE |
---|
| 305 | |
---|
| 306 | INTEGER :: i, j, k, l, m, recvcount_yx, sendcount_yx, xs |
---|
| 307 | |
---|
| 308 | REAL :: f_in(1:nza,nys:nyna,nxl:nxra), f_inv(nxl:nxra,1:nza,nys:nyna), & |
---|
| 309 | f_out(0:nxa,nys_x:nyn_xa,nzb_x:nzt_xa), & |
---|
[164] | 310 | work(nnx*nny*nnz) |
---|
[1] | 311 | |
---|
| 312 | #if defined( __parallel ) |
---|
| 313 | |
---|
| 314 | ! |
---|
| 315 | !-- Rearrange indices of input array in order to make data to be send |
---|
| 316 | !-- by MPI contiguous |
---|
| 317 | DO k = 1, nza |
---|
| 318 | DO j = nys, nyna |
---|
| 319 | DO i = nxl, nxra |
---|
[164] | 320 | f_inv(i,k,j) = f_in(k,j,i) |
---|
[1] | 321 | ENDDO |
---|
| 322 | ENDDO |
---|
| 323 | ENDDO |
---|
| 324 | |
---|
| 325 | ! |
---|
| 326 | !-- Transpose array |
---|
| 327 | CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' ) |
---|
[622] | 328 | IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr ) |
---|
[1] | 329 | CALL MPI_ALLTOALL( f_inv(nxl,1,nys), sendrecvcount_xy, MPI_REAL, & |
---|
[164] | 330 | work(1), sendrecvcount_xy, MPI_REAL, & |
---|
[1] | 331 | comm1dx, ierr ) |
---|
| 332 | CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' ) |
---|
| 333 | |
---|
| 334 | ! |
---|
| 335 | !-- Reorder transposed array |
---|
| 336 | m = 0 |
---|
| 337 | DO l = 0, pdims(1) - 1 |
---|
| 338 | xs = 0 + l * nnx |
---|
| 339 | DO j = nys_x, nyn_xa |
---|
| 340 | DO k = 1, nza |
---|
| 341 | DO i = xs, xs + nnx - 1 |
---|
| 342 | m = m + 1 |
---|
[164] | 343 | f_out(i,j,k) = work(m) |
---|
[1] | 344 | ENDDO |
---|
| 345 | ENDDO |
---|
| 346 | ENDDO |
---|
| 347 | ENDDO |
---|
| 348 | |
---|
| 349 | #endif |
---|
| 350 | |
---|
| 351 | END SUBROUTINE transpose_yxd |
---|
| 352 | |
---|
| 353 | |
---|
[164] | 354 | SUBROUTINE transpose_yz( f_in, work, f_out ) |
---|
[1] | 355 | |
---|
| 356 | !------------------------------------------------------------------------------! |
---|
| 357 | ! Description: |
---|
| 358 | ! ------------ |
---|
| 359 | ! Transposition of input array (f_in) from y to z. For the input array, all |
---|
| 360 | ! elements along y reside on the same PE, while after transposition, all |
---|
| 361 | ! elements along z reside on the same PE. |
---|
| 362 | !------------------------------------------------------------------------------! |
---|
| 363 | |
---|
| 364 | USE cpulog |
---|
| 365 | USE indices |
---|
| 366 | USE interfaces |
---|
| 367 | USE pegrid |
---|
| 368 | USE transpose_indices |
---|
| 369 | |
---|
| 370 | IMPLICIT NONE |
---|
| 371 | |
---|
| 372 | INTEGER :: i, j, k, l, m, zs |
---|
| 373 | |
---|
| 374 | REAL :: f_in(0:nya,nxl_y:nxr_ya,nzb_y:nzt_ya), & |
---|
| 375 | f_inv(nxl_y:nxr_ya,nzb_y:nzt_ya,0:nya), & |
---|
| 376 | f_out(nxl_z:nxr_za,nys_z:nyn_za,1:nza), & |
---|
[164] | 377 | work(nnx*nny*nnz) |
---|
[1] | 378 | |
---|
| 379 | #if defined( __parallel ) |
---|
| 380 | |
---|
| 381 | ! |
---|
| 382 | !-- Rearrange indices of input array in order to make data to be send |
---|
| 383 | !-- by MPI contiguous |
---|
[683] | 384 | !$OMP PARALLEL PRIVATE ( i, j, k ) |
---|
| 385 | !$OMP DO |
---|
[164] | 386 | DO j = 0, nya |
---|
| 387 | DO k = nzb_y, nzt_ya |
---|
| 388 | DO i = nxl_y, nxr_ya |
---|
| 389 | f_inv(i,k,j) = f_in(j,i,k) |
---|
[1] | 390 | ENDDO |
---|
| 391 | ENDDO |
---|
| 392 | ENDDO |
---|
[683] | 393 | !$OMP END PARALLEL |
---|
[1] | 394 | |
---|
| 395 | ! |
---|
| 396 | !-- Move data to different array, because memory location of work1 is |
---|
| 397 | !-- needed further below (work1 = work2). |
---|
| 398 | !-- If the PE grid is one-dimensional along y, only local reordering |
---|
| 399 | !-- of the data is necessary and no transposition has to be done. |
---|
| 400 | IF ( pdims(1) == 1 ) THEN |
---|
[683] | 401 | !$OMP PARALLEL PRIVATE ( i, j, k ) |
---|
| 402 | !$OMP DO |
---|
[1] | 403 | DO j = 0, nya |
---|
| 404 | DO k = nzb_y, nzt_ya |
---|
| 405 | DO i = nxl_y, nxr_ya |
---|
[164] | 406 | f_out(i,j,k) = f_inv(i,k,j) |
---|
[1] | 407 | ENDDO |
---|
| 408 | ENDDO |
---|
| 409 | ENDDO |
---|
[683] | 410 | !$OMP END PARALLEL |
---|
[1] | 411 | RETURN |
---|
| 412 | ENDIF |
---|
| 413 | |
---|
| 414 | ! |
---|
| 415 | !-- Transpose array |
---|
| 416 | CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' ) |
---|
[622] | 417 | IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr ) |
---|
[1] | 418 | CALL MPI_ALLTOALL( f_inv(nxl_y,nzb_y,0), sendrecvcount_yz, MPI_REAL, & |
---|
[164] | 419 | work(1), sendrecvcount_yz, MPI_REAL, & |
---|
[1] | 420 | comm1dx, ierr ) |
---|
| 421 | CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' ) |
---|
| 422 | |
---|
| 423 | ! |
---|
| 424 | !-- Reorder transposed array |
---|
[683] | 425 | !$OMP PARALLEL PRIVATE ( i, j, k, l, m, zs ) |
---|
| 426 | !$OMP DO |
---|
[1] | 427 | DO l = 0, pdims(1) - 1 |
---|
[683] | 428 | m = l * ( nyn_za - nys_z + 1 ) * ( nzt_ya - nzb_y + 1 ) * & |
---|
| 429 | ( nxr_za - nxl_z + 1 ) |
---|
[1] | 430 | zs = 1 + l * ( nzt_ya - nzb_y + 1 ) |
---|
| 431 | DO j = nys_z, nyn_za |
---|
| 432 | DO k = zs, zs + nzt_ya - nzb_y |
---|
| 433 | DO i = nxl_z, nxr_za |
---|
| 434 | m = m + 1 |
---|
[164] | 435 | f_out(i,j,k) = work(m) |
---|
[1] | 436 | ENDDO |
---|
| 437 | ENDDO |
---|
| 438 | ENDDO |
---|
| 439 | ENDDO |
---|
[683] | 440 | !$OMP END PARALLEL |
---|
[1] | 441 | |
---|
| 442 | #endif |
---|
| 443 | |
---|
| 444 | END SUBROUTINE transpose_yz |
---|
| 445 | |
---|
| 446 | |
---|
[164] | 447 | SUBROUTINE transpose_zx( f_in, work, f_out ) |
---|
[1] | 448 | |
---|
| 449 | !------------------------------------------------------------------------------! |
---|
| 450 | ! Description: |
---|
| 451 | ! ------------ |
---|
| 452 | ! Transposition of input array (f_in) from z to x. For the input array, all |
---|
| 453 | ! elements along z reside on the same PE, while after transposition, all |
---|
| 454 | ! elements along x reside on the same PE. |
---|
| 455 | !------------------------------------------------------------------------------! |
---|
| 456 | |
---|
| 457 | USE cpulog |
---|
| 458 | USE indices |
---|
| 459 | USE interfaces |
---|
| 460 | USE pegrid |
---|
| 461 | USE transpose_indices |
---|
| 462 | |
---|
| 463 | IMPLICIT NONE |
---|
| 464 | |
---|
| 465 | INTEGER :: i, j, k, l, m, xs |
---|
| 466 | |
---|
[164] | 467 | REAL :: f_in(1:nza,nys:nyna,nxl:nxra), f_inv(nys:nyna,nxl:nxra,1:nza), & |
---|
[1] | 468 | f_out(0:nxa,nys_x:nyn_xa,nzb_x:nzt_xa), & |
---|
[164] | 469 | work(nnx*nny*nnz) |
---|
[1] | 470 | |
---|
| 471 | #if defined( __parallel ) |
---|
| 472 | |
---|
| 473 | ! |
---|
| 474 | !-- Rearrange indices of input array in order to make data to be send |
---|
| 475 | !-- by MPI contiguous |
---|
[683] | 476 | !$OMP PARALLEL PRIVATE ( i, j, k ) |
---|
| 477 | !$OMP DO |
---|
[164] | 478 | DO k = 1,nza |
---|
| 479 | DO i = nxl, nxra |
---|
| 480 | DO j = nys, nyna |
---|
| 481 | f_inv(j,i,k) = f_in(k,j,i) |
---|
[1] | 482 | ENDDO |
---|
| 483 | ENDDO |
---|
| 484 | ENDDO |
---|
[683] | 485 | !$OMP END PARALLEL |
---|
[1] | 486 | |
---|
| 487 | ! |
---|
| 488 | !-- Move data to different array, because memory location of work1 is |
---|
| 489 | !-- needed further below (work1 = work2). |
---|
| 490 | !-- If the PE grid is one-dimensional along y, only local reordering |
---|
| 491 | !-- of the data is necessary and no transposition has to be done. |
---|
| 492 | IF ( pdims(1) == 1 ) THEN |
---|
[683] | 493 | !$OMP PARALLEL PRIVATE ( i, j, k ) |
---|
| 494 | !$OMP DO |
---|
[1] | 495 | DO k = 1, nza |
---|
[164] | 496 | DO i = nxl, nxra |
---|
| 497 | DO j = nys, nyna |
---|
| 498 | f_out(i,j,k) = f_inv(j,i,k) |
---|
[1] | 499 | ENDDO |
---|
| 500 | ENDDO |
---|
| 501 | ENDDO |
---|
[683] | 502 | !$OMP END PARALLEL |
---|
[1] | 503 | RETURN |
---|
| 504 | ENDIF |
---|
| 505 | |
---|
| 506 | ! |
---|
| 507 | !-- Transpose array |
---|
| 508 | CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' ) |
---|
[622] | 509 | IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr ) |
---|
[164] | 510 | CALL MPI_ALLTOALL( f_inv(nys,nxl,1), sendrecvcount_zx, MPI_REAL, & |
---|
| 511 | work(1), sendrecvcount_zx, MPI_REAL, & |
---|
[1] | 512 | comm1dx, ierr ) |
---|
| 513 | CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' ) |
---|
| 514 | |
---|
| 515 | ! |
---|
| 516 | !-- Reorder transposed array |
---|
[683] | 517 | !$OMP PARALLEL PRIVATE ( i, j, k, l, m, xs ) |
---|
| 518 | !$OMP DO |
---|
[1] | 519 | DO l = 0, pdims(1) - 1 |
---|
[683] | 520 | m = l * ( nzt_xa - nzb_x + 1 ) * nnx * ( nyn_xa - nys_x + 1 ) |
---|
[1] | 521 | xs = 0 + l * nnx |
---|
| 522 | DO k = nzb_x, nzt_xa |
---|
[164] | 523 | DO i = xs, xs + nnx - 1 |
---|
| 524 | DO j = nys_x, nyn_xa |
---|
[1] | 525 | m = m + 1 |
---|
[164] | 526 | f_out(i,j,k) = work(m) |
---|
[1] | 527 | ENDDO |
---|
| 528 | ENDDO |
---|
| 529 | ENDDO |
---|
| 530 | ENDDO |
---|
[683] | 531 | !$OMP END PARALLEL |
---|
[1] | 532 | |
---|
| 533 | #endif |
---|
| 534 | |
---|
| 535 | END SUBROUTINE transpose_zx |
---|
| 536 | |
---|
| 537 | |
---|
[164] | 538 | SUBROUTINE transpose_zy( f_in, work, f_out ) |
---|
[1] | 539 | |
---|
| 540 | !------------------------------------------------------------------------------! |
---|
| 541 | ! Description: |
---|
| 542 | ! ------------ |
---|
| 543 | ! Transposition of input array (f_in) from z to y. For the input array, all |
---|
| 544 | ! elements along z reside on the same PE, while after transposition, all |
---|
| 545 | ! elements along y reside on the same PE. |
---|
| 546 | !------------------------------------------------------------------------------! |
---|
| 547 | |
---|
| 548 | USE cpulog |
---|
| 549 | USE indices |
---|
| 550 | USE interfaces |
---|
| 551 | USE pegrid |
---|
| 552 | USE transpose_indices |
---|
| 553 | |
---|
| 554 | IMPLICIT NONE |
---|
| 555 | |
---|
| 556 | INTEGER :: i, j, k, l, m, zs |
---|
| 557 | |
---|
| 558 | REAL :: f_in(nxl_z:nxr_za,nys_z:nyn_za,1:nza), & |
---|
| 559 | f_inv(nxl_y:nxr_ya,nzb_y:nzt_ya,0:nya), & |
---|
| 560 | f_out(0:nya,nxl_y:nxr_ya,nzb_y:nzt_ya), & |
---|
[164] | 561 | work(nnx*nny*nnz) |
---|
[1] | 562 | |
---|
| 563 | #if defined( __parallel ) |
---|
| 564 | |
---|
| 565 | ! |
---|
| 566 | !-- If the PE grid is one-dimensional along y, the array has only to be |
---|
| 567 | !-- reordered locally and therefore no transposition has to be done. |
---|
| 568 | IF ( pdims(1) /= 1 ) THEN |
---|
| 569 | ! |
---|
| 570 | !-- Reorder input array for transposition |
---|
[683] | 571 | !$OMP PARALLEL PRIVATE ( i, j, k, l, m, zs ) |
---|
| 572 | !$OMP DO |
---|
[1] | 573 | DO l = 0, pdims(1) - 1 |
---|
[683] | 574 | m = l * ( nyn_za - nys_z + 1 ) * ( nzt_ya - nzb_y + 1 ) * & |
---|
| 575 | ( nxr_za - nxl_z + 1 ) |
---|
[1] | 576 | zs = 1 + l * ( nzt_ya - nzb_y + 1 ) |
---|
| 577 | DO j = nys_z, nyn_za |
---|
| 578 | DO k = zs, zs + nzt_ya - nzb_y |
---|
| 579 | DO i = nxl_z, nxr_za |
---|
| 580 | m = m + 1 |
---|
[164] | 581 | work(m) = f_in(i,j,k) |
---|
[1] | 582 | ENDDO |
---|
| 583 | ENDDO |
---|
| 584 | ENDDO |
---|
| 585 | ENDDO |
---|
[683] | 586 | !$OMP END PARALLEL |
---|
[1] | 587 | |
---|
| 588 | ! |
---|
| 589 | !-- Transpose array |
---|
| 590 | CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' ) |
---|
[622] | 591 | IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr ) |
---|
[164] | 592 | CALL MPI_ALLTOALL( work(1), sendrecvcount_yz, MPI_REAL, & |
---|
[1] | 593 | f_inv(nxl_y,nzb_y,0), sendrecvcount_yz, MPI_REAL, & |
---|
| 594 | comm1dx, ierr ) |
---|
| 595 | CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' ) |
---|
| 596 | |
---|
| 597 | ! |
---|
| 598 | !-- Reorder transposed array in a way that the y index is in first position |
---|
[683] | 599 | !$OMP PARALLEL PRIVATE ( i, j, k ) |
---|
| 600 | !$OMP DO |
---|
[164] | 601 | DO j = 0, nya |
---|
| 602 | DO k = nzb_y, nzt_ya |
---|
| 603 | DO i = nxl_y, nxr_ya |
---|
| 604 | f_out(j,i,k) = f_inv(i,k,j) |
---|
[1] | 605 | ENDDO |
---|
| 606 | ENDDO |
---|
| 607 | ENDDO |
---|
[683] | 608 | !$OMP END PARALLEL |
---|
[1] | 609 | ELSE |
---|
| 610 | ! |
---|
| 611 | !-- Reorder the array in a way that the y index is in first position |
---|
[683] | 612 | !$OMP PARALLEL PRIVATE ( i, j, k ) |
---|
| 613 | !$OMP DO |
---|
[1] | 614 | DO k = nzb_y, nzt_ya |
---|
[164] | 615 | DO j = 0, nya |
---|
| 616 | DO i = nxl_y, nxr_ya |
---|
| 617 | f_inv(i,k,j) = f_in(i,j,k) |
---|
| 618 | ENDDO |
---|
| 619 | ENDDO |
---|
| 620 | ENDDO |
---|
[683] | 621 | !$OMP END PARALLEL |
---|
[164] | 622 | ! |
---|
| 623 | !-- Move data to output array |
---|
[683] | 624 | !$OMP PARALLEL PRIVATE ( i, j, k ) |
---|
| 625 | !$OMP DO |
---|
[164] | 626 | DO k = nzb_y, nzt_ya |
---|
[1] | 627 | DO i = nxl_y, nxr_ya |
---|
| 628 | DO j = 0, nya |
---|
[164] | 629 | f_out(j,i,k) = f_inv(i,k,j) |
---|
[1] | 630 | ENDDO |
---|
| 631 | ENDDO |
---|
| 632 | ENDDO |
---|
[683] | 633 | !$OMP END PARALLEL |
---|
[164] | 634 | |
---|
[1] | 635 | ENDIF |
---|
| 636 | |
---|
| 637 | #endif |
---|
| 638 | |
---|
| 639 | END SUBROUTINE transpose_zy |
---|
| 640 | |
---|
| 641 | |
---|
[164] | 642 | SUBROUTINE transpose_zyd( f_in, work, f_out ) |
---|
[1] | 643 | |
---|
| 644 | !------------------------------------------------------------------------------! |
---|
| 645 | ! Description: |
---|
| 646 | ! ------------ |
---|
| 647 | ! Transposition of input array (f_in) from z to y. For the input array, all |
---|
| 648 | ! elements along z reside on the same PE, while after transposition, all |
---|
| 649 | ! elements along y reside on the same PE. |
---|
| 650 | ! This is a direct transposition for arrays with indices in regular order |
---|
| 651 | ! (k,j,i) (cf. transpose_zy). |
---|
| 652 | !------------------------------------------------------------------------------! |
---|
| 653 | |
---|
| 654 | USE cpulog |
---|
| 655 | USE indices |
---|
| 656 | USE interfaces |
---|
| 657 | USE pegrid |
---|
| 658 | USE transpose_indices |
---|
| 659 | |
---|
| 660 | IMPLICIT NONE |
---|
| 661 | |
---|
| 662 | INTEGER :: i, j, k, l, m, ys |
---|
| 663 | |
---|
| 664 | REAL :: f_in(1:nza,nys:nyna,nxl:nxra), f_inv(nys:nyna,nxl:nxra,1:nza), & |
---|
| 665 | f_out(0:nya,nxl_yd:nxr_yda,nzb_yd:nzt_yda), & |
---|
[164] | 666 | work(nnx*nny*nnz) |
---|
[1] | 667 | |
---|
| 668 | #if defined( __parallel ) |
---|
| 669 | |
---|
| 670 | ! |
---|
| 671 | !-- Rearrange indices of input array in order to make data to be send |
---|
| 672 | !-- by MPI contiguous |
---|
| 673 | DO i = nxl, nxra |
---|
| 674 | DO j = nys, nyna |
---|
| 675 | DO k = 1, nza |
---|
[164] | 676 | f_inv(j,i,k) = f_in(k,j,i) |
---|
[1] | 677 | ENDDO |
---|
| 678 | ENDDO |
---|
| 679 | ENDDO |
---|
| 680 | |
---|
| 681 | ! |
---|
| 682 | !-- Move data to different array, because memory location of work1 is |
---|
| 683 | !-- needed further below (work1 = work2). |
---|
| 684 | !-- If the PE grid is one-dimensional along x, only local reordering |
---|
| 685 | !-- of the data is necessary and no transposition has to be done. |
---|
| 686 | IF ( pdims(2) == 1 ) THEN |
---|
| 687 | DO k = 1, nza |
---|
| 688 | DO i = nxl, nxra |
---|
| 689 | DO j = nys, nyna |
---|
[164] | 690 | f_out(j,i,k) = f_inv(j,i,k) |
---|
[1] | 691 | ENDDO |
---|
| 692 | ENDDO |
---|
| 693 | ENDDO |
---|
| 694 | RETURN |
---|
| 695 | ENDIF |
---|
| 696 | |
---|
| 697 | ! |
---|
| 698 | !-- Transpose array |
---|
| 699 | CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' ) |
---|
[622] | 700 | IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr ) |
---|
[1] | 701 | CALL MPI_ALLTOALL( f_inv(nys,nxl,1), sendrecvcount_zyd, MPI_REAL, & |
---|
[164] | 702 | work(1), sendrecvcount_zyd, MPI_REAL, & |
---|
[1] | 703 | comm1dy, ierr ) |
---|
| 704 | CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' ) |
---|
| 705 | |
---|
| 706 | ! |
---|
| 707 | !-- Reorder transposed array |
---|
| 708 | m = 0 |
---|
| 709 | DO l = 0, pdims(2) - 1 |
---|
| 710 | ys = 0 + l * nny |
---|
| 711 | DO k = nzb_yd, nzt_yda |
---|
| 712 | DO i = nxl_yd, nxr_yda |
---|
| 713 | DO j = ys, ys + nny - 1 |
---|
| 714 | m = m + 1 |
---|
[164] | 715 | f_out(j,i,k) = work(m) |
---|
[1] | 716 | ENDDO |
---|
| 717 | ENDDO |
---|
| 718 | ENDDO |
---|
| 719 | ENDDO |
---|
| 720 | |
---|
| 721 | #endif |
---|
| 722 | |
---|
| 723 | END SUBROUTINE transpose_zyd |
---|