Changeset 1106 for palm/trunk/SOURCE/transpose.f90
- Timestamp:
- Mar 4, 2013 5:31:38 AM (11 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
palm/trunk/SOURCE/transpose.f90
r1093 r1106 20 20 ! Current revisions: 21 21 ! ----------------- 22 ! 22 ! preprocessor lines rearranged so that routines can also be used in serial 23 ! (non-parallel) mode 23 24 ! 24 25 ! Former revisions: … … 84 85 work(nnx*nny*nnz) 85 86 86 #if defined( __parallel )87 88 87 ! 89 88 !-- Rearrange indices of input array in order to make data to be send … … 100 99 !$OMP END PARALLEL 101 100 102 ! 103 !-- Transpose array 104 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' ) 105 IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr ) 106 CALL MPI_ALLTOALL( f_inv(nys_x,nzb_x,0), sendrecvcount_xy, MPI_REAL, & 107 work(1), sendrecvcount_xy, MPI_REAL, & 108 comm1dy, ierr ) 109 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' ) 110 111 ! 112 !-- Reorder transposed array 101 IF ( numprocs /= 1 ) THEN 102 103 #if defined( __parallel ) 104 ! 105 !-- Transpose array 106 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' ) 107 IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr ) 108 CALL MPI_ALLTOALL( f_inv(nys_x,nzb_x,0), sendrecvcount_xy, MPI_REAL, & 109 work(1), sendrecvcount_xy, MPI_REAL, & 110 comm1dy, ierr ) 111 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' ) 112 113 ! 114 !-- Reorder transposed array 113 115 !$OMP PARALLEL PRIVATE ( i, j, k, l, m, ys ) 114 116 !$OMP DO 115 DO l = 0, pdims(2) - 1 116 m = l * ( nxr_y - nxl_y + 1 ) * ( nzt_y - nzb_y + 1 ) * & 117 ( nyn_x - nys_x + 1 ) 118 ys = 0 + l * ( nyn_x - nys_x + 1 ) 119 DO i = nxl_y, nxr_y 120 DO k = nzb_y, nzt_y 121 DO j = ys, ys + nyn_x - nys_x 122 m = m + 1 123 f_out(j,i,k) = work(m) 124 ENDDO 125 ENDDO 126 ENDDO 127 ENDDO 128 !$OMP END PARALLEL 129 117 DO l = 0, pdims(2) - 1 118 m = l * ( nxr_y - nxl_y + 1 ) * ( nzt_y - nzb_y + 1 ) * & 119 ( nyn_x - nys_x + 1 ) 120 ys = 0 + l * ( nyn_x - nys_x + 1 ) 121 DO i = nxl_y, nxr_y 122 DO k = nzb_y, nzt_y 123 DO j = ys, ys + nyn_x - nys_x 124 m = m + 1 125 f_out(j,i,k) = work(m) 126 ENDDO 127 ENDDO 128 ENDDO 129 ENDDO 130 !$OMP END PARALLEL 130 131 #endif 132 133 ELSE 134 135 ! 136 !-- Reorder transposed array 137 !$OMP PARALLEL PRIVATE ( i, j, k ) 138 !$OMP DO 139 DO k = nzb_y, nzt_y 140 DO i = nxl_y, nxr_y 141 DO j = 0, ny 142 f_out(j,i,k) = f_inv(j,k,i) 143 ENDDO 144 ENDDO 145 ENDDO 146 !$OMP END PARALLEL 147 148 ENDIF 131 149 132 150 END SUBROUTINE transpose_xy … … 158 176 work(nnx*nny*nnz) 159 177 160 #if defined( __parallel )161 178 162 179 ! … … 164 181 !-- reordered locally and therefore no transposition has to be done. 165 182 IF ( pdims(1) /= 1 ) THEN 183 184 #if defined( __parallel ) 166 185 ! 167 186 !-- Reorder input array for transposition … … 203 222 ENDDO 204 223 !$OMP END PARALLEL 224 #endif 225 205 226 ELSE 227 206 228 ! 207 229 !-- Reorder the array in a way that the z index is in first position … … 229 251 230 252 ENDIF 231 232 233 #endif234 253 235 254 END SUBROUTINE transpose_xz … … 261 280 work(nnx*nny*nnz) 262 281 282 IF ( numprocs /= 1 ) THEN 283 263 284 #if defined( __parallel ) 264 265 ! 266 !-- Reorder input array for transposition 285 ! 286 !-- Reorder input array for transposition 267 287 !$OMP PARALLEL PRIVATE ( i, j, k, l, m, ys ) 268 288 !$OMP DO 269 DO l = 0, pdims(2) - 1 270 m = l * ( nxr_y - nxl_y + 1 ) * ( nzt_y - nzb_y + 1 ) * & 271 ( nyn_x - nys_x + 1 ) 272 ys = 0 + l * ( nyn_x - nys_x + 1 ) 289 DO l = 0, pdims(2) - 1 290 m = l * ( nxr_y - nxl_y + 1 ) * ( nzt_y - nzb_y + 1 ) * & 291 ( nyn_x - nys_x + 1 ) 292 ys = 0 + l * ( nyn_x - nys_x + 1 ) 293 DO i = nxl_y, nxr_y 294 DO k = nzb_y, nzt_y 295 DO j = ys, ys + nyn_x - nys_x 296 m = m + 1 297 work(m) = f_in(j,i,k) 298 ENDDO 299 ENDDO 300 ENDDO 301 ENDDO 302 !$OMP END PARALLEL 303 304 ! 305 !-- Transpose array 306 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' ) 307 IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr ) 308 CALL MPI_ALLTOALL( work(1), sendrecvcount_xy, MPI_REAL, & 309 f_inv(nys_x,nzb_x,0), sendrecvcount_xy, MPI_REAL, & 310 comm1dy, ierr ) 311 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' ) 312 #endif 313 314 ELSE 315 316 ! 317 !-- Reorder array f_in the same way as ALLTOALL did it 318 !$OMP PARALLEL PRIVATE ( i, j, k ) 319 !$OMP DO 273 320 DO i = nxl_y, nxr_y 274 321 DO k = nzb_y, nzt_y 275 DO j = ys, ys + nyn_x - nys_x 276 m = m + 1 277 work(m) = f_in(j,i,k) 278 ENDDO 279 ENDDO 280 ENDDO 281 ENDDO 282 !$OMP END PARALLEL 283 284 ! 285 !-- Transpose array 286 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' ) 287 IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr ) 288 CALL MPI_ALLTOALL( work(1), sendrecvcount_xy, MPI_REAL, & 289 f_inv(nys_x,nzb_x,0), sendrecvcount_xy, MPI_REAL, & 290 comm1dy, ierr ) 291 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' ) 322 DO j = 0, ny 323 f_inv(j,k,i) = f_in(j,i,k) 324 ENDDO 325 ENDDO 326 ENDDO 327 !$OMP END PARALLEL 328 329 ENDIF 292 330 293 331 ! … … 303 341 ENDDO 304 342 !$OMP END PARALLEL 305 306 #endif307 343 308 344 END SUBROUTINE transpose_yx … … 402 438 work(nnx*nny*nnz) 403 439 404 #if defined( __parallel )405 406 440 ! 407 441 !-- Rearrange indices of input array in order to make data to be send … … 424 458 !-- of the data is necessary and no transposition has to be done. 425 459 IF ( pdims(1) == 1 ) THEN 460 426 461 !$OMP PARALLEL PRIVATE ( i, j, k ) 427 462 !$OMP DO … … 434 469 ENDDO 435 470 !$OMP END PARALLEL 436 RETURN 437 ENDIF 438 439 ! 440 !-- Transpose array 441 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' ) 442 IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr ) 443 CALL MPI_ALLTOALL( f_inv(nxl_y,nzb_y,0), sendrecvcount_yz, MPI_REAL, & 444 work(1), sendrecvcount_yz, MPI_REAL, & 445 comm1dx, ierr ) 446 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' ) 447 448 ! 449 !-- Reorder transposed array 471 472 ELSE 473 474 #if defined( __parallel ) 475 ! 476 !-- Transpose array 477 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' ) 478 IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr ) 479 CALL MPI_ALLTOALL( f_inv(nxl_y,nzb_y,0), sendrecvcount_yz, MPI_REAL, & 480 work(1), sendrecvcount_yz, MPI_REAL, & 481 comm1dx, ierr ) 482 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' ) 483 484 ! 485 !-- Reorder transposed array 450 486 !$OMP PARALLEL PRIVATE ( i, j, k, l, m, zs ) 451 487 !$OMP DO 452 DO l = 0, pdims(1) - 1 453 m = l * ( nyn_z - nys_z + 1 ) * ( nzt_y - nzb_y + 1 ) * & 454 ( nxr_z - nxl_z + 1 ) 455 zs = 1 + l * ( nzt_y - nzb_y + 1 ) 456 DO j = nys_z, nyn_z 457 DO k = zs, zs + nzt_y - nzb_y 458 DO i = nxl_z, nxr_z 459 m = m + 1 460 f_out(i,j,k) = work(m) 461 ENDDO 462 ENDDO 463 ENDDO 464 ENDDO 465 !$OMP END PARALLEL 466 488 DO l = 0, pdims(1) - 1 489 m = l * ( nyn_z - nys_z + 1 ) * ( nzt_y - nzb_y + 1 ) * & 490 ( nxr_z - nxl_z + 1 ) 491 zs = 1 + l * ( nzt_y - nzb_y + 1 ) 492 DO j = nys_z, nyn_z 493 DO k = zs, zs + nzt_y - nzb_y 494 DO i = nxl_z, nxr_z 495 m = m + 1 496 f_out(i,j,k) = work(m) 497 ENDDO 498 ENDDO 499 ENDDO 500 ENDDO 501 !$OMP END PARALLEL 467 502 #endif 503 504 ENDIF 468 505 469 506 END SUBROUTINE transpose_yz … … 490 527 INTEGER :: i, j, k, l, m, xs 491 528 492 REAL :: f_in(1:nz,nys:nyn,nxl:nxr), f_inv(nys:nyn,nxl:nxr,1:nz), & 493 f_out(0:nx,nys_x:nyn_x,nzb_x:nzt_x), & 529 REAL :: f_in(1:nz,nys:nyn,nxl:nxr), f_out(0:nx,nys_x:nyn_x,nzb_x:nzt_x), & 494 530 work(nnx*nny*nnz) 495 531 496 #if defined( __parallel ) 532 !$acc declare create ( f_inv ) 533 REAL :: f_inv(nys:nyn,nxl:nxr,1:nz) 534 497 535 498 536 ! … … 501 539 !$OMP PARALLEL PRIVATE ( i, j, k ) 502 540 !$OMP DO 541 !$acc kernels present( f_in ) 542 !$acc loop 503 543 DO k = 1,nz 504 544 DO i = nxl, nxr 545 !$acc loop vector( 32 ) 505 546 DO j = nys, nyn 506 547 f_inv(j,i,k) = f_in(k,j,i) … … 516 557 !-- of the data is necessary and no transposition has to be done. 517 558 IF ( pdims(1) == 1 ) THEN 518 !$OMP PARALLEL PRIVATE ( i, j, k ) 519 !$OMP DO 559 560 !$OMP PARALLEL PRIVATE ( i, j, k ) 561 !$OMP DO 562 !$acc kernels present( f_out ) 563 !$acc loop 520 564 DO k = 1, nz 521 565 DO i = nxl, nxr 566 !$acc loop vector( 32 ) 522 567 DO j = nys, nyn 523 568 f_out(i,j,k) = f_inv(j,i,k) … … 526 571 ENDDO 527 572 !$OMP END PARALLEL 528 RETURN 573 574 ELSE 575 576 #if defined( __parallel ) 577 ! 578 !-- Transpose array 579 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' ) 580 IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr ) 581 CALL MPI_ALLTOALL( f_inv(nys,nxl,1), sendrecvcount_zx, MPI_REAL, & 582 work(1), sendrecvcount_zx, MPI_REAL, & 583 comm1dx, ierr ) 584 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' ) 585 586 ! 587 !-- Reorder transposed array 588 !$OMP PARALLEL PRIVATE ( i, j, k, l, m, xs ) 589 !$OMP DO 590 DO l = 0, pdims(1) - 1 591 m = l * ( nzt_x - nzb_x + 1 ) * nnx * ( nyn_x - nys_x + 1 ) 592 xs = 0 + l * nnx 593 DO k = nzb_x, nzt_x 594 DO i = xs, xs + nnx - 1 595 DO j = nys_x, nyn_x 596 m = m + 1 597 f_out(i,j,k) = work(m) 598 ENDDO 599 ENDDO 600 ENDDO 601 ENDDO 602 !$OMP END PARALLEL 603 #endif 604 529 605 ENDIF 530 531 !532 !-- Transpose array533 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' )534 IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )535 CALL MPI_ALLTOALL( f_inv(nys,nxl,1), sendrecvcount_zx, MPI_REAL, &536 work(1), sendrecvcount_zx, MPI_REAL, &537 comm1dx, ierr )538 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )539 540 !541 !-- Reorder transposed array542 !$OMP PARALLEL PRIVATE ( i, j, k, l, m, xs )543 !$OMP DO544 DO l = 0, pdims(1) - 1545 m = l * ( nzt_x - nzb_x + 1 ) * nnx * ( nyn_x - nys_x + 1 )546 xs = 0 + l * nnx547 DO k = nzb_x, nzt_x548 DO i = xs, xs + nnx - 1549 DO j = nys_x, nyn_x550 m = m + 1551 f_out(i,j,k) = work(m)552 ENDDO553 ENDDO554 ENDDO555 ENDDO556 !$OMP END PARALLEL557 558 #endif559 606 560 607 END SUBROUTINE transpose_zx … … 586 633 work(nnx*nny*nnz) 587 634 588 #if defined( __parallel )589 590 635 ! 591 636 !-- If the PE grid is one-dimensional along y, the array has only to be 592 637 !-- reordered locally and therefore no transposition has to be done. 593 638 IF ( pdims(1) /= 1 ) THEN 639 640 #if defined( __parallel ) 594 641 ! 595 642 !-- Reorder input array for transposition … … 619 666 comm1dx, ierr ) 620 667 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' ) 621 622 ! 623 !-- Reorder transposed array in a way that the y index is in first position 624 !$OMP PARALLEL PRIVATE ( i, j, k ) 625 !$OMP DO 626 DO j = 0, ny 627 DO k = nzb_y, nzt_y 628 DO i = nxl_y, nxr_y 629 f_out(j,i,k) = f_inv(i,k,j) 630 ENDDO 631 ENDDO 632 ENDDO 633 !$OMP END PARALLEL 668 #endif 669 634 670 ELSE 635 671 ! 636 !-- Reorder the array in a way that the y index is in first position672 !-- Reorder the array in the same way like ALLTOALL did it 637 673 !$OMP PARALLEL PRIVATE ( i, j, k ) 638 674 !$OMP DO … … 645 681 ENDDO 646 682 !$OMP END PARALLEL 647 !648 !-- Move data to output array649 !$OMP PARALLEL PRIVATE ( i, j, k )650 !$OMP DO651 DO k = nzb_y, nzt_y652 DO i = nxl_y, nxr_y653 DO j = 0, ny654 f_out(j,i,k) = f_inv(i,k,j)655 ENDDO656 ENDDO657 ENDDO658 !$OMP END PARALLEL659 683 660 684 ENDIF 661 685 662 #endif 686 ! 687 !-- Reorder transposed array in a way that the y index is in first position 688 !$OMP PARALLEL PRIVATE ( i, j, k ) 689 !$OMP DO 690 DO k = nzb_y, nzt_y 691 DO i = nxl_y, nxr_y 692 DO j = 0, ny 693 f_out(j,i,k) = f_inv(i,k,j) 694 ENDDO 695 ENDDO 696 ENDDO 697 !$OMP END PARALLEL 663 698 664 699 END SUBROUTINE transpose_zy
Note: See TracChangeset
for help on using the changeset viewer.