Changeset 1216 for palm/trunk/SOURCE/transpose.f90
- Timestamp:
- Aug 26, 2013 9:31:42 AM (11 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
palm/trunk/SOURCE/transpose.f90
r1112 r1216 1 SUBROUTINE transpose_xy( f_in, work, f_out)1 SUBROUTINE resort_for_xy( f_in, f_inv ) 2 2 3 3 !--------------------------------------------------------------------------------! … … 20 20 ! Current revisions: 21 21 ! ----------------- 22 ! 22 ! re-sorting of the transposed / to be transposed arrays moved to separate 23 ! routines resort_for_... 23 24 ! 24 25 ! Former revisions: … … 69 70 ! Initial revision 70 71 ! 71 ! 72 !------------------------------------------------------------------------------! 73 ! Description: 74 ! ------------ 75 ! Resorting data for the transposition from x to y. The transposition itself 76 ! is carried out in transpose_xy 77 !------------------------------------------------------------------------------! 78 79 USE indices 80 USE transpose_indices 81 82 IMPLICIT NONE 83 84 REAL :: f_in(0:nx,nys_x:nyn_x,nzb_x:nzt_x) 85 REAL :: f_inv(nys_x:nyn_x,nzb_x:nzt_x,0:nx) 86 87 88 INTEGER :: i, j, k 89 90 ! 91 !-- Rearrange indices of input array in order to make data to be send 92 !-- by MPI contiguous 93 !$OMP PARALLEL PRIVATE ( i, j, k ) 94 !$OMP DO 95 !$acc kernels present( f_in, f_inv ) 96 !$acc loop 97 DO i = 0, nx 98 DO k = nzb_x, nzt_x 99 !$acc loop vector( 32 ) 100 DO j = nys_x, nyn_x 101 f_inv(j,k,i) = f_in(i,j,k) 102 ENDDO 103 ENDDO 104 ENDDO 105 !$acc end kernels 106 !$OMP END PARALLEL 107 108 END SUBROUTINE resort_for_xy 109 110 111 SUBROUTINE transpose_xy( f_inv, f_out ) 112 113 !------------------------------------------------------------------------------! 72 114 ! Description: 73 115 ! ------------ … … 87 129 INTEGER :: i, j, k, l, ys 88 130 89 REAL :: f_in (0:nx,nys_x:nyn_x,nzb_x:nzt_x), f_out(0:ny,nxl_y:nxr_y,nzb_y:nzt_y)131 REAL :: f_inv(nys_x:nyn_x,nzb_x:nzt_x,0:nx), f_out(0:ny,nxl_y:nxr_y,nzb_y:nzt_y) 90 132 91 133 REAL, DIMENSION(nyn_x-nys_x+1,nzb_y:nzt_y,nxl_y:nxr_y,0:pdims(2)-1) :: work 92 134 93 !$acc declare create( f_inv )94 REAL :: f_inv(nys_x:nyn_x,nzb_x:nzt_x,0:nx)95 96 97 !98 !-- Rearrange indices of input array in order to make data to be send99 !-- by MPI contiguous100 !$OMP PARALLEL PRIVATE ( i, j, k )101 !$OMP DO102 !$acc kernels present( f_in )103 !$acc loop104 DO i = 0, nx105 DO k = nzb_x, nzt_x106 !$acc loop vector( 32 )107 DO j = nys_x, nyn_x108 f_inv(j,k,i) = f_in(i,j,k)109 ENDDO110 ENDDO111 ENDDO112 !$acc end kernels113 !$OMP END PARALLEL114 135 115 136 IF ( numprocs /= 1 ) THEN … … 124 145 work(1,nzb_y,nxl_y,0), sendrecvcount_xy, MPI_REAL, & 125 146 comm1dy, ierr ) 126 !$acc update device( work )127 147 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' ) 128 148 … … 131 151 !$OMP PARALLEL PRIVATE ( i, j, k, l, ys ) 132 152 !$OMP DO 153 !$acc data copyin( work ) 133 154 DO l = 0, pdims(2) - 1 134 155 ys = 0 + l * ( nyn_x - nys_x + 1 ) … … 145 166 !$acc end kernels 146 167 ENDDO 168 !$acc end data 147 169 !$OMP END PARALLEL 148 170 #endif … … 154 176 !$OMP PARALLEL PRIVATE ( i, j, k ) 155 177 !$OMP DO 156 !$acc kernels present( f_ out )178 !$acc kernels present( f_inv, f_out ) 157 179 !$acc loop 158 180 DO k = nzb_y, nzt_y … … 172 194 173 195 174 SUBROUTINE transpose_xz( f_in, work, f_out ) 196 SUBROUTINE resort_for_xz( f_inv, f_out ) 197 198 !------------------------------------------------------------------------------! 199 ! Description: 200 ! ------------ 201 ! Resorting data after the transposition from x to z. The transposition itself 202 ! is carried out in transpose_xz 203 !------------------------------------------------------------------------------! 204 205 USE indices 206 USE transpose_indices 207 208 IMPLICIT NONE 209 210 REAL :: f_inv(nys:nyn,nxl:nxr,1:nz) 211 REAL :: f_out(1:nz,nys:nyn,nxl:nxr) 212 213 214 INTEGER :: i, j, k 215 216 ! 217 !-- Rearrange indices of input array in order to make data to be send 218 !-- by MPI contiguous. 219 !-- In case of parallel fft/transposition, scattered store is faster in 220 !-- backward direction!!! 221 !$OMP PARALLEL PRIVATE ( i, j, k ) 222 !$OMP DO 223 !$acc kernels present( f_inv, f_out ) 224 !$acc loop 225 DO k = 1, nz 226 DO i = nxl, nxr 227 !$acc loop vector( 32 ) 228 DO j = nys, nyn 229 f_out(k,j,i) = f_inv(j,i,k) 230 ENDDO 231 ENDDO 232 ENDDO 233 !$acc end kernels 234 !$OMP END PARALLEL 235 236 END SUBROUTINE resort_for_xz 237 238 239 SUBROUTINE transpose_xz( f_in, f_inv ) 175 240 176 241 !------------------------------------------------------------------------------! … … 192 257 INTEGER :: i, j, k, l, xs 193 258 194 REAL :: f_in(0:nx,nys_x:nyn_x,nzb_x:nzt_x), f_ out(1:nz,nys:nyn,nxl:nxr)259 REAL :: f_in(0:nx,nys_x:nyn_x,nzb_x:nzt_x), f_inv(nys:nyn,nxl:nxr,1:nz) 195 260 196 261 REAL, DIMENSION(nys_x:nyn_x,nnx,nzb_x:nzt_x,0:pdims(1)-1) :: work 197 198 !$acc declare create( f_inv )199 REAL :: f_inv(nys:nyn,nxl:nxr,1:nz)200 262 201 263 … … 210 272 !$OMP PARALLEL PRIVATE ( i, j, k, l, xs ) 211 273 !$OMP DO 274 !$acc data copyout( work ) 212 275 DO l = 0, pdims(1) - 1 213 276 xs = 0 + l * nnx … … 224 287 !$acc end kernels 225 288 ENDDO 289 !$acc end data 226 290 !$OMP END PARALLEL 227 291 … … 230 294 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' ) 231 295 IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr ) 232 !$acc update host( work )233 296 CALL MPI_ALLTOALL( work(nys_x,1,nzb_x,0), sendrecvcount_zx, MPI_REAL, & 234 297 f_inv(nys,nxl,1), sendrecvcount_zx, MPI_REAL, & … … 236 299 !$acc update device( f_inv ) 237 300 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' ) 238 239 !240 !-- Reorder transposed array in a way that the z index is in first position241 !$OMP PARALLEL PRIVATE ( i, j, k )242 !$OMP DO243 !$acc kernels present( f_out )244 !$acc loop245 DO k = 1, nz246 DO i = nxl, nxr247 !$acc loop vector( 32 )248 DO j = nys, nyn249 f_out(k,j,i) = f_inv(j,i,k)250 ENDDO251 ENDDO252 ENDDO253 !$acc end kernels254 !$OMP END PARALLEL255 301 #endif 256 302 … … 261 307 !$OMP PARALLEL PRIVATE ( i, j, k ) 262 308 !$OMP DO 263 !$acc kernels present( f_in )309 !$acc kernels present( f_in, f_inv ) 264 310 !$acc loop 265 311 DO i = nxl, nxr … … 274 320 !$OMP END PARALLEL 275 321 276 !$OMP PARALLEL PRIVATE ( i, j, k )277 !$OMP DO278 !$acc kernels present( f_out )279 !$acc loop280 DO k = 1, nz281 DO i = nxl, nxr282 !$acc loop vector( 32 )283 DO j = nys, nyn284 f_out(k,j,i) = f_inv(j,i,k)285 ENDDO286 ENDDO287 ENDDO288 !$acc end kernels289 !$OMP END PARALLEL290 291 322 ENDIF 292 323 … … 294 325 295 326 296 SUBROUTINE transpose_yx( f_in, work, f_out ) 327 SUBROUTINE resort_for_yx( f_inv, f_out ) 328 329 !------------------------------------------------------------------------------! 330 ! Description: 331 ! ------------ 332 ! Resorting data after the transposition from y to x. The transposition itself 333 ! is carried out in transpose_yx 334 !------------------------------------------------------------------------------! 335 336 USE indices 337 USE transpose_indices 338 339 IMPLICIT NONE 340 341 REAL :: f_inv(nys_x:nyn_x,nzb_x:nzt_x,0:nx) 342 REAL :: f_out(0:nx,nys_x:nyn_x,nzb_x:nzt_x) 343 344 345 INTEGER :: i, j, k 346 347 ! 348 !-- Rearrange indices of input array in order to make data to be send 349 !-- by MPI contiguous 350 !$OMP PARALLEL PRIVATE ( i, j, k ) 351 !$OMP DO 352 !$acc kernels present( f_inv, f_out ) 353 !$acc loop 354 DO i = 0, nx 355 DO k = nzb_x, nzt_x 356 !$acc loop vector( 32 ) 357 DO j = nys_x, nyn_x 358 f_out(i,j,k) = f_inv(j,k,i) 359 ENDDO 360 ENDDO 361 ENDDO 362 !$acc end kernels 363 !$OMP END PARALLEL 364 365 END SUBROUTINE resort_for_yx 366 367 368 SUBROUTINE transpose_yx( f_in, f_inv ) 297 369 298 370 !------------------------------------------------------------------------------! … … 314 386 INTEGER :: i, j, k, l, ys 315 387 316 REAL :: f_in(0:ny,nxl_y:nxr_y,nzb_y:nzt_y), f_ out(0:nx,nys_x:nyn_x,nzb_x:nzt_x)388 REAL :: f_in(0:ny,nxl_y:nxr_y,nzb_y:nzt_y), f_inv(nys_x:nyn_x,nzb_x:nzt_x,0:nx) 317 389 318 390 REAL, DIMENSION(nyn_x-nys_x+1,nzb_y:nzt_y,nxl_y:nxr_y,0:pdims(2)-1) :: work 319 320 !$acc declare create( f_inv )321 REAL :: f_inv(nys_x:nyn_x,nzb_x:nzt_x,0:nx)322 391 323 392 … … 329 398 !$OMP PARALLEL PRIVATE ( i, j, k, l, ys ) 330 399 !$OMP DO 400 !$acc data copyout( work ) 331 401 DO l = 0, pdims(2) - 1 332 402 ys = 0 + l * ( nyn_x - nys_x + 1 ) … … 343 413 !$acc end kernels 344 414 ENDDO 415 !$acc end data 345 416 !$OMP END PARALLEL 346 417 … … 349 420 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' ) 350 421 IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr ) 351 !$acc update host( work )352 422 CALL MPI_ALLTOALL( work(1,nzb_y,nxl_y,0), sendrecvcount_xy, MPI_REAL, & 353 423 f_inv(nys_x,nzb_x,0), sendrecvcount_xy, MPI_REAL, & … … 363 433 !$OMP PARALLEL PRIVATE ( i, j, k ) 364 434 !$OMP DO 365 !$acc kernels present( f_in )435 !$acc kernels present( f_in, f_inv ) 366 436 !$acc loop 367 437 DO i = nxl_y, nxr_y … … 378 448 ENDIF 379 449 380 !381 !-- Reorder transposed array in a way that the x index is in first position382 !$OMP PARALLEL PRIVATE ( i, j, k )383 !$OMP DO384 !$acc kernels present( f_out )385 !$acc loop386 DO i = 0, nx387 DO k = nzb_x, nzt_x388 !$acc loop vector( 32 )389 DO j = nys_x, nyn_x390 f_out(i,j,k) = f_inv(j,k,i)391 ENDDO392 ENDDO393 ENDDO394 !$acc end kernels395 !$OMP END PARALLEL396 397 450 END SUBROUTINE transpose_yx 398 451 399 452 400 SUBROUTINE transpose_yxd( f_in, work,f_out )453 SUBROUTINE transpose_yxd( f_in, f_out ) 401 454 402 455 !------------------------------------------------------------------------------! … … 466 519 467 520 468 SUBROUTINE transpose_yz( f_in, work, f_out ) 521 SUBROUTINE resort_for_yz( f_in, f_inv ) 522 523 !------------------------------------------------------------------------------! 524 ! Description: 525 ! ------------ 526 ! Resorting data for the transposition from y to z. The transposition itself 527 ! is carried out in transpose_yz 528 !------------------------------------------------------------------------------! 529 530 USE indices 531 USE transpose_indices 532 533 IMPLICIT NONE 534 535 REAL :: f_in(0:ny,nxl_y:nxr_y,nzb_y:nzt_y) 536 REAL :: f_inv(nxl_y:nxr_y,nzb_y:nzt_y,0:ny) 537 538 539 INTEGER :: i, j, k 540 541 ! 542 !-- Rearrange indices of input array in order to make data to be send 543 !-- by MPI contiguous 544 !$OMP PARALLEL PRIVATE ( i, j, k ) 545 !$OMP DO 546 !$acc kernels present( f_in, f_inv ) 547 !$acc loop 548 DO j = 0, ny 549 DO k = nzb_y, nzt_y 550 !$acc loop vector( 32 ) 551 DO i = nxl_y, nxr_y 552 f_inv(i,k,j) = f_in(j,i,k) 553 ENDDO 554 ENDDO 555 ENDDO 556 !$acc end kernels 557 !$OMP END PARALLEL 558 559 END SUBROUTINE resort_for_yz 560 561 562 SUBROUTINE transpose_yz( f_inv, f_out ) 469 563 470 564 !------------------------------------------------------------------------------! … … 486 580 INTEGER :: i, j, k, l, zs 487 581 488 REAL :: f_in (0:ny,nxl_y:nxr_y,nzb_y:nzt_y), f_out(nxl_z:nxr_z,nys_z:nyn_z,1:nz)582 REAL :: f_inv(nxl_y:nxr_y,nzb_y:nzt_y,0:ny), f_out(nxl_z:nxr_z,nys_z:nyn_z,1:nz) 489 583 490 584 REAL, DIMENSION(nxl_z:nxr_z,nzt_y-nzb_y+1,nys_z:nyn_z,0:pdims(1)-1) :: work 491 585 492 !$acc declare create( f_inv ) 493 REAL :: f_inv(nxl_y:nxr_y,nzb_y:nzt_y,0:ny) 494 495 496 ! 497 !-- Rearrange indices of input array in order to make data to be send 498 !-- by MPI contiguous 499 !$OMP PARALLEL PRIVATE ( i, j, k ) 500 !$OMP DO 501 !$acc kernels present( f_in ) 502 !$acc loop 503 DO j = 0, ny 504 DO k = nzb_y, nzt_y 505 !$acc loop vector( 32 ) 506 DO i = nxl_y, nxr_y 507 f_inv(i,k,j) = f_in(j,i,k) 508 ENDDO 509 ENDDO 510 ENDDO 511 !$acc end kernels 512 !$OMP END PARALLEL 513 514 ! 515 !-- Move data to different array, because memory location of work1 is 516 !-- needed further below (work1 = work2). 586 587 ! 517 588 !-- If the PE grid is one-dimensional along y, only local reordering 518 589 !-- of the data is necessary and no transposition has to be done. … … 521 592 !$OMP PARALLEL PRIVATE ( i, j, k ) 522 593 !$OMP DO 523 !$acc kernels present( f_ out )594 !$acc kernels present( f_inv, f_out ) 524 595 !$acc loop 525 596 DO j = 0, ny … … 545 616 work(nxl_z,1,nys_z,0), sendrecvcount_yz, MPI_REAL, & 546 617 comm1dx, ierr ) 547 !$acc update device( work )548 618 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' ) 549 619 … … 552 622 !$OMP PARALLEL PRIVATE ( i, j, k, l, zs ) 553 623 !$OMP DO 624 !$acc data copyin( work ) 554 625 DO l = 0, pdims(1) - 1 555 626 zs = 1 + l * ( nzt_y - nzb_y + 1 ) 556 !$acc kernels present( f_out , work)627 !$acc kernels present( f_out ) 557 628 !$acc loop 558 629 DO j = nys_z, nyn_z … … 566 637 !$acc end kernels 567 638 ENDDO 639 !$acc end data 568 640 !$OMP END PARALLEL 569 641 #endif … … 574 646 575 647 576 SUBROUTINE transpose_zx( f_in, work, f_out ) 648 SUBROUTINE resort_for_zx( f_in, f_inv ) 649 650 !------------------------------------------------------------------------------! 651 ! Description: 652 ! ------------ 653 ! Resorting data for the transposition from z to x. The transposition itself 654 ! is carried out in transpose_zx 655 !------------------------------------------------------------------------------! 656 657 USE indices 658 USE transpose_indices 659 660 IMPLICIT NONE 661 662 REAL :: f_in(1:nz,nys:nyn,nxl:nxr) 663 REAL :: f_inv(nys:nyn,nxl:nxr,1:nz) 664 665 666 INTEGER :: i, j, k 667 668 ! 669 !-- Rearrange indices of input array in order to make data to be send 670 !-- by MPI contiguous 671 !$OMP PARALLEL PRIVATE ( i, j, k ) 672 !$OMP DO 673 !$acc kernels present( f_in, f_inv ) 674 !$acc loop 675 DO k = 1,nz 676 DO i = nxl, nxr 677 !$acc loop vector( 32 ) 678 DO j = nys, nyn 679 f_inv(j,i,k) = f_in(k,j,i) 680 ENDDO 681 ENDDO 682 ENDDO 683 !$acc end kernels 684 !$OMP END PARALLEL 685 686 END SUBROUTINE resort_for_zx 687 688 689 SUBROUTINE transpose_zx( f_inv, f_out ) 577 690 578 691 !------------------------------------------------------------------------------! … … 594 707 INTEGER :: i, j, k, l, xs 595 708 596 REAL :: f_in (1:nz,nys:nyn,nxl:nxr), f_out(0:nx,nys_x:nyn_x,nzb_x:nzt_x)709 REAL :: f_inv(nys:nyn,nxl:nxr,1:nz), f_out(0:nx,nys_x:nyn_x,nzb_x:nzt_x) 597 710 598 711 REAL, DIMENSION(nys_x:nyn_x,nnx,nzb_x:nzt_x,0:pdims(1)-1) :: work 599 712 600 !$acc declare create( f_inv ) 601 REAL :: f_inv(nys:nyn,nxl:nxr,1:nz) 602 603 604 ! 605 !-- Rearrange indices of input array in order to make data to be send 606 !-- by MPI contiguous 607 !$OMP PARALLEL PRIVATE ( i, j, k ) 608 !$OMP DO 609 !$acc kernels present( f_in ) 610 !$acc loop 611 DO k = 1,nz 612 DO i = nxl, nxr 613 !$acc loop vector( 32 ) 614 DO j = nys, nyn 615 f_inv(j,i,k) = f_in(k,j,i) 616 ENDDO 617 ENDDO 618 ENDDO 619 !$acc end kernels 620 !$OMP END PARALLEL 621 622 ! 623 !-- Move data to different array, because memory location of work1 is 624 !-- needed further below (work1 = work2). 713 714 ! 625 715 !-- If the PE grid is one-dimensional along y, only local reordering 626 716 !-- of the data is necessary and no transposition has to be done. … … 629 719 !$OMP PARALLEL PRIVATE ( i, j, k ) 630 720 !$OMP DO 631 !$acc kernels present( f_ out )721 !$acc kernels present( f_inv, f_out ) 632 722 !$acc loop 633 723 DO k = 1, nz … … 653 743 work(nys_x,1,nzb_x,0), sendrecvcount_zx, MPI_REAL, & 654 744 comm1dx, ierr ) 655 !$acc update device( work )656 745 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' ) 657 746 … … 660 749 !$OMP PARALLEL PRIVATE ( i, j, k, l, xs ) 661 750 !$OMP DO 751 !$acc data copyin( work ) 662 752 DO l = 0, pdims(1) - 1 663 753 xs = 0 + l * nnx 664 !$acc kernels present( f_out , work)754 !$acc kernels present( f_out ) 665 755 !$acc loop 666 756 DO k = nzb_x, nzt_x … … 674 764 !$acc end kernels 675 765 ENDDO 766 !$acc end data 676 767 !$OMP END PARALLEL 677 768 #endif … … 682 773 683 774 684 SUBROUTINE transpose_zy( f_in, work, f_out ) 775 SUBROUTINE resort_for_zy( f_inv, f_out ) 776 777 !------------------------------------------------------------------------------! 778 ! Description: 779 ! ------------ 780 ! Resorting data after the transposition from z to y. The transposition itself 781 ! is carried out in transpose_zy 782 !------------------------------------------------------------------------------! 783 784 USE indices 785 USE transpose_indices 786 787 IMPLICIT NONE 788 789 REAL :: f_inv(nxl_y:nxr_y,nzb_y:nzt_y,0:ny) 790 REAL :: f_out(0:ny,nxl_y:nxr_y,nzb_y:nzt_y) 791 792 793 INTEGER :: i, j, k 794 795 ! 796 !-- Rearrange indices of input array in order to make data to be send 797 !-- by MPI contiguous 798 !$OMP PARALLEL PRIVATE ( i, j, k ) 799 !$OMP DO 800 !$acc kernels present( f_inv, f_out ) 801 !$acc loop 802 DO k = nzb_y, nzt_y 803 DO j = 0, ny 804 !$acc loop vector( 32 ) 805 DO i = nxl_y, nxr_y 806 f_out(j,i,k) = f_inv(i,k,j) 807 ENDDO 808 ENDDO 809 ENDDO 810 !$acc end kernels 811 !$OMP END PARALLEL 812 813 END SUBROUTINE resort_for_zy 814 815 816 SUBROUTINE transpose_zy( f_in, f_inv ) 685 817 686 818 !------------------------------------------------------------------------------! … … 702 834 INTEGER :: i, j, k, l, zs 703 835 704 REAL :: f_in(nxl_z:nxr_z,nys_z:nyn_z,1:nz), f_ out(0:ny,nxl_y:nxr_y,nzb_y:nzt_y)836 REAL :: f_in(nxl_z:nxr_z,nys_z:nyn_z,1:nz), f_inv(nxl_y:nxr_y,nzb_y:nzt_y,0:ny) 705 837 706 838 REAL, DIMENSION(nxl_z:nxr_z,nzt_y-nzb_y+1,nys_z:nyn_z,0:pdims(1)-1) :: work 707 708 !$acc declare create( f_inv )709 REAL :: f_inv(nxl_y:nxr_y,nzb_y:nzt_y,0:ny)710 839 711 840 … … 720 849 !$OMP PARALLEL PRIVATE ( i, j, k, l, zs ) 721 850 !$OMP DO 851 !$acc data copyout( work ) 722 852 DO l = 0, pdims(1) - 1 723 853 zs = 1 + l * ( nzt_y - nzb_y + 1 ) … … 734 864 !$acc end kernels 735 865 ENDDO 866 !$acc end data 736 867 !$OMP END PARALLEL 737 868 … … 740 871 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start' ) 741 872 IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr ) 742 !$acc update host( work )743 873 CALL MPI_ALLTOALL( work(nxl_z,1,nys_z,0), sendrecvcount_yz, MPI_REAL, & 744 874 f_inv(nxl_y,nzb_y,0), sendrecvcount_yz, MPI_REAL, & … … 753 883 !$OMP PARALLEL PRIVATE ( i, j, k ) 754 884 !$OMP DO 755 !$acc kernels present( f_in )885 !$acc kernels present( f_in, f_inv ) 756 886 !$acc loop 757 887 DO k = nzb_y, nzt_y … … 768 898 ENDIF 769 899 770 !771 !-- Reorder transposed array in a way that the y index is in first position772 !$OMP PARALLEL PRIVATE ( i, j, k )773 !$OMP DO774 !$acc kernels present( f_out )775 !$acc loop776 DO k = nzb_y, nzt_y777 DO i = nxl_y, nxr_y778 !$acc loop vector( 32 )779 DO j = 0, ny780 f_out(j,i,k) = f_inv(i,k,j)781 ENDDO782 ENDDO783 ENDDO784 !$acc end kernels785 !$OMP END PARALLEL786 787 900 END SUBROUTINE transpose_zy 788 901 789 902 790 SUBROUTINE transpose_zyd( f_in, work,f_out )903 SUBROUTINE transpose_zyd( f_in, f_out ) 791 904 792 905 !------------------------------------------------------------------------------!
Note: See TracChangeset
for help on using the changeset viewer.