Changeset 3634 for palm/trunk/SOURCE/transpose.f90
 Timestamp:
 Dec 18, 2018 12:31:28 PM (5 years ago)
 File:

 1 edited
Legend:
 Unmodified
 Added
 Removed

palm/trunk/SOURCE/transpose.f90
r3241 r3634 25 25 !  26 26 ! $Id$ 27 ! OpenACC port for SPEC 28 ! 29 ! 3241 20180912 15:02:00Z raasch 27 30 ! unused variables removed 28 31 ! … … 119 122 !$OMP PARALLEL PRIVATE ( i, j, k ) 120 123 !$OMP DO 124 !$ACC PARALLEL LOOP COLLAPSE(3) PRIVATE(i,j,k) & 125 !$ACC PRESENT(f_inv, f_in) 121 126 DO i = 0, nx 122 127 DO k = nzb_x, nzt_x … … 166 171 167 172 REAL(wp), DIMENSION(nyn_xnys_x+1,nzb_y:nzt_y,nxl_y:nxr_y,0:pdims(2)1) :: work !< 173 !$ACC DECLARE CREATE(work) 168 174 169 175 … … 174 180 ! Transpose array 175 181 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start', cpu_log_nowait ) 182 !$ACC UPDATE HOST(f_inv) 176 183 IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr ) 177 184 CALL MPI_ALLTOALL( f_inv(nys_x,nzb_x,0), sendrecvcount_xy, MPI_REAL, & 178 185 work(1,nzb_y,nxl_y,0), sendrecvcount_xy, MPI_REAL, & 179 186 comm1dy, ierr ) 187 !$ACC UPDATE DEVICE(work) 180 188 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' ) 181 189 … … 186 194 DO l = 0, pdims(2)  1 187 195 ys = 0 + l * ( nyn_x  nys_x + 1 ) 196 !$ACC PARALLEL LOOP COLLAPSE(3) PRIVATE(i,j,k) & 197 !$ACC PRESENT(f_out, work) 188 198 DO i = nxl_y, nxr_y 189 199 DO k = nzb_y, nzt_y … … 203 213 !$OMP PARALLEL PRIVATE ( i, j, k ) 204 214 !$OMP DO 215 !$ACC PARALLEL LOOP COLLAPSE(3) PRIVATE(i,j,k) & 216 !$ACC PRESENT(f_out, f_inv) 205 217 DO k = nzb_y, nzt_y 206 218 DO i = nxl_y, nxr_y … … 246 258 !$OMP PARALLEL PRIVATE ( i, j, k ) 247 259 !$OMP DO 260 !$ACC PARALLEL LOOP COLLAPSE(3) PRIVATE(i,j,k) & 261 !$ACC PRESENT(f_out, f_inv) 248 262 DO k = 1, nz 249 263 DO i = nxl, nxr … … 293 307 294 308 REAL(wp), DIMENSION(nys_x:nyn_x,nnx,nzb_x:nzt_x,0:pdims(1)1) :: work !< 309 !$ACC DECLARE CREATE(work) 295 310 296 311 … … 307 322 DO l = 0, pdims(1)  1 308 323 xs = 0 + l * nnx 324 !$ACC PARALLEL LOOP COLLAPSE(3) PRIVATE(i,j,k) & 325 !$ACC PRESENT(work, f_in) 309 326 DO k = nzb_x, nzt_x 310 327 DO i = xs, xs + nnx  1 … … 320 337 ! Transpose array 321 338 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start', cpu_log_nowait ) 339 !$ACC UPDATE HOST(work) 322 340 IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr ) 323 341 CALL MPI_ALLTOALL( work(nys_x,1,nzb_x,0), sendrecvcount_zx, MPI_REAL, & 324 342 f_inv(nys,nxl,1), sendrecvcount_zx, MPI_REAL, & 325 343 comm1dx, ierr ) 344 !$ACC UPDATE DEVICE(f_inv) 326 345 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' ) 327 346 #endif … … 333 352 !$OMP PARALLEL PRIVATE ( i, j, k ) 334 353 !$OMP DO 354 !$ACC PARALLEL LOOP COLLAPSE(3) PRIVATE(i,j,k) & 355 !$ACC PRESENT(f_inv, f_in) 335 356 DO i = nxl, nxr 336 357 DO j = nys, nyn … … 378 399 !$OMP PARALLEL PRIVATE ( i, j, k ) 379 400 !$OMP DO 401 !$ACC PARALLEL LOOP COLLAPSE(3) PRIVATE(i,j,k) & 402 !$ACC PRESENT(f_out, f_inv) 380 403 DO i = 0, nx 381 404 DO k = nzb_x, nzt_x … … 425 448 426 449 REAL(wp), DIMENSION(nyn_xnys_x+1,nzb_y:nzt_y,nxl_y:nxr_y,0:pdims(2)1) :: work !< 450 !$ACC DECLARE CREATE(work) 427 451 428 452 … … 436 460 DO l = 0, pdims(2)  1 437 461 ys = 0 + l * ( nyn_x  nys_x + 1 ) 462 !$ACC PARALLEL LOOP COLLAPSE(3) PRIVATE(i,j,k) & 463 !$ACC PRESENT(work, f_in) 438 464 DO i = nxl_y, nxr_y 439 465 DO k = nzb_y, nzt_y … … 449 475 ! Transpose array 450 476 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start', cpu_log_nowait ) 477 !$ACC UPDATE HOST(work) 451 478 IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr ) 452 479 CALL MPI_ALLTOALL( work(1,nzb_y,nxl_y,0), sendrecvcount_xy, MPI_REAL, & 453 480 f_inv(nys_x,nzb_x,0), sendrecvcount_xy, MPI_REAL, & 454 481 comm1dy, ierr ) 482 !$ACC UPDATE DEVICE(f_inv) 455 483 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' ) 456 484 #endif … … 462 490 !$OMP PARALLEL PRIVATE ( i, j, k ) 463 491 !$OMP DO 492 !$ACC PARALLEL LOOP COLLAPSE(3) PRIVATE(i,j,k) & 493 !$ACC PRESENT(f_inv, f_in) 464 494 DO i = nxl_y, nxr_y 465 495 DO k = nzb_y, nzt_y … … 587 617 !$OMP PARALLEL PRIVATE ( i, j, k ) 588 618 !$OMP DO 619 !$ACC PARALLEL LOOP COLLAPSE(3) PRIVATE(i,j,k) & 620 !$ACC PRESENT(f_inv, f_in) 589 621 DO j = 0, ny 590 622 DO k = nzb_y, nzt_y … … 634 666 635 667 REAL(wp), DIMENSION(nxl_z:nxr_z,nzt_ynzb_y+1,nys_z:nyn_z,0:pdims(1)1) :: work !< 668 !$ACC DECLARE CREATE(work) 636 669 637 670 … … 643 676 !$OMP PARALLEL PRIVATE ( i, j, k ) 644 677 !$OMP DO 678 !$ACC PARALLEL LOOP COLLAPSE(3) PRIVATE(i,j,k) & 679 !$ACC PRESENT(f_out, f_inv) 645 680 DO j = 0, ny 646 681 DO k = nzb_y, nzt_y … … 658 693 ! Transpose array 659 694 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start', cpu_log_nowait ) 695 !$ACC UPDATE HOST(f_inv) 660 696 IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr ) 661 697 CALL MPI_ALLTOALL( f_inv(nxl_y,nzb_y,0), sendrecvcount_yz, MPI_REAL, & 662 698 work(nxl_z,1,nys_z,0), sendrecvcount_yz, MPI_REAL, & 663 699 comm1dx, ierr ) 700 !$ACC UPDATE DEVICE(work) 664 701 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' ) 665 702 … … 670 707 DO l = 0, pdims(1)  1 671 708 zs = 1 + l * ( nzt_y  nzb_y + 1 ) 709 !$ACC PARALLEL LOOP COLLAPSE(3) PRIVATE(i,j,k) & 710 !$ACC PRESENT(f_out, work) 672 711 DO j = nys_z, nyn_z 673 712 DO k = zs, zs + nzt_y  nzb_y … … 714 753 !$OMP PARALLEL PRIVATE ( i, j, k ) 715 754 !$OMP DO 755 !$ACC PARALLEL LOOP COLLAPSE(3) PRIVATE(i,j,k) & 756 !$ACC PRESENT(f_in, f_inv) 716 757 DO k = 1,nz 717 758 DO i = nxl, nxr … … 761 802 762 803 REAL(wp), DIMENSION(nys_x:nyn_x,nnx,nzb_x:nzt_x,0:pdims(1)1) :: work !< 804 !$ACC DECLARE CREATE(work) 763 805 764 806 … … 770 812 !$OMP PARALLEL PRIVATE ( i, j, k ) 771 813 !$OMP DO 814 !$ACC PARALLEL LOOP COLLAPSE(3) PRIVATE(i,j,k) & 815 !$ACC PRESENT(f_out, f_inv) 772 816 DO k = 1, nz 773 817 DO i = nxl, nxr … … 785 829 ! Transpose array 786 830 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start', cpu_log_nowait ) 831 !$ACC UPDATE HOST(f_inv) 787 832 IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr ) 788 833 CALL MPI_ALLTOALL( f_inv(nys,nxl,1), sendrecvcount_zx, MPI_REAL, & 789 834 work(nys_x,1,nzb_x,0), sendrecvcount_zx, MPI_REAL, & 790 835 comm1dx, ierr ) 836 !$ACC UPDATE DEVICE(work) 791 837 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' ) 792 838 … … 797 843 DO l = 0, pdims(1)  1 798 844 xs = 0 + l * nnx 845 !$ACC PARALLEL LOOP COLLAPSE(3) PRIVATE(i,j,k) & 846 !$ACC PRESENT(f_out, work) 799 847 DO k = nzb_x, nzt_x 800 848 DO i = xs, xs + nnx  1 … … 845 893 !$OMP PARALLEL PRIVATE ( i, j, k ) 846 894 !$OMP DO 895 !$ACC PARALLEL LOOP COLLAPSE(3) PRIVATE(i,j,k) & 896 !$ACC PRESENT(f_out, f_inv) 847 897 DO k = nzb_y, nzt_y 848 898 DO j = 0, ny … … 892 942 893 943 REAL(wp), DIMENSION(nxl_z:nxr_z,nzt_ynzb_y+1,nys_z:nyn_z,0:pdims(1)1) :: work !< 944 !$ACC DECLARE CREATE(work) 894 945 895 946 ! … … 905 956 DO l = 0, pdims(1)  1 906 957 zs = 1 + l * ( nzt_y  nzb_y + 1 ) 958 !$ACC PARALLEL LOOP COLLAPSE(3) PRIVATE(i,j,k) & 959 !$ACC PRESENT(work, f_in) 907 960 DO j = nys_z, nyn_z 908 961 DO k = zs, zs + nzt_y  nzb_y … … 918 971 ! Transpose array 919 972 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start', cpu_log_nowait ) 973 !$ACC UPDATE HOST(work) 920 974 IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr ) 921 975 CALL MPI_ALLTOALL( work(nxl_z,1,nys_z,0), sendrecvcount_yz, MPI_REAL, & 922 976 f_inv(nxl_y,nzb_y,0), sendrecvcount_yz, MPI_REAL, & 923 977 comm1dx, ierr ) 978 !$ACC UPDATE DEVICE(f_inv) 924 979 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' ) 925 980 #endif … … 930 985 !$OMP PARALLEL PRIVATE ( i, j, k ) 931 986 !$OMP DO 987 !$ACC PARALLEL LOOP COLLAPSE(3) PRIVATE(i,j,k) & 988 !$ACC PRESENT(f_inv, f_in) 932 989 DO k = nzb_y, nzt_y 933 990 DO j = 0, ny
Note: See TracChangeset
for help on using the changeset viewer.