- Timestamp:
- Jan 7, 2019 8:14:18 PM (6 years ago)
- Location:
- palm/trunk/SOURCE
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
palm/trunk/SOURCE/exchange_horiz.f90
r3655 r3657 132 132 133 133 #ifdef _OPENACC 134 !$ACC UPDATE IF_PRESENT &134 !$ACC UPDATE IF_PRESENT ASYNC(1) & 135 135 !$ACC HOST(ar(:,:,nxr-nbgp_local+1:nxr)) & 136 136 !$ACC HOST(ar(:,:,nxl:nxl+nbgp_local-1)) 137 DO i = nxl-nbgp_local, nxr+nbgp_local 138 !$ACC UPDATE IF_PRESENT & 137 138 ! 139 !-- Wait for first UPDATE to complete before starting the others. 140 !$ACC WAIT(1) ASYNC(2) 141 ! ar(:,:,nxl-nbgp_local:nxl-1) is overwritten by first part below 142 ! ar(:,:,nxl:nxl+nbgp_local-1) has been transferred above 143 DO i = nxl+nbgp_local, nxr-nbgp_local 144 !$ACC UPDATE IF_PRESENT ASYNC(2) & 139 145 !$ACC HOST(ar(:,nyn-nbgp_local+1:nyn,i)) & 140 146 !$ACC HOST(ar(:,nys:nys+nbgp_local-1,i)) 141 147 ENDDO 148 ! ar(:,:,nxr-nbgp_local+1:nxr) has been transferred above 149 ! ar(:,:,nxr+1:nxr+nbgp_local) is overwritten by first part below 150 151 ! 152 !-- Wait for first UPDATE to complete before starting MPI. 153 !$ACC WAIT(1) 142 154 #endif 143 155 … … 203 215 ENDIF 204 216 217 !$ACC UPDATE IF_PRESENT ASYNC(1) & 218 !$ACC DEVICE(ar(:,:,nxl-nbgp_local:nxl-1)) & 219 !$ACC DEVICE(ar(:,:,nxr+1:nxr+nbgp_local)) 220 221 ! 222 !-- Wait for UPDATES above to complete before starting MPI. 223 !$ACC WAIT(2) 205 224 206 225 IF ( pdims(2) == 1 .OR. mg_switch_to_pe0 ) THEN … … 274 293 ENDIF 275 294 295 !$ACC UPDATE IF_PRESENT ASYNC(1) & 296 !$ACC DEVICE(ar(:,:,nxl-nbgp_local:nxl-1)) & 297 !$ACC DEVICE(ar(:,:,nxr+1:nxr+nbgp_local)) 298 299 ! 300 !-- Wait for UPDATES above to complete before starting MPI. 301 !$ACC WAIT(2) 302 276 303 IF ( bc_ns_cyc ) THEN 277 304 ar(:,nys-nbgp_local:nys-1,:) = ar(:,nyn-nbgp_local+1:nyn,:) … … 282 309 283 310 #ifdef _OPENACC 284 !$ACC UPDATE IF_PRESENT &285 !$ACC DEVICE(ar(:,:,nxl-nbgp_local:nxl-1)) &286 !$ACC DEVICE(ar(:,:,nxr+1:nxr+nbgp_local))287 311 DO i = nxl-nbgp_local, nxr+nbgp_local 288 !$ACC UPDATE IF_PRESENT &312 !$ACC UPDATE IF_PRESENT ASYNC(2) & 289 313 !$ACC DEVICE(ar(:,nys-nbgp_local:nys-1,i)) & 290 314 !$ACC DEVICE(ar(:,nyn+1:nyn+nbgp_local,i)) 291 315 ENDDO 316 317 ! 318 !-- Wait for all UPDATEs to finish. 319 !$ACC WAIT 292 320 #endif 293 321 -
palm/trunk/SOURCE/transpose.f90
r3655 r3657 180 180 !-- Transpose array 181 181 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start', cpu_log_nowait ) 182 #ifndef __cuda_aware_mpi 182 183 !$ACC UPDATE HOST(f_inv) 184 #else 185 !$ACC HOST_DATA USE_DEVICE(work, f_inv) 186 #endif 183 187 IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr ) 184 188 CALL MPI_ALLTOALL( f_inv(nys_x,nzb_x,0), sendrecvcount_xy, MPI_REAL, & 185 189 work(1,nzb_y,nxl_y,0), sendrecvcount_xy, MPI_REAL, & 186 190 comm1dy, ierr ) 191 #ifndef __cuda_aware_mpi 187 192 !$ACC UPDATE DEVICE(work) 193 #else 194 !$ACC END HOST_DATA 195 #endif 188 196 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' ) 189 197 … … 337 345 !-- Transpose array 338 346 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start', cpu_log_nowait ) 347 #ifndef __cuda_aware_mpi 339 348 !$ACC UPDATE HOST(work) 349 #else 350 !$ACC HOST_DATA USE_DEVICE(work, f_inv) 351 #endif 340 352 IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr ) 341 353 CALL MPI_ALLTOALL( work(nys_x,1,nzb_x,0), sendrecvcount_zx, MPI_REAL, & 342 354 f_inv(nys,nxl,1), sendrecvcount_zx, MPI_REAL, & 343 355 comm1dx, ierr ) 356 #ifndef __cuda_aware_mpi 344 357 !$ACC UPDATE DEVICE(f_inv) 358 #else 359 !$ACC END HOST_DATA 360 #endif 345 361 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' ) 346 362 #endif … … 475 491 !-- Transpose array 476 492 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start', cpu_log_nowait ) 493 #ifndef __cuda_aware_mpi 477 494 !$ACC UPDATE HOST(work) 495 #else 496 !$ACC HOST_DATA USE_DEVICE(work, f_inv) 497 #endif 478 498 IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr ) 479 499 CALL MPI_ALLTOALL( work(1,nzb_y,nxl_y,0), sendrecvcount_xy, MPI_REAL, & 480 500 f_inv(nys_x,nzb_x,0), sendrecvcount_xy, MPI_REAL, & 481 501 comm1dy, ierr ) 502 #ifndef __cuda_aware_mpi 482 503 !$ACC UPDATE DEVICE(f_inv) 504 #else 505 !$ACC END HOST_DATA 506 #endif 483 507 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' ) 484 508 #endif … … 693 717 !-- Transpose array 694 718 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start', cpu_log_nowait ) 719 #ifndef __cuda_aware_mpi 695 720 !$ACC UPDATE HOST(f_inv) 721 #else 722 !$ACC HOST_DATA USE_DEVICE(work, f_inv) 723 #endif 696 724 IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr ) 697 725 CALL MPI_ALLTOALL( f_inv(nxl_y,nzb_y,0), sendrecvcount_yz, MPI_REAL, & 698 726 work(nxl_z,1,nys_z,0), sendrecvcount_yz, MPI_REAL, & 699 727 comm1dx, ierr ) 728 #ifndef __cuda_aware_mpi 700 729 !$ACC UPDATE DEVICE(work) 730 #else 731 !$ACC END HOST_DATA 732 #endif 701 733 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' ) 702 734 … … 829 861 !-- Transpose array 830 862 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start', cpu_log_nowait ) 863 #ifndef __cuda_aware_mpi 831 864 !$ACC UPDATE HOST(f_inv) 865 #else 866 !$ACC HOST_DATA USE_DEVICE(work, f_inv) 867 #endif 832 868 IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr ) 833 869 CALL MPI_ALLTOALL( f_inv(nys,nxl,1), sendrecvcount_zx, MPI_REAL, & 834 870 work(nys_x,1,nzb_x,0), sendrecvcount_zx, MPI_REAL, & 835 871 comm1dx, ierr ) 872 #ifndef __cuda_aware_mpi 836 873 !$ACC UPDATE DEVICE(work) 874 #else 875 !$ACC END HOST_DATA 876 #endif 837 877 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' ) 838 878 … … 971 1011 !-- Transpose array 972 1012 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start', cpu_log_nowait ) 1013 #ifndef __cuda_aware_mpi 973 1014 !$ACC UPDATE HOST(work) 1015 #else 1016 !$ACC HOST_DATA USE_DEVICE(work, f_inv) 1017 #endif 974 1018 IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr ) 975 1019 CALL MPI_ALLTOALL( work(nxl_z,1,nys_z,0), sendrecvcount_yz, MPI_REAL, & 976 1020 f_inv(nxl_y,nzb_y,0), sendrecvcount_yz, MPI_REAL, & 977 1021 comm1dx, ierr ) 1022 #ifndef __cuda_aware_mpi 978 1023 !$ACC UPDATE DEVICE(f_inv) 1024 #else 1025 !$ACC END HOST_DATA 1026 #endif 979 1027 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' ) 980 1028 #endif
Note: See TracChangeset
for help on using the changeset viewer.