Ignore:
Timestamp:
Jan 7, 2019 8:14:18 PM (2 years ago)
Author:
knoop
Message:

OpenACC: cuda-aware-mpi in transpose and acc update async in exchange_horiz added

File:
1 edited

Legend:

Unmodified
Added
Removed
  • palm/trunk/SOURCE/transpose.f90

    r3655 r3657  
    180180!--    Transpose array
    181181       CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start', cpu_log_nowait )
     182#ifndef __cuda_aware_mpi
    182183       !$ACC UPDATE HOST(f_inv)
     184#else
     185       !$ACC HOST_DATA USE_DEVICE(work, f_inv)
     186#endif
    183187       IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
    184188       CALL MPI_ALLTOALL( f_inv(nys_x,nzb_x,0),  sendrecvcount_xy, MPI_REAL, &
    185189                          work(1,nzb_y,nxl_y,0), sendrecvcount_xy, MPI_REAL, &
    186190                          comm1dy, ierr )
     191#ifndef __cuda_aware_mpi
    187192       !$ACC UPDATE DEVICE(work)
     193#else
     194       !$ACC END HOST_DATA
     195#endif
    188196       CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
    189197
     
    337345!--    Transpose array
    338346       CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start', cpu_log_nowait )
     347#ifndef __cuda_aware_mpi
    339348       !$ACC UPDATE HOST(work)
     349#else
     350       !$ACC HOST_DATA USE_DEVICE(work, f_inv)
     351#endif
    340352       IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
    341353       CALL MPI_ALLTOALL( work(nys_x,1,nzb_x,0), sendrecvcount_zx, MPI_REAL, &
    342354                          f_inv(nys,nxl,1),      sendrecvcount_zx, MPI_REAL, &
    343355                          comm1dx, ierr )
     356#ifndef __cuda_aware_mpi
    344357       !$ACC UPDATE DEVICE(f_inv)
     358#else
     359       !$ACC END HOST_DATA
     360#endif
    345361       CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
    346362#endif
     
    475491!--    Transpose array
    476492       CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start', cpu_log_nowait )
     493#ifndef __cuda_aware_mpi
    477494       !$ACC UPDATE HOST(work)
     495#else
     496       !$ACC HOST_DATA USE_DEVICE(work, f_inv)
     497#endif
    478498       IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
    479499       CALL MPI_ALLTOALL( work(1,nzb_y,nxl_y,0), sendrecvcount_xy, MPI_REAL, &
    480500                          f_inv(nys_x,nzb_x,0),  sendrecvcount_xy, MPI_REAL, &
    481501                          comm1dy, ierr )
     502#ifndef __cuda_aware_mpi
    482503       !$ACC UPDATE DEVICE(f_inv)
     504#else
     505       !$ACC END HOST_DATA
     506#endif
    483507       CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
    484508#endif
     
    693717!--    Transpose array
    694718       CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start', cpu_log_nowait )
     719#ifndef __cuda_aware_mpi
    695720       !$ACC UPDATE HOST(f_inv)
     721#else
     722       !$ACC HOST_DATA USE_DEVICE(work, f_inv)
     723#endif
    696724       IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
    697725       CALL MPI_ALLTOALL( f_inv(nxl_y,nzb_y,0),  sendrecvcount_yz, MPI_REAL, &
    698726                          work(nxl_z,1,nys_z,0), sendrecvcount_yz, MPI_REAL, &
    699727                          comm1dx, ierr )
     728#ifndef __cuda_aware_mpi
    700729       !$ACC UPDATE DEVICE(work)
     730#else
     731       !$ACC END HOST_DATA
     732#endif
    701733       CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
    702734
     
    829861!--    Transpose array
    830862       CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start', cpu_log_nowait )
     863#ifndef __cuda_aware_mpi
    831864       !$ACC UPDATE HOST(f_inv)
     865#else
     866       !$ACC HOST_DATA USE_DEVICE(work, f_inv)
     867#endif
    832868       IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
    833869       CALL MPI_ALLTOALL( f_inv(nys,nxl,1),      sendrecvcount_zx, MPI_REAL, &
    834870                          work(nys_x,1,nzb_x,0), sendrecvcount_zx, MPI_REAL, &
    835871                          comm1dx, ierr )
     872#ifndef __cuda_aware_mpi
    836873       !$ACC UPDATE DEVICE(work)
     874#else
     875       !$ACC END HOST_DATA
     876#endif
    837877       CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
    838878
     
    9711011!--    Transpose array
    9721012       CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start', cpu_log_nowait )
     1013#ifndef __cuda_aware_mpi
    9731014       !$ACC UPDATE HOST(work)
     1015#else
     1016       !$ACC HOST_DATA USE_DEVICE(work, f_inv)
     1017#endif
    9741018       IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
    9751019       CALL MPI_ALLTOALL( work(nxl_z,1,nys_z,0), sendrecvcount_yz, MPI_REAL, &
    9761020                          f_inv(nxl_y,nzb_y,0),  sendrecvcount_yz, MPI_REAL, &
    9771021                          comm1dx, ierr )
     1022#ifndef __cuda_aware_mpi
    9781023       !$ACC UPDATE DEVICE(f_inv)
     1024#else
     1025       !$ACC END HOST_DATA
     1026#endif
    9791027       CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
    9801028#endif
Note: See TracChangeset for help on using the changeset viewer.