Ignore:
Timestamp:
Dec 18, 2018 12:31:28 PM (3 years ago)
Author:
knoop
Message:

OpenACC port for SPEC

File:
1 edited

Legend:

Unmodified
Added
Removed
  • palm/trunk/SOURCE/transpose.f90

    r3241 r3634  
    2525! -----------------
    2626! $Id$
     27! OpenACC port for SPEC
     28!
     29! 3241 2018-09-12 15:02:00Z raasch
    2730! unused variables removed
    2831!
     
    119122    !$OMP  PARALLEL PRIVATE ( i, j, k )
    120123    !$OMP  DO
     124     !$ACC PARALLEL LOOP COLLAPSE(3) PRIVATE(i,j,k) &
     125     !$ACC PRESENT(f_inv, f_in)
    121126     DO  i = 0, nx
    122127         DO  k = nzb_x, nzt_x
     
    166171
    167172    REAL(wp), DIMENSION(nyn_x-nys_x+1,nzb_y:nzt_y,nxl_y:nxr_y,0:pdims(2)-1) ::  work !<
     173    !$ACC DECLARE CREATE(work)
    168174
    169175
     
    174180!--    Transpose array
    175181       CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start', cpu_log_nowait )
     182       !$ACC UPDATE HOST(f_inv)
    176183       IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
    177184       CALL MPI_ALLTOALL( f_inv(nys_x,nzb_x,0),  sendrecvcount_xy, MPI_REAL, &
    178185                          work(1,nzb_y,nxl_y,0), sendrecvcount_xy, MPI_REAL, &
    179186                          comm1dy, ierr )
     187       !$ACC UPDATE DEVICE(work)
    180188       CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
    181189
     
    186194       DO  l = 0, pdims(2) - 1
    187195          ys = 0 + l * ( nyn_x - nys_x + 1 )
     196          !$ACC PARALLEL LOOP COLLAPSE(3) PRIVATE(i,j,k) &
     197          !$ACC PRESENT(f_out, work)
    188198          DO  i = nxl_y, nxr_y
    189199             DO  k = nzb_y, nzt_y
     
    203213!$OMP  PARALLEL PRIVATE ( i, j, k )
    204214!$OMP  DO
     215       !$ACC PARALLEL LOOP COLLAPSE(3) PRIVATE(i,j,k) &
     216       !$ACC PRESENT(f_out, f_inv)
    205217       DO  k = nzb_y, nzt_y
    206218          DO  i = nxl_y, nxr_y
     
    246258    !$OMP  PARALLEL PRIVATE ( i, j, k )
    247259    !$OMP  DO
     260     !$ACC PARALLEL LOOP COLLAPSE(3) PRIVATE(i,j,k) &
     261     !$ACC PRESENT(f_out, f_inv)
    248262     DO  k = 1, nz
    249263         DO  i = nxl, nxr
     
    293307
    294308    REAL(wp), DIMENSION(nys_x:nyn_x,nnx,nzb_x:nzt_x,0:pdims(1)-1) ::  work !<
     309    !$ACC DECLARE CREATE(work)
    295310
    296311
     
    307322       DO  l = 0, pdims(1) - 1
    308323          xs = 0 + l * nnx
     324          !$ACC PARALLEL LOOP COLLAPSE(3) PRIVATE(i,j,k) &
     325          !$ACC PRESENT(work, f_in)
    309326          DO  k = nzb_x, nzt_x
    310327             DO  i = xs, xs + nnx - 1
     
    320337!--    Transpose array
    321338       CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start', cpu_log_nowait )
     339       !$ACC UPDATE HOST(work)
    322340       IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
    323341       CALL MPI_ALLTOALL( work(nys_x,1,nzb_x,0), sendrecvcount_zx, MPI_REAL, &
    324342                          f_inv(nys,nxl,1),      sendrecvcount_zx, MPI_REAL, &
    325343                          comm1dx, ierr )
     344       !$ACC UPDATE DEVICE(f_inv)
    326345       CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
    327346#endif
     
    333352!$OMP  PARALLEL PRIVATE ( i, j, k )
    334353!$OMP  DO
     354       !$ACC PARALLEL LOOP COLLAPSE(3) PRIVATE(i,j,k) &
     355       !$ACC PRESENT(f_inv, f_in)
    335356       DO  i = nxl, nxr
    336357          DO  j = nys, nyn
     
    378399    !$OMP  PARALLEL PRIVATE ( i, j, k )
    379400    !$OMP  DO
     401     !$ACC PARALLEL LOOP COLLAPSE(3) PRIVATE(i,j,k) &
     402     !$ACC PRESENT(f_out, f_inv)
    380403     DO  i = 0, nx
    381404         DO  k = nzb_x, nzt_x
     
    425448
    426449    REAL(wp), DIMENSION(nyn_x-nys_x+1,nzb_y:nzt_y,nxl_y:nxr_y,0:pdims(2)-1) ::  work !<
     450    !$ACC DECLARE CREATE(work)
    427451
    428452
     
    436460       DO  l = 0, pdims(2) - 1
    437461          ys = 0 + l * ( nyn_x - nys_x + 1 )
     462          !$ACC PARALLEL LOOP COLLAPSE(3) PRIVATE(i,j,k) &
     463          !$ACC PRESENT(work, f_in)
    438464          DO  i = nxl_y, nxr_y
    439465             DO  k = nzb_y, nzt_y
     
    449475!--    Transpose array
    450476       CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start', cpu_log_nowait )
     477       !$ACC UPDATE HOST(work)
    451478       IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
    452479       CALL MPI_ALLTOALL( work(1,nzb_y,nxl_y,0), sendrecvcount_xy, MPI_REAL, &
    453480                          f_inv(nys_x,nzb_x,0),  sendrecvcount_xy, MPI_REAL, &
    454481                          comm1dy, ierr )
     482       !$ACC UPDATE DEVICE(f_inv)
    455483       CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
    456484#endif
     
    462490!$OMP  PARALLEL PRIVATE ( i, j, k )
    463491!$OMP  DO
     492       !$ACC PARALLEL LOOP COLLAPSE(3) PRIVATE(i,j,k) &
     493       !$ACC PRESENT(f_inv, f_in)
    464494       DO  i = nxl_y, nxr_y
    465495          DO  k = nzb_y, nzt_y
     
    587617    !$OMP  PARALLEL PRIVATE ( i, j, k )
    588618    !$OMP  DO
     619     !$ACC PARALLEL LOOP COLLAPSE(3) PRIVATE(i,j,k) &
     620     !$ACC PRESENT(f_inv, f_in)
    589621     DO  j = 0, ny
    590622         DO  k = nzb_y, nzt_y
     
    634666
    635667    REAL(wp), DIMENSION(nxl_z:nxr_z,nzt_y-nzb_y+1,nys_z:nyn_z,0:pdims(1)-1) ::  work !<
     668    !$ACC DECLARE CREATE(work)
    636669
    637670
     
    643676!$OMP  PARALLEL PRIVATE ( i, j, k )
    644677!$OMP  DO
     678       !$ACC PARALLEL LOOP COLLAPSE(3) PRIVATE(i,j,k) &
     679       !$ACC PRESENT(f_out, f_inv)
    645680       DO  j = 0, ny
    646681          DO  k = nzb_y, nzt_y
     
    658693!--    Transpose array
    659694       CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start', cpu_log_nowait )
     695       !$ACC UPDATE HOST(f_inv)
    660696       IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
    661697       CALL MPI_ALLTOALL( f_inv(nxl_y,nzb_y,0),  sendrecvcount_yz, MPI_REAL, &
    662698                          work(nxl_z,1,nys_z,0), sendrecvcount_yz, MPI_REAL, &
    663699                          comm1dx, ierr )
     700       !$ACC UPDATE DEVICE(work)
    664701       CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
    665702
     
    670707       DO  l = 0, pdims(1) - 1
    671708          zs = 1 + l * ( nzt_y - nzb_y + 1 )
     709          !$ACC PARALLEL LOOP COLLAPSE(3) PRIVATE(i,j,k) &
     710          !$ACC PRESENT(f_out, work)
    672711          DO  j = nys_z, nyn_z
    673712             DO  k = zs, zs + nzt_y - nzb_y
     
    714753    !$OMP  PARALLEL PRIVATE ( i, j, k )
    715754    !$OMP  DO
     755    !$ACC PARALLEL LOOP COLLAPSE(3) PRIVATE(i,j,k) &
     756    !$ACC PRESENT(f_in, f_inv)
    716757     DO  k = 1,nz
    717758         DO  i = nxl, nxr
     
    761802
    762803    REAL(wp), DIMENSION(nys_x:nyn_x,nnx,nzb_x:nzt_x,0:pdims(1)-1) ::  work !<
     804    !$ACC DECLARE CREATE(work)
    763805
    764806
     
    770812!$OMP  PARALLEL PRIVATE ( i, j, k )
    771813!$OMP  DO
     814       !$ACC PARALLEL LOOP COLLAPSE(3) PRIVATE(i,j,k) &
     815       !$ACC PRESENT(f_out, f_inv)
    772816       DO  k = 1, nz
    773817          DO  i = nxl, nxr
     
    785829!--    Transpose array
    786830       CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start', cpu_log_nowait )
     831       !$ACC UPDATE HOST(f_inv)
    787832       IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
    788833       CALL MPI_ALLTOALL( f_inv(nys,nxl,1),      sendrecvcount_zx, MPI_REAL, &
    789834                          work(nys_x,1,nzb_x,0), sendrecvcount_zx, MPI_REAL, &
    790835                          comm1dx, ierr )
     836       !$ACC UPDATE DEVICE(work)
    791837       CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
    792838
     
    797843       DO  l = 0, pdims(1) - 1
    798844          xs = 0 + l * nnx
     845          !$ACC PARALLEL LOOP COLLAPSE(3) PRIVATE(i,j,k) &
     846          !$ACC PRESENT(f_out, work)
    799847          DO  k = nzb_x, nzt_x
    800848             DO  i = xs, xs + nnx - 1
     
    845893    !$OMP  PARALLEL PRIVATE ( i, j, k )
    846894    !$OMP  DO
     895    !$ACC PARALLEL LOOP COLLAPSE(3) PRIVATE(i,j,k) &
     896    !$ACC PRESENT(f_out, f_inv)
    847897     DO  k = nzb_y, nzt_y
    848898         DO  j = 0, ny
     
    892942
    893943    REAL(wp), DIMENSION(nxl_z:nxr_z,nzt_y-nzb_y+1,nys_z:nyn_z,0:pdims(1)-1) ::  work !<
     944    !$ACC DECLARE CREATE(work)
    894945
    895946!
     
    905956       DO  l = 0, pdims(1) - 1
    906957          zs = 1 + l * ( nzt_y - nzb_y + 1 )
     958          !$ACC PARALLEL LOOP COLLAPSE(3) PRIVATE(i,j,k) &
     959          !$ACC PRESENT(work, f_in)
    907960          DO  j = nys_z, nyn_z
    908961             DO  k = zs, zs + nzt_y - nzb_y
     
    918971!--    Transpose array
    919972       CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start', cpu_log_nowait )
     973       !$ACC UPDATE HOST(work)
    920974       IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
    921975       CALL MPI_ALLTOALL( work(nxl_z,1,nys_z,0), sendrecvcount_yz, MPI_REAL, &
    922976                          f_inv(nxl_y,nzb_y,0),  sendrecvcount_yz, MPI_REAL, &
    923977                          comm1dx, ierr )
     978       !$ACC UPDATE DEVICE(f_inv)
    924979       CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' )
    925980#endif
     
    930985!$OMP  PARALLEL PRIVATE ( i, j, k )
    931986!$OMP  DO
     987       !$ACC PARALLEL LOOP COLLAPSE(3) PRIVATE(i,j,k) &
     988       !$ACC PRESENT(f_inv, f_in)
    932989       DO  k = nzb_y, nzt_y
    933990          DO  j = 0, ny
Note: See TracChangeset for help on using the changeset viewer.