Ignore:
Timestamp:
Sep 30, 2020 10:27:40 PM (4 years ago)
Author:
pavelkrc
Message:

Fixes and optimizations of OpenMP parallelization, formatting of OpenMP directives

File:
1 edited

Legend:

Unmodified
Added
Removed
  • palm/trunk/SOURCE/transpose.f90

    r4540 r4717  
    2020! Current revisions:
    2121! -----------------
    22 ! 
    23 ! 
     22!
     23!
    2424! Former revisions:
    2525! -----------------
    2626! $Id$
     27! Formatting of OpenMP directives (J. Resler)
     28!
     29! 4540 2020-05-18 15:23:29Z raasch
    2730! File re-formatted to follow the PALM coding standard
    28 !
    2931!
    3032! 4429 2020-02-27 15:24:30Z raasch
     
    197199!
    198200!--    Reorder transposed array
    199 !$OMP  PARALLEL PRIVATE ( i, j, k, l, ys )
     201       !$OMP  PARALLEL PRIVATE ( i, j, k, l, ys )
    200202       DO  l = 0, pdims(2) - 1
    201203          ys = 0 + l * ( nyn_x - nys_x + 1 )
     
    214216          !$OMP END DO NOWAIT
    215217       ENDDO
    216 !$OMP  END PARALLEL
     218       !$OMP  END PARALLEL
    217219#endif
    218220
     
    221223!
    222224!--    Reorder transposed array
    223 !$OMP  PARALLEL PRIVATE ( i, j, k )
    224 !$OMP  DO
     225       !$OMP  PARALLEL PRIVATE ( i, j, k )
     226       !$OMP  DO
    225227#if __acc_fft_device
    226228       !$ACC PARALLEL LOOP COLLAPSE(3) PRIVATE(i,j,k) &
     
    234236          ENDDO
    235237       ENDDO
    236 !$OMP  END PARALLEL
     238       !$OMP  END PARALLEL
    237239
    238240    ENDIF
     
    271273!-- Rearrange indices of input array in order to make data to be send by MPI contiguous.
    272274!-- In case of parallel fft/transposition, scattered store is faster in backward direction!!!
    273    !$OMP  PARALLEL PRIVATE ( i, j, k )
    274    !$OMP  DO
    275 #if __acc_fft_device
    276    !$ACC PARALLEL LOOP COLLAPSE(3) PRIVATE(i,j,k) &
    277    !$ACC PRESENT(f_out, f_inv)
     275    !$OMP  PARALLEL PRIVATE ( i, j, k )
     276    !$OMP  DO
     277#if __acc_fft_device
     278    !$ACC PARALLEL LOOP COLLAPSE(3) PRIVATE(i,j,k) &
     279    !$ACC PRESENT(f_out, f_inv)
    278280#endif
    279281    DO  i = nxl, nxr
     
    430432!
    431433!--    Reorder the array in a way that the z index is in first position
    432 !$OMP PARALLEL PRIVATE ( i, j, k )
    433 !$OMP DO
     434       !$OMP PARALLEL PRIVATE ( i, j, k )
     435       !$OMP DO
    434436#if __acc_fft_device
    435437       !$ACC PARALLEL LOOP COLLAPSE(3) PRIVATE(i,j,k) &
     
    443445          ENDDO
    444446       ENDDO
    445 !$OMP END PARALLEL
     447       !$OMP END PARALLEL
    446448
    447449    ENDIF
     
    481483!
    482484!-- Rearrange indices of input array in order to make data to be send by MPI contiguous.
    483    !$OMP  PARALLEL PRIVATE ( i, j, k )
    484    !$OMP  DO
     485   !$OMP PARALLEL PRIVATE ( i, j, k )
     486   !$OMP DO
    485487#if __acc_fft_device
    486488   !$ACC PARALLEL LOOP COLLAPSE(3) PRIVATE(i,j,k) &
     
    494496        ENDDO
    495497    ENDDO
    496     !$OMP  END PARALLEL
     498    !$OMP END PARALLEL
    497499
    498500 END SUBROUTINE resort_for_yx
     
    560562!
    561563!--    Reorder input array for transposition
    562 !$OMP PARALLEL PRIVATE ( i, j, k, l, ys )
     564       !$OMP PARALLEL PRIVATE ( i, j, k, l, ys )
    563565       DO  l = 0, pdims(2) - 1
    564566          ys = 0 + l * ( nyn_x - nys_x + 1 )
     
    577579          !$OMP END DO NOWAIT
    578580       ENDDO
    579 !$OMP END PARALLEL
     581       !$OMP END PARALLEL
    580582
    581583!
     
    610612!
    611613!--    Reorder array f_in the same way as ALLTOALL did it.
    612 !$OMP PARALLEL PRIVATE ( i, j, k )
    613 !$OMP DO
     614       !$OMP PARALLEL PRIVATE ( i, j, k )
     615       !$OMP DO
    614616#if __acc_fft_device
    615617       !$ACC PARALLEL LOOP COLLAPSE(3) PRIVATE(i,j,k) &
     
    623625          ENDDO
    624626       ENDDO
    625 !$OMP END PARALLEL
     627       !$OMP END PARALLEL
    626628
    627629    ENDIF
     
    748750!
    749751!-- Rearrange indices of input array in order to make data to be send by MPI contiguous.
    750    !$OMP PARALLEL PRIVATE ( i, j, k )
    751    !$OMP DO
     752    !$OMP PARALLEL PRIVATE ( i, j, k )
     753    !$OMP DO
    752754#if __acc_fft_device
    753755    !$ACC PARALLEL LOOP COLLAPSE(3) PRIVATE(i,j,k) &
     
    761763        ENDDO
    762764    ENDDO
    763     !$OMP  END PARALLEL
     765    !$OMP END PARALLEL
    764766
    765767 END SUBROUTINE resort_for_yz
     
    827829    IF ( pdims(1) == 1 )  THEN
    828830
    829 !$OMP PARALLEL PRIVATE ( i, j, k )
    830 !$OMP DO
     831       !$OMP PARALLEL PRIVATE ( i, j, k )
     832       !$OMP DO
    831833#if __acc_fft_device
    832834       !$ACC PARALLEL LOOP COLLAPSE(3) PRIVATE(i,j,k) &
     
    840842          ENDDO
    841843       ENDDO
    842 !$OMP END PARALLEL
     844       !$OMP END PARALLEL
    843845
    844846    ELSE
     
    873875!
    874876!--    Reorder transposed array
    875 !$OMP PARALLEL PRIVATE ( i, j, k, l, zs )
     877       !$OMP PARALLEL PRIVATE ( i, j, k, l, zs )
    876878       DO  l = 0, pdims(1) - 1
    877879          zs = 1 + l * ( nzt_y - nzb_y + 1 )
     
    890892          !$OMP END DO NOWAIT
    891893       ENDDO
    892 !$OMP END PARALLEL
     894       !$OMP END PARALLEL
    893895#endif
    894896
     
    927929!
    928930!-- Rearrange indices of input array in order to make data to be send by MPI contiguous.
    929    !$OMP PARALLEL PRIVATE ( i, j, k )
    930    !$OMP DO
    931 #if __acc_fft_device
    932    !$ACC PARALLEL LOOP COLLAPSE(3) PRIVATE(i,j,k) &
    933    !$ACC PRESENT(f_in, f_inv)
     931    !$OMP PARALLEL PRIVATE ( i, j, k )
     932    !$OMP DO
     933#if __acc_fft_device
     934    !$ACC PARALLEL LOOP COLLAPSE(3) PRIVATE(i,j,k) &
     935    !$ACC PRESENT(f_in, f_inv)
    934936#endif
    935937    DO  i = nxl, nxr
     
    940942        ENDDO
    941943    ENDDO
    942     !$OMP  END PARALLEL
     944    !$OMP END PARALLEL
    943945
    944946 END SUBROUTINE resort_for_zx
     
    10161018    IF ( pdims(1) == 1 )  THEN
    10171019
    1018 !$OMP PARALLEL PRIVATE ( i, j, k )
    1019 !$OMP DO
     1020       !$OMP PARALLEL PRIVATE ( i, j, k )
     1021       !$OMP DO
    10201022#if __acc_fft_device
    10211023       !$ACC PARALLEL LOOP COLLAPSE(3) PRIVATE(i,j,k) &
     
    10291031          ENDDO
    10301032       ENDDO
    1031 !$OMP END PARALLEL
     1033       !$OMP END PARALLEL
    10321034
    10331035    ELSE
     
    10801082       ELSE
    10811083
    1082           !$OMP  PARALLEL PRIVATE ( i, j, k, l, xs )
     1084          !$OMP PARALLEL PRIVATE ( i, j, k, l, xs )
    10831085          DO  l = 0, pdims(1) - 1
    10841086             xs = 0 + l * nnx
     
    10971099             !$OMP END DO NOWAIT
    10981100          ENDDO
    1099           !$OMP  END PARALLEL
     1101          !$OMP END PARALLEL
    11001102
    11011103       ENDIF
     
    11391141!
    11401142!-- Rearrange indices of input array in order to make data to be send by MPI contiguous.
    1141     !$OMP  PARALLEL PRIVATE ( i, j, k )
    1142     !$OMP  DO
     1143    !$OMP PARALLEL PRIVATE ( i, j, k )
     1144    !$OMP DO
    11431145#if __acc_fft_device
    11441146    !$ACC PARALLEL LOOP COLLAPSE(3) PRIVATE(i,j,k) &
     
    11521154        ENDDO
    11531155    ENDDO
    1154     !$OMP  END PARALLEL
     1156    !$OMP END PARALLEL
    11551157
    11561158 END SUBROUTINE resort_for_zy
     
    12201222!
    12211223!--    Reorder input array for transposition
    1222 !$OMP PARALLEL PRIVATE ( i, j, k, l, zs )
     1224       !$OMP PARALLEL PRIVATE ( i, j, k, l, zs )
    12231225       DO  l = 0, pdims(1) - 1
    12241226          zs = 1 + l * ( nzt_y - nzb_y + 1 )
     
    12371239          !$OMP END DO NOWAIT
    12381240       ENDDO
    1239 !$OMP END PARALLEL
     1241       !$OMP END PARALLEL
    12401242
    12411243!
     
    12691271!
    12701272!--    Reorder the array in the same way like ALLTOALL did it
    1271 !$OMP PARALLEL PRIVATE ( i, j, k )
    1272 !$OMP DO
     1273       !$OMP PARALLEL PRIVATE ( i, j, k )
     1274       !$OMP DO
    12731275#if __acc_fft_device
    12741276       !$ACC PARALLEL LOOP COLLAPSE(3) PRIVATE(i,j,k) &
     
    12821284          ENDDO
    12831285       ENDDO
    1284 !$OMP END PARALLEL
     1286       !$OMP END PARALLEL
    12851287
    12861288    ENDIF
Note: See TracChangeset for help on using the changeset viewer.