Ignore:
Timestamp:
Sep 30, 2020 10:27:40 PM (4 years ago)
Author:
pavelkrc
Message:

Fixes and optimizations of OpenMP parallelization, formatting of OpenMP directives

File:
1 edited

Legend:

Unmodified
Added
Removed
  • palm/trunk/SOURCE/pres.f90

    r4651 r4717  
    2525! -----------------
    2626! $Id$
     27! Fixes and optimizations of OpenMP parallelization, formatting of OpenMP
     28! directives (J. Resler)
     29!
     30! 4651 2020-08-27 07:17:45Z raasch
    2731! preprocessor branch for ibm removed
    2832!
    2933! 4649 2020-08-25 12:11:17Z raasch
    3034! File re-formatted to follow the PALM coding standard
    31 !
    3235!
    3336! 4457 2020-03-11 14:20:43Z raasch
     
    171174    REAL(wp) ::  ddt_3d            !<
    172175    REAL(wp) ::  d_weight_pres     !<
    173     REAL(wp) ::  localsum          !<
    174176    REAL(wp) ::  threadsum         !<
    175177    REAL(wp) ::  weight_pres_l     !<
     
    363365
    364366    IF ( psolver(1:9) == 'multigrid' )  THEN
    365        !$OMP PARALLEL DO SCHEDULE( STATIC ) PRIVATE (i,j,k)
     367       !$OMP PARALLEL DO PRIVATE (i,j,k) SCHEDULE( STATIC )
    366368       DO  i = nxl-1, nxr+1
    367369          DO  j = nys-1, nyn+1
     
    372374       ENDDO
    373375    ELSE
    374        !$OMP PARALLEL DO SCHEDULE( STATIC ) PRIVATE (i,j,k)
     376       !$OMP PARALLEL DO PRIVATE (i,j,k) SCHEDULE( STATIC )
    375377       !$ACC PARALLEL LOOP COLLAPSE(3) PRIVATE(i, j, k) &
    376378       !$ACC PRESENT(d)
     
    384386    ENDIF
    385387
    386     localsum  = 0.0_wp
    387     threadsum = 0.0_wp
    388 
    389     !$OMP PARALLEL PRIVATE (i,j,k)
    390     !$OMP DO SCHEDULE( STATIC )
     388    !$OMP PARALLEL DO PRIVATE (i,j,k) SCHEDULE( STATIC )
    391389    !$ACC PARALLEL LOOP COLLAPSE(3) PRIVATE(i, j, k) &
    392390    !$ACC PRESENT(u, v, w, rho_air, rho_air_zw, ddzw, wall_flags_total_0) &
     
    403401       ENDDO
    404402    ENDDO
    405     !$OMP END PARALLEL
    406403
    407404!
     
    410407    IF ( intermediate_timestep_count == intermediate_timestep_count_max  .OR.                      &
    411408         intermediate_timestep_count == 0 )  THEN
    412        !$OMP PARALLEL PRIVATE (i,j,k) FIRSTPRIVATE(threadsum) REDUCTION(+:localsum)
    413        !$OMP DO SCHEDULE( STATIC )
     409       threadsum = 0.0_wp
     410       !$OMP PARALLEL DO PRIVATE (i,j,k) REDUCTION(+:threadsum) SCHEDULE( STATIC )
    414411       !$ACC PARALLEL LOOP COLLAPSE(3) PRIVATE(i,j,k) &
    415412       !$ACC REDUCTION(+:threadsum) COPY(threadsum) &
     
    422419          ENDDO
    423420       ENDDO
    424        localsum = localsum + threadsum * dt_3d * weight_pres_l
    425        !$OMP END PARALLEL
     421       threadsum = threadsum + threadsum * dt_3d * weight_pres_l
    426422    ENDIF
    427423
     
    430426    IF ( intermediate_timestep_count == intermediate_timestep_count_max  .OR.                      &
    431427         intermediate_timestep_count == 0 )  THEN
    432        sums_divold_l(0:statistic_regions) = localsum
     428       sums_divold_l(0:statistic_regions) = threadsum
    433429    ENDIF
    434430
     
    445441!
    446442!--    Store computed perturbation pressure and set boundary condition in z-direction
    447        !$OMP PARALLEL DO PRIVATE (i,j,k)
     443       !$OMP PARALLEL DO PRIVATE (i,j,k) SCHEDULE( STATIC )
    448444       !$ACC PARALLEL LOOP COLLAPSE(3) PRIVATE(i, j, k) &
    449445       !$ACC PRESENT(d, tend)
     
    465461!--       natural and urban surfaces
    466462!--       Upward facing
    467           !$OMP PARALLEL DO PRIVATE( i, j, k )
     463          !$OMP PARALLEL DO PRIVATE( m, i, j, k ) SCHEDULE( STATIC )
    468464          !$ACC PARALLEL LOOP PRIVATE(i, j, k) &
    469465          !$ACC PRESENT(bc_h, tend)
     
    476472!
    477473!--       Downward facing
    478           !$OMP PARALLEL DO PRIVATE( i, j, k )
     474          !$OMP PARALLEL DO PRIVATE( m, i, j, k ) SCHEDULE( STATIC )
    479475          !$ACC PARALLEL LOOP PRIVATE(i, j, k) &
    480476          !$ACC PRESENT(bc_h, tend)
     
    491487!--       urban surfaces
    492488!--       Upward facing
    493           !$OMP PARALLEL DO PRIVATE( i, j, k )
     489          !$OMP PARALLEL DO PRIVATE( m, i, j, k ) SCHEDULE( STATIC )
    494490          DO  m = 1, bc_h(0)%ns
    495491             i = bc_h(0)%i(m)
     
    500496!
    501497!--       Downward facing
    502           !$OMP PARALLEL DO PRIVATE( i, j, k )
     498          !$OMP PARALLEL DO PRIVATE( m, i, j, k ) SCHEDULE( STATIC )
    503499          DO  m = 1, bc_h(1)%ns
    504500             i = bc_h(1)%i(m)
     
    515511!
    516512!--       Neumann
    517           !$OMP PARALLEL DO PRIVATE (i,j,k)
     513          !$OMP PARALLEL DO PRIVATE (i,j) SCHEDULE( STATIC )
    518514          DO  i = nxlg, nxrg
    519515             DO  j = nysg, nyng
     
    525521!
    526522!--       Dirichlet
    527           !$OMP PARALLEL DO PRIVATE (i,j,k)
     523          !$OMP PARALLEL DO PRIVATE (i,j) SCHEDULE( STATIC )
    528524          !$ACC PARALLEL LOOP COLLAPSE(2) PRIVATE(i, j) &
    529525          !$ACC PRESENT(tend)
     
    590586!-- Ghost layers are added in the output routines (except sor-method: see below)
    591587    IF ( intermediate_timestep_count <= 1 )  THEN
    592        !$OMP PARALLEL PRIVATE (i,j,k)
    593        !$OMP DO
     588       !$OMP PARALLEL DO PRIVATE (i,j,k) SCHEDULE( STATIC )
    594589       !$ACC PARALLEL LOOP COLLAPSE(3) PRIVATE(i, j, k) &
    595590       !$ACC PRESENT(p, tend)
     
    601596          ENDDO
    602597       ENDDO
    603        !$OMP END PARALLEL
    604598
    605599    ELSEIF ( intermediate_timestep_count > 1 )  THEN
    606        !$OMP PARALLEL PRIVATE (i,j,k)
    607        !$OMP DO
     600       !$OMP PARALLEL DO PRIVATE (i,j,k) SCHEDULE( STATIC )
    608601       !$ACC PARALLEL LOOP COLLAPSE(3) PRIVATE(i, j, k) &
    609602       !$ACC PRESENT(p, tend)
     
    615608          ENDDO
    616609       ENDDO
    617        !$OMP END PARALLEL
    618610
    619611    ENDIF
     
    635627!-- the velocities, zero-gradient conditions for the pressure are set, so that no modification is
    636628!-- imposed at the boundaries.
    637     !$OMP PARALLEL PRIVATE (i,j,k)
    638     !$OMP DO
     629    !$OMP PARALLEL DO PRIVATE (i,j,k) SCHEDULE( STATIC )
    639630    !$ACC PARALLEL LOOP COLLAPSE(2) PRIVATE(i, j, k) &
    640631    !$ACC PRESENT(u, v, w, tend, ddzu, wall_flags_total_0)
     
    660651       ENDDO
    661652    ENDDO
    662     !$OMP END PARALLEL
    663653
    664654!
     
    675665    IF ( conserve_volume_flow  .AND.  bc_lr_cyc  .AND.  bc_ns_cyc  .AND.  nxr == nx )  THEN
    676666
    677        !$OMP PARALLEL PRIVATE (j,k)
    678        !$OMP DO
     667       !$OMP PARALLEL DO PRIVATE (j,k) REDUCTION (volume_flow_l(1))
    679668       DO  j = nys, nyn
    680           !$OMP CRITICAL
    681669          DO  k = nzb+1, nzt
    682670             volume_flow_l(1) = volume_flow_l(1) + u(k,j,nxr) * dzw(k)                             &
    683671                                * MERGE( 1.0_wp, 0.0_wp, BTEST( wall_flags_total_0(k,j,nxr), 1 ) )
    684672          ENDDO
    685           !$OMP END CRITICAL
    686        ENDDO
    687        !$OMP END PARALLEL
     673       ENDDO
    688674
    689675    ENDIF
     
    691677    IF ( conserve_volume_flow  .AND.  bc_ns_cyc  .AND.  bc_lr_cyc  .AND. nyn == ny )  THEN
    692678
    693        !$OMP PARALLEL PRIVATE (i,k)
    694        !$OMP DO
    695        DO  i = nxl, nxr
    696           !$OMP CRITICAL
     679       !$OMP PARALLEL DO PRIVATE (i,k) REDUCTION (volume_flow_l(2))
     680       DO  i = nxl, nxr
    697681          DO  k = nzb+1, nzt
    698682             volume_flow_l(2) = volume_flow_l(2) + v(k,nyn,i) * dzw(k)                             &
    699683                                * MERGE( 1.0_wp, 0.0_wp, BTEST( wall_flags_total_0(k,nyn,i), 2 ) )
    700684           ENDDO
    701           !$OMP END CRITICAL
    702        ENDDO
    703        !$OMP END PARALLEL
     685       ENDDO
    704686
    705687    ENDIF
     
    719701                                 / volume_flow_area(1:2)
    720702
    721        !$OMP PARALLEL PRIVATE (i,j,k)
    722        !$OMP DO
     703       !$OMP PARALLEL DO PRIVATE (i,j,k) SCHEDULE( STATIC )
    723704       DO  i = nxl, nxr
    724705          DO  j = nys, nyn
     
    733714          ENDDO
    734715       ENDDO
    735 
    736        !$OMP END PARALLEL
    737716
    738717    ENDIF
     
    757736       IF ( topography /= 'flat' )  d = 0.0_wp
    758737
    759        localsum  = 0.0_wp
    760        threadsum = 0.0_wp
    761 
    762        !$OMP PARALLEL PRIVATE (i,j,k) FIRSTPRIVATE(threadsum) REDUCTION(+:localsum)
    763        !$OMP DO SCHEDULE( STATIC )
     738       !$OMP PARALLEL DO PRIVATE (i,j,k) SCHEDULE( STATIC )
    764739       !$ACC PARALLEL LOOP COLLAPSE(3) PRIVATE(i, j, k) &
    765740       !$ACC PRESENT(u, v, w, rho_air, rho_air_zw, ddzw, wall_flags_total_0) &
     
    778753!
    779754!--    Compute possible PE-sum of divergences for flow_statistics
    780        !$OMP DO SCHEDULE( STATIC )
     755       threadsum = 0.0_wp
     756       !$OMP PARALLEL DO PRIVATE (i,j,k) REDUCTION(+:threadsum) SCHEDULE( STATIC )
    781757       !$ACC PARALLEL LOOP COLLAPSE(3) PRIVATE(i, j, k) &
    782758       !$ACC REDUCTION(+:threadsum) COPY(threadsum) &
     
    790766       ENDDO
    791767
    792        localsum = localsum + threadsum
    793        !$OMP END PARALLEL
    794 
    795768!
    796769!--    For completeness, set the divergence sum of all statistic regions to those of the total
    797770!--    domain
    798        sums_divnew_l(0:statistic_regions) = localsum
     771       sums_divnew_l(0:statistic_regions) = threadsum
    799772
    800773       CALL cpu_log( log_point_s(1), 'divergence', 'stop' )
Note: See TracChangeset for help on using the changeset viewer.