Ignore:
Timestamp:
Sep 10, 2013 8:59:13 AM (8 years ago)
Author:
raasch
Message:

New:


openACC porting of reduction operations
additional 3D-flag arrays for replacing the 2D-index arrays nzb_s_inner and nzb_diff_s_inner
(flow_statistics, init_grid, init_3d_model, modules, palm, pres, time_integration)

Changed:


for PGI/openACC performance reasons set default compile options for openACC to "-ta=nocache",
and set environment variable PGI_ACC_SYNCHRONOUS=1
(MAKE.inc.pgi.openacc, palm_simple_run)

wall_flags_0 changed to 32bit INTEGER, additional array wall_flags_00 introduced to hold
bits 32-63
(advec_ws, init_grid, modules, palm)

Errors:


dummy argument tri in 1d-routines replaced by tri_for_1d because of name
conflict with arry tri in module arrays_3d
(tridia_solver)

File:
1 edited

Legend:

Unmodified
Added
Removed
  • palm/trunk/SOURCE/pres.f90

    r1213 r1221  
    2020! Current revisions:
    2121! ------------------
    22 !
     22! openACC porting of reduction operations, loops for calculating d are
     23! using the rflags_s_inner multiply flag instead of the nzb_s_inner loop index
    2324!
    2425! Former revisions:
     
    360361    !$OMP PARALLEL PRIVATE (i,j,k)
    361362    !$OMP DO SCHEDULE( STATIC )
    362     !$acc kernels present( d, ddzw, nzb_s_inner, u, v, w )
    363     !$acc loop
     363    !$acc kernels present( d, ddzw, rflags_s_inner, u, v, w )
     364    !$acc loop collapse( 3 )
    364365    DO  i = nxl, nxr
    365366       DO  j = nys, nyn
    366           !$acc loop vector(32)
    367367          DO  k = 1, nzt
    368              IF ( k > nzb_s_inner(j,i) )  THEN
    369                 d(k,j,i) = ( ( u(k,j,i+1) - u(k,j,i) ) * ddx + &
    370                            ( v(k,j+1,i) - v(k,j,i) ) * ddy + &
    371                            ( w(k,j,i) - w(k-1,j,i) ) * ddzw(k) ) * ddt_3d      &
    372                            * d_weight_pres
    373              ENDIF
     368             d(k,j,i) = ( ( u(k,j,i+1) - u(k,j,i) ) * ddx +             &
     369                        ( v(k,j+1,i) - v(k,j,i) ) * ddy +               &
     370                        ( w(k,j,i) - w(k-1,j,i) ) * ddzw(k) ) * ddt_3d  &
     371                        * d_weight_pres * rflags_s_inner(k,j,i)
    374372          ENDDO
    375373       ENDDO
     
    382380    !$OMP PARALLEL PRIVATE (i,j,k) FIRSTPRIVATE(threadsum) REDUCTION(+:localsum)
    383381    !$OMP DO SCHEDULE( STATIC )
     382    !$acc parallel loop collapse(3) present( d ) reduction(+:threadsum)
    384383    DO  i = nxl, nxr
    385384       DO  j = nys, nyn
     
    389388       ENDDO
    390389    ENDDO
     390    !$acc end parallel
    391391    localsum = localsum + threadsum * dt_3d * &
    392392                          weight_pres(intermediate_timestep_count)
     
    594594!-- Correction of the provisional velocities with the current perturbation
    595595!-- pressure just computed
    596     !$acc update host( u, v, w )
    597596    IF ( conserve_volume_flow  .AND.  ( bc_lr_cyc .OR. bc_ns_cyc ) )  THEN
    598597       volume_flow_l(1) = 0.0
     
    748747    ENDDO
    749748#else
    750     !$acc kernels present( d, ddzw, nzb_s_inner, u, v, w )
    751     !$acc loop
     749    !$acc kernels present( d, ddzw, rflags_s_inner, u, v, w )
     750    !$acc loop collapse( 3 )
    752751    DO  i = nxl, nxr
    753752       DO  j = nys, nyn
    754           !$acc loop vector( 32 )
    755753          DO  k = 1, nzt
    756              IF ( k > nzb_s_inner(j,i) )  THEN
    757                 d(k,j,i) = ( u(k,j,i+1) - u(k,j,i) ) * ddx + &
    758                            ( v(k,j+1,i) - v(k,j,i) ) * ddy + &
    759                            ( w(k,j,i) - w(k-1,j,i) ) * ddzw(k)
    760              ENDIF
     754                d(k,j,i) = ( ( u(k,j,i+1) - u(k,j,i) ) * ddx +   &
     755                             ( v(k,j+1,i) - v(k,j,i) ) * ddy +   &
     756                             ( w(k,j,i) - w(k-1,j,i) ) * ddzw(k) &
     757                           ) * rflags_s_inner(k,j,i)
    761758          ENDDO
    762759       ENDDO
     
    765762!
    766763!-- Compute possible PE-sum of divergences for flow_statistics
     764    !$acc parallel loop collapse(3) present( d ) reduction(+:threadsum)
    767765    DO  i = nxl, nxr
    768766       DO  j = nys, nyn
    769           DO  k = nzb_s_inner(j,i)+1, nzt
     767          DO  k = nzb+1, nzt
    770768             threadsum = threadsum + ABS( d(k,j,i) )
    771769          ENDDO
    772770       ENDDO
    773771    ENDDO
     772    !$acc end parallel
    774773#endif
    775774
Note: See TracChangeset for help on using the changeset viewer.