source: palm/trunk/SOURCE/pres.f90 @ 1276

Last change on this file since 1276 was 1258, checked in by raasch, 11 years ago

last commit documented

  • Property svn:keywords set to Id
File size: 24.2 KB
RevLine 
[1]1 SUBROUTINE pres
2
[1036]3!--------------------------------------------------------------------------------!
4! This file is part of PALM.
5!
6! PALM is free software: you can redistribute it and/or modify it under the terms
7! of the GNU General Public License as published by the Free Software Foundation,
8! either version 3 of the License, or (at your option) any later version.
9!
10! PALM is distributed in the hope that it will be useful, but WITHOUT ANY
11! WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
12! A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
13!
14! You should have received a copy of the GNU General Public License along with
15! PALM. If not, see <http://www.gnu.org/licenses/>.
16!
17! Copyright 1997-2012  Leibniz University Hannover
18!--------------------------------------------------------------------------------!
19!
[484]20! Current revisions:
[1212]21! ------------------
[708]22!
[1258]23!
[708]24! Former revisions:
25! -----------------
26! $Id: pres.f90 1258 2013-11-08 16:09:09Z heinze $
27!
[1258]28! 1257 2013-11-08 15:18:40Z raasch
29! openacc loop and loop vector clauses removed, independent clauses added,
30! end parallel replaced by end parallel loop
31!
[1222]32! 1221 2013-09-10 08:59:13Z raasch
33! openACC porting of reduction operations, loops for calculating d are
34! using the rflags_s_inner multiply flag instead of the nzb_s_inner loop index
35!
[1213]36! 1212 2013-08-15 08:46:27Z raasch
37! call of poisfft_hybrid removed
38!
[1118]39! 1117 2013-03-27 11:15:36Z suehring
40! Bugfix in OpenMP parallelization.
41!
[1114]42! 1113 2013-03-10 02:48:14Z raasch
43! GPU-porting of several loops, some loops rearranged
44!
[1112]45! 1111 2013-03-08 23:54:10Z
46! openACC statements added,
47! ibc_p_b = 2 removed
48!
[1093]49! 1092 2013-02-02 11:24:22Z raasch
50! unused variables removed
51!
[1037]52! 1036 2012-10-22 13:43:42Z raasch
53! code put under GPL (PALM 3.9)
54!
[1004]55! 1003 2012-09-14 14:35:53Z raasch
56! adjustment of array tend for cases with unequal subdomain sizes removed
57!
[779]58! 778 2011-11-07 14:18:25Z fricke
59! New allocation of tend when multigrid is used and the collected field on PE0
60! has more grid points than the subdomain of an PE.
61!
[720]62! 719 2011-04-06 13:05:23Z gryschka
63! Bugfix in volume flow control for double cyclic boundary conditions
64!
[710]65! 709 2011-03-30 09:31:40Z raasch
66! formatting adjustments
67!
[708]68! 707 2011-03-29 11:39:40Z raasch
[707]69! Calculation of weighted average of p is now handled in the same way
70! regardless of the number of ghost layers (advection scheme),
71! multigrid and sor method are using p_loc for iterative advancements of
72! pressure,
73! localsum calculation modified for proper OpenMP reduction,
74! bc_lr/ns replaced by bc_lr/ns_cyc
[674]75!
[694]76! 693 2011-03-08 09:..:..Z raasch
[695]77! bugfix: weighting coefficient added to ibm branch
[694]78!
79! 680 2011-02-04 23:16:06Z gryschka
[681]80! bugfix: collective_wait
[668]81!
[676]82! 675 2011-01-19 10:56:55Z suehring
83! Removed bugfix while copying tend.
84!
[674]85! 673 2011-01-18 16:19:48Z suehring
86! Weighting coefficients added for right computation of the pressure during
87! Runge-Kutta substeps.
88!
[668]89! 667 2010-12-23 12:06:00Z suehring/gryschka
[667]90! New allocation of tend when ws-scheme and multigrid is used. This is due to
91! reasons of perforance of the data_exchange. The same is done with p after
92! poismg is called.
93! nxl-1, nxr+1, nys-1, nyn+1 replaced by nxlg, nxrg, nysg, nyng when no
94! multigrid is used. Calls of exchange_horiz are modified.
95! bugfix: After pressure correction no volume flow correction in case of
96! non-cyclic boundary conditions
97! (has to be done only before pressure correction)
98! Call of SOR routine is referenced with ddzu_pres.
99!
[623]100! 622 2010-12-10 08:08:13Z raasch
101! optional barriers included in order to speed up collective operations
102!
[198]103! 151 2008-03-07 13:42:18Z raasch
104! Bugfix in volume flow control for non-cyclic boundary conditions
105!
[110]106! 106 2007-08-16 14:30:26Z raasch
107! Volume flow conservation added for the remaining three outflow boundaries
108!
[90]109! 85 2007-05-11 09:35:14Z raasch
110! Division through dt_3d replaced by multiplication of the inverse.
111! For performance optimisation, this is done in the loop calculating the
112! divergence instead of using a seperate loop.
113!
[77]114! 75 2007-03-22 09:54:05Z raasch
[75]115! Volume flow control for non-cyclic boundary conditions added (currently only
[76]116! for the north boundary!!), 2nd+3rd argument removed from exchange horiz,
117! mean vertical velocity is removed in case of Neumann boundary conditions
118! both at the bottom and the top
[1]119!
[3]120! RCS Log replace by Id keyword, revision history cleaned up
121!
[1]122! Revision 1.25  2006/04/26 13:26:12  raasch
123! OpenMP optimization (+localsum, threadsum)
124!
125! Revision 1.1  1997/07/24 11:24:44  raasch
126! Initial revision
127!
128!
129! Description:
130! ------------
131! Compute the divergence of the provisional velocity field. Solve the Poisson
132! equation for the perturbation pressure. Compute the final velocities using
133! this perturbation pressure. Compute the remaining divergence.
134!------------------------------------------------------------------------------!
135
136    USE arrays_3d
137    USE constants
138    USE control_parameters
139    USE cpulog
140    USE grid_variables
141    USE indices
142    USE interfaces
143    USE pegrid
144    USE poisfft_mod
145    USE statistics
146
147    IMPLICIT NONE
148
[1092]149    INTEGER ::  i, j, k
[1]150
[673]151    REAL    ::  ddt_3d, localsum, threadsum, d_weight_pres
[1]152
153    REAL, DIMENSION(1:2) ::  volume_flow_l, volume_flow_offset
[76]154    REAL, DIMENSION(1:nzt) ::  w_l, w_l_l
[1]155
156
157    CALL cpu_log( log_point(8), 'pres', 'start' )
158
[85]159
160    ddt_3d = 1.0 / dt_3d
[709]161    d_weight_pres = 1.0 / weight_pres(intermediate_timestep_count)
[85]162
[1]163!
[707]164!-- Multigrid method expects array d to have one ghost layer.
165!--
[1]166    IF ( psolver == 'multigrid' )  THEN
[667]167     
[1]168       DEALLOCATE( d )
[667]169       ALLOCATE( d(nzb:nzt+1,nys-1:nyn+1,nxl-1:nxr+1) ) 
[707]170
171!
172!--    Since p is later used to hold the weighted average of the substeps, it
173!--    cannot be used in the iterative solver. Therefore, its initial value is
174!--    stored on p_loc, which is then iteratively advanced in every substep.
175       IF ( intermediate_timestep_count == 1 )  THEN
176          DO  i = nxl-1, nxr+1
177             DO  j = nys-1, nyn+1
178                DO  k = nzb, nzt+1
179                   p_loc(k,j,i) = p(k,j,i)
180                ENDDO
181             ENDDO
182          ENDDO
[667]183       ENDIF
184       
[707]185    ELSEIF ( psolver == 'sor'  .AND.  intermediate_timestep_count == 1 )  THEN
186
187!
188!--    Since p is later used to hold the weighted average of the substeps, it
189!--    cannot be used in the iterative solver. Therefore, its initial value is
190!--    stored on p_loc, which is then iteratively advanced in every substep.
191       p_loc = p
192
[1]193    ENDIF
194
195!
[75]196!-- Conserve the volume flow at the outflow in case of non-cyclic lateral
197!-- boundary conditions
[106]198!-- WARNING: so far, this conservation does not work at the left/south
199!--          boundary if the topography at the inflow differs from that at the
200!--          outflow! For this case, volume_flow_area needs adjustment!
201!
202!-- Left/right
[709]203    IF ( conserve_volume_flow  .AND.  ( outflow_l .OR. outflow_r ) )  THEN
[680]204
[106]205       volume_flow(1)   = 0.0
206       volume_flow_l(1) = 0.0
207
208       IF ( outflow_l )  THEN
209          i = 0
210       ELSEIF ( outflow_r )  THEN
211          i = nx+1
212       ENDIF
213
214       DO  j = nys, nyn
215!
216!--       Sum up the volume flow through the south/north boundary
[709]217          DO  k = nzb_2d(j,i)+1, nzt
[667]218             volume_flow_l(1) = volume_flow_l(1) + u(k,j,i) * dzw(k)
[106]219          ENDDO
220       ENDDO
221
222#if defined( __parallel )   
[680]223       IF ( collective_wait )  CALL MPI_BARRIER( comm1dy, ierr )
[106]224       CALL MPI_ALLREDUCE( volume_flow_l(1), volume_flow(1), 1, MPI_REAL, &
225                           MPI_SUM, comm1dy, ierr )   
226#else
227       volume_flow = volume_flow_l 
228#endif
[709]229       volume_flow_offset(1) = ( volume_flow_initial(1) - volume_flow(1) ) &
[106]230                               / volume_flow_area(1)
231
[667]232       DO  j = nysg, nyng
[709]233          DO  k = nzb_2d(j,i)+1, nzt
[106]234             u(k,j,i) = u(k,j,i) + volume_flow_offset(1)
235          ENDDO
236       ENDDO
237
238    ENDIF
239
240!
241!-- South/north
[709]242    IF ( conserve_volume_flow  .AND.  ( outflow_n .OR. outflow_s ) )  THEN
[106]243
[75]244       volume_flow(2)   = 0.0
245       volume_flow_l(2) = 0.0
246
[106]247       IF ( outflow_s )  THEN
248          j = 0
249       ELSEIF ( outflow_n )  THEN
[75]250          j = ny+1
[106]251       ENDIF
252
253       DO  i = nxl, nxr
[75]254!
[106]255!--       Sum up the volume flow through the south/north boundary
[709]256          DO  k = nzb_2d(j,i)+1, nzt
[667]257             volume_flow_l(2) = volume_flow_l(2) + v(k,j,i) * dzw(k)
[75]258          ENDDO
[106]259       ENDDO
260
[75]261#if defined( __parallel )   
[680]262       IF ( collective_wait )  CALL MPI_BARRIER( comm1dx, ierr )
[75]263       CALL MPI_ALLREDUCE( volume_flow_l(2), volume_flow(2), 1, MPI_REAL, &
264                           MPI_SUM, comm1dx, ierr )   
265#else
266       volume_flow = volume_flow_l 
267#endif
268       volume_flow_offset(2) = ( volume_flow_initial(2) - volume_flow(2) )    &
[106]269                               / volume_flow_area(2)
[75]270
[667]271       DO  i = nxlg, nxrg
[709]272          DO  k = nzb_v_inner(j,i)+1, nzt
[106]273             v(k,j,i) = v(k,j,i) + volume_flow_offset(2)
[75]274          ENDDO
[106]275       ENDDO
[75]276
277    ENDIF
278
[76]279!
280!-- Remove mean vertical velocity
281    IF ( ibc_p_b == 1  .AND.  ibc_p_t == 1 )  THEN
[709]282       IF ( simulated_time > 0.0 )  THEN ! otherwise nzb_w_inner not yet known
[76]283          w_l = 0.0;  w_l_l = 0.0
284          DO  i = nxl, nxr
285             DO  j = nys, nyn
286                DO  k = nzb_w_inner(j,i)+1, nzt
287                   w_l_l(k) = w_l_l(k) + w(k,j,i)
288                ENDDO
289             ENDDO
290          ENDDO
291#if defined( __parallel )   
[622]292          IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
[709]293          CALL MPI_ALLREDUCE( w_l_l(1), w_l(1), nzt, MPI_REAL, MPI_SUM, &
294                              comm2d, ierr )
[76]295#else
296          w_l = w_l_l 
297#endif
298          DO  k = 1, nzt
299             w_l(k) = w_l(k) / ngp_2dh_outer(k,0)
300          ENDDO
[667]301          DO  i = nxlg, nxrg
302             DO  j = nysg, nyng
[76]303                DO  k = nzb_w_inner(j,i)+1, nzt
304                   w(k,j,i) = w(k,j,i) - w_l(k)
305                ENDDO
306             ENDDO
307          ENDDO
308       ENDIF
309    ENDIF
[75]310
311!
[1]312!-- Compute the divergence of the provisional velocity field.
313    CALL cpu_log( log_point_s(1), 'divergence', 'start' )
314
315    IF ( psolver == 'multigrid' )  THEN
316       !$OMP PARALLEL DO SCHEDULE( STATIC )
317       DO  i = nxl-1, nxr+1
318          DO  j = nys-1, nyn+1
319             DO  k = nzb, nzt+1
320                d(k,j,i) = 0.0
321             ENDDO
322          ENDDO
323       ENDDO
324    ELSE
325       !$OMP PARALLEL DO SCHEDULE( STATIC )
[1111]326       !$acc kernels present( d )
[1003]327       DO  i = nxl, nxr
328          DO  j = nys, nyn
329             DO  k = nzb+1, nzt
[1]330                d(k,j,i) = 0.0
331             ENDDO
332          ENDDO
333       ENDDO
[1111]334       !$acc end kernels
[1]335    ENDIF
336
337    localsum  = 0.0
338    threadsum = 0.0
339
340#if defined( __ibm )
341    !$OMP PARALLEL PRIVATE (i,j,k) FIRSTPRIVATE(threadsum) REDUCTION(+:localsum)
342    !$OMP DO SCHEDULE( STATIC )
343    DO  i = nxl, nxr
344       DO  j = nys, nyn
345          DO  k = nzb_s_inner(j,i)+1, nzt
[85]346             d(k,j,i) = ( ( u(k,j,i+1) - u(k,j,i) ) * ddx + &
347                          ( v(k,j+1,i) - v(k,j,i) ) * ddy + &
[673]348                          ( w(k,j,i) - w(k-1,j,i) ) * ddzw(k) ) * ddt_3d      &
349                                                                * d_weight_pres 
[1]350          ENDDO
351!
352!--       Compute possible PE-sum of divergences for flow_statistics
353          DO  k = nzb_s_inner(j,i)+1, nzt
354             threadsum = threadsum + ABS( d(k,j,i) )
355          ENDDO
356
357       ENDDO
358    ENDDO
359
[707]360    localsum = localsum + threadsum * dt_3d * &
361                          weight_pres(intermediate_timestep_count)
[693]362
[1]363    !$OMP END PARALLEL
364#else
365
[1111]366    !$OMP PARALLEL PRIVATE (i,j,k)
367    !$OMP DO SCHEDULE( STATIC )
[1221]368    !$acc kernels present( d, ddzw, rflags_s_inner, u, v, w )
369    !$acc loop collapse( 3 )
[1111]370    DO  i = nxl, nxr
371       DO  j = nys, nyn
372          DO  k = 1, nzt
[1221]373             d(k,j,i) = ( ( u(k,j,i+1) - u(k,j,i) ) * ddx +             &
374                        ( v(k,j+1,i) - v(k,j,i) ) * ddy +               &
375                        ( w(k,j,i) - w(k-1,j,i) ) * ddzw(k) ) * ddt_3d  &
376                        * d_weight_pres * rflags_s_inner(k,j,i)
[1]377          ENDDO
378       ENDDO
[1111]379    ENDDO
380    !$acc end kernels
381    !$OMP END PARALLEL
[1]382
383!
384!-- Compute possible PE-sum of divergences for flow_statistics
385    !$OMP PARALLEL PRIVATE (i,j,k) FIRSTPRIVATE(threadsum) REDUCTION(+:localsum)
386    !$OMP DO SCHEDULE( STATIC )
[1221]387    !$acc parallel loop collapse(3) present( d ) reduction(+:threadsum)
[1]388    DO  i = nxl, nxr
389       DO  j = nys, nyn
390          DO  k = nzb+1, nzt
391             threadsum = threadsum + ABS( d(k,j,i) )
392          ENDDO
393       ENDDO
394    ENDDO
[1257]395    !$acc end parallel loop
[707]396    localsum = localsum + threadsum * dt_3d * &
397                          weight_pres(intermediate_timestep_count)
[1]398    !$OMP END PARALLEL
399#endif
400
401!
402!-- For completeness, set the divergence sum of all statistic regions to those
403!-- of the total domain
404    sums_divold_l(0:statistic_regions) = localsum
405
406    CALL cpu_log( log_point_s(1), 'divergence', 'stop' )
407
408!
409!-- Compute the pressure perturbation solving the Poisson equation
410    IF ( psolver(1:7) == 'poisfft' )  THEN
411
412!
413!--    Solve Poisson equation via FFT and solution of tridiagonal matrices
414       IF ( psolver == 'poisfft' )  THEN
[1212]415
[1]416          CALL poisfft( d, tend )
[1113]417
[1]418       ENDIF
419
420!
421!--    Store computed perturbation pressure and set boundary condition in
422!--    z-direction
423       !$OMP PARALLEL DO
[1113]424       !$acc kernels present( d, tend )
[1]425       DO  i = nxl, nxr
426          DO  j = nys, nyn
427             DO  k = nzb+1, nzt
428                tend(k,j,i) = d(k,j,i)
429             ENDDO
430          ENDDO
431       ENDDO
[1113]432       !$acc end kernels
[1]433
434!
435!--    Bottom boundary:
436!--    This condition is only required for internal output. The pressure
437!--    gradient (dp(nzb+1)-dp(nzb))/dz is not used anywhere else.
438       IF ( ibc_p_b == 1 )  THEN
439!
440!--       Neumann (dp/dz = 0)
441          !$OMP PARALLEL DO
[1113]442          !$acc kernels present( nzb_s_inner, tend )
[667]443          DO  i = nxlg, nxrg
444             DO  j = nysg, nyng
[1]445                tend(nzb_s_inner(j,i),j,i) = tend(nzb_s_inner(j,i)+1,j,i)
446             ENDDO
447          ENDDO
[1113]448          !$acc end kernels
[1]449
450       ELSE
451!
452!--       Dirichlet
453          !$OMP PARALLEL DO
[1113]454          !$acc kernels present( tend )
[667]455          DO  i = nxlg, nxrg
456             DO  j = nysg, nyng
[1]457                tend(nzb_s_inner(j,i),j,i) = 0.0
458             ENDDO
459          ENDDO
[1113]460          !$acc end kernels
[1]461
462       ENDIF
463
464!
465!--    Top boundary
466       IF ( ibc_p_t == 1 )  THEN
467!
468!--       Neumann
469          !$OMP PARALLEL DO
[1113]470          !$acc kernels present( tend )
[667]471          DO  i = nxlg, nxrg
472             DO  j = nysg, nyng
[1]473                tend(nzt+1,j,i) = tend(nzt,j,i)
474             ENDDO
475          ENDDO
[1113]476          !$acc end kernels
[1]477
478       ELSE
479!
480!--       Dirichlet
481          !$OMP PARALLEL DO
[1113]482          !$acc kernels present( tend )
[667]483          DO  i = nxlg, nxrg
484             DO  j = nysg, nyng
[1]485                tend(nzt+1,j,i) = 0.0
486             ENDDO
487          ENDDO
[1113]488          !$acc end kernels
[1]489
490       ENDIF
491
492!
493!--    Exchange boundaries for p
[1113]494       IF ( numprocs == 1 )  THEN    ! workaround for single-core GPU runs
495          on_device = .TRUE.         ! to be removed after complete porting
496       ELSE                          ! of ghost point exchange
497          !$acc update host( tend )
498       ENDIF
[667]499       CALL exchange_horiz( tend, nbgp )
[1113]500       IF ( numprocs == 1 )  THEN    ! workaround for single-core GPU runs
501          on_device = .FALSE.        ! to be removed after complete porting
502       ELSE                          ! of ghost point exchange
503          !$acc update device( tend )
504       ENDIF
[1]505     
506    ELSEIF ( psolver == 'sor' )  THEN
507
508!
509!--    Solve Poisson equation for perturbation pressure using SOR-Red/Black
510!--    scheme
[707]511       CALL sor( d, ddzu_pres, ddzw, p_loc )
512       tend = p_loc
[1]513
514    ELSEIF ( psolver == 'multigrid' )  THEN
515
516!
517!--    Solve Poisson equation for perturbation pressure using Multigrid scheme,
[667]518!--    array tend is used to store the residuals, logical exchange_mg is used
519!--    to discern data exchange in multigrid ( 1 ghostpoint ) and normal grid
520!--    ( nbgp ghost points ).
[778]521
522!--    If the number of grid points of the gathered grid, which is collected
523!--    on PE0, is larger than the number of grid points of an PE, than array
524!--    tend will be enlarged.
525       IF ( gathered_size > subdomain_size )  THEN
526          DEALLOCATE( tend )
527          ALLOCATE( tend(nzb:nzt_mg(mg_switch_to_pe0_level)+1,nys_mg(          &
528                    mg_switch_to_pe0_level)-1:nyn_mg(mg_switch_to_pe0_level)+1,&
529                    nxl_mg(mg_switch_to_pe0_level)-1:nxr_mg(                   &
530                    mg_switch_to_pe0_level)+1) )
531       ENDIF
532
[1]533       CALL poismg( tend )
[707]534
[778]535       IF ( gathered_size > subdomain_size )  THEN
536          DEALLOCATE( tend )
537          ALLOCATE( tend(nzb:nzt+1,nysg:nyng,nxlg:nxrg) )
538       ENDIF
539
[1]540!
541!--    Restore perturbation pressure on tend because this array is used
542!--    further below to correct the velocity fields
[707]543       DO  i = nxl-1, nxr+1
544          DO  j = nys-1, nyn+1
545             DO  k = nzb, nzt+1
546                tend(k,j,i) = p_loc(k,j,i)
547             ENDDO
548          ENDDO
549       ENDDO
[667]550
[1]551    ENDIF
552
553!
[707]554!-- Store perturbation pressure on array p, used for pressure data output.
555!-- Ghost layers are added in the output routines (except sor-method: see below)
556    IF ( intermediate_timestep_count == 1 )  THEN
557       !$OMP PARALLEL PRIVATE (i,j,k)
558       !$OMP DO
[1113]559       !$acc kernels present( p, tend, weight_substep )
[1257]560       !$acc loop independent
[707]561       DO  i = nxl-1, nxr+1
[1257]562          !$acc loop independent
[707]563          DO  j = nys-1, nyn+1
[1257]564             !$acc loop independent
[707]565             DO  k = nzb, nzt+1
566                p(k,j,i) = tend(k,j,i) * &
567                           weight_substep(intermediate_timestep_count)
[673]568             ENDDO
[1]569          ENDDO
[707]570       ENDDO
[1113]571       !$acc end kernels
[707]572       !$OMP END PARALLEL
573
574    ELSE 
575       !$OMP PARALLEL PRIVATE (i,j,k)
576       !$OMP DO
[1113]577       !$acc kernels present( p, tend, weight_substep )
[1257]578       !$acc loop independent
[707]579       DO  i = nxl-1, nxr+1
[1257]580          !$acc loop independent
[707]581          DO  j = nys-1, nyn+1
[1257]582             !$acc loop independent
[707]583             DO  k = nzb, nzt+1
584                p(k,j,i) = p(k,j,i) + tend(k,j,i) * &
585                           weight_substep(intermediate_timestep_count)
[673]586             ENDDO
587          ENDDO
[707]588       ENDDO
[1113]589       !$acc end kernels
[707]590       !$OMP END PARALLEL
591
592    ENDIF
[673]593       
[707]594!
595!-- SOR-method needs ghost layers for the next timestep
596    IF ( psolver == 'sor' )  CALL exchange_horiz( p, nbgp )
[682]597
[1]598!
599!-- Correction of the provisional velocities with the current perturbation
600!-- pressure just computed
[709]601    IF ( conserve_volume_flow  .AND.  ( bc_lr_cyc .OR. bc_ns_cyc ) )  THEN
[1]602       volume_flow_l(1) = 0.0
603       volume_flow_l(2) = 0.0
604    ENDIF
[707]605
[1]606    !$OMP PARALLEL PRIVATE (i,j,k)
607    !$OMP DO
[1113]608    !$acc kernels present( ddzu, nzb_u_inner, nzb_v_inner, nzb_w_inner, tend, u, v, w, weight_pres )
[1257]609    !$acc loop independent
[673]610    DO  i = nxl, nxr   
[1257]611       !$acc loop independent
[1]612       DO  j = nys, nyn
[1257]613          !$acc loop independent
[1113]614          DO  k = 1, nzt
615             IF ( k > nzb_w_inner(j,i) )  THEN
616                w(k,j,i) = w(k,j,i) - dt_3d *                                 &
617                           ( tend(k+1,j,i) - tend(k,j,i) ) * ddzu(k+1) *      &
618                           weight_pres(intermediate_timestep_count)
619             ENDIF
[1]620          ENDDO
[1257]621          !$acc loop independent
[1113]622          DO  k = 1, nzt
623             IF ( k > nzb_u_inner(j,i) )  THEN
624                u(k,j,i) = u(k,j,i) - dt_3d *                                 &
625                           ( tend(k,j,i) - tend(k,j,i-1) ) * ddx *            &
626                           weight_pres(intermediate_timestep_count)
627             ENDIF
[1]628          ENDDO
[1257]629          !$acc loop independent
[1113]630          DO  k = 1, nzt
631             IF ( k > nzb_v_inner(j,i) )  THEN
632                v(k,j,i) = v(k,j,i) - dt_3d *                                 &
633                           ( tend(k,j,i) - tend(k,j-1,i) ) * ddy *            &
634                           weight_pres(intermediate_timestep_count)
635             ENDIF
[673]636          ENDDO                                                         
[1]637
638       ENDDO
639    ENDDO
[1113]640    !$acc end kernels
[1]641    !$OMP END PARALLEL
[1113]642
643!
644!-- Sum up the volume flow through the right and north boundary
645    IF ( conserve_volume_flow  .AND.  bc_lr_cyc  .AND.  bc_ns_cyc  .AND.  &
646         nxr == nx )  THEN
647
648       !$OMP PARALLEL PRIVATE (j,k)
649       !$OMP DO
650       DO  j = nys, nyn
651          !$OMP CRITICAL
652          DO  k = nzb_2d(j,nx) + 1, nzt
653             volume_flow_l(1) = volume_flow_l(1) + u(k,j,nx) * dzw(k)
654          ENDDO
655          !$OMP END CRITICAL
656       ENDDO
657       !$OMP END PARALLEL
658
659    ENDIF
660
661    IF ( conserve_volume_flow  .AND.  bc_ns_cyc  .AND.  bc_lr_cyc  .AND.  &
662         nyn == ny )  THEN
663
664       !$OMP PARALLEL PRIVATE (i,k)
665       !$OMP DO
666       DO  i = nxl, nxr
667          !$OMP CRITICAL
668          DO  k = nzb_2d(ny,i) + 1, nzt
669             volume_flow_l(2) = volume_flow_l(2) + v(k,ny,i) * dzw(k)
670           ENDDO
671          !$OMP END CRITICAL
672       ENDDO
673       !$OMP END PARALLEL
674
675    ENDIF
[673]676   
[1]677!
678!-- Conserve the volume flow
[707]679    IF ( conserve_volume_flow  .AND.  ( bc_lr_cyc  .AND.  bc_ns_cyc ) )  THEN
[1]680
681#if defined( __parallel )   
[622]682       IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
[1]683       CALL MPI_ALLREDUCE( volume_flow_l(1), volume_flow(1), 2, MPI_REAL, &
684                           MPI_SUM, comm2d, ierr ) 
685#else
686       volume_flow = volume_flow_l 
687#endif   
688
689       volume_flow_offset = ( volume_flow_initial - volume_flow ) / &
690                            volume_flow_area
691
692       !$OMP PARALLEL PRIVATE (i,j,k)
693       !$OMP DO
694       DO  i = nxl, nxr
695          DO  j = nys, nyn
[667]696             DO  k = nzb_u_inner(j,i) + 1, nzt
697                u(k,j,i) = u(k,j,i) + volume_flow_offset(1)
[719]698             ENDDO
699             DO k = nzb_v_inner(j,i) + 1, nzt
[667]700                v(k,j,i) = v(k,j,i) + volume_flow_offset(2)
701             ENDDO
[1]702          ENDDO
703       ENDDO
[667]704
[1]705       !$OMP END PARALLEL
706
707    ENDIF
708
709!
710!-- Exchange of boundaries for the velocities
[1113]711    IF ( numprocs == 1 )  THEN    ! workaround for single-core GPU runs
712       on_device = .TRUE.         ! to be removed after complete porting
713    ELSE                          ! of ghost point exchange
714       !$acc update host( u, v, w )
715    ENDIF
[667]716    CALL exchange_horiz( u, nbgp )
717    CALL exchange_horiz( v, nbgp )
718    CALL exchange_horiz( w, nbgp )
[1113]719    IF ( numprocs == 1 )  THEN    ! workaround for single-core GPU runs
720       on_device = .FALSE.        ! to be removed after complete porting
721    ELSE                          ! of ghost point exchange
722       !$acc update device( u, v, w )
723    ENDIF
[1]724
725!
726!-- Compute the divergence of the corrected velocity field,
727!-- a possible PE-sum is computed in flow_statistics
728    CALL cpu_log( log_point_s(1), 'divergence', 'start' )
729    sums_divnew_l = 0.0
730
731!
732!-- d must be reset to zero because it can contain nonzero values below the
733!-- topography
734    IF ( topography /= 'flat' )  d = 0.0
735
736    localsum  = 0.0
737    threadsum = 0.0
738
739    !$OMP PARALLEL PRIVATE (i,j,k) FIRSTPRIVATE(threadsum) REDUCTION(+:localsum)
740    !$OMP DO SCHEDULE( STATIC )
741#if defined( __ibm )
742    DO  i = nxl, nxr
743       DO  j = nys, nyn
744          DO  k = nzb_s_inner(j,i)+1, nzt
745             d(k,j,i) = ( u(k,j,i+1) - u(k,j,i) ) * ddx + &
746                        ( v(k,j+1,i) - v(k,j,i) ) * ddy + &
747                        ( w(k,j,i) - w(k-1,j,i) ) * ddzw(k)
748          ENDDO
749          DO  k = nzb+1, nzt
750             threadsum = threadsum + ABS( d(k,j,i) )
751          ENDDO
752       ENDDO
753    ENDDO
754#else
[1221]755    !$acc kernels present( d, ddzw, rflags_s_inner, u, v, w )
756    !$acc loop collapse( 3 )
[1]757    DO  i = nxl, nxr
758       DO  j = nys, nyn
[1113]759          DO  k = 1, nzt
[1221]760                d(k,j,i) = ( ( u(k,j,i+1) - u(k,j,i) ) * ddx +   &
761                             ( v(k,j+1,i) - v(k,j,i) ) * ddy +   &
762                             ( w(k,j,i) - w(k-1,j,i) ) * ddzw(k) &
763                           ) * rflags_s_inner(k,j,i)
[1113]764          ENDDO
765       ENDDO
766    ENDDO
767    !$acc end kernels
768!
769!-- Compute possible PE-sum of divergences for flow_statistics
[1221]770    !$acc parallel loop collapse(3) present( d ) reduction(+:threadsum)
[1113]771    DO  i = nxl, nxr
772       DO  j = nys, nyn
[1221]773          DO  k = nzb+1, nzt
[1]774             threadsum = threadsum + ABS( d(k,j,i) )
775          ENDDO
776       ENDDO
777    ENDDO
[1257]778    !$acc end parallel loop
[1]779#endif
[667]780
[1]781    localsum = localsum + threadsum
782    !$OMP END PARALLEL
783
784!
785!-- For completeness, set the divergence sum of all statistic regions to those
786!-- of the total domain
787    sums_divnew_l(0:statistic_regions) = localsum
788
789    CALL cpu_log( log_point_s(1), 'divergence', 'stop' )
790
791    CALL cpu_log( log_point(8), 'pres', 'stop' )
792
793
794 END SUBROUTINE pres
Note: See TracBrowser for help on using the repository browser.