source: palm/trunk/SOURCE/pres.f90 @ 1230

Last change on this file since 1230 was 1222, checked in by raasch, 11 years ago

last commit documented

  • Property svn:keywords set to Id
File size: 24.1 KB
RevLine 
[1]1 SUBROUTINE pres
2
[1036]3!--------------------------------------------------------------------------------!
4! This file is part of PALM.
5!
6! PALM is free software: you can redistribute it and/or modify it under the terms
7! of the GNU General Public License as published by the Free Software Foundation,
8! either version 3 of the License, or (at your option) any later version.
9!
10! PALM is distributed in the hope that it will be useful, but WITHOUT ANY
11! WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
12! A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
13!
14! You should have received a copy of the GNU General Public License along with
15! PALM. If not, see <http://www.gnu.org/licenses/>.
16!
17! Copyright 1997-2012  Leibniz University Hannover
18!--------------------------------------------------------------------------------!
19!
[484]20! Current revisions:
[1212]21! ------------------
[708]22!
[1222]23!
[708]24! Former revisions:
25! -----------------
26! $Id: pres.f90 1222 2013-09-10 14:48:09Z raasch $
27!
[1222]28! 1221 2013-09-10 08:59:13Z raasch
29! openACC porting of reduction operations, loops for calculating d are
30! using the rflags_s_inner multiply flag instead of the nzb_s_inner loop index
31!
[1213]32! 1212 2013-08-15 08:46:27Z raasch
33! call of poisfft_hybrid removed
34!
[1118]35! 1117 2013-03-27 11:15:36Z suehring
36! Bugfix in OpenMP parallelization.
37!
[1114]38! 1113 2013-03-10 02:48:14Z raasch
39! GPU-porting of several loops, some loops rearranged
40!
[1112]41! 1111 2013-03-08 23:54:10Z
42! openACC statements added,
43! ibc_p_b = 2 removed
44!
[1093]45! 1092 2013-02-02 11:24:22Z raasch
46! unused variables removed
47!
[1037]48! 1036 2012-10-22 13:43:42Z raasch
49! code put under GPL (PALM 3.9)
50!
[1004]51! 1003 2012-09-14 14:35:53Z raasch
52! adjustment of array tend for cases with unequal subdomain sizes removed
53!
[779]54! 778 2011-11-07 14:18:25Z fricke
55! New allocation of tend when multigrid is used and the collected field on PE0
56! has more grid points than the subdomain of an PE.
57!
[720]58! 719 2011-04-06 13:05:23Z gryschka
59! Bugfix in volume flow control for double cyclic boundary conditions
60!
[710]61! 709 2011-03-30 09:31:40Z raasch
62! formatting adjustments
63!
[708]64! 707 2011-03-29 11:39:40Z raasch
[707]65! Calculation of weighted average of p is now handled in the same way
66! regardless of the number of ghost layers (advection scheme),
67! multigrid and sor method are using p_loc for iterative advancements of
68! pressure,
69! localsum calculation modified for proper OpenMP reduction,
70! bc_lr/ns replaced by bc_lr/ns_cyc
[674]71!
[694]72! 693 2011-03-08 09:..:..Z raasch
[695]73! bugfix: weighting coefficient added to ibm branch
[694]74!
75! 680 2011-02-04 23:16:06Z gryschka
[681]76! bugfix: collective_wait
[668]77!
[676]78! 675 2011-01-19 10:56:55Z suehring
79! Removed bugfix while copying tend.
80!
[674]81! 673 2011-01-18 16:19:48Z suehring
82! Weighting coefficients added for right computation of the pressure during
83! Runge-Kutta substeps.
84!
[668]85! 667 2010-12-23 12:06:00Z suehring/gryschka
[667]86! New allocation of tend when ws-scheme and multigrid is used. This is due to
87! reasons of perforance of the data_exchange. The same is done with p after
88! poismg is called.
89! nxl-1, nxr+1, nys-1, nyn+1 replaced by nxlg, nxrg, nysg, nyng when no
90! multigrid is used. Calls of exchange_horiz are modified.
91! bugfix: After pressure correction no volume flow correction in case of
92! non-cyclic boundary conditions
93! (has to be done only before pressure correction)
94! Call of SOR routine is referenced with ddzu_pres.
95!
[623]96! 622 2010-12-10 08:08:13Z raasch
97! optional barriers included in order to speed up collective operations
98!
[198]99! 151 2008-03-07 13:42:18Z raasch
100! Bugfix in volume flow control for non-cyclic boundary conditions
101!
[110]102! 106 2007-08-16 14:30:26Z raasch
103! Volume flow conservation added for the remaining three outflow boundaries
104!
[90]105! 85 2007-05-11 09:35:14Z raasch
106! Division through dt_3d replaced by multiplication of the inverse.
107! For performance optimisation, this is done in the loop calculating the
108! divergence instead of using a seperate loop.
109!
[77]110! 75 2007-03-22 09:54:05Z raasch
[75]111! Volume flow control for non-cyclic boundary conditions added (currently only
[76]112! for the north boundary!!), 2nd+3rd argument removed from exchange horiz,
113! mean vertical velocity is removed in case of Neumann boundary conditions
114! both at the bottom and the top
[1]115!
[3]116! RCS Log replace by Id keyword, revision history cleaned up
117!
[1]118! Revision 1.25  2006/04/26 13:26:12  raasch
119! OpenMP optimization (+localsum, threadsum)
120!
121! Revision 1.1  1997/07/24 11:24:44  raasch
122! Initial revision
123!
124!
125! Description:
126! ------------
127! Compute the divergence of the provisional velocity field. Solve the Poisson
128! equation for the perturbation pressure. Compute the final velocities using
129! this perturbation pressure. Compute the remaining divergence.
130!------------------------------------------------------------------------------!
131
132    USE arrays_3d
133    USE constants
134    USE control_parameters
135    USE cpulog
136    USE grid_variables
137    USE indices
138    USE interfaces
139    USE pegrid
140    USE poisfft_mod
141    USE statistics
142
143    IMPLICIT NONE
144
[1092]145    INTEGER ::  i, j, k
[1]146
[673]147    REAL    ::  ddt_3d, localsum, threadsum, d_weight_pres
[1]148
149    REAL, DIMENSION(1:2) ::  volume_flow_l, volume_flow_offset
[76]150    REAL, DIMENSION(1:nzt) ::  w_l, w_l_l
[1]151
152
153    CALL cpu_log( log_point(8), 'pres', 'start' )
154
[85]155
156    ddt_3d = 1.0 / dt_3d
[709]157    d_weight_pres = 1.0 / weight_pres(intermediate_timestep_count)
[85]158
[1]159!
[707]160!-- Multigrid method expects array d to have one ghost layer.
161!--
[1]162    IF ( psolver == 'multigrid' )  THEN
[667]163     
[1]164       DEALLOCATE( d )
[667]165       ALLOCATE( d(nzb:nzt+1,nys-1:nyn+1,nxl-1:nxr+1) ) 
[707]166
167!
168!--    Since p is later used to hold the weighted average of the substeps, it
169!--    cannot be used in the iterative solver. Therefore, its initial value is
170!--    stored on p_loc, which is then iteratively advanced in every substep.
171       IF ( intermediate_timestep_count == 1 )  THEN
172          DO  i = nxl-1, nxr+1
173             DO  j = nys-1, nyn+1
174                DO  k = nzb, nzt+1
175                   p_loc(k,j,i) = p(k,j,i)
176                ENDDO
177             ENDDO
178          ENDDO
[667]179       ENDIF
180       
[707]181    ELSEIF ( psolver == 'sor'  .AND.  intermediate_timestep_count == 1 )  THEN
182
183!
184!--    Since p is later used to hold the weighted average of the substeps, it
185!--    cannot be used in the iterative solver. Therefore, its initial value is
186!--    stored on p_loc, which is then iteratively advanced in every substep.
187       p_loc = p
188
[1]189    ENDIF
190
191!
[75]192!-- Conserve the volume flow at the outflow in case of non-cyclic lateral
193!-- boundary conditions
[106]194!-- WARNING: so far, this conservation does not work at the left/south
195!--          boundary if the topography at the inflow differs from that at the
196!--          outflow! For this case, volume_flow_area needs adjustment!
197!
198!-- Left/right
[709]199    IF ( conserve_volume_flow  .AND.  ( outflow_l .OR. outflow_r ) )  THEN
[680]200
[106]201       volume_flow(1)   = 0.0
202       volume_flow_l(1) = 0.0
203
204       IF ( outflow_l )  THEN
205          i = 0
206       ELSEIF ( outflow_r )  THEN
207          i = nx+1
208       ENDIF
209
210       DO  j = nys, nyn
211!
212!--       Sum up the volume flow through the south/north boundary
[709]213          DO  k = nzb_2d(j,i)+1, nzt
[667]214             volume_flow_l(1) = volume_flow_l(1) + u(k,j,i) * dzw(k)
[106]215          ENDDO
216       ENDDO
217
218#if defined( __parallel )   
[680]219       IF ( collective_wait )  CALL MPI_BARRIER( comm1dy, ierr )
[106]220       CALL MPI_ALLREDUCE( volume_flow_l(1), volume_flow(1), 1, MPI_REAL, &
221                           MPI_SUM, comm1dy, ierr )   
222#else
223       volume_flow = volume_flow_l 
224#endif
[709]225       volume_flow_offset(1) = ( volume_flow_initial(1) - volume_flow(1) ) &
[106]226                               / volume_flow_area(1)
227
[667]228       DO  j = nysg, nyng
[709]229          DO  k = nzb_2d(j,i)+1, nzt
[106]230             u(k,j,i) = u(k,j,i) + volume_flow_offset(1)
231          ENDDO
232       ENDDO
233
234    ENDIF
235
236!
237!-- South/north
[709]238    IF ( conserve_volume_flow  .AND.  ( outflow_n .OR. outflow_s ) )  THEN
[106]239
[75]240       volume_flow(2)   = 0.0
241       volume_flow_l(2) = 0.0
242
[106]243       IF ( outflow_s )  THEN
244          j = 0
245       ELSEIF ( outflow_n )  THEN
[75]246          j = ny+1
[106]247       ENDIF
248
249       DO  i = nxl, nxr
[75]250!
[106]251!--       Sum up the volume flow through the south/north boundary
[709]252          DO  k = nzb_2d(j,i)+1, nzt
[667]253             volume_flow_l(2) = volume_flow_l(2) + v(k,j,i) * dzw(k)
[75]254          ENDDO
[106]255       ENDDO
256
[75]257#if defined( __parallel )   
[680]258       IF ( collective_wait )  CALL MPI_BARRIER( comm1dx, ierr )
[75]259       CALL MPI_ALLREDUCE( volume_flow_l(2), volume_flow(2), 1, MPI_REAL, &
260                           MPI_SUM, comm1dx, ierr )   
261#else
262       volume_flow = volume_flow_l 
263#endif
264       volume_flow_offset(2) = ( volume_flow_initial(2) - volume_flow(2) )    &
[106]265                               / volume_flow_area(2)
[75]266
[667]267       DO  i = nxlg, nxrg
[709]268          DO  k = nzb_v_inner(j,i)+1, nzt
[106]269             v(k,j,i) = v(k,j,i) + volume_flow_offset(2)
[75]270          ENDDO
[106]271       ENDDO
[75]272
273    ENDIF
274
[76]275!
276!-- Remove mean vertical velocity
277    IF ( ibc_p_b == 1  .AND.  ibc_p_t == 1 )  THEN
[709]278       IF ( simulated_time > 0.0 )  THEN ! otherwise nzb_w_inner not yet known
[76]279          w_l = 0.0;  w_l_l = 0.0
280          DO  i = nxl, nxr
281             DO  j = nys, nyn
282                DO  k = nzb_w_inner(j,i)+1, nzt
283                   w_l_l(k) = w_l_l(k) + w(k,j,i)
284                ENDDO
285             ENDDO
286          ENDDO
287#if defined( __parallel )   
[622]288          IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
[709]289          CALL MPI_ALLREDUCE( w_l_l(1), w_l(1), nzt, MPI_REAL, MPI_SUM, &
290                              comm2d, ierr )
[76]291#else
292          w_l = w_l_l 
293#endif
294          DO  k = 1, nzt
295             w_l(k) = w_l(k) / ngp_2dh_outer(k,0)
296          ENDDO
[667]297          DO  i = nxlg, nxrg
298             DO  j = nysg, nyng
[76]299                DO  k = nzb_w_inner(j,i)+1, nzt
300                   w(k,j,i) = w(k,j,i) - w_l(k)
301                ENDDO
302             ENDDO
303          ENDDO
304       ENDIF
305    ENDIF
[75]306
307!
[1]308!-- Compute the divergence of the provisional velocity field.
309    CALL cpu_log( log_point_s(1), 'divergence', 'start' )
310
311    IF ( psolver == 'multigrid' )  THEN
312       !$OMP PARALLEL DO SCHEDULE( STATIC )
313       DO  i = nxl-1, nxr+1
314          DO  j = nys-1, nyn+1
315             DO  k = nzb, nzt+1
316                d(k,j,i) = 0.0
317             ENDDO
318          ENDDO
319       ENDDO
320    ELSE
321       !$OMP PARALLEL DO SCHEDULE( STATIC )
[1111]322       !$acc kernels present( d )
323       !$acc loop
[1003]324       DO  i = nxl, nxr
325          DO  j = nys, nyn
[1111]326             !$acc loop vector(32)
[1003]327             DO  k = nzb+1, nzt
[1]328                d(k,j,i) = 0.0
329             ENDDO
330          ENDDO
331       ENDDO
[1111]332       !$acc end kernels
[1]333    ENDIF
334
335    localsum  = 0.0
336    threadsum = 0.0
337
338#if defined( __ibm )
339    !$OMP PARALLEL PRIVATE (i,j,k) FIRSTPRIVATE(threadsum) REDUCTION(+:localsum)
340    !$OMP DO SCHEDULE( STATIC )
341    DO  i = nxl, nxr
342       DO  j = nys, nyn
343          DO  k = nzb_s_inner(j,i)+1, nzt
[85]344             d(k,j,i) = ( ( u(k,j,i+1) - u(k,j,i) ) * ddx + &
345                          ( v(k,j+1,i) - v(k,j,i) ) * ddy + &
[673]346                          ( w(k,j,i) - w(k-1,j,i) ) * ddzw(k) ) * ddt_3d      &
347                                                                * d_weight_pres 
[1]348          ENDDO
349!
350!--       Compute possible PE-sum of divergences for flow_statistics
351          DO  k = nzb_s_inner(j,i)+1, nzt
352             threadsum = threadsum + ABS( d(k,j,i) )
353          ENDDO
354
355       ENDDO
356    ENDDO
357
[707]358    localsum = localsum + threadsum * dt_3d * &
359                          weight_pres(intermediate_timestep_count)
[693]360
[1]361    !$OMP END PARALLEL
362#else
363
[1111]364    !$OMP PARALLEL PRIVATE (i,j,k)
365    !$OMP DO SCHEDULE( STATIC )
[1221]366    !$acc kernels present( d, ddzw, rflags_s_inner, u, v, w )
367    !$acc loop collapse( 3 )
[1111]368    DO  i = nxl, nxr
369       DO  j = nys, nyn
370          DO  k = 1, nzt
[1221]371             d(k,j,i) = ( ( u(k,j,i+1) - u(k,j,i) ) * ddx +             &
372                        ( v(k,j+1,i) - v(k,j,i) ) * ddy +               &
373                        ( w(k,j,i) - w(k-1,j,i) ) * ddzw(k) ) * ddt_3d  &
374                        * d_weight_pres * rflags_s_inner(k,j,i)
[1]375          ENDDO
376       ENDDO
[1111]377    ENDDO
378    !$acc end kernels
379    !$OMP END PARALLEL
[1]380
381!
382!-- Compute possible PE-sum of divergences for flow_statistics
383    !$OMP PARALLEL PRIVATE (i,j,k) FIRSTPRIVATE(threadsum) REDUCTION(+:localsum)
384    !$OMP DO SCHEDULE( STATIC )
[1221]385    !$acc parallel loop collapse(3) present( d ) reduction(+:threadsum)
[1]386    DO  i = nxl, nxr
387       DO  j = nys, nyn
388          DO  k = nzb+1, nzt
389             threadsum = threadsum + ABS( d(k,j,i) )
390          ENDDO
391       ENDDO
392    ENDDO
[1221]393    !$acc end parallel
[707]394    localsum = localsum + threadsum * dt_3d * &
395                          weight_pres(intermediate_timestep_count)
[1]396    !$OMP END PARALLEL
397#endif
398
399!
400!-- For completeness, set the divergence sum of all statistic regions to those
401!-- of the total domain
402    sums_divold_l(0:statistic_regions) = localsum
403
404    CALL cpu_log( log_point_s(1), 'divergence', 'stop' )
405
406!
407!-- Compute the pressure perturbation solving the Poisson equation
408    IF ( psolver(1:7) == 'poisfft' )  THEN
409
410!
411!--    Solve Poisson equation via FFT and solution of tridiagonal matrices
412       IF ( psolver == 'poisfft' )  THEN
[1212]413
[1]414          CALL poisfft( d, tend )
[1113]415
[1]416       ENDIF
417
418!
419!--    Store computed perturbation pressure and set boundary condition in
420!--    z-direction
421       !$OMP PARALLEL DO
[1113]422       !$acc kernels present( d, tend )
423       !$acc loop
[1]424       DO  i = nxl, nxr
425          DO  j = nys, nyn
[1113]426             !$acc loop vector( 32 )
[1]427             DO  k = nzb+1, nzt
428                tend(k,j,i) = d(k,j,i)
429             ENDDO
430          ENDDO
431       ENDDO
[1113]432       !$acc end kernels
[1]433
434!
435!--    Bottom boundary:
436!--    This condition is only required for internal output. The pressure
437!--    gradient (dp(nzb+1)-dp(nzb))/dz is not used anywhere else.
438       IF ( ibc_p_b == 1 )  THEN
439!
440!--       Neumann (dp/dz = 0)
441          !$OMP PARALLEL DO
[1113]442          !$acc kernels present( nzb_s_inner, tend )
[667]443          DO  i = nxlg, nxrg
444             DO  j = nysg, nyng
[1]445                tend(nzb_s_inner(j,i),j,i) = tend(nzb_s_inner(j,i)+1,j,i)
446             ENDDO
447          ENDDO
[1113]448          !$acc end kernels
[1]449
450       ELSE
451!
452!--       Dirichlet
453          !$OMP PARALLEL DO
[1113]454          !$acc kernels present( tend )
[667]455          DO  i = nxlg, nxrg
456             DO  j = nysg, nyng
[1]457                tend(nzb_s_inner(j,i),j,i) = 0.0
458             ENDDO
459          ENDDO
[1113]460          !$acc end kernels
[1]461
462       ENDIF
463
464!
465!--    Top boundary
466       IF ( ibc_p_t == 1 )  THEN
467!
468!--       Neumann
469          !$OMP PARALLEL DO
[1113]470          !$acc kernels present( tend )
[667]471          DO  i = nxlg, nxrg
472             DO  j = nysg, nyng
[1]473                tend(nzt+1,j,i) = tend(nzt,j,i)
474             ENDDO
475          ENDDO
[1113]476          !$acc end kernels
[1]477
478       ELSE
479!
480!--       Dirichlet
481          !$OMP PARALLEL DO
[1113]482          !$acc kernels present( tend )
[667]483          DO  i = nxlg, nxrg
484             DO  j = nysg, nyng
[1]485                tend(nzt+1,j,i) = 0.0
486             ENDDO
487          ENDDO
[1113]488          !$acc end kernels
[1]489
490       ENDIF
491
492!
493!--    Exchange boundaries for p
[1113]494       IF ( numprocs == 1 )  THEN    ! workaround for single-core GPU runs
495          on_device = .TRUE.         ! to be removed after complete porting
496       ELSE                          ! of ghost point exchange
497          !$acc update host( tend )
498       ENDIF
[667]499       CALL exchange_horiz( tend, nbgp )
[1113]500       IF ( numprocs == 1 )  THEN    ! workaround for single-core GPU runs
501          on_device = .FALSE.        ! to be removed after complete porting
502       ELSE                          ! of ghost point exchange
503          !$acc update device( tend )
504       ENDIF
[1]505     
506    ELSEIF ( psolver == 'sor' )  THEN
507
508!
509!--    Solve Poisson equation for perturbation pressure using SOR-Red/Black
510!--    scheme
[707]511       CALL sor( d, ddzu_pres, ddzw, p_loc )
512       tend = p_loc
[1]513
514    ELSEIF ( psolver == 'multigrid' )  THEN
515
516!
517!--    Solve Poisson equation for perturbation pressure using Multigrid scheme,
[667]518!--    array tend is used to store the residuals, logical exchange_mg is used
519!--    to discern data exchange in multigrid ( 1 ghostpoint ) and normal grid
520!--    ( nbgp ghost points ).
[778]521
522!--    If the number of grid points of the gathered grid, which is collected
523!--    on PE0, is larger than the number of grid points of an PE, than array
524!--    tend will be enlarged.
525       IF ( gathered_size > subdomain_size )  THEN
526          DEALLOCATE( tend )
527          ALLOCATE( tend(nzb:nzt_mg(mg_switch_to_pe0_level)+1,nys_mg(          &
528                    mg_switch_to_pe0_level)-1:nyn_mg(mg_switch_to_pe0_level)+1,&
529                    nxl_mg(mg_switch_to_pe0_level)-1:nxr_mg(                   &
530                    mg_switch_to_pe0_level)+1) )
531       ENDIF
532
[1]533       CALL poismg( tend )
[707]534
[778]535       IF ( gathered_size > subdomain_size )  THEN
536          DEALLOCATE( tend )
537          ALLOCATE( tend(nzb:nzt+1,nysg:nyng,nxlg:nxrg) )
538       ENDIF
539
[1]540!
541!--    Restore perturbation pressure on tend because this array is used
542!--    further below to correct the velocity fields
[707]543       DO  i = nxl-1, nxr+1
544          DO  j = nys-1, nyn+1
545             DO  k = nzb, nzt+1
546                tend(k,j,i) = p_loc(k,j,i)
547             ENDDO
548          ENDDO
549       ENDDO
[667]550
[1]551    ENDIF
552
553!
[707]554!-- Store perturbation pressure on array p, used for pressure data output.
555!-- Ghost layers are added in the output routines (except sor-method: see below)
556    IF ( intermediate_timestep_count == 1 )  THEN
557       !$OMP PARALLEL PRIVATE (i,j,k)
558       !$OMP DO
[1113]559       !$acc kernels present( p, tend, weight_substep )
560       !$acc loop
[707]561       DO  i = nxl-1, nxr+1
562          DO  j = nys-1, nyn+1
[1113]563             !$acc loop vector( 32 )
[707]564             DO  k = nzb, nzt+1
565                p(k,j,i) = tend(k,j,i) * &
566                           weight_substep(intermediate_timestep_count)
[673]567             ENDDO
[1]568          ENDDO
[707]569       ENDDO
[1113]570       !$acc end kernels
[707]571       !$OMP END PARALLEL
572
573    ELSE 
574       !$OMP PARALLEL PRIVATE (i,j,k)
575       !$OMP DO
[1113]576       !$acc kernels present( p, tend, weight_substep )
577       !$acc loop
[707]578       DO  i = nxl-1, nxr+1
579          DO  j = nys-1, nyn+1
[1113]580             !$acc loop vector( 32 )
[707]581             DO  k = nzb, nzt+1
582                p(k,j,i) = p(k,j,i) + tend(k,j,i) * &
583                           weight_substep(intermediate_timestep_count)
[673]584             ENDDO
585          ENDDO
[707]586       ENDDO
[1113]587       !$acc end kernels
[707]588       !$OMP END PARALLEL
589
590    ENDIF
[673]591       
[707]592!
593!-- SOR-method needs ghost layers for the next timestep
594    IF ( psolver == 'sor' )  CALL exchange_horiz( p, nbgp )
[682]595
[1]596!
597!-- Correction of the provisional velocities with the current perturbation
598!-- pressure just computed
[709]599    IF ( conserve_volume_flow  .AND.  ( bc_lr_cyc .OR. bc_ns_cyc ) )  THEN
[1]600       volume_flow_l(1) = 0.0
601       volume_flow_l(2) = 0.0
602    ENDIF
[707]603
[1]604    !$OMP PARALLEL PRIVATE (i,j,k)
605    !$OMP DO
[1113]606    !$acc kernels present( ddzu, nzb_u_inner, nzb_v_inner, nzb_w_inner, tend, u, v, w, weight_pres )
607    !$acc loop
[673]608    DO  i = nxl, nxr   
[1]609       DO  j = nys, nyn
[1113]610          !$acc loop vector( 32 )
611          DO  k = 1, nzt
612             IF ( k > nzb_w_inner(j,i) )  THEN
613                w(k,j,i) = w(k,j,i) - dt_3d *                                 &
614                           ( tend(k+1,j,i) - tend(k,j,i) ) * ddzu(k+1) *      &
615                           weight_pres(intermediate_timestep_count)
616             ENDIF
[1]617          ENDDO
[1113]618          !$acc loop vector( 32 )
619          DO  k = 1, nzt
620             IF ( k > nzb_u_inner(j,i) )  THEN
621                u(k,j,i) = u(k,j,i) - dt_3d *                                 &
622                           ( tend(k,j,i) - tend(k,j,i-1) ) * ddx *            &
623                           weight_pres(intermediate_timestep_count)
624             ENDIF
[1]625          ENDDO
[1113]626          !$acc loop vector( 32 )
627          DO  k = 1, nzt
628             IF ( k > nzb_v_inner(j,i) )  THEN
629                v(k,j,i) = v(k,j,i) - dt_3d *                                 &
630                           ( tend(k,j,i) - tend(k,j-1,i) ) * ddy *            &
631                           weight_pres(intermediate_timestep_count)
632             ENDIF
[673]633          ENDDO                                                         
[1]634
635       ENDDO
636    ENDDO
[1113]637    !$acc end kernels
[1]638    !$OMP END PARALLEL
[1113]639
640!
641!-- Sum up the volume flow through the right and north boundary
642    IF ( conserve_volume_flow  .AND.  bc_lr_cyc  .AND.  bc_ns_cyc  .AND.  &
643         nxr == nx )  THEN
644
645       !$OMP PARALLEL PRIVATE (j,k)
646       !$OMP DO
647       DO  j = nys, nyn
648          !$OMP CRITICAL
649          DO  k = nzb_2d(j,nx) + 1, nzt
650             volume_flow_l(1) = volume_flow_l(1) + u(k,j,nx) * dzw(k)
651          ENDDO
652          !$OMP END CRITICAL
653       ENDDO
654       !$OMP END PARALLEL
655
656    ENDIF
657
658    IF ( conserve_volume_flow  .AND.  bc_ns_cyc  .AND.  bc_lr_cyc  .AND.  &
659         nyn == ny )  THEN
660
661       !$OMP PARALLEL PRIVATE (i,k)
662       !$OMP DO
663       DO  i = nxl, nxr
664          !$OMP CRITICAL
665          DO  k = nzb_2d(ny,i) + 1, nzt
666             volume_flow_l(2) = volume_flow_l(2) + v(k,ny,i) * dzw(k)
667           ENDDO
668          !$OMP END CRITICAL
669       ENDDO
670       !$OMP END PARALLEL
671
672    ENDIF
[673]673   
[1]674!
675!-- Conserve the volume flow
[707]676    IF ( conserve_volume_flow  .AND.  ( bc_lr_cyc  .AND.  bc_ns_cyc ) )  THEN
[1]677
678#if defined( __parallel )   
[622]679       IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
[1]680       CALL MPI_ALLREDUCE( volume_flow_l(1), volume_flow(1), 2, MPI_REAL, &
681                           MPI_SUM, comm2d, ierr ) 
682#else
683       volume_flow = volume_flow_l 
684#endif   
685
686       volume_flow_offset = ( volume_flow_initial - volume_flow ) / &
687                            volume_flow_area
688
689       !$OMP PARALLEL PRIVATE (i,j,k)
690       !$OMP DO
691       DO  i = nxl, nxr
692          DO  j = nys, nyn
[667]693             DO  k = nzb_u_inner(j,i) + 1, nzt
694                u(k,j,i) = u(k,j,i) + volume_flow_offset(1)
[719]695             ENDDO
696             DO k = nzb_v_inner(j,i) + 1, nzt
[667]697                v(k,j,i) = v(k,j,i) + volume_flow_offset(2)
698             ENDDO
[1]699          ENDDO
700       ENDDO
[667]701
[1]702       !$OMP END PARALLEL
703
704    ENDIF
705
706!
707!-- Exchange of boundaries for the velocities
[1113]708    IF ( numprocs == 1 )  THEN    ! workaround for single-core GPU runs
709       on_device = .TRUE.         ! to be removed after complete porting
710    ELSE                          ! of ghost point exchange
711       !$acc update host( u, v, w )
712    ENDIF
[667]713    CALL exchange_horiz( u, nbgp )
714    CALL exchange_horiz( v, nbgp )
715    CALL exchange_horiz( w, nbgp )
[1113]716    IF ( numprocs == 1 )  THEN    ! workaround for single-core GPU runs
717       on_device = .FALSE.        ! to be removed after complete porting
718    ELSE                          ! of ghost point exchange
719       !$acc update device( u, v, w )
720    ENDIF
[1]721
722!
723!-- Compute the divergence of the corrected velocity field,
724!-- a possible PE-sum is computed in flow_statistics
725    CALL cpu_log( log_point_s(1), 'divergence', 'start' )
726    sums_divnew_l = 0.0
727
728!
729!-- d must be reset to zero because it can contain nonzero values below the
730!-- topography
731    IF ( topography /= 'flat' )  d = 0.0
732
733    localsum  = 0.0
734    threadsum = 0.0
735
736    !$OMP PARALLEL PRIVATE (i,j,k) FIRSTPRIVATE(threadsum) REDUCTION(+:localsum)
737    !$OMP DO SCHEDULE( STATIC )
738#if defined( __ibm )
739    DO  i = nxl, nxr
740       DO  j = nys, nyn
741          DO  k = nzb_s_inner(j,i)+1, nzt
742             d(k,j,i) = ( u(k,j,i+1) - u(k,j,i) ) * ddx + &
743                        ( v(k,j+1,i) - v(k,j,i) ) * ddy + &
744                        ( w(k,j,i) - w(k-1,j,i) ) * ddzw(k)
745          ENDDO
746          DO  k = nzb+1, nzt
747             threadsum = threadsum + ABS( d(k,j,i) )
748          ENDDO
749       ENDDO
750    ENDDO
751#else
[1221]752    !$acc kernels present( d, ddzw, rflags_s_inner, u, v, w )
753    !$acc loop collapse( 3 )
[1]754    DO  i = nxl, nxr
755       DO  j = nys, nyn
[1113]756          DO  k = 1, nzt
[1221]757                d(k,j,i) = ( ( u(k,j,i+1) - u(k,j,i) ) * ddx +   &
758                             ( v(k,j+1,i) - v(k,j,i) ) * ddy +   &
759                             ( w(k,j,i) - w(k-1,j,i) ) * ddzw(k) &
760                           ) * rflags_s_inner(k,j,i)
[1113]761          ENDDO
762       ENDDO
763    ENDDO
764    !$acc end kernels
765!
766!-- Compute possible PE-sum of divergences for flow_statistics
[1221]767    !$acc parallel loop collapse(3) present( d ) reduction(+:threadsum)
[1113]768    DO  i = nxl, nxr
769       DO  j = nys, nyn
[1221]770          DO  k = nzb+1, nzt
[1]771             threadsum = threadsum + ABS( d(k,j,i) )
772          ENDDO
773       ENDDO
774    ENDDO
[1221]775    !$acc end parallel
[1]776#endif
[667]777
[1]778    localsum = localsum + threadsum
779    !$OMP END PARALLEL
780
781!
782!-- For completeness, set the divergence sum of all statistic regions to those
783!-- of the total domain
784    sums_divnew_l(0:statistic_regions) = localsum
785
786    CALL cpu_log( log_point_s(1), 'divergence', 'stop' )
787
788    CALL cpu_log( log_point(8), 'pres', 'stop' )
789
790
791 END SUBROUTINE pres
Note: See TracBrowser for help on using the repository browser.