source: palm/trunk/SOURCE/pres.f90 @ 1217

Last change on this file since 1217 was 1213, checked in by raasch, 11 years ago

last commit documented

  • Property svn:keywords set to Id
File size: 23.8 KB
RevLine 
[1]1 SUBROUTINE pres
2
[1036]3!--------------------------------------------------------------------------------!
4! This file is part of PALM.
5!
6! PALM is free software: you can redistribute it and/or modify it under the terms
7! of the GNU General Public License as published by the Free Software Foundation,
8! either version 3 of the License, or (at your option) any later version.
9!
10! PALM is distributed in the hope that it will be useful, but WITHOUT ANY
11! WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
12! A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
13!
14! You should have received a copy of the GNU General Public License along with
15! PALM. If not, see <http://www.gnu.org/licenses/>.
16!
17! Copyright 1997-2012  Leibniz University Hannover
18!--------------------------------------------------------------------------------!
19!
[484]20! Current revisions:
[1212]21! ------------------
[708]22!
[1213]23!
[708]24! Former revisions:
25! -----------------
26! $Id: pres.f90 1213 2013-08-15 09:03:50Z raasch $
27!
[1213]28! 1212 2013-08-15 08:46:27Z raasch
29! call of poisfft_hybrid removed
30!
[1118]31! 1117 2013-03-27 11:15:36Z suehring
32! Bugfix in OpenMP parallelization.
33!
[1114]34! 1113 2013-03-10 02:48:14Z raasch
35! GPU-porting of several loops, some loops rearranged
36!
[1112]37! 1111 2013-03-08 23:54:10Z
38! openACC statements added,
39! ibc_p_b = 2 removed
40!
[1093]41! 1092 2013-02-02 11:24:22Z raasch
42! unused variables removed
43!
[1037]44! 1036 2012-10-22 13:43:42Z raasch
45! code put under GPL (PALM 3.9)
46!
[1004]47! 1003 2012-09-14 14:35:53Z raasch
48! adjustment of array tend for cases with unequal subdomain sizes removed
49!
[779]50! 778 2011-11-07 14:18:25Z fricke
51! New allocation of tend when multigrid is used and the collected field on PE0
52! has more grid points than the subdomain of an PE.
53!
[720]54! 719 2011-04-06 13:05:23Z gryschka
55! Bugfix in volume flow control for double cyclic boundary conditions
56!
[710]57! 709 2011-03-30 09:31:40Z raasch
58! formatting adjustments
59!
[708]60! 707 2011-03-29 11:39:40Z raasch
[707]61! Calculation of weighted average of p is now handled in the same way
62! regardless of the number of ghost layers (advection scheme),
63! multigrid and sor method are using p_loc for iterative advancements of
64! pressure,
65! localsum calculation modified for proper OpenMP reduction,
66! bc_lr/ns replaced by bc_lr/ns_cyc
[674]67!
[694]68! 693 2011-03-08 09:..:..Z raasch
[695]69! bugfix: weighting coefficient added to ibm branch
[694]70!
71! 680 2011-02-04 23:16:06Z gryschka
[681]72! bugfix: collective_wait
[668]73!
[676]74! 675 2011-01-19 10:56:55Z suehring
75! Removed bugfix while copying tend.
76!
[674]77! 673 2011-01-18 16:19:48Z suehring
78! Weighting coefficients added for right computation of the pressure during
79! Runge-Kutta substeps.
80!
[668]81! 667 2010-12-23 12:06:00Z suehring/gryschka
[667]82! New allocation of tend when ws-scheme and multigrid is used. This is due to
83! reasons of perforance of the data_exchange. The same is done with p after
84! poismg is called.
85! nxl-1, nxr+1, nys-1, nyn+1 replaced by nxlg, nxrg, nysg, nyng when no
86! multigrid is used. Calls of exchange_horiz are modified.
87! bugfix: After pressure correction no volume flow correction in case of
88! non-cyclic boundary conditions
89! (has to be done only before pressure correction)
90! Call of SOR routine is referenced with ddzu_pres.
91!
[623]92! 622 2010-12-10 08:08:13Z raasch
93! optional barriers included in order to speed up collective operations
94!
[198]95! 151 2008-03-07 13:42:18Z raasch
96! Bugfix in volume flow control for non-cyclic boundary conditions
97!
[110]98! 106 2007-08-16 14:30:26Z raasch
99! Volume flow conservation added for the remaining three outflow boundaries
100!
[90]101! 85 2007-05-11 09:35:14Z raasch
102! Division through dt_3d replaced by multiplication of the inverse.
103! For performance optimisation, this is done in the loop calculating the
104! divergence instead of using a seperate loop.
105!
[77]106! 75 2007-03-22 09:54:05Z raasch
[75]107! Volume flow control for non-cyclic boundary conditions added (currently only
[76]108! for the north boundary!!), 2nd+3rd argument removed from exchange horiz,
109! mean vertical velocity is removed in case of Neumann boundary conditions
110! both at the bottom and the top
[1]111!
[3]112! RCS Log replace by Id keyword, revision history cleaned up
113!
[1]114! Revision 1.25  2006/04/26 13:26:12  raasch
115! OpenMP optimization (+localsum, threadsum)
116!
117! Revision 1.1  1997/07/24 11:24:44  raasch
118! Initial revision
119!
120!
121! Description:
122! ------------
123! Compute the divergence of the provisional velocity field. Solve the Poisson
124! equation for the perturbation pressure. Compute the final velocities using
125! this perturbation pressure. Compute the remaining divergence.
126!------------------------------------------------------------------------------!
127
128    USE arrays_3d
129    USE constants
130    USE control_parameters
131    USE cpulog
132    USE grid_variables
133    USE indices
134    USE interfaces
135    USE pegrid
136    USE poisfft_mod
137    USE statistics
138
139    IMPLICIT NONE
140
[1092]141    INTEGER ::  i, j, k
[1]142
[673]143    REAL    ::  ddt_3d, localsum, threadsum, d_weight_pres
[1]144
145    REAL, DIMENSION(1:2) ::  volume_flow_l, volume_flow_offset
[76]146    REAL, DIMENSION(1:nzt) ::  w_l, w_l_l
[1]147
148
149    CALL cpu_log( log_point(8), 'pres', 'start' )
150
[85]151
152    ddt_3d = 1.0 / dt_3d
[709]153    d_weight_pres = 1.0 / weight_pres(intermediate_timestep_count)
[85]154
[1]155!
[707]156!-- Multigrid method expects array d to have one ghost layer.
157!--
[1]158    IF ( psolver == 'multigrid' )  THEN
[667]159     
[1]160       DEALLOCATE( d )
[667]161       ALLOCATE( d(nzb:nzt+1,nys-1:nyn+1,nxl-1:nxr+1) ) 
[707]162
163!
164!--    Since p is later used to hold the weighted average of the substeps, it
165!--    cannot be used in the iterative solver. Therefore, its initial value is
166!--    stored on p_loc, which is then iteratively advanced in every substep.
167       IF ( intermediate_timestep_count == 1 )  THEN
168          DO  i = nxl-1, nxr+1
169             DO  j = nys-1, nyn+1
170                DO  k = nzb, nzt+1
171                   p_loc(k,j,i) = p(k,j,i)
172                ENDDO
173             ENDDO
174          ENDDO
[667]175       ENDIF
176       
[707]177    ELSEIF ( psolver == 'sor'  .AND.  intermediate_timestep_count == 1 )  THEN
178
179!
180!--    Since p is later used to hold the weighted average of the substeps, it
181!--    cannot be used in the iterative solver. Therefore, its initial value is
182!--    stored on p_loc, which is then iteratively advanced in every substep.
183       p_loc = p
184
[1]185    ENDIF
186
187!
[75]188!-- Conserve the volume flow at the outflow in case of non-cyclic lateral
189!-- boundary conditions
[106]190!-- WARNING: so far, this conservation does not work at the left/south
191!--          boundary if the topography at the inflow differs from that at the
192!--          outflow! For this case, volume_flow_area needs adjustment!
193!
194!-- Left/right
[709]195    IF ( conserve_volume_flow  .AND.  ( outflow_l .OR. outflow_r ) )  THEN
[680]196
[106]197       volume_flow(1)   = 0.0
198       volume_flow_l(1) = 0.0
199
200       IF ( outflow_l )  THEN
201          i = 0
202       ELSEIF ( outflow_r )  THEN
203          i = nx+1
204       ENDIF
205
206       DO  j = nys, nyn
207!
208!--       Sum up the volume flow through the south/north boundary
[709]209          DO  k = nzb_2d(j,i)+1, nzt
[667]210             volume_flow_l(1) = volume_flow_l(1) + u(k,j,i) * dzw(k)
[106]211          ENDDO
212       ENDDO
213
214#if defined( __parallel )   
[680]215       IF ( collective_wait )  CALL MPI_BARRIER( comm1dy, ierr )
[106]216       CALL MPI_ALLREDUCE( volume_flow_l(1), volume_flow(1), 1, MPI_REAL, &
217                           MPI_SUM, comm1dy, ierr )   
218#else
219       volume_flow = volume_flow_l 
220#endif
[709]221       volume_flow_offset(1) = ( volume_flow_initial(1) - volume_flow(1) ) &
[106]222                               / volume_flow_area(1)
223
[667]224       DO  j = nysg, nyng
[709]225          DO  k = nzb_2d(j,i)+1, nzt
[106]226             u(k,j,i) = u(k,j,i) + volume_flow_offset(1)
227          ENDDO
228       ENDDO
229
230    ENDIF
231
232!
233!-- South/north
[709]234    IF ( conserve_volume_flow  .AND.  ( outflow_n .OR. outflow_s ) )  THEN
[106]235
[75]236       volume_flow(2)   = 0.0
237       volume_flow_l(2) = 0.0
238
[106]239       IF ( outflow_s )  THEN
240          j = 0
241       ELSEIF ( outflow_n )  THEN
[75]242          j = ny+1
[106]243       ENDIF
244
245       DO  i = nxl, nxr
[75]246!
[106]247!--       Sum up the volume flow through the south/north boundary
[709]248          DO  k = nzb_2d(j,i)+1, nzt
[667]249             volume_flow_l(2) = volume_flow_l(2) + v(k,j,i) * dzw(k)
[75]250          ENDDO
[106]251       ENDDO
252
[75]253#if defined( __parallel )   
[680]254       IF ( collective_wait )  CALL MPI_BARRIER( comm1dx, ierr )
[75]255       CALL MPI_ALLREDUCE( volume_flow_l(2), volume_flow(2), 1, MPI_REAL, &
256                           MPI_SUM, comm1dx, ierr )   
257#else
258       volume_flow = volume_flow_l 
259#endif
260       volume_flow_offset(2) = ( volume_flow_initial(2) - volume_flow(2) )    &
[106]261                               / volume_flow_area(2)
[75]262
[667]263       DO  i = nxlg, nxrg
[709]264          DO  k = nzb_v_inner(j,i)+1, nzt
[106]265             v(k,j,i) = v(k,j,i) + volume_flow_offset(2)
[75]266          ENDDO
[106]267       ENDDO
[75]268
269    ENDIF
270
[76]271!
272!-- Remove mean vertical velocity
273    IF ( ibc_p_b == 1  .AND.  ibc_p_t == 1 )  THEN
[709]274       IF ( simulated_time > 0.0 )  THEN ! otherwise nzb_w_inner not yet known
[76]275          w_l = 0.0;  w_l_l = 0.0
276          DO  i = nxl, nxr
277             DO  j = nys, nyn
278                DO  k = nzb_w_inner(j,i)+1, nzt
279                   w_l_l(k) = w_l_l(k) + w(k,j,i)
280                ENDDO
281             ENDDO
282          ENDDO
283#if defined( __parallel )   
[622]284          IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
[709]285          CALL MPI_ALLREDUCE( w_l_l(1), w_l(1), nzt, MPI_REAL, MPI_SUM, &
286                              comm2d, ierr )
[76]287#else
288          w_l = w_l_l 
289#endif
290          DO  k = 1, nzt
291             w_l(k) = w_l(k) / ngp_2dh_outer(k,0)
292          ENDDO
[667]293          DO  i = nxlg, nxrg
294             DO  j = nysg, nyng
[76]295                DO  k = nzb_w_inner(j,i)+1, nzt
296                   w(k,j,i) = w(k,j,i) - w_l(k)
297                ENDDO
298             ENDDO
299          ENDDO
300       ENDIF
301    ENDIF
[75]302
303!
[1]304!-- Compute the divergence of the provisional velocity field.
305    CALL cpu_log( log_point_s(1), 'divergence', 'start' )
306
307    IF ( psolver == 'multigrid' )  THEN
308       !$OMP PARALLEL DO SCHEDULE( STATIC )
309       DO  i = nxl-1, nxr+1
310          DO  j = nys-1, nyn+1
311             DO  k = nzb, nzt+1
312                d(k,j,i) = 0.0
313             ENDDO
314          ENDDO
315       ENDDO
316    ELSE
317       !$OMP PARALLEL DO SCHEDULE( STATIC )
[1111]318       !$acc kernels present( d )
319       !$acc loop
[1003]320       DO  i = nxl, nxr
321          DO  j = nys, nyn
[1111]322             !$acc loop vector(32)
[1003]323             DO  k = nzb+1, nzt
[1]324                d(k,j,i) = 0.0
325             ENDDO
326          ENDDO
327       ENDDO
[1111]328       !$acc end kernels
[1]329    ENDIF
330
331    localsum  = 0.0
332    threadsum = 0.0
333
334#if defined( __ibm )
335    !$OMP PARALLEL PRIVATE (i,j,k) FIRSTPRIVATE(threadsum) REDUCTION(+:localsum)
336    !$OMP DO SCHEDULE( STATIC )
337    DO  i = nxl, nxr
338       DO  j = nys, nyn
339          DO  k = nzb_s_inner(j,i)+1, nzt
[85]340             d(k,j,i) = ( ( u(k,j,i+1) - u(k,j,i) ) * ddx + &
341                          ( v(k,j+1,i) - v(k,j,i) ) * ddy + &
[673]342                          ( w(k,j,i) - w(k-1,j,i) ) * ddzw(k) ) * ddt_3d      &
343                                                                * d_weight_pres 
[1]344          ENDDO
345!
346!--       Compute possible PE-sum of divergences for flow_statistics
347          DO  k = nzb_s_inner(j,i)+1, nzt
348             threadsum = threadsum + ABS( d(k,j,i) )
349          ENDDO
350
351       ENDDO
352    ENDDO
353
[707]354    localsum = localsum + threadsum * dt_3d * &
355                          weight_pres(intermediate_timestep_count)
[693]356
[1]357    !$OMP END PARALLEL
358#else
359
[1111]360    !$OMP PARALLEL PRIVATE (i,j,k)
361    !$OMP DO SCHEDULE( STATIC )
362    !$acc kernels present( d, ddzw, nzb_s_inner, u, v, w )
363    !$acc loop
364    DO  i = nxl, nxr
365       DO  j = nys, nyn
366          !$acc loop vector(32)
367          DO  k = 1, nzt
368             IF ( k > nzb_s_inner(j,i) )  THEN
[85]369                d(k,j,i) = ( ( u(k,j,i+1) - u(k,j,i) ) * ddx + &
[1111]370                           ( v(k,j+1,i) - v(k,j,i) ) * ddy + &
371                           ( w(k,j,i) - w(k-1,j,i) ) * ddzw(k) ) * ddt_3d      &
372                           * d_weight_pres
373             ENDIF
[1]374          ENDDO
375       ENDDO
[1111]376    ENDDO
377    !$acc end kernels
378    !$OMP END PARALLEL
[1]379
380!
381!-- Compute possible PE-sum of divergences for flow_statistics
382    !$OMP PARALLEL PRIVATE (i,j,k) FIRSTPRIVATE(threadsum) REDUCTION(+:localsum)
383    !$OMP DO SCHEDULE( STATIC )
384    DO  i = nxl, nxr
385       DO  j = nys, nyn
386          DO  k = nzb+1, nzt
387             threadsum = threadsum + ABS( d(k,j,i) )
388          ENDDO
389       ENDDO
390    ENDDO
[707]391    localsum = localsum + threadsum * dt_3d * &
392                          weight_pres(intermediate_timestep_count)
[1]393    !$OMP END PARALLEL
394#endif
395
396!
397!-- For completeness, set the divergence sum of all statistic regions to those
398!-- of the total domain
399    sums_divold_l(0:statistic_regions) = localsum
400
401    CALL cpu_log( log_point_s(1), 'divergence', 'stop' )
402
403!
404!-- Compute the pressure perturbation solving the Poisson equation
405    IF ( psolver(1:7) == 'poisfft' )  THEN
406
407!
408!--    Solve Poisson equation via FFT and solution of tridiagonal matrices
409       IF ( psolver == 'poisfft' )  THEN
[1212]410
[1]411          CALL poisfft( d, tend )
[1113]412
[1]413       ENDIF
414
415!
416!--    Store computed perturbation pressure and set boundary condition in
417!--    z-direction
418       !$OMP PARALLEL DO
[1113]419       !$acc kernels present( d, tend )
420       !$acc loop
[1]421       DO  i = nxl, nxr
422          DO  j = nys, nyn
[1113]423             !$acc loop vector( 32 )
[1]424             DO  k = nzb+1, nzt
425                tend(k,j,i) = d(k,j,i)
426             ENDDO
427          ENDDO
428       ENDDO
[1113]429       !$acc end kernels
[1]430
431!
432!--    Bottom boundary:
433!--    This condition is only required for internal output. The pressure
434!--    gradient (dp(nzb+1)-dp(nzb))/dz is not used anywhere else.
435       IF ( ibc_p_b == 1 )  THEN
436!
437!--       Neumann (dp/dz = 0)
438          !$OMP PARALLEL DO
[1113]439          !$acc kernels present( nzb_s_inner, tend )
[667]440          DO  i = nxlg, nxrg
441             DO  j = nysg, nyng
[1]442                tend(nzb_s_inner(j,i),j,i) = tend(nzb_s_inner(j,i)+1,j,i)
443             ENDDO
444          ENDDO
[1113]445          !$acc end kernels
[1]446
447       ELSE
448!
449!--       Dirichlet
450          !$OMP PARALLEL DO
[1113]451          !$acc kernels present( tend )
[667]452          DO  i = nxlg, nxrg
453             DO  j = nysg, nyng
[1]454                tend(nzb_s_inner(j,i),j,i) = 0.0
455             ENDDO
456          ENDDO
[1113]457          !$acc end kernels
[1]458
459       ENDIF
460
461!
462!--    Top boundary
463       IF ( ibc_p_t == 1 )  THEN
464!
465!--       Neumann
466          !$OMP PARALLEL DO
[1113]467          !$acc kernels present( tend )
[667]468          DO  i = nxlg, nxrg
469             DO  j = nysg, nyng
[1]470                tend(nzt+1,j,i) = tend(nzt,j,i)
471             ENDDO
472          ENDDO
[1113]473          !$acc end kernels
[1]474
475       ELSE
476!
477!--       Dirichlet
478          !$OMP PARALLEL DO
[1113]479          !$acc kernels present( tend )
[667]480          DO  i = nxlg, nxrg
481             DO  j = nysg, nyng
[1]482                tend(nzt+1,j,i) = 0.0
483             ENDDO
484          ENDDO
[1113]485          !$acc end kernels
[1]486
487       ENDIF
488
489!
490!--    Exchange boundaries for p
[1113]491       IF ( numprocs == 1 )  THEN    ! workaround for single-core GPU runs
492          on_device = .TRUE.         ! to be removed after complete porting
493       ELSE                          ! of ghost point exchange
494          !$acc update host( tend )
495       ENDIF
[667]496       CALL exchange_horiz( tend, nbgp )
[1113]497       IF ( numprocs == 1 )  THEN    ! workaround for single-core GPU runs
498          on_device = .FALSE.        ! to be removed after complete porting
499       ELSE                          ! of ghost point exchange
500          !$acc update device( tend )
501       ENDIF
[1]502     
503    ELSEIF ( psolver == 'sor' )  THEN
504
505!
506!--    Solve Poisson equation for perturbation pressure using SOR-Red/Black
507!--    scheme
[707]508       CALL sor( d, ddzu_pres, ddzw, p_loc )
509       tend = p_loc
[1]510
511    ELSEIF ( psolver == 'multigrid' )  THEN
512
513!
514!--    Solve Poisson equation for perturbation pressure using Multigrid scheme,
[667]515!--    array tend is used to store the residuals, logical exchange_mg is used
516!--    to discern data exchange in multigrid ( 1 ghostpoint ) and normal grid
517!--    ( nbgp ghost points ).
[778]518
519!--    If the number of grid points of the gathered grid, which is collected
520!--    on PE0, is larger than the number of grid points of an PE, than array
521!--    tend will be enlarged.
522       IF ( gathered_size > subdomain_size )  THEN
523          DEALLOCATE( tend )
524          ALLOCATE( tend(nzb:nzt_mg(mg_switch_to_pe0_level)+1,nys_mg(          &
525                    mg_switch_to_pe0_level)-1:nyn_mg(mg_switch_to_pe0_level)+1,&
526                    nxl_mg(mg_switch_to_pe0_level)-1:nxr_mg(                   &
527                    mg_switch_to_pe0_level)+1) )
528       ENDIF
529
[1]530       CALL poismg( tend )
[707]531
[778]532       IF ( gathered_size > subdomain_size )  THEN
533          DEALLOCATE( tend )
534          ALLOCATE( tend(nzb:nzt+1,nysg:nyng,nxlg:nxrg) )
535       ENDIF
536
[1]537!
538!--    Restore perturbation pressure on tend because this array is used
539!--    further below to correct the velocity fields
[707]540       DO  i = nxl-1, nxr+1
541          DO  j = nys-1, nyn+1
542             DO  k = nzb, nzt+1
543                tend(k,j,i) = p_loc(k,j,i)
544             ENDDO
545          ENDDO
546       ENDDO
[667]547
[1]548    ENDIF
549
550!
[707]551!-- Store perturbation pressure on array p, used for pressure data output.
552!-- Ghost layers are added in the output routines (except sor-method: see below)
553    IF ( intermediate_timestep_count == 1 )  THEN
554       !$OMP PARALLEL PRIVATE (i,j,k)
555       !$OMP DO
[1113]556       !$acc kernels present( p, tend, weight_substep )
557       !$acc loop
[707]558       DO  i = nxl-1, nxr+1
559          DO  j = nys-1, nyn+1
[1113]560             !$acc loop vector( 32 )
[707]561             DO  k = nzb, nzt+1
562                p(k,j,i) = tend(k,j,i) * &
563                           weight_substep(intermediate_timestep_count)
[673]564             ENDDO
[1]565          ENDDO
[707]566       ENDDO
[1113]567       !$acc end kernels
[707]568       !$OMP END PARALLEL
569
570    ELSE 
571       !$OMP PARALLEL PRIVATE (i,j,k)
572       !$OMP DO
[1113]573       !$acc kernels present( p, tend, weight_substep )
574       !$acc loop
[707]575       DO  i = nxl-1, nxr+1
576          DO  j = nys-1, nyn+1
[1113]577             !$acc loop vector( 32 )
[707]578             DO  k = nzb, nzt+1
579                p(k,j,i) = p(k,j,i) + tend(k,j,i) * &
580                           weight_substep(intermediate_timestep_count)
[673]581             ENDDO
582          ENDDO
[707]583       ENDDO
[1113]584       !$acc end kernels
[707]585       !$OMP END PARALLEL
586
587    ENDIF
[673]588       
[707]589!
590!-- SOR-method needs ghost layers for the next timestep
591    IF ( psolver == 'sor' )  CALL exchange_horiz( p, nbgp )
[682]592
[1]593!
594!-- Correction of the provisional velocities with the current perturbation
595!-- pressure just computed
[1111]596    !$acc update host( u, v, w )
[709]597    IF ( conserve_volume_flow  .AND.  ( bc_lr_cyc .OR. bc_ns_cyc ) )  THEN
[1]598       volume_flow_l(1) = 0.0
599       volume_flow_l(2) = 0.0
600    ENDIF
[707]601
[1]602    !$OMP PARALLEL PRIVATE (i,j,k)
603    !$OMP DO
[1113]604    !$acc kernels present( ddzu, nzb_u_inner, nzb_v_inner, nzb_w_inner, tend, u, v, w, weight_pres )
605    !$acc loop
[673]606    DO  i = nxl, nxr   
[1]607       DO  j = nys, nyn
[1113]608          !$acc loop vector( 32 )
609          DO  k = 1, nzt
610             IF ( k > nzb_w_inner(j,i) )  THEN
611                w(k,j,i) = w(k,j,i) - dt_3d *                                 &
612                           ( tend(k+1,j,i) - tend(k,j,i) ) * ddzu(k+1) *      &
613                           weight_pres(intermediate_timestep_count)
614             ENDIF
[1]615          ENDDO
[1113]616          !$acc loop vector( 32 )
617          DO  k = 1, nzt
618             IF ( k > nzb_u_inner(j,i) )  THEN
619                u(k,j,i) = u(k,j,i) - dt_3d *                                 &
620                           ( tend(k,j,i) - tend(k,j,i-1) ) * ddx *            &
621                           weight_pres(intermediate_timestep_count)
622             ENDIF
[1]623          ENDDO
[1113]624          !$acc loop vector( 32 )
625          DO  k = 1, nzt
626             IF ( k > nzb_v_inner(j,i) )  THEN
627                v(k,j,i) = v(k,j,i) - dt_3d *                                 &
628                           ( tend(k,j,i) - tend(k,j-1,i) ) * ddy *            &
629                           weight_pres(intermediate_timestep_count)
630             ENDIF
[673]631          ENDDO                                                         
[1]632
633       ENDDO
634    ENDDO
[1113]635    !$acc end kernels
[1]636    !$OMP END PARALLEL
[1113]637
638!
639!-- Sum up the volume flow through the right and north boundary
640    IF ( conserve_volume_flow  .AND.  bc_lr_cyc  .AND.  bc_ns_cyc  .AND.  &
641         nxr == nx )  THEN
642
643       !$OMP PARALLEL PRIVATE (j,k)
644       !$OMP DO
645       DO  j = nys, nyn
646          !$OMP CRITICAL
647          DO  k = nzb_2d(j,nx) + 1, nzt
648             volume_flow_l(1) = volume_flow_l(1) + u(k,j,nx) * dzw(k)
649          ENDDO
650          !$OMP END CRITICAL
651       ENDDO
652       !$OMP END PARALLEL
653
654    ENDIF
655
656    IF ( conserve_volume_flow  .AND.  bc_ns_cyc  .AND.  bc_lr_cyc  .AND.  &
657         nyn == ny )  THEN
658
659       !$OMP PARALLEL PRIVATE (i,k)
660       !$OMP DO
661       DO  i = nxl, nxr
662          !$OMP CRITICAL
663          DO  k = nzb_2d(ny,i) + 1, nzt
664             volume_flow_l(2) = volume_flow_l(2) + v(k,ny,i) * dzw(k)
665           ENDDO
666          !$OMP END CRITICAL
667       ENDDO
668       !$OMP END PARALLEL
669
670    ENDIF
[673]671   
[1]672!
673!-- Conserve the volume flow
[707]674    IF ( conserve_volume_flow  .AND.  ( bc_lr_cyc  .AND.  bc_ns_cyc ) )  THEN
[1]675
676#if defined( __parallel )   
[622]677       IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
[1]678       CALL MPI_ALLREDUCE( volume_flow_l(1), volume_flow(1), 2, MPI_REAL, &
679                           MPI_SUM, comm2d, ierr ) 
680#else
681       volume_flow = volume_flow_l 
682#endif   
683
684       volume_flow_offset = ( volume_flow_initial - volume_flow ) / &
685                            volume_flow_area
686
687       !$OMP PARALLEL PRIVATE (i,j,k)
688       !$OMP DO
689       DO  i = nxl, nxr
690          DO  j = nys, nyn
[667]691             DO  k = nzb_u_inner(j,i) + 1, nzt
692                u(k,j,i) = u(k,j,i) + volume_flow_offset(1)
[719]693             ENDDO
694             DO k = nzb_v_inner(j,i) + 1, nzt
[667]695                v(k,j,i) = v(k,j,i) + volume_flow_offset(2)
696             ENDDO
[1]697          ENDDO
698       ENDDO
[667]699
[1]700       !$OMP END PARALLEL
701
702    ENDIF
703
704!
705!-- Exchange of boundaries for the velocities
[1113]706    IF ( numprocs == 1 )  THEN    ! workaround for single-core GPU runs
707       on_device = .TRUE.         ! to be removed after complete porting
708    ELSE                          ! of ghost point exchange
709       !$acc update host( u, v, w )
710    ENDIF
[667]711    CALL exchange_horiz( u, nbgp )
712    CALL exchange_horiz( v, nbgp )
713    CALL exchange_horiz( w, nbgp )
[1113]714    IF ( numprocs == 1 )  THEN    ! workaround for single-core GPU runs
715       on_device = .FALSE.        ! to be removed after complete porting
716    ELSE                          ! of ghost point exchange
717       !$acc update device( u, v, w )
718    ENDIF
[1]719
720!
721!-- Compute the divergence of the corrected velocity field,
722!-- a possible PE-sum is computed in flow_statistics
723    CALL cpu_log( log_point_s(1), 'divergence', 'start' )
724    sums_divnew_l = 0.0
725
726!
727!-- d must be reset to zero because it can contain nonzero values below the
728!-- topography
729    IF ( topography /= 'flat' )  d = 0.0
730
731    localsum  = 0.0
732    threadsum = 0.0
733
734    !$OMP PARALLEL PRIVATE (i,j,k) FIRSTPRIVATE(threadsum) REDUCTION(+:localsum)
735    !$OMP DO SCHEDULE( STATIC )
736#if defined( __ibm )
737    DO  i = nxl, nxr
738       DO  j = nys, nyn
739          DO  k = nzb_s_inner(j,i)+1, nzt
740             d(k,j,i) = ( u(k,j,i+1) - u(k,j,i) ) * ddx + &
741                        ( v(k,j+1,i) - v(k,j,i) ) * ddy + &
742                        ( w(k,j,i) - w(k-1,j,i) ) * ddzw(k)
743          ENDDO
744          DO  k = nzb+1, nzt
745             threadsum = threadsum + ABS( d(k,j,i) )
746          ENDDO
747       ENDDO
748    ENDDO
749#else
[1113]750    !$acc kernels present( d, ddzw, nzb_s_inner, u, v, w )
751    !$acc loop
[1]752    DO  i = nxl, nxr
753       DO  j = nys, nyn
[1113]754          !$acc loop vector( 32 )
755          DO  k = 1, nzt
756             IF ( k > nzb_s_inner(j,i) )  THEN
757                d(k,j,i) = ( u(k,j,i+1) - u(k,j,i) ) * ddx + &
758                           ( v(k,j+1,i) - v(k,j,i) ) * ddy + &
759                           ( w(k,j,i) - w(k-1,j,i) ) * ddzw(k)
760             ENDIF
761          ENDDO
762       ENDDO
763    ENDDO
764    !$acc end kernels
765!
766!-- Compute possible PE-sum of divergences for flow_statistics
767    DO  i = nxl, nxr
768       DO  j = nys, nyn
[1]769          DO  k = nzb_s_inner(j,i)+1, nzt
770             threadsum = threadsum + ABS( d(k,j,i) )
771          ENDDO
772       ENDDO
773    ENDDO
774#endif
[667]775
[1]776    localsum = localsum + threadsum
777    !$OMP END PARALLEL
778
779!
780!-- For completeness, set the divergence sum of all statistic regions to those
781!-- of the total domain
782    sums_divnew_l(0:statistic_regions) = localsum
783
784    CALL cpu_log( log_point_s(1), 'divergence', 'stop' )
785
786    CALL cpu_log( log_point(8), 'pres', 'stop' )
787
788
789 END SUBROUTINE pres
Note: See TracBrowser for help on using the repository browser.