source: palm/trunk/SOURCE/pres.f90 @ 1307

Last change on this file since 1307 was 1307, checked in by raasch, 10 years ago

last commit documented

  • Property svn:keywords set to Id
File size: 24.3 KB
RevLine 
[1]1 SUBROUTINE pres
2
[1036]3!--------------------------------------------------------------------------------!
4! This file is part of PALM.
5!
6! PALM is free software: you can redistribute it and/or modify it under the terms
7! of the GNU General Public License as published by the Free Software Foundation,
8! either version 3 of the License, or (at your option) any later version.
9!
10! PALM is distributed in the hope that it will be useful, but WITHOUT ANY
11! WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
12! A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
13!
14! You should have received a copy of the GNU General Public License along with
15! PALM. If not, see <http://www.gnu.org/licenses/>.
16!
17! Copyright 1997-2012  Leibniz University Hannover
18!--------------------------------------------------------------------------------!
19!
[484]20! Current revisions:
[1212]21! ------------------
[708]22!
[1307]23!
[708]24! Former revisions:
25! -----------------
26! $Id: pres.f90 1307 2014-03-13 14:39:47Z raasch $
27!
[1307]28! 1306 2014-03-13 14:30:59Z raasch
29! second argument removed from routine poisfft
30!
[1258]31! 1257 2013-11-08 15:18:40Z raasch
32! openacc loop and loop vector clauses removed, independent clauses added,
33! end parallel replaced by end parallel loop
34!
[1222]35! 1221 2013-09-10 08:59:13Z raasch
36! openACC porting of reduction operations, loops for calculating d are
37! using the rflags_s_inner multiply flag instead of the nzb_s_inner loop index
38!
[1213]39! 1212 2013-08-15 08:46:27Z raasch
40! call of poisfft_hybrid removed
41!
[1118]42! 1117 2013-03-27 11:15:36Z suehring
43! Bugfix in OpenMP parallelization.
44!
[1114]45! 1113 2013-03-10 02:48:14Z raasch
46! GPU-porting of several loops, some loops rearranged
47!
[1112]48! 1111 2013-03-08 23:54:10Z
49! openACC statements added,
50! ibc_p_b = 2 removed
51!
[1093]52! 1092 2013-02-02 11:24:22Z raasch
53! unused variables removed
54!
[1037]55! 1036 2012-10-22 13:43:42Z raasch
56! code put under GPL (PALM 3.9)
57!
[1004]58! 1003 2012-09-14 14:35:53Z raasch
59! adjustment of array tend for cases with unequal subdomain sizes removed
60!
[779]61! 778 2011-11-07 14:18:25Z fricke
62! New allocation of tend when multigrid is used and the collected field on PE0
63! has more grid points than the subdomain of an PE.
64!
[720]65! 719 2011-04-06 13:05:23Z gryschka
66! Bugfix in volume flow control for double cyclic boundary conditions
67!
[710]68! 709 2011-03-30 09:31:40Z raasch
69! formatting adjustments
70!
[708]71! 707 2011-03-29 11:39:40Z raasch
[707]72! Calculation of weighted average of p is now handled in the same way
73! regardless of the number of ghost layers (advection scheme),
74! multigrid and sor method are using p_loc for iterative advancements of
75! pressure,
76! localsum calculation modified for proper OpenMP reduction,
77! bc_lr/ns replaced by bc_lr/ns_cyc
[674]78!
[694]79! 693 2011-03-08 09:..:..Z raasch
[695]80! bugfix: weighting coefficient added to ibm branch
[694]81!
82! 680 2011-02-04 23:16:06Z gryschka
[681]83! bugfix: collective_wait
[668]84!
[676]85! 675 2011-01-19 10:56:55Z suehring
86! Removed bugfix while copying tend.
87!
[674]88! 673 2011-01-18 16:19:48Z suehring
89! Weighting coefficients added for right computation of the pressure during
90! Runge-Kutta substeps.
91!
[668]92! 667 2010-12-23 12:06:00Z suehring/gryschka
[667]93! New allocation of tend when ws-scheme and multigrid is used. This is due to
94! reasons of perforance of the data_exchange. The same is done with p after
95! poismg is called.
96! nxl-1, nxr+1, nys-1, nyn+1 replaced by nxlg, nxrg, nysg, nyng when no
97! multigrid is used. Calls of exchange_horiz are modified.
98! bugfix: After pressure correction no volume flow correction in case of
99! non-cyclic boundary conditions
100! (has to be done only before pressure correction)
101! Call of SOR routine is referenced with ddzu_pres.
102!
[623]103! 622 2010-12-10 08:08:13Z raasch
104! optional barriers included in order to speed up collective operations
105!
[198]106! 151 2008-03-07 13:42:18Z raasch
107! Bugfix in volume flow control for non-cyclic boundary conditions
108!
[110]109! 106 2007-08-16 14:30:26Z raasch
110! Volume flow conservation added for the remaining three outflow boundaries
111!
[90]112! 85 2007-05-11 09:35:14Z raasch
113! Division through dt_3d replaced by multiplication of the inverse.
114! For performance optimisation, this is done in the loop calculating the
115! divergence instead of using a seperate loop.
116!
[77]117! 75 2007-03-22 09:54:05Z raasch
[75]118! Volume flow control for non-cyclic boundary conditions added (currently only
[76]119! for the north boundary!!), 2nd+3rd argument removed from exchange horiz,
120! mean vertical velocity is removed in case of Neumann boundary conditions
121! both at the bottom and the top
[1]122!
[3]123! RCS Log replace by Id keyword, revision history cleaned up
124!
[1]125! Revision 1.25  2006/04/26 13:26:12  raasch
126! OpenMP optimization (+localsum, threadsum)
127!
128! Revision 1.1  1997/07/24 11:24:44  raasch
129! Initial revision
130!
131!
132! Description:
133! ------------
134! Compute the divergence of the provisional velocity field. Solve the Poisson
135! equation for the perturbation pressure. Compute the final velocities using
136! this perturbation pressure. Compute the remaining divergence.
137!------------------------------------------------------------------------------!
138
139    USE arrays_3d
140    USE constants
141    USE control_parameters
142    USE cpulog
143    USE grid_variables
144    USE indices
145    USE interfaces
146    USE pegrid
147    USE poisfft_mod
148    USE statistics
149
150    IMPLICIT NONE
151
[1092]152    INTEGER ::  i, j, k
[1]153
[673]154    REAL    ::  ddt_3d, localsum, threadsum, d_weight_pres
[1]155
156    REAL, DIMENSION(1:2) ::  volume_flow_l, volume_flow_offset
[76]157    REAL, DIMENSION(1:nzt) ::  w_l, w_l_l
[1]158
159
160    CALL cpu_log( log_point(8), 'pres', 'start' )
161
[85]162
163    ddt_3d = 1.0 / dt_3d
[709]164    d_weight_pres = 1.0 / weight_pres(intermediate_timestep_count)
[85]165
[1]166!
[707]167!-- Multigrid method expects array d to have one ghost layer.
168!--
[1]169    IF ( psolver == 'multigrid' )  THEN
[667]170     
[1]171       DEALLOCATE( d )
[667]172       ALLOCATE( d(nzb:nzt+1,nys-1:nyn+1,nxl-1:nxr+1) ) 
[707]173
174!
175!--    Since p is later used to hold the weighted average of the substeps, it
176!--    cannot be used in the iterative solver. Therefore, its initial value is
177!--    stored on p_loc, which is then iteratively advanced in every substep.
178       IF ( intermediate_timestep_count == 1 )  THEN
179          DO  i = nxl-1, nxr+1
180             DO  j = nys-1, nyn+1
181                DO  k = nzb, nzt+1
182                   p_loc(k,j,i) = p(k,j,i)
183                ENDDO
184             ENDDO
185          ENDDO
[667]186       ENDIF
187       
[707]188    ELSEIF ( psolver == 'sor'  .AND.  intermediate_timestep_count == 1 )  THEN
189
190!
191!--    Since p is later used to hold the weighted average of the substeps, it
192!--    cannot be used in the iterative solver. Therefore, its initial value is
193!--    stored on p_loc, which is then iteratively advanced in every substep.
194       p_loc = p
195
[1]196    ENDIF
197
198!
[75]199!-- Conserve the volume flow at the outflow in case of non-cyclic lateral
200!-- boundary conditions
[106]201!-- WARNING: so far, this conservation does not work at the left/south
202!--          boundary if the topography at the inflow differs from that at the
203!--          outflow! For this case, volume_flow_area needs adjustment!
204!
205!-- Left/right
[709]206    IF ( conserve_volume_flow  .AND.  ( outflow_l .OR. outflow_r ) )  THEN
[680]207
[106]208       volume_flow(1)   = 0.0
209       volume_flow_l(1) = 0.0
210
211       IF ( outflow_l )  THEN
212          i = 0
213       ELSEIF ( outflow_r )  THEN
214          i = nx+1
215       ENDIF
216
217       DO  j = nys, nyn
218!
219!--       Sum up the volume flow through the south/north boundary
[709]220          DO  k = nzb_2d(j,i)+1, nzt
[667]221             volume_flow_l(1) = volume_flow_l(1) + u(k,j,i) * dzw(k)
[106]222          ENDDO
223       ENDDO
224
225#if defined( __parallel )   
[680]226       IF ( collective_wait )  CALL MPI_BARRIER( comm1dy, ierr )
[106]227       CALL MPI_ALLREDUCE( volume_flow_l(1), volume_flow(1), 1, MPI_REAL, &
228                           MPI_SUM, comm1dy, ierr )   
229#else
230       volume_flow = volume_flow_l 
231#endif
[709]232       volume_flow_offset(1) = ( volume_flow_initial(1) - volume_flow(1) ) &
[106]233                               / volume_flow_area(1)
234
[667]235       DO  j = nysg, nyng
[709]236          DO  k = nzb_2d(j,i)+1, nzt
[106]237             u(k,j,i) = u(k,j,i) + volume_flow_offset(1)
238          ENDDO
239       ENDDO
240
241    ENDIF
242
243!
244!-- South/north
[709]245    IF ( conserve_volume_flow  .AND.  ( outflow_n .OR. outflow_s ) )  THEN
[106]246
[75]247       volume_flow(2)   = 0.0
248       volume_flow_l(2) = 0.0
249
[106]250       IF ( outflow_s )  THEN
251          j = 0
252       ELSEIF ( outflow_n )  THEN
[75]253          j = ny+1
[106]254       ENDIF
255
256       DO  i = nxl, nxr
[75]257!
[106]258!--       Sum up the volume flow through the south/north boundary
[709]259          DO  k = nzb_2d(j,i)+1, nzt
[667]260             volume_flow_l(2) = volume_flow_l(2) + v(k,j,i) * dzw(k)
[75]261          ENDDO
[106]262       ENDDO
263
[75]264#if defined( __parallel )   
[680]265       IF ( collective_wait )  CALL MPI_BARRIER( comm1dx, ierr )
[75]266       CALL MPI_ALLREDUCE( volume_flow_l(2), volume_flow(2), 1, MPI_REAL, &
267                           MPI_SUM, comm1dx, ierr )   
268#else
269       volume_flow = volume_flow_l 
270#endif
271       volume_flow_offset(2) = ( volume_flow_initial(2) - volume_flow(2) )    &
[106]272                               / volume_flow_area(2)
[75]273
[667]274       DO  i = nxlg, nxrg
[709]275          DO  k = nzb_v_inner(j,i)+1, nzt
[106]276             v(k,j,i) = v(k,j,i) + volume_flow_offset(2)
[75]277          ENDDO
[106]278       ENDDO
[75]279
280    ENDIF
281
[76]282!
283!-- Remove mean vertical velocity
284    IF ( ibc_p_b == 1  .AND.  ibc_p_t == 1 )  THEN
[709]285       IF ( simulated_time > 0.0 )  THEN ! otherwise nzb_w_inner not yet known
[76]286          w_l = 0.0;  w_l_l = 0.0
287          DO  i = nxl, nxr
288             DO  j = nys, nyn
289                DO  k = nzb_w_inner(j,i)+1, nzt
290                   w_l_l(k) = w_l_l(k) + w(k,j,i)
291                ENDDO
292             ENDDO
293          ENDDO
294#if defined( __parallel )   
[622]295          IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
[709]296          CALL MPI_ALLREDUCE( w_l_l(1), w_l(1), nzt, MPI_REAL, MPI_SUM, &
297                              comm2d, ierr )
[76]298#else
299          w_l = w_l_l 
300#endif
301          DO  k = 1, nzt
302             w_l(k) = w_l(k) / ngp_2dh_outer(k,0)
303          ENDDO
[667]304          DO  i = nxlg, nxrg
305             DO  j = nysg, nyng
[76]306                DO  k = nzb_w_inner(j,i)+1, nzt
307                   w(k,j,i) = w(k,j,i) - w_l(k)
308                ENDDO
309             ENDDO
310          ENDDO
311       ENDIF
312    ENDIF
[75]313
314!
[1]315!-- Compute the divergence of the provisional velocity field.
316    CALL cpu_log( log_point_s(1), 'divergence', 'start' )
317
318    IF ( psolver == 'multigrid' )  THEN
319       !$OMP PARALLEL DO SCHEDULE( STATIC )
320       DO  i = nxl-1, nxr+1
321          DO  j = nys-1, nyn+1
322             DO  k = nzb, nzt+1
323                d(k,j,i) = 0.0
324             ENDDO
325          ENDDO
326       ENDDO
327    ELSE
328       !$OMP PARALLEL DO SCHEDULE( STATIC )
[1111]329       !$acc kernels present( d )
[1003]330       DO  i = nxl, nxr
331          DO  j = nys, nyn
332             DO  k = nzb+1, nzt
[1]333                d(k,j,i) = 0.0
334             ENDDO
335          ENDDO
336       ENDDO
[1111]337       !$acc end kernels
[1]338    ENDIF
339
340    localsum  = 0.0
341    threadsum = 0.0
342
343#if defined( __ibm )
344    !$OMP PARALLEL PRIVATE (i,j,k) FIRSTPRIVATE(threadsum) REDUCTION(+:localsum)
345    !$OMP DO SCHEDULE( STATIC )
346    DO  i = nxl, nxr
347       DO  j = nys, nyn
348          DO  k = nzb_s_inner(j,i)+1, nzt
[85]349             d(k,j,i) = ( ( u(k,j,i+1) - u(k,j,i) ) * ddx + &
350                          ( v(k,j+1,i) - v(k,j,i) ) * ddy + &
[673]351                          ( w(k,j,i) - w(k-1,j,i) ) * ddzw(k) ) * ddt_3d      &
352                                                                * d_weight_pres 
[1]353          ENDDO
354!
355!--       Compute possible PE-sum of divergences for flow_statistics
356          DO  k = nzb_s_inner(j,i)+1, nzt
357             threadsum = threadsum + ABS( d(k,j,i) )
358          ENDDO
359
360       ENDDO
361    ENDDO
362
[707]363    localsum = localsum + threadsum * dt_3d * &
364                          weight_pres(intermediate_timestep_count)
[693]365
[1]366    !$OMP END PARALLEL
367#else
368
[1111]369    !$OMP PARALLEL PRIVATE (i,j,k)
370    !$OMP DO SCHEDULE( STATIC )
[1221]371    !$acc kernels present( d, ddzw, rflags_s_inner, u, v, w )
372    !$acc loop collapse( 3 )
[1111]373    DO  i = nxl, nxr
374       DO  j = nys, nyn
375          DO  k = 1, nzt
[1221]376             d(k,j,i) = ( ( u(k,j,i+1) - u(k,j,i) ) * ddx +             &
377                        ( v(k,j+1,i) - v(k,j,i) ) * ddy +               &
378                        ( w(k,j,i) - w(k-1,j,i) ) * ddzw(k) ) * ddt_3d  &
379                        * d_weight_pres * rflags_s_inner(k,j,i)
[1]380          ENDDO
381       ENDDO
[1111]382    ENDDO
383    !$acc end kernels
384    !$OMP END PARALLEL
[1]385
386!
387!-- Compute possible PE-sum of divergences for flow_statistics
388    !$OMP PARALLEL PRIVATE (i,j,k) FIRSTPRIVATE(threadsum) REDUCTION(+:localsum)
389    !$OMP DO SCHEDULE( STATIC )
[1221]390    !$acc parallel loop collapse(3) present( d ) reduction(+:threadsum)
[1]391    DO  i = nxl, nxr
392       DO  j = nys, nyn
393          DO  k = nzb+1, nzt
394             threadsum = threadsum + ABS( d(k,j,i) )
395          ENDDO
396       ENDDO
397    ENDDO
[1257]398    !$acc end parallel loop
[707]399    localsum = localsum + threadsum * dt_3d * &
400                          weight_pres(intermediate_timestep_count)
[1]401    !$OMP END PARALLEL
402#endif
403
404!
405!-- For completeness, set the divergence sum of all statistic regions to those
406!-- of the total domain
407    sums_divold_l(0:statistic_regions) = localsum
408
409    CALL cpu_log( log_point_s(1), 'divergence', 'stop' )
410
411!
412!-- Compute the pressure perturbation solving the Poisson equation
413    IF ( psolver(1:7) == 'poisfft' )  THEN
414
415!
416!--    Solve Poisson equation via FFT and solution of tridiagonal matrices
417       IF ( psolver == 'poisfft' )  THEN
[1212]418
[1306]419          CALL poisfft( d )
[1113]420
[1]421       ENDIF
422
423!
424!--    Store computed perturbation pressure and set boundary condition in
425!--    z-direction
426       !$OMP PARALLEL DO
[1113]427       !$acc kernels present( d, tend )
[1]428       DO  i = nxl, nxr
429          DO  j = nys, nyn
430             DO  k = nzb+1, nzt
431                tend(k,j,i) = d(k,j,i)
432             ENDDO
433          ENDDO
434       ENDDO
[1113]435       !$acc end kernels
[1]436
437!
438!--    Bottom boundary:
439!--    This condition is only required for internal output. The pressure
440!--    gradient (dp(nzb+1)-dp(nzb))/dz is not used anywhere else.
441       IF ( ibc_p_b == 1 )  THEN
442!
443!--       Neumann (dp/dz = 0)
444          !$OMP PARALLEL DO
[1113]445          !$acc kernels present( nzb_s_inner, tend )
[667]446          DO  i = nxlg, nxrg
447             DO  j = nysg, nyng
[1]448                tend(nzb_s_inner(j,i),j,i) = tend(nzb_s_inner(j,i)+1,j,i)
449             ENDDO
450          ENDDO
[1113]451          !$acc end kernels
[1]452
453       ELSE
454!
455!--       Dirichlet
456          !$OMP PARALLEL DO
[1113]457          !$acc kernels present( tend )
[667]458          DO  i = nxlg, nxrg
459             DO  j = nysg, nyng
[1]460                tend(nzb_s_inner(j,i),j,i) = 0.0
461             ENDDO
462          ENDDO
[1113]463          !$acc end kernels
[1]464
465       ENDIF
466
467!
468!--    Top boundary
469       IF ( ibc_p_t == 1 )  THEN
470!
471!--       Neumann
472          !$OMP PARALLEL DO
[1113]473          !$acc kernels present( tend )
[667]474          DO  i = nxlg, nxrg
475             DO  j = nysg, nyng
[1]476                tend(nzt+1,j,i) = tend(nzt,j,i)
477             ENDDO
478          ENDDO
[1113]479          !$acc end kernels
[1]480
481       ELSE
482!
483!--       Dirichlet
484          !$OMP PARALLEL DO
[1113]485          !$acc kernels present( tend )
[667]486          DO  i = nxlg, nxrg
487             DO  j = nysg, nyng
[1]488                tend(nzt+1,j,i) = 0.0
489             ENDDO
490          ENDDO
[1113]491          !$acc end kernels
[1]492
493       ENDIF
494
495!
496!--    Exchange boundaries for p
[1113]497       IF ( numprocs == 1 )  THEN    ! workaround for single-core GPU runs
498          on_device = .TRUE.         ! to be removed after complete porting
499       ELSE                          ! of ghost point exchange
500          !$acc update host( tend )
501       ENDIF
[667]502       CALL exchange_horiz( tend, nbgp )
[1113]503       IF ( numprocs == 1 )  THEN    ! workaround for single-core GPU runs
504          on_device = .FALSE.        ! to be removed after complete porting
505       ELSE                          ! of ghost point exchange
506          !$acc update device( tend )
507       ENDIF
[1]508     
509    ELSEIF ( psolver == 'sor' )  THEN
510
511!
512!--    Solve Poisson equation for perturbation pressure using SOR-Red/Black
513!--    scheme
[707]514       CALL sor( d, ddzu_pres, ddzw, p_loc )
515       tend = p_loc
[1]516
517    ELSEIF ( psolver == 'multigrid' )  THEN
518
519!
520!--    Solve Poisson equation for perturbation pressure using Multigrid scheme,
[667]521!--    array tend is used to store the residuals, logical exchange_mg is used
522!--    to discern data exchange in multigrid ( 1 ghostpoint ) and normal grid
523!--    ( nbgp ghost points ).
[778]524
525!--    If the number of grid points of the gathered grid, which is collected
526!--    on PE0, is larger than the number of grid points of an PE, than array
527!--    tend will be enlarged.
528       IF ( gathered_size > subdomain_size )  THEN
529          DEALLOCATE( tend )
530          ALLOCATE( tend(nzb:nzt_mg(mg_switch_to_pe0_level)+1,nys_mg(          &
531                    mg_switch_to_pe0_level)-1:nyn_mg(mg_switch_to_pe0_level)+1,&
532                    nxl_mg(mg_switch_to_pe0_level)-1:nxr_mg(                   &
533                    mg_switch_to_pe0_level)+1) )
534       ENDIF
535
[1]536       CALL poismg( tend )
[707]537
[778]538       IF ( gathered_size > subdomain_size )  THEN
539          DEALLOCATE( tend )
540          ALLOCATE( tend(nzb:nzt+1,nysg:nyng,nxlg:nxrg) )
541       ENDIF
542
[1]543!
544!--    Restore perturbation pressure on tend because this array is used
545!--    further below to correct the velocity fields
[707]546       DO  i = nxl-1, nxr+1
547          DO  j = nys-1, nyn+1
548             DO  k = nzb, nzt+1
549                tend(k,j,i) = p_loc(k,j,i)
550             ENDDO
551          ENDDO
552       ENDDO
[667]553
[1]554    ENDIF
555
556!
[707]557!-- Store perturbation pressure on array p, used for pressure data output.
558!-- Ghost layers are added in the output routines (except sor-method: see below)
559    IF ( intermediate_timestep_count == 1 )  THEN
560       !$OMP PARALLEL PRIVATE (i,j,k)
561       !$OMP DO
[1113]562       !$acc kernels present( p, tend, weight_substep )
[1257]563       !$acc loop independent
[707]564       DO  i = nxl-1, nxr+1
[1257]565          !$acc loop independent
[707]566          DO  j = nys-1, nyn+1
[1257]567             !$acc loop independent
[707]568             DO  k = nzb, nzt+1
569                p(k,j,i) = tend(k,j,i) * &
570                           weight_substep(intermediate_timestep_count)
[673]571             ENDDO
[1]572          ENDDO
[707]573       ENDDO
[1113]574       !$acc end kernels
[707]575       !$OMP END PARALLEL
576
577    ELSE 
578       !$OMP PARALLEL PRIVATE (i,j,k)
579       !$OMP DO
[1113]580       !$acc kernels present( p, tend, weight_substep )
[1257]581       !$acc loop independent
[707]582       DO  i = nxl-1, nxr+1
[1257]583          !$acc loop independent
[707]584          DO  j = nys-1, nyn+1
[1257]585             !$acc loop independent
[707]586             DO  k = nzb, nzt+1
587                p(k,j,i) = p(k,j,i) + tend(k,j,i) * &
588                           weight_substep(intermediate_timestep_count)
[673]589             ENDDO
590          ENDDO
[707]591       ENDDO
[1113]592       !$acc end kernels
[707]593       !$OMP END PARALLEL
594
595    ENDIF
[673]596       
[707]597!
598!-- SOR-method needs ghost layers for the next timestep
599    IF ( psolver == 'sor' )  CALL exchange_horiz( p, nbgp )
[682]600
[1]601!
602!-- Correction of the provisional velocities with the current perturbation
603!-- pressure just computed
[709]604    IF ( conserve_volume_flow  .AND.  ( bc_lr_cyc .OR. bc_ns_cyc ) )  THEN
[1]605       volume_flow_l(1) = 0.0
606       volume_flow_l(2) = 0.0
607    ENDIF
[707]608
[1]609    !$OMP PARALLEL PRIVATE (i,j,k)
610    !$OMP DO
[1113]611    !$acc kernels present( ddzu, nzb_u_inner, nzb_v_inner, nzb_w_inner, tend, u, v, w, weight_pres )
[1257]612    !$acc loop independent
[673]613    DO  i = nxl, nxr   
[1257]614       !$acc loop independent
[1]615       DO  j = nys, nyn
[1257]616          !$acc loop independent
[1113]617          DO  k = 1, nzt
618             IF ( k > nzb_w_inner(j,i) )  THEN
619                w(k,j,i) = w(k,j,i) - dt_3d *                                 &
620                           ( tend(k+1,j,i) - tend(k,j,i) ) * ddzu(k+1) *      &
621                           weight_pres(intermediate_timestep_count)
622             ENDIF
[1]623          ENDDO
[1257]624          !$acc loop independent
[1113]625          DO  k = 1, nzt
626             IF ( k > nzb_u_inner(j,i) )  THEN
627                u(k,j,i) = u(k,j,i) - dt_3d *                                 &
628                           ( tend(k,j,i) - tend(k,j,i-1) ) * ddx *            &
629                           weight_pres(intermediate_timestep_count)
630             ENDIF
[1]631          ENDDO
[1257]632          !$acc loop independent
[1113]633          DO  k = 1, nzt
634             IF ( k > nzb_v_inner(j,i) )  THEN
635                v(k,j,i) = v(k,j,i) - dt_3d *                                 &
636                           ( tend(k,j,i) - tend(k,j-1,i) ) * ddy *            &
637                           weight_pres(intermediate_timestep_count)
638             ENDIF
[673]639          ENDDO                                                         
[1]640
641       ENDDO
642    ENDDO
[1113]643    !$acc end kernels
[1]644    !$OMP END PARALLEL
[1113]645
646!
647!-- Sum up the volume flow through the right and north boundary
648    IF ( conserve_volume_flow  .AND.  bc_lr_cyc  .AND.  bc_ns_cyc  .AND.  &
649         nxr == nx )  THEN
650
651       !$OMP PARALLEL PRIVATE (j,k)
652       !$OMP DO
653       DO  j = nys, nyn
654          !$OMP CRITICAL
655          DO  k = nzb_2d(j,nx) + 1, nzt
656             volume_flow_l(1) = volume_flow_l(1) + u(k,j,nx) * dzw(k)
657          ENDDO
658          !$OMP END CRITICAL
659       ENDDO
660       !$OMP END PARALLEL
661
662    ENDIF
663
664    IF ( conserve_volume_flow  .AND.  bc_ns_cyc  .AND.  bc_lr_cyc  .AND.  &
665         nyn == ny )  THEN
666
667       !$OMP PARALLEL PRIVATE (i,k)
668       !$OMP DO
669       DO  i = nxl, nxr
670          !$OMP CRITICAL
671          DO  k = nzb_2d(ny,i) + 1, nzt
672             volume_flow_l(2) = volume_flow_l(2) + v(k,ny,i) * dzw(k)
673           ENDDO
674          !$OMP END CRITICAL
675       ENDDO
676       !$OMP END PARALLEL
677
678    ENDIF
[673]679   
[1]680!
681!-- Conserve the volume flow
[707]682    IF ( conserve_volume_flow  .AND.  ( bc_lr_cyc  .AND.  bc_ns_cyc ) )  THEN
[1]683
684#if defined( __parallel )   
[622]685       IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
[1]686       CALL MPI_ALLREDUCE( volume_flow_l(1), volume_flow(1), 2, MPI_REAL, &
687                           MPI_SUM, comm2d, ierr ) 
688#else
689       volume_flow = volume_flow_l 
690#endif   
691
692       volume_flow_offset = ( volume_flow_initial - volume_flow ) / &
693                            volume_flow_area
694
695       !$OMP PARALLEL PRIVATE (i,j,k)
696       !$OMP DO
697       DO  i = nxl, nxr
698          DO  j = nys, nyn
[667]699             DO  k = nzb_u_inner(j,i) + 1, nzt
700                u(k,j,i) = u(k,j,i) + volume_flow_offset(1)
[719]701             ENDDO
702             DO k = nzb_v_inner(j,i) + 1, nzt
[667]703                v(k,j,i) = v(k,j,i) + volume_flow_offset(2)
704             ENDDO
[1]705          ENDDO
706       ENDDO
[667]707
[1]708       !$OMP END PARALLEL
709
710    ENDIF
711
712!
713!-- Exchange of boundaries for the velocities
[1113]714    IF ( numprocs == 1 )  THEN    ! workaround for single-core GPU runs
715       on_device = .TRUE.         ! to be removed after complete porting
716    ELSE                          ! of ghost point exchange
717       !$acc update host( u, v, w )
718    ENDIF
[667]719    CALL exchange_horiz( u, nbgp )
720    CALL exchange_horiz( v, nbgp )
721    CALL exchange_horiz( w, nbgp )
[1113]722    IF ( numprocs == 1 )  THEN    ! workaround for single-core GPU runs
723       on_device = .FALSE.        ! to be removed after complete porting
724    ELSE                          ! of ghost point exchange
725       !$acc update device( u, v, w )
726    ENDIF
[1]727
728!
729!-- Compute the divergence of the corrected velocity field,
730!-- a possible PE-sum is computed in flow_statistics
731    CALL cpu_log( log_point_s(1), 'divergence', 'start' )
732    sums_divnew_l = 0.0
733
734!
735!-- d must be reset to zero because it can contain nonzero values below the
736!-- topography
737    IF ( topography /= 'flat' )  d = 0.0
738
739    localsum  = 0.0
740    threadsum = 0.0
741
742    !$OMP PARALLEL PRIVATE (i,j,k) FIRSTPRIVATE(threadsum) REDUCTION(+:localsum)
743    !$OMP DO SCHEDULE( STATIC )
744#if defined( __ibm )
745    DO  i = nxl, nxr
746       DO  j = nys, nyn
747          DO  k = nzb_s_inner(j,i)+1, nzt
748             d(k,j,i) = ( u(k,j,i+1) - u(k,j,i) ) * ddx + &
749                        ( v(k,j+1,i) - v(k,j,i) ) * ddy + &
750                        ( w(k,j,i) - w(k-1,j,i) ) * ddzw(k)
751          ENDDO
752          DO  k = nzb+1, nzt
753             threadsum = threadsum + ABS( d(k,j,i) )
754          ENDDO
755       ENDDO
756    ENDDO
757#else
[1221]758    !$acc kernels present( d, ddzw, rflags_s_inner, u, v, w )
759    !$acc loop collapse( 3 )
[1]760    DO  i = nxl, nxr
761       DO  j = nys, nyn
[1113]762          DO  k = 1, nzt
[1221]763                d(k,j,i) = ( ( u(k,j,i+1) - u(k,j,i) ) * ddx +   &
764                             ( v(k,j+1,i) - v(k,j,i) ) * ddy +   &
765                             ( w(k,j,i) - w(k-1,j,i) ) * ddzw(k) &
766                           ) * rflags_s_inner(k,j,i)
[1113]767          ENDDO
768       ENDDO
769    ENDDO
770    !$acc end kernels
771!
772!-- Compute possible PE-sum of divergences for flow_statistics
[1221]773    !$acc parallel loop collapse(3) present( d ) reduction(+:threadsum)
[1113]774    DO  i = nxl, nxr
775       DO  j = nys, nyn
[1221]776          DO  k = nzb+1, nzt
[1]777             threadsum = threadsum + ABS( d(k,j,i) )
778          ENDDO
779       ENDDO
780    ENDDO
[1257]781    !$acc end parallel loop
[1]782#endif
[667]783
[1]784    localsum = localsum + threadsum
785    !$OMP END PARALLEL
786
787!
788!-- For completeness, set the divergence sum of all statistic regions to those
789!-- of the total domain
790    sums_divnew_l(0:statistic_regions) = localsum
791
792    CALL cpu_log( log_point_s(1), 'divergence', 'stop' )
793
794    CALL cpu_log( log_point(8), 'pres', 'stop' )
795
796
797 END SUBROUTINE pres
Note: See TracBrowser for help on using the repository browser.