source: palm/trunk/SOURCE/pres.f90 @ 1117

Last change on this file since 1117 was 1117, checked in by suehring, 11 years ago

Bugfix in OpenMP parallelization.

  • Property svn:keywords set to Id
File size: 24.0 KB
RevLine 
[1]1 SUBROUTINE pres
2
[1036]3!--------------------------------------------------------------------------------!
4! This file is part of PALM.
5!
6! PALM is free software: you can redistribute it and/or modify it under the terms
7! of the GNU General Public License as published by the Free Software Foundation,
8! either version 3 of the License, or (at your option) any later version.
9!
10! PALM is distributed in the hope that it will be useful, but WITHOUT ANY
11! WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
12! A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
13!
14! You should have received a copy of the GNU General Public License along with
15! PALM. If not, see <http://www.gnu.org/licenses/>.
16!
17! Copyright 1997-2012  Leibniz University Hannover
18!--------------------------------------------------------------------------------!
19!
[484]20! Current revisions:
[1]21! -----------------
[1117]22! Bugfix in OpenMP parallelization.
[708]23!
24! Former revisions:
25! -----------------
26! $Id: pres.f90 1117 2013-03-27 11:15:36Z suehring $
27!
[1114]28! 1113 2013-03-10 02:48:14Z raasch
29! GPU-porting of several loops, some loops rearranged
30!
[1112]31! 1111 2013-03-08 23:54:10Z
32! openACC statements added,
33! ibc_p_b = 2 removed
34!
[1093]35! 1092 2013-02-02 11:24:22Z raasch
36! unused variables removed
37!
[1037]38! 1036 2012-10-22 13:43:42Z raasch
39! code put under GPL (PALM 3.9)
40!
[1004]41! 1003 2012-09-14 14:35:53Z raasch
42! adjustment of array tend for cases with unequal subdomain sizes removed
43!
[779]44! 778 2011-11-07 14:18:25Z fricke
45! New allocation of tend when multigrid is used and the collected field on PE0
46! has more grid points than the subdomain of an PE.
47!
[720]48! 719 2011-04-06 13:05:23Z gryschka
49! Bugfix in volume flow control for double cyclic boundary conditions
50!
[710]51! 709 2011-03-30 09:31:40Z raasch
52! formatting adjustments
53!
[708]54! 707 2011-03-29 11:39:40Z raasch
[707]55! Calculation of weighted average of p is now handled in the same way
56! regardless of the number of ghost layers (advection scheme),
57! multigrid and sor method are using p_loc for iterative advancements of
58! pressure,
59! localsum calculation modified for proper OpenMP reduction,
60! bc_lr/ns replaced by bc_lr/ns_cyc
[674]61!
[694]62! 693 2011-03-08 09:..:..Z raasch
[695]63! bugfix: weighting coefficient added to ibm branch
[694]64!
65! 680 2011-02-04 23:16:06Z gryschka
[681]66! bugfix: collective_wait
[668]67!
[676]68! 675 2011-01-19 10:56:55Z suehring
69! Removed bugfix while copying tend.
70!
[674]71! 673 2011-01-18 16:19:48Z suehring
72! Weighting coefficients added for right computation of the pressure during
73! Runge-Kutta substeps.
74!
[668]75! 667 2010-12-23 12:06:00Z suehring/gryschka
[667]76! New allocation of tend when ws-scheme and multigrid is used. This is due to
77! reasons of perforance of the data_exchange. The same is done with p after
78! poismg is called.
79! nxl-1, nxr+1, nys-1, nyn+1 replaced by nxlg, nxrg, nysg, nyng when no
80! multigrid is used. Calls of exchange_horiz are modified.
81! bugfix: After pressure correction no volume flow correction in case of
82! non-cyclic boundary conditions
83! (has to be done only before pressure correction)
84! Call of SOR routine is referenced with ddzu_pres.
85!
[623]86! 622 2010-12-10 08:08:13Z raasch
87! optional barriers included in order to speed up collective operations
88!
[198]89! 151 2008-03-07 13:42:18Z raasch
90! Bugfix in volume flow control for non-cyclic boundary conditions
91!
[110]92! 106 2007-08-16 14:30:26Z raasch
93! Volume flow conservation added for the remaining three outflow boundaries
94!
[90]95! 85 2007-05-11 09:35:14Z raasch
96! Division through dt_3d replaced by multiplication of the inverse.
97! For performance optimisation, this is done in the loop calculating the
98! divergence instead of using a seperate loop.
99!
[77]100! 75 2007-03-22 09:54:05Z raasch
[75]101! Volume flow control for non-cyclic boundary conditions added (currently only
[76]102! for the north boundary!!), 2nd+3rd argument removed from exchange horiz,
103! mean vertical velocity is removed in case of Neumann boundary conditions
104! both at the bottom and the top
[1]105!
[3]106! RCS Log replace by Id keyword, revision history cleaned up
107!
[1]108! Revision 1.25  2006/04/26 13:26:12  raasch
109! OpenMP optimization (+localsum, threadsum)
110!
111! Revision 1.1  1997/07/24 11:24:44  raasch
112! Initial revision
113!
114!
115! Description:
116! ------------
117! Compute the divergence of the provisional velocity field. Solve the Poisson
118! equation for the perturbation pressure. Compute the final velocities using
119! this perturbation pressure. Compute the remaining divergence.
120!------------------------------------------------------------------------------!
121
122    USE arrays_3d
123    USE constants
124    USE control_parameters
125    USE cpulog
126    USE grid_variables
127    USE indices
128    USE interfaces
129    USE pegrid
130    USE poisfft_mod
131    USE poisfft_hybrid_mod
132    USE statistics
133
134    IMPLICIT NONE
135
[1092]136    INTEGER ::  i, j, k
[1]137
[673]138    REAL    ::  ddt_3d, localsum, threadsum, d_weight_pres
[1]139
140    REAL, DIMENSION(1:2) ::  volume_flow_l, volume_flow_offset
[76]141    REAL, DIMENSION(1:nzt) ::  w_l, w_l_l
[1]142
143
144    CALL cpu_log( log_point(8), 'pres', 'start' )
145
[85]146
147    ddt_3d = 1.0 / dt_3d
[709]148    d_weight_pres = 1.0 / weight_pres(intermediate_timestep_count)
[85]149
[1]150!
[707]151!-- Multigrid method expects array d to have one ghost layer.
152!--
[1]153    IF ( psolver == 'multigrid' )  THEN
[667]154     
[1]155       DEALLOCATE( d )
[667]156       ALLOCATE( d(nzb:nzt+1,nys-1:nyn+1,nxl-1:nxr+1) ) 
[707]157
158!
159!--    Since p is later used to hold the weighted average of the substeps, it
160!--    cannot be used in the iterative solver. Therefore, its initial value is
161!--    stored on p_loc, which is then iteratively advanced in every substep.
162       IF ( intermediate_timestep_count == 1 )  THEN
163          DO  i = nxl-1, nxr+1
164             DO  j = nys-1, nyn+1
165                DO  k = nzb, nzt+1
166                   p_loc(k,j,i) = p(k,j,i)
167                ENDDO
168             ENDDO
169          ENDDO
[667]170       ENDIF
171       
[707]172    ELSEIF ( psolver == 'sor'  .AND.  intermediate_timestep_count == 1 )  THEN
173
174!
175!--    Since p is later used to hold the weighted average of the substeps, it
176!--    cannot be used in the iterative solver. Therefore, its initial value is
177!--    stored on p_loc, which is then iteratively advanced in every substep.
178       p_loc = p
179
[1]180    ENDIF
181
182!
[75]183!-- Conserve the volume flow at the outflow in case of non-cyclic lateral
184!-- boundary conditions
[106]185!-- WARNING: so far, this conservation does not work at the left/south
186!--          boundary if the topography at the inflow differs from that at the
187!--          outflow! For this case, volume_flow_area needs adjustment!
188!
189!-- Left/right
[709]190    IF ( conserve_volume_flow  .AND.  ( outflow_l .OR. outflow_r ) )  THEN
[680]191
[106]192       volume_flow(1)   = 0.0
193       volume_flow_l(1) = 0.0
194
195       IF ( outflow_l )  THEN
196          i = 0
197       ELSEIF ( outflow_r )  THEN
198          i = nx+1
199       ENDIF
200
201       DO  j = nys, nyn
202!
203!--       Sum up the volume flow through the south/north boundary
[709]204          DO  k = nzb_2d(j,i)+1, nzt
[667]205             volume_flow_l(1) = volume_flow_l(1) + u(k,j,i) * dzw(k)
[106]206          ENDDO
207       ENDDO
208
209#if defined( __parallel )   
[680]210       IF ( collective_wait )  CALL MPI_BARRIER( comm1dy, ierr )
[106]211       CALL MPI_ALLREDUCE( volume_flow_l(1), volume_flow(1), 1, MPI_REAL, &
212                           MPI_SUM, comm1dy, ierr )   
213#else
214       volume_flow = volume_flow_l 
215#endif
[709]216       volume_flow_offset(1) = ( volume_flow_initial(1) - volume_flow(1) ) &
[106]217                               / volume_flow_area(1)
218
[667]219       DO  j = nysg, nyng
[709]220          DO  k = nzb_2d(j,i)+1, nzt
[106]221             u(k,j,i) = u(k,j,i) + volume_flow_offset(1)
222          ENDDO
223       ENDDO
224
225    ENDIF
226
227!
228!-- South/north
[709]229    IF ( conserve_volume_flow  .AND.  ( outflow_n .OR. outflow_s ) )  THEN
[106]230
[75]231       volume_flow(2)   = 0.0
232       volume_flow_l(2) = 0.0
233
[106]234       IF ( outflow_s )  THEN
235          j = 0
236       ELSEIF ( outflow_n )  THEN
[75]237          j = ny+1
[106]238       ENDIF
239
240       DO  i = nxl, nxr
[75]241!
[106]242!--       Sum up the volume flow through the south/north boundary
[709]243          DO  k = nzb_2d(j,i)+1, nzt
[667]244             volume_flow_l(2) = volume_flow_l(2) + v(k,j,i) * dzw(k)
[75]245          ENDDO
[106]246       ENDDO
247
[75]248#if defined( __parallel )   
[680]249       IF ( collective_wait )  CALL MPI_BARRIER( comm1dx, ierr )
[75]250       CALL MPI_ALLREDUCE( volume_flow_l(2), volume_flow(2), 1, MPI_REAL, &
251                           MPI_SUM, comm1dx, ierr )   
252#else
253       volume_flow = volume_flow_l 
254#endif
255       volume_flow_offset(2) = ( volume_flow_initial(2) - volume_flow(2) )    &
[106]256                               / volume_flow_area(2)
[75]257
[667]258       DO  i = nxlg, nxrg
[709]259          DO  k = nzb_v_inner(j,i)+1, nzt
[106]260             v(k,j,i) = v(k,j,i) + volume_flow_offset(2)
[75]261          ENDDO
[106]262       ENDDO
[75]263
264    ENDIF
265
[76]266!
267!-- Remove mean vertical velocity
268    IF ( ibc_p_b == 1  .AND.  ibc_p_t == 1 )  THEN
[709]269       IF ( simulated_time > 0.0 )  THEN ! otherwise nzb_w_inner not yet known
[76]270          w_l = 0.0;  w_l_l = 0.0
271          DO  i = nxl, nxr
272             DO  j = nys, nyn
273                DO  k = nzb_w_inner(j,i)+1, nzt
274                   w_l_l(k) = w_l_l(k) + w(k,j,i)
275                ENDDO
276             ENDDO
277          ENDDO
278#if defined( __parallel )   
[622]279          IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
[709]280          CALL MPI_ALLREDUCE( w_l_l(1), w_l(1), nzt, MPI_REAL, MPI_SUM, &
281                              comm2d, ierr )
[76]282#else
283          w_l = w_l_l 
284#endif
285          DO  k = 1, nzt
286             w_l(k) = w_l(k) / ngp_2dh_outer(k,0)
287          ENDDO
[667]288          DO  i = nxlg, nxrg
289             DO  j = nysg, nyng
[76]290                DO  k = nzb_w_inner(j,i)+1, nzt
291                   w(k,j,i) = w(k,j,i) - w_l(k)
292                ENDDO
293             ENDDO
294          ENDDO
295       ENDIF
296    ENDIF
[75]297
298!
[1]299!-- Compute the divergence of the provisional velocity field.
300    CALL cpu_log( log_point_s(1), 'divergence', 'start' )
301
302    IF ( psolver == 'multigrid' )  THEN
303       !$OMP PARALLEL DO SCHEDULE( STATIC )
304       DO  i = nxl-1, nxr+1
305          DO  j = nys-1, nyn+1
306             DO  k = nzb, nzt+1
307                d(k,j,i) = 0.0
308             ENDDO
309          ENDDO
310       ENDDO
311    ELSE
312       !$OMP PARALLEL DO SCHEDULE( STATIC )
[1111]313       !$acc kernels present( d )
314       !$acc loop
[1003]315       DO  i = nxl, nxr
316          DO  j = nys, nyn
[1111]317             !$acc loop vector(32)
[1003]318             DO  k = nzb+1, nzt
[1]319                d(k,j,i) = 0.0
320             ENDDO
321          ENDDO
322       ENDDO
[1111]323       !$acc end kernels
[1]324    ENDIF
325
326    localsum  = 0.0
327    threadsum = 0.0
328
329#if defined( __ibm )
330    !$OMP PARALLEL PRIVATE (i,j,k) FIRSTPRIVATE(threadsum) REDUCTION(+:localsum)
331    !$OMP DO SCHEDULE( STATIC )
332    DO  i = nxl, nxr
333       DO  j = nys, nyn
334          DO  k = nzb_s_inner(j,i)+1, nzt
[85]335             d(k,j,i) = ( ( u(k,j,i+1) - u(k,j,i) ) * ddx + &
336                          ( v(k,j+1,i) - v(k,j,i) ) * ddy + &
[673]337                          ( w(k,j,i) - w(k-1,j,i) ) * ddzw(k) ) * ddt_3d      &
338                                                                * d_weight_pres 
[1]339          ENDDO
340!
341!--       Compute possible PE-sum of divergences for flow_statistics
342          DO  k = nzb_s_inner(j,i)+1, nzt
343             threadsum = threadsum + ABS( d(k,j,i) )
344          ENDDO
345
346       ENDDO
347    ENDDO
348
[707]349    localsum = localsum + threadsum * dt_3d * &
350                          weight_pres(intermediate_timestep_count)
[693]351
[1]352    !$OMP END PARALLEL
353#else
354
[1111]355    !$OMP PARALLEL PRIVATE (i,j,k)
356    !$OMP DO SCHEDULE( STATIC )
357    !$acc kernels present( d, ddzw, nzb_s_inner, u, v, w )
358    !$acc loop
359    DO  i = nxl, nxr
360       DO  j = nys, nyn
361          !$acc loop vector(32)
362          DO  k = 1, nzt
363             IF ( k > nzb_s_inner(j,i) )  THEN
[85]364                d(k,j,i) = ( ( u(k,j,i+1) - u(k,j,i) ) * ddx + &
[1111]365                           ( v(k,j+1,i) - v(k,j,i) ) * ddy + &
366                           ( w(k,j,i) - w(k-1,j,i) ) * ddzw(k) ) * ddt_3d      &
367                           * d_weight_pres
368             ENDIF
[1]369          ENDDO
370       ENDDO
[1111]371    ENDDO
372    !$acc end kernels
373    !$OMP END PARALLEL
[1]374
375!
376!-- Compute possible PE-sum of divergences for flow_statistics
377    !$OMP PARALLEL PRIVATE (i,j,k) FIRSTPRIVATE(threadsum) REDUCTION(+:localsum)
378    !$OMP DO SCHEDULE( STATIC )
379    DO  i = nxl, nxr
380       DO  j = nys, nyn
381          DO  k = nzb+1, nzt
382             threadsum = threadsum + ABS( d(k,j,i) )
383          ENDDO
384       ENDDO
385    ENDDO
[707]386    localsum = localsum + threadsum * dt_3d * &
387                          weight_pres(intermediate_timestep_count)
[1]388    !$OMP END PARALLEL
389#endif
390
391!
392!-- For completeness, set the divergence sum of all statistic regions to those
393!-- of the total domain
394    sums_divold_l(0:statistic_regions) = localsum
395
396    CALL cpu_log( log_point_s(1), 'divergence', 'stop' )
397
398!
399!-- Compute the pressure perturbation solving the Poisson equation
400    IF ( psolver(1:7) == 'poisfft' )  THEN
401
402!
403!--    Solve Poisson equation via FFT and solution of tridiagonal matrices
404       IF ( psolver == 'poisfft' )  THEN
405!
406!--       Solver for 2d-decomposition
407          CALL poisfft( d, tend )
[1113]408
[1]409       ELSEIF ( psolver == 'poisfft_hybrid' )  THEN 
410!
411!--       Solver for 1d-decomposition (using MPI and OpenMP).
412!--       The old hybrid-solver is still included here, as long as there
413!--       are some optimization problems in poisfft
414          CALL poisfft_hybrid( d )
[1113]415
[1]416       ENDIF
417
418!
419!--    Store computed perturbation pressure and set boundary condition in
420!--    z-direction
421       !$OMP PARALLEL DO
[1113]422       !$acc kernels present( d, tend )
423       !$acc loop
[1]424       DO  i = nxl, nxr
425          DO  j = nys, nyn
[1113]426             !$acc loop vector( 32 )
[1]427             DO  k = nzb+1, nzt
428                tend(k,j,i) = d(k,j,i)
429             ENDDO
430          ENDDO
431       ENDDO
[1113]432       !$acc end kernels
[1]433
434!
435!--    Bottom boundary:
436!--    This condition is only required for internal output. The pressure
437!--    gradient (dp(nzb+1)-dp(nzb))/dz is not used anywhere else.
438       IF ( ibc_p_b == 1 )  THEN
439!
440!--       Neumann (dp/dz = 0)
441          !$OMP PARALLEL DO
[1113]442          !$acc kernels present( nzb_s_inner, tend )
[667]443          DO  i = nxlg, nxrg
444             DO  j = nysg, nyng
[1]445                tend(nzb_s_inner(j,i),j,i) = tend(nzb_s_inner(j,i)+1,j,i)
446             ENDDO
447          ENDDO
[1113]448          !$acc end kernels
[1]449
450       ELSE
451!
452!--       Dirichlet
453          !$OMP PARALLEL DO
[1113]454          !$acc kernels present( tend )
[667]455          DO  i = nxlg, nxrg
456             DO  j = nysg, nyng
[1]457                tend(nzb_s_inner(j,i),j,i) = 0.0
458             ENDDO
459          ENDDO
[1113]460          !$acc end kernels
[1]461
462       ENDIF
463
464!
465!--    Top boundary
466       IF ( ibc_p_t == 1 )  THEN
467!
468!--       Neumann
469          !$OMP PARALLEL DO
[1113]470          !$acc kernels present( tend )
[667]471          DO  i = nxlg, nxrg
472             DO  j = nysg, nyng
[1]473                tend(nzt+1,j,i) = tend(nzt,j,i)
474             ENDDO
475          ENDDO
[1113]476          !$acc end kernels
[1]477
478       ELSE
479!
480!--       Dirichlet
481          !$OMP PARALLEL DO
[1113]482          !$acc kernels present( tend )
[667]483          DO  i = nxlg, nxrg
484             DO  j = nysg, nyng
[1]485                tend(nzt+1,j,i) = 0.0
486             ENDDO
487          ENDDO
[1113]488          !$acc end kernels
[1]489
490       ENDIF
491
492!
493!--    Exchange boundaries for p
[1113]494       IF ( numprocs == 1 )  THEN    ! workaround for single-core GPU runs
495          on_device = .TRUE.         ! to be removed after complete porting
496       ELSE                          ! of ghost point exchange
497          !$acc update host( tend )
498       ENDIF
[667]499       CALL exchange_horiz( tend, nbgp )
[1113]500       IF ( numprocs == 1 )  THEN    ! workaround for single-core GPU runs
501          on_device = .FALSE.        ! to be removed after complete porting
502       ELSE                          ! of ghost point exchange
503          !$acc update device( tend )
504       ENDIF
[1]505     
506    ELSEIF ( psolver == 'sor' )  THEN
507
508!
509!--    Solve Poisson equation for perturbation pressure using SOR-Red/Black
510!--    scheme
[707]511       CALL sor( d, ddzu_pres, ddzw, p_loc )
512       tend = p_loc
[1]513
514    ELSEIF ( psolver == 'multigrid' )  THEN
515
516!
517!--    Solve Poisson equation for perturbation pressure using Multigrid scheme,
[667]518!--    array tend is used to store the residuals, logical exchange_mg is used
519!--    to discern data exchange in multigrid ( 1 ghostpoint ) and normal grid
520!--    ( nbgp ghost points ).
[778]521
522!--    If the number of grid points of the gathered grid, which is collected
523!--    on PE0, is larger than the number of grid points of an PE, than array
524!--    tend will be enlarged.
525       IF ( gathered_size > subdomain_size )  THEN
526          DEALLOCATE( tend )
527          ALLOCATE( tend(nzb:nzt_mg(mg_switch_to_pe0_level)+1,nys_mg(          &
528                    mg_switch_to_pe0_level)-1:nyn_mg(mg_switch_to_pe0_level)+1,&
529                    nxl_mg(mg_switch_to_pe0_level)-1:nxr_mg(                   &
530                    mg_switch_to_pe0_level)+1) )
531       ENDIF
532
[1]533       CALL poismg( tend )
[707]534
[778]535       IF ( gathered_size > subdomain_size )  THEN
536          DEALLOCATE( tend )
537          ALLOCATE( tend(nzb:nzt+1,nysg:nyng,nxlg:nxrg) )
538       ENDIF
539
[1]540!
541!--    Restore perturbation pressure on tend because this array is used
542!--    further below to correct the velocity fields
[707]543       DO  i = nxl-1, nxr+1
544          DO  j = nys-1, nyn+1
545             DO  k = nzb, nzt+1
546                tend(k,j,i) = p_loc(k,j,i)
547             ENDDO
548          ENDDO
549       ENDDO
[667]550
[1]551    ENDIF
552
553!
[707]554!-- Store perturbation pressure on array p, used for pressure data output.
555!-- Ghost layers are added in the output routines (except sor-method: see below)
556    IF ( intermediate_timestep_count == 1 )  THEN
557       !$OMP PARALLEL PRIVATE (i,j,k)
558       !$OMP DO
[1113]559       !$acc kernels present( p, tend, weight_substep )
560       !$acc loop
[707]561       DO  i = nxl-1, nxr+1
562          DO  j = nys-1, nyn+1
[1113]563             !$acc loop vector( 32 )
[707]564             DO  k = nzb, nzt+1
565                p(k,j,i) = tend(k,j,i) * &
566                           weight_substep(intermediate_timestep_count)
[673]567             ENDDO
[1]568          ENDDO
[707]569       ENDDO
[1113]570       !$acc end kernels
[707]571       !$OMP END PARALLEL
572
573    ELSE 
574       !$OMP PARALLEL PRIVATE (i,j,k)
575       !$OMP DO
[1113]576       !$acc kernels present( p, tend, weight_substep )
577       !$acc loop
[707]578       DO  i = nxl-1, nxr+1
579          DO  j = nys-1, nyn+1
[1113]580             !$acc loop vector( 32 )
[707]581             DO  k = nzb, nzt+1
582                p(k,j,i) = p(k,j,i) + tend(k,j,i) * &
583                           weight_substep(intermediate_timestep_count)
[673]584             ENDDO
585          ENDDO
[707]586       ENDDO
[1113]587       !$acc end kernels
[707]588       !$OMP END PARALLEL
589
590    ENDIF
[673]591       
[707]592!
593!-- SOR-method needs ghost layers for the next timestep
594    IF ( psolver == 'sor' )  CALL exchange_horiz( p, nbgp )
[682]595
[1]596!
597!-- Correction of the provisional velocities with the current perturbation
598!-- pressure just computed
[1111]599    !$acc update host( u, v, w )
[709]600    IF ( conserve_volume_flow  .AND.  ( bc_lr_cyc .OR. bc_ns_cyc ) )  THEN
[1]601       volume_flow_l(1) = 0.0
602       volume_flow_l(2) = 0.0
603    ENDIF
[707]604
[1]605    !$OMP PARALLEL PRIVATE (i,j,k)
606    !$OMP DO
[1113]607    !$acc kernels present( ddzu, nzb_u_inner, nzb_v_inner, nzb_w_inner, tend, u, v, w, weight_pres )
608    !$acc loop
[673]609    DO  i = nxl, nxr   
[1]610       DO  j = nys, nyn
[1113]611          !$acc loop vector( 32 )
612          DO  k = 1, nzt
613             IF ( k > nzb_w_inner(j,i) )  THEN
614                w(k,j,i) = w(k,j,i) - dt_3d *                                 &
615                           ( tend(k+1,j,i) - tend(k,j,i) ) * ddzu(k+1) *      &
616                           weight_pres(intermediate_timestep_count)
617             ENDIF
[1]618          ENDDO
[1113]619          !$acc loop vector( 32 )
620          DO  k = 1, nzt
621             IF ( k > nzb_u_inner(j,i) )  THEN
622                u(k,j,i) = u(k,j,i) - dt_3d *                                 &
623                           ( tend(k,j,i) - tend(k,j,i-1) ) * ddx *            &
624                           weight_pres(intermediate_timestep_count)
625             ENDIF
[1]626          ENDDO
[1113]627          !$acc loop vector( 32 )
628          DO  k = 1, nzt
629             IF ( k > nzb_v_inner(j,i) )  THEN
630                v(k,j,i) = v(k,j,i) - dt_3d *                                 &
631                           ( tend(k,j,i) - tend(k,j-1,i) ) * ddy *            &
632                           weight_pres(intermediate_timestep_count)
633             ENDIF
[673]634          ENDDO                                                         
[1]635
636       ENDDO
637    ENDDO
[1113]638    !$acc end kernels
[1]639    !$OMP END PARALLEL
[1113]640
641!
642!-- Sum up the volume flow through the right and north boundary
643    IF ( conserve_volume_flow  .AND.  bc_lr_cyc  .AND.  bc_ns_cyc  .AND.  &
644         nxr == nx )  THEN
645
646       !$OMP PARALLEL PRIVATE (j,k)
647       !$OMP DO
648       DO  j = nys, nyn
649          !$OMP CRITICAL
650          DO  k = nzb_2d(j,nx) + 1, nzt
651             volume_flow_l(1) = volume_flow_l(1) + u(k,j,nx) * dzw(k)
652          ENDDO
653          !$OMP END CRITICAL
654       ENDDO
655       !$OMP END PARALLEL
656
657    ENDIF
658
659    IF ( conserve_volume_flow  .AND.  bc_ns_cyc  .AND.  bc_lr_cyc  .AND.  &
660         nyn == ny )  THEN
661
662       !$OMP PARALLEL PRIVATE (i,k)
663       !$OMP DO
664       DO  i = nxl, nxr
665          !$OMP CRITICAL
666          DO  k = nzb_2d(ny,i) + 1, nzt
667             volume_flow_l(2) = volume_flow_l(2) + v(k,ny,i) * dzw(k)
668           ENDDO
669          !$OMP END CRITICAL
670       ENDDO
671       !$OMP END PARALLEL
672
673    ENDIF
[673]674   
[1]675!
676!-- Conserve the volume flow
[707]677    IF ( conserve_volume_flow  .AND.  ( bc_lr_cyc  .AND.  bc_ns_cyc ) )  THEN
[1]678
679#if defined( __parallel )   
[622]680       IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
[1]681       CALL MPI_ALLREDUCE( volume_flow_l(1), volume_flow(1), 2, MPI_REAL, &
682                           MPI_SUM, comm2d, ierr ) 
683#else
684       volume_flow = volume_flow_l 
685#endif   
686
687       volume_flow_offset = ( volume_flow_initial - volume_flow ) / &
688                            volume_flow_area
689
690       !$OMP PARALLEL PRIVATE (i,j,k)
691       !$OMP DO
692       DO  i = nxl, nxr
693          DO  j = nys, nyn
[667]694             DO  k = nzb_u_inner(j,i) + 1, nzt
695                u(k,j,i) = u(k,j,i) + volume_flow_offset(1)
[719]696             ENDDO
697             DO k = nzb_v_inner(j,i) + 1, nzt
[667]698                v(k,j,i) = v(k,j,i) + volume_flow_offset(2)
699             ENDDO
[1]700          ENDDO
701       ENDDO
[667]702
[1]703       !$OMP END PARALLEL
704
705    ENDIF
706
707!
708!-- Exchange of boundaries for the velocities
[1113]709    IF ( numprocs == 1 )  THEN    ! workaround for single-core GPU runs
710       on_device = .TRUE.         ! to be removed after complete porting
711    ELSE                          ! of ghost point exchange
712       !$acc update host( u, v, w )
713    ENDIF
[667]714    CALL exchange_horiz( u, nbgp )
715    CALL exchange_horiz( v, nbgp )
716    CALL exchange_horiz( w, nbgp )
[1113]717    IF ( numprocs == 1 )  THEN    ! workaround for single-core GPU runs
718       on_device = .FALSE.        ! to be removed after complete porting
719    ELSE                          ! of ghost point exchange
720       !$acc update device( u, v, w )
721    ENDIF
[1]722
723!
724!-- Compute the divergence of the corrected velocity field,
725!-- a possible PE-sum is computed in flow_statistics
726    CALL cpu_log( log_point_s(1), 'divergence', 'start' )
727    sums_divnew_l = 0.0
728
729!
730!-- d must be reset to zero because it can contain nonzero values below the
731!-- topography
732    IF ( topography /= 'flat' )  d = 0.0
733
734    localsum  = 0.0
735    threadsum = 0.0
736
737    !$OMP PARALLEL PRIVATE (i,j,k) FIRSTPRIVATE(threadsum) REDUCTION(+:localsum)
738    !$OMP DO SCHEDULE( STATIC )
739#if defined( __ibm )
740    DO  i = nxl, nxr
741       DO  j = nys, nyn
742          DO  k = nzb_s_inner(j,i)+1, nzt
743             d(k,j,i) = ( u(k,j,i+1) - u(k,j,i) ) * ddx + &
744                        ( v(k,j+1,i) - v(k,j,i) ) * ddy + &
745                        ( w(k,j,i) - w(k-1,j,i) ) * ddzw(k)
746          ENDDO
747          DO  k = nzb+1, nzt
748             threadsum = threadsum + ABS( d(k,j,i) )
749          ENDDO
750       ENDDO
751    ENDDO
752#else
[1113]753    !$acc kernels present( d, ddzw, nzb_s_inner, u, v, w )
754    !$acc loop
[1]755    DO  i = nxl, nxr
756       DO  j = nys, nyn
[1113]757          !$acc loop vector( 32 )
758          DO  k = 1, nzt
759             IF ( k > nzb_s_inner(j,i) )  THEN
760                d(k,j,i) = ( u(k,j,i+1) - u(k,j,i) ) * ddx + &
761                           ( v(k,j+1,i) - v(k,j,i) ) * ddy + &
762                           ( w(k,j,i) - w(k-1,j,i) ) * ddzw(k)
763             ENDIF
764          ENDDO
765       ENDDO
766    ENDDO
767    !$acc end kernels
768!
769!-- Compute possible PE-sum of divergences for flow_statistics
770    DO  i = nxl, nxr
771       DO  j = nys, nyn
[1]772          DO  k = nzb_s_inner(j,i)+1, nzt
773             threadsum = threadsum + ABS( d(k,j,i) )
774          ENDDO
775       ENDDO
776    ENDDO
777#endif
[667]778
[1]779    localsum = localsum + threadsum
780    !$OMP END PARALLEL
781
782!
783!-- For completeness, set the divergence sum of all statistic regions to those
784!-- of the total domain
785    sums_divnew_l(0:statistic_regions) = localsum
786
787    CALL cpu_log( log_point_s(1), 'divergence', 'stop' )
788
789    CALL cpu_log( log_point(8), 'pres', 'stop' )
790
791
792 END SUBROUTINE pres
Note: See TracBrowser for help on using the repository browser.