source: palm/trunk/SOURCE/pres.f90 @ 1118

Last change on this file since 1118 was 1118, checked in by suehring, 9 years ago

Last commit documented.

  • Property svn:keywords set to Id
File size: 24.1 KB
Line 
1 SUBROUTINE pres
2
3!--------------------------------------------------------------------------------!
4! This file is part of PALM.
5!
6! PALM is free software: you can redistribute it and/or modify it under the terms
7! of the GNU General Public License as published by the Free Software Foundation,
8! either version 3 of the License, or (at your option) any later version.
9!
10! PALM is distributed in the hope that it will be useful, but WITHOUT ANY
11! WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
12! A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
13!
14! You should have received a copy of the GNU General Public License along with
15! PALM. If not, see <http://www.gnu.org/licenses/>.
16!
17! Copyright 1997-2012  Leibniz University Hannover
18!--------------------------------------------------------------------------------!
19!
20! Current revisions:
21! -----------------
22!
23! Former revisions:
24! -----------------
25! $Id: pres.f90 1118 2013-03-27 11:17:44Z suehring $
26!
27! 1117 2013-03-27 11:15:36Z suehring
28! Bugfix in OpenMP parallelization.
29!
30! 1113 2013-03-10 02:48:14Z raasch
31! GPU-porting of several loops, some loops rearranged
32!
33! 1111 2013-03-08 23:54:10Z
34! openACC statements added,
35! ibc_p_b = 2 removed
36!
37! 1092 2013-02-02 11:24:22Z raasch
38! unused variables removed
39!
40! 1036 2012-10-22 13:43:42Z raasch
41! code put under GPL (PALM 3.9)
42!
43! 1003 2012-09-14 14:35:53Z raasch
44! adjustment of array tend for cases with unequal subdomain sizes removed
45!
46! 778 2011-11-07 14:18:25Z fricke
47! New allocation of tend when multigrid is used and the collected field on PE0
48! has more grid points than the subdomain of an PE.
49!
50! 719 2011-04-06 13:05:23Z gryschka
51! Bugfix in volume flow control for double cyclic boundary conditions
52!
53! 709 2011-03-30 09:31:40Z raasch
54! formatting adjustments
55!
56! 707 2011-03-29 11:39:40Z raasch
57! Calculation of weighted average of p is now handled in the same way
58! regardless of the number of ghost layers (advection scheme),
59! multigrid and sor method are using p_loc for iterative advancements of
60! pressure,
61! localsum calculation modified for proper OpenMP reduction,
62! bc_lr/ns replaced by bc_lr/ns_cyc
63!
64! 693 2011-03-08 09:..:..Z raasch
65! bugfix: weighting coefficient added to ibm branch
66!
67! 680 2011-02-04 23:16:06Z gryschka
68! bugfix: collective_wait
69!
70! 675 2011-01-19 10:56:55Z suehring
71! Removed bugfix while copying tend.
72!
73! 673 2011-01-18 16:19:48Z suehring
74! Weighting coefficients added for right computation of the pressure during
75! Runge-Kutta substeps.
76!
77! 667 2010-12-23 12:06:00Z suehring/gryschka
78! New allocation of tend when ws-scheme and multigrid is used. This is due to
79! reasons of perforance of the data_exchange. The same is done with p after
80! poismg is called.
81! nxl-1, nxr+1, nys-1, nyn+1 replaced by nxlg, nxrg, nysg, nyng when no
82! multigrid is used. Calls of exchange_horiz are modified.
83! bugfix: After pressure correction no volume flow correction in case of
84! non-cyclic boundary conditions
85! (has to be done only before pressure correction)
86! Call of SOR routine is referenced with ddzu_pres.
87!
88! 622 2010-12-10 08:08:13Z raasch
89! optional barriers included in order to speed up collective operations
90!
91! 151 2008-03-07 13:42:18Z raasch
92! Bugfix in volume flow control for non-cyclic boundary conditions
93!
94! 106 2007-08-16 14:30:26Z raasch
95! Volume flow conservation added for the remaining three outflow boundaries
96!
97! 85 2007-05-11 09:35:14Z raasch
98! Division through dt_3d replaced by multiplication of the inverse.
99! For performance optimisation, this is done in the loop calculating the
100! divergence instead of using a seperate loop.
101!
102! 75 2007-03-22 09:54:05Z raasch
103! Volume flow control for non-cyclic boundary conditions added (currently only
104! for the north boundary!!), 2nd+3rd argument removed from exchange horiz,
105! mean vertical velocity is removed in case of Neumann boundary conditions
106! both at the bottom and the top
107!
108! RCS Log replace by Id keyword, revision history cleaned up
109!
110! Revision 1.25  2006/04/26 13:26:12  raasch
111! OpenMP optimization (+localsum, threadsum)
112!
113! Revision 1.1  1997/07/24 11:24:44  raasch
114! Initial revision
115!
116!
117! Description:
118! ------------
119! Compute the divergence of the provisional velocity field. Solve the Poisson
120! equation for the perturbation pressure. Compute the final velocities using
121! this perturbation pressure. Compute the remaining divergence.
122!------------------------------------------------------------------------------!
123
124    USE arrays_3d
125    USE constants
126    USE control_parameters
127    USE cpulog
128    USE grid_variables
129    USE indices
130    USE interfaces
131    USE pegrid
132    USE poisfft_mod
133    USE poisfft_hybrid_mod
134    USE statistics
135
136    IMPLICIT NONE
137
138    INTEGER ::  i, j, k
139
140    REAL    ::  ddt_3d, localsum, threadsum, d_weight_pres
141
142    REAL, DIMENSION(1:2) ::  volume_flow_l, volume_flow_offset
143    REAL, DIMENSION(1:nzt) ::  w_l, w_l_l
144
145
146    CALL cpu_log( log_point(8), 'pres', 'start' )
147
148
149    ddt_3d = 1.0 / dt_3d
150    d_weight_pres = 1.0 / weight_pres(intermediate_timestep_count)
151
152!
153!-- Multigrid method expects array d to have one ghost layer.
154!--
155    IF ( psolver == 'multigrid' )  THEN
156     
157       DEALLOCATE( d )
158       ALLOCATE( d(nzb:nzt+1,nys-1:nyn+1,nxl-1:nxr+1) ) 
159
160!
161!--    Since p is later used to hold the weighted average of the substeps, it
162!--    cannot be used in the iterative solver. Therefore, its initial value is
163!--    stored on p_loc, which is then iteratively advanced in every substep.
164       IF ( intermediate_timestep_count == 1 )  THEN
165          DO  i = nxl-1, nxr+1
166             DO  j = nys-1, nyn+1
167                DO  k = nzb, nzt+1
168                   p_loc(k,j,i) = p(k,j,i)
169                ENDDO
170             ENDDO
171          ENDDO
172       ENDIF
173       
174    ELSEIF ( psolver == 'sor'  .AND.  intermediate_timestep_count == 1 )  THEN
175
176!
177!--    Since p is later used to hold the weighted average of the substeps, it
178!--    cannot be used in the iterative solver. Therefore, its initial value is
179!--    stored on p_loc, which is then iteratively advanced in every substep.
180       p_loc = p
181
182    ENDIF
183
184!
185!-- Conserve the volume flow at the outflow in case of non-cyclic lateral
186!-- boundary conditions
187!-- WARNING: so far, this conservation does not work at the left/south
188!--          boundary if the topography at the inflow differs from that at the
189!--          outflow! For this case, volume_flow_area needs adjustment!
190!
191!-- Left/right
192    IF ( conserve_volume_flow  .AND.  ( outflow_l .OR. outflow_r ) )  THEN
193
194       volume_flow(1)   = 0.0
195       volume_flow_l(1) = 0.0
196
197       IF ( outflow_l )  THEN
198          i = 0
199       ELSEIF ( outflow_r )  THEN
200          i = nx+1
201       ENDIF
202
203       DO  j = nys, nyn
204!
205!--       Sum up the volume flow through the south/north boundary
206          DO  k = nzb_2d(j,i)+1, nzt
207             volume_flow_l(1) = volume_flow_l(1) + u(k,j,i) * dzw(k)
208          ENDDO
209       ENDDO
210
211#if defined( __parallel )   
212       IF ( collective_wait )  CALL MPI_BARRIER( comm1dy, ierr )
213       CALL MPI_ALLREDUCE( volume_flow_l(1), volume_flow(1), 1, MPI_REAL, &
214                           MPI_SUM, comm1dy, ierr )   
215#else
216       volume_flow = volume_flow_l 
217#endif
218       volume_flow_offset(1) = ( volume_flow_initial(1) - volume_flow(1) ) &
219                               / volume_flow_area(1)
220
221       DO  j = nysg, nyng
222          DO  k = nzb_2d(j,i)+1, nzt
223             u(k,j,i) = u(k,j,i) + volume_flow_offset(1)
224          ENDDO
225       ENDDO
226
227    ENDIF
228
229!
230!-- South/north
231    IF ( conserve_volume_flow  .AND.  ( outflow_n .OR. outflow_s ) )  THEN
232
233       volume_flow(2)   = 0.0
234       volume_flow_l(2) = 0.0
235
236       IF ( outflow_s )  THEN
237          j = 0
238       ELSEIF ( outflow_n )  THEN
239          j = ny+1
240       ENDIF
241
242       DO  i = nxl, nxr
243!
244!--       Sum up the volume flow through the south/north boundary
245          DO  k = nzb_2d(j,i)+1, nzt
246             volume_flow_l(2) = volume_flow_l(2) + v(k,j,i) * dzw(k)
247          ENDDO
248       ENDDO
249
250#if defined( __parallel )   
251       IF ( collective_wait )  CALL MPI_BARRIER( comm1dx, ierr )
252       CALL MPI_ALLREDUCE( volume_flow_l(2), volume_flow(2), 1, MPI_REAL, &
253                           MPI_SUM, comm1dx, ierr )   
254#else
255       volume_flow = volume_flow_l 
256#endif
257       volume_flow_offset(2) = ( volume_flow_initial(2) - volume_flow(2) )    &
258                               / volume_flow_area(2)
259
260       DO  i = nxlg, nxrg
261          DO  k = nzb_v_inner(j,i)+1, nzt
262             v(k,j,i) = v(k,j,i) + volume_flow_offset(2)
263          ENDDO
264       ENDDO
265
266    ENDIF
267
268!
269!-- Remove mean vertical velocity
270    IF ( ibc_p_b == 1  .AND.  ibc_p_t == 1 )  THEN
271       IF ( simulated_time > 0.0 )  THEN ! otherwise nzb_w_inner not yet known
272          w_l = 0.0;  w_l_l = 0.0
273          DO  i = nxl, nxr
274             DO  j = nys, nyn
275                DO  k = nzb_w_inner(j,i)+1, nzt
276                   w_l_l(k) = w_l_l(k) + w(k,j,i)
277                ENDDO
278             ENDDO
279          ENDDO
280#if defined( __parallel )   
281          IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
282          CALL MPI_ALLREDUCE( w_l_l(1), w_l(1), nzt, MPI_REAL, MPI_SUM, &
283                              comm2d, ierr )
284#else
285          w_l = w_l_l 
286#endif
287          DO  k = 1, nzt
288             w_l(k) = w_l(k) / ngp_2dh_outer(k,0)
289          ENDDO
290          DO  i = nxlg, nxrg
291             DO  j = nysg, nyng
292                DO  k = nzb_w_inner(j,i)+1, nzt
293                   w(k,j,i) = w(k,j,i) - w_l(k)
294                ENDDO
295             ENDDO
296          ENDDO
297       ENDIF
298    ENDIF
299
300!
301!-- Compute the divergence of the provisional velocity field.
302    CALL cpu_log( log_point_s(1), 'divergence', 'start' )
303
304    IF ( psolver == 'multigrid' )  THEN
305       !$OMP PARALLEL DO SCHEDULE( STATIC )
306       DO  i = nxl-1, nxr+1
307          DO  j = nys-1, nyn+1
308             DO  k = nzb, nzt+1
309                d(k,j,i) = 0.0
310             ENDDO
311          ENDDO
312       ENDDO
313    ELSE
314       !$OMP PARALLEL DO SCHEDULE( STATIC )
315       !$acc kernels present( d )
316       !$acc loop
317       DO  i = nxl, nxr
318          DO  j = nys, nyn
319             !$acc loop vector(32)
320             DO  k = nzb+1, nzt
321                d(k,j,i) = 0.0
322             ENDDO
323          ENDDO
324       ENDDO
325       !$acc end kernels
326    ENDIF
327
328    localsum  = 0.0
329    threadsum = 0.0
330
331#if defined( __ibm )
332    !$OMP PARALLEL PRIVATE (i,j,k) FIRSTPRIVATE(threadsum) REDUCTION(+:localsum)
333    !$OMP DO SCHEDULE( STATIC )
334    DO  i = nxl, nxr
335       DO  j = nys, nyn
336          DO  k = nzb_s_inner(j,i)+1, nzt
337             d(k,j,i) = ( ( u(k,j,i+1) - u(k,j,i) ) * ddx + &
338                          ( v(k,j+1,i) - v(k,j,i) ) * ddy + &
339                          ( w(k,j,i) - w(k-1,j,i) ) * ddzw(k) ) * ddt_3d      &
340                                                                * d_weight_pres
341          ENDDO
342!
343!--       Compute possible PE-sum of divergences for flow_statistics
344          DO  k = nzb_s_inner(j,i)+1, nzt
345             threadsum = threadsum + ABS( d(k,j,i) )
346          ENDDO
347
348       ENDDO
349    ENDDO
350
351    localsum = localsum + threadsum * dt_3d * &
352                          weight_pres(intermediate_timestep_count)
353
354    !$OMP END PARALLEL
355#else
356
357    !$OMP PARALLEL PRIVATE (i,j,k)
358    !$OMP DO SCHEDULE( STATIC )
359    !$acc kernels present( d, ddzw, nzb_s_inner, u, v, w )
360    !$acc loop
361    DO  i = nxl, nxr
362       DO  j = nys, nyn
363          !$acc loop vector(32)
364          DO  k = 1, nzt
365             IF ( k > nzb_s_inner(j,i) )  THEN
366                d(k,j,i) = ( ( u(k,j,i+1) - u(k,j,i) ) * ddx + &
367                           ( v(k,j+1,i) - v(k,j,i) ) * ddy + &
368                           ( w(k,j,i) - w(k-1,j,i) ) * ddzw(k) ) * ddt_3d      &
369                           * d_weight_pres
370             ENDIF
371          ENDDO
372       ENDDO
373    ENDDO
374    !$acc end kernels
375    !$OMP END PARALLEL
376
377!
378!-- Compute possible PE-sum of divergences for flow_statistics
379    !$OMP PARALLEL PRIVATE (i,j,k) FIRSTPRIVATE(threadsum) REDUCTION(+:localsum)
380    !$OMP DO SCHEDULE( STATIC )
381    DO  i = nxl, nxr
382       DO  j = nys, nyn
383          DO  k = nzb+1, nzt
384             threadsum = threadsum + ABS( d(k,j,i) )
385          ENDDO
386       ENDDO
387    ENDDO
388    localsum = localsum + threadsum * dt_3d * &
389                          weight_pres(intermediate_timestep_count)
390    !$OMP END PARALLEL
391#endif
392
393!
394!-- For completeness, set the divergence sum of all statistic regions to those
395!-- of the total domain
396    sums_divold_l(0:statistic_regions) = localsum
397
398    CALL cpu_log( log_point_s(1), 'divergence', 'stop' )
399
400!
401!-- Compute the pressure perturbation solving the Poisson equation
402    IF ( psolver(1:7) == 'poisfft' )  THEN
403
404!
405!--    Solve Poisson equation via FFT and solution of tridiagonal matrices
406       IF ( psolver == 'poisfft' )  THEN
407!
408!--       Solver for 2d-decomposition
409          CALL poisfft( d, tend )
410
411       ELSEIF ( psolver == 'poisfft_hybrid' )  THEN 
412!
413!--       Solver for 1d-decomposition (using MPI and OpenMP).
414!--       The old hybrid-solver is still included here, as long as there
415!--       are some optimization problems in poisfft
416          CALL poisfft_hybrid( d )
417
418       ENDIF
419
420!
421!--    Store computed perturbation pressure and set boundary condition in
422!--    z-direction
423       !$OMP PARALLEL DO
424       !$acc kernels present( d, tend )
425       !$acc loop
426       DO  i = nxl, nxr
427          DO  j = nys, nyn
428             !$acc loop vector( 32 )
429             DO  k = nzb+1, nzt
430                tend(k,j,i) = d(k,j,i)
431             ENDDO
432          ENDDO
433       ENDDO
434       !$acc end kernels
435
436!
437!--    Bottom boundary:
438!--    This condition is only required for internal output. The pressure
439!--    gradient (dp(nzb+1)-dp(nzb))/dz is not used anywhere else.
440       IF ( ibc_p_b == 1 )  THEN
441!
442!--       Neumann (dp/dz = 0)
443          !$OMP PARALLEL DO
444          !$acc kernels present( nzb_s_inner, tend )
445          DO  i = nxlg, nxrg
446             DO  j = nysg, nyng
447                tend(nzb_s_inner(j,i),j,i) = tend(nzb_s_inner(j,i)+1,j,i)
448             ENDDO
449          ENDDO
450          !$acc end kernels
451
452       ELSE
453!
454!--       Dirichlet
455          !$OMP PARALLEL DO
456          !$acc kernels present( tend )
457          DO  i = nxlg, nxrg
458             DO  j = nysg, nyng
459                tend(nzb_s_inner(j,i),j,i) = 0.0
460             ENDDO
461          ENDDO
462          !$acc end kernels
463
464       ENDIF
465
466!
467!--    Top boundary
468       IF ( ibc_p_t == 1 )  THEN
469!
470!--       Neumann
471          !$OMP PARALLEL DO
472          !$acc kernels present( tend )
473          DO  i = nxlg, nxrg
474             DO  j = nysg, nyng
475                tend(nzt+1,j,i) = tend(nzt,j,i)
476             ENDDO
477          ENDDO
478          !$acc end kernels
479
480       ELSE
481!
482!--       Dirichlet
483          !$OMP PARALLEL DO
484          !$acc kernels present( tend )
485          DO  i = nxlg, nxrg
486             DO  j = nysg, nyng
487                tend(nzt+1,j,i) = 0.0
488             ENDDO
489          ENDDO
490          !$acc end kernels
491
492       ENDIF
493
494!
495!--    Exchange boundaries for p
496       IF ( numprocs == 1 )  THEN    ! workaround for single-core GPU runs
497          on_device = .TRUE.         ! to be removed after complete porting
498       ELSE                          ! of ghost point exchange
499          !$acc update host( tend )
500       ENDIF
501       CALL exchange_horiz( tend, nbgp )
502       IF ( numprocs == 1 )  THEN    ! workaround for single-core GPU runs
503          on_device = .FALSE.        ! to be removed after complete porting
504       ELSE                          ! of ghost point exchange
505          !$acc update device( tend )
506       ENDIF
507     
508    ELSEIF ( psolver == 'sor' )  THEN
509
510!
511!--    Solve Poisson equation for perturbation pressure using SOR-Red/Black
512!--    scheme
513       CALL sor( d, ddzu_pres, ddzw, p_loc )
514       tend = p_loc
515
516    ELSEIF ( psolver == 'multigrid' )  THEN
517
518!
519!--    Solve Poisson equation for perturbation pressure using Multigrid scheme,
520!--    array tend is used to store the residuals, logical exchange_mg is used
521!--    to discern data exchange in multigrid ( 1 ghostpoint ) and normal grid
522!--    ( nbgp ghost points ).
523
524!--    If the number of grid points of the gathered grid, which is collected
525!--    on PE0, is larger than the number of grid points of an PE, than array
526!--    tend will be enlarged.
527       IF ( gathered_size > subdomain_size )  THEN
528          DEALLOCATE( tend )
529          ALLOCATE( tend(nzb:nzt_mg(mg_switch_to_pe0_level)+1,nys_mg(          &
530                    mg_switch_to_pe0_level)-1:nyn_mg(mg_switch_to_pe0_level)+1,&
531                    nxl_mg(mg_switch_to_pe0_level)-1:nxr_mg(                   &
532                    mg_switch_to_pe0_level)+1) )
533       ENDIF
534
535       CALL poismg( tend )
536
537       IF ( gathered_size > subdomain_size )  THEN
538          DEALLOCATE( tend )
539          ALLOCATE( tend(nzb:nzt+1,nysg:nyng,nxlg:nxrg) )
540       ENDIF
541
542!
543!--    Restore perturbation pressure on tend because this array is used
544!--    further below to correct the velocity fields
545       DO  i = nxl-1, nxr+1
546          DO  j = nys-1, nyn+1
547             DO  k = nzb, nzt+1
548                tend(k,j,i) = p_loc(k,j,i)
549             ENDDO
550          ENDDO
551       ENDDO
552
553    ENDIF
554
555!
556!-- Store perturbation pressure on array p, used for pressure data output.
557!-- Ghost layers are added in the output routines (except sor-method: see below)
558    IF ( intermediate_timestep_count == 1 )  THEN
559       !$OMP PARALLEL PRIVATE (i,j,k)
560       !$OMP DO
561       !$acc kernels present( p, tend, weight_substep )
562       !$acc loop
563       DO  i = nxl-1, nxr+1
564          DO  j = nys-1, nyn+1
565             !$acc loop vector( 32 )
566             DO  k = nzb, nzt+1
567                p(k,j,i) = tend(k,j,i) * &
568                           weight_substep(intermediate_timestep_count)
569             ENDDO
570          ENDDO
571       ENDDO
572       !$acc end kernels
573       !$OMP END PARALLEL
574
575    ELSE 
576       !$OMP PARALLEL PRIVATE (i,j,k)
577       !$OMP DO
578       !$acc kernels present( p, tend, weight_substep )
579       !$acc loop
580       DO  i = nxl-1, nxr+1
581          DO  j = nys-1, nyn+1
582             !$acc loop vector( 32 )
583             DO  k = nzb, nzt+1
584                p(k,j,i) = p(k,j,i) + tend(k,j,i) * &
585                           weight_substep(intermediate_timestep_count)
586             ENDDO
587          ENDDO
588       ENDDO
589       !$acc end kernels
590       !$OMP END PARALLEL
591
592    ENDIF
593       
594!
595!-- SOR-method needs ghost layers for the next timestep
596    IF ( psolver == 'sor' )  CALL exchange_horiz( p, nbgp )
597
598!
599!-- Correction of the provisional velocities with the current perturbation
600!-- pressure just computed
601    !$acc update host( u, v, w )
602    IF ( conserve_volume_flow  .AND.  ( bc_lr_cyc .OR. bc_ns_cyc ) )  THEN
603       volume_flow_l(1) = 0.0
604       volume_flow_l(2) = 0.0
605    ENDIF
606
607    !$OMP PARALLEL PRIVATE (i,j,k)
608    !$OMP DO
609    !$acc kernels present( ddzu, nzb_u_inner, nzb_v_inner, nzb_w_inner, tend, u, v, w, weight_pres )
610    !$acc loop
611    DO  i = nxl, nxr   
612       DO  j = nys, nyn
613          !$acc loop vector( 32 )
614          DO  k = 1, nzt
615             IF ( k > nzb_w_inner(j,i) )  THEN
616                w(k,j,i) = w(k,j,i) - dt_3d *                                 &
617                           ( tend(k+1,j,i) - tend(k,j,i) ) * ddzu(k+1) *      &
618                           weight_pres(intermediate_timestep_count)
619             ENDIF
620          ENDDO
621          !$acc loop vector( 32 )
622          DO  k = 1, nzt
623             IF ( k > nzb_u_inner(j,i) )  THEN
624                u(k,j,i) = u(k,j,i) - dt_3d *                                 &
625                           ( tend(k,j,i) - tend(k,j,i-1) ) * ddx *            &
626                           weight_pres(intermediate_timestep_count)
627             ENDIF
628          ENDDO
629          !$acc loop vector( 32 )
630          DO  k = 1, nzt
631             IF ( k > nzb_v_inner(j,i) )  THEN
632                v(k,j,i) = v(k,j,i) - dt_3d *                                 &
633                           ( tend(k,j,i) - tend(k,j-1,i) ) * ddy *            &
634                           weight_pres(intermediate_timestep_count)
635             ENDIF
636          ENDDO                                                         
637
638       ENDDO
639    ENDDO
640    !$acc end kernels
641    !$OMP END PARALLEL
642
643!
644!-- Sum up the volume flow through the right and north boundary
645    IF ( conserve_volume_flow  .AND.  bc_lr_cyc  .AND.  bc_ns_cyc  .AND.  &
646         nxr == nx )  THEN
647
648       !$OMP PARALLEL PRIVATE (j,k)
649       !$OMP DO
650       DO  j = nys, nyn
651          !$OMP CRITICAL
652          DO  k = nzb_2d(j,nx) + 1, nzt
653             volume_flow_l(1) = volume_flow_l(1) + u(k,j,nx) * dzw(k)
654          ENDDO
655          !$OMP END CRITICAL
656       ENDDO
657       !$OMP END PARALLEL
658
659    ENDIF
660
661    IF ( conserve_volume_flow  .AND.  bc_ns_cyc  .AND.  bc_lr_cyc  .AND.  &
662         nyn == ny )  THEN
663
664       !$OMP PARALLEL PRIVATE (i,k)
665       !$OMP DO
666       DO  i = nxl, nxr
667          !$OMP CRITICAL
668          DO  k = nzb_2d(ny,i) + 1, nzt
669             volume_flow_l(2) = volume_flow_l(2) + v(k,ny,i) * dzw(k)
670           ENDDO
671          !$OMP END CRITICAL
672       ENDDO
673       !$OMP END PARALLEL
674
675    ENDIF
676   
677!
678!-- Conserve the volume flow
679    IF ( conserve_volume_flow  .AND.  ( bc_lr_cyc  .AND.  bc_ns_cyc ) )  THEN
680
681#if defined( __parallel )   
682       IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
683       CALL MPI_ALLREDUCE( volume_flow_l(1), volume_flow(1), 2, MPI_REAL, &
684                           MPI_SUM, comm2d, ierr ) 
685#else
686       volume_flow = volume_flow_l 
687#endif   
688
689       volume_flow_offset = ( volume_flow_initial - volume_flow ) / &
690                            volume_flow_area
691
692       !$OMP PARALLEL PRIVATE (i,j,k)
693       !$OMP DO
694       DO  i = nxl, nxr
695          DO  j = nys, nyn
696             DO  k = nzb_u_inner(j,i) + 1, nzt
697                u(k,j,i) = u(k,j,i) + volume_flow_offset(1)
698             ENDDO
699             DO k = nzb_v_inner(j,i) + 1, nzt
700                v(k,j,i) = v(k,j,i) + volume_flow_offset(2)
701             ENDDO
702          ENDDO
703       ENDDO
704
705       !$OMP END PARALLEL
706
707    ENDIF
708
709!
710!-- Exchange of boundaries for the velocities
711    IF ( numprocs == 1 )  THEN    ! workaround for single-core GPU runs
712       on_device = .TRUE.         ! to be removed after complete porting
713    ELSE                          ! of ghost point exchange
714       !$acc update host( u, v, w )
715    ENDIF
716    CALL exchange_horiz( u, nbgp )
717    CALL exchange_horiz( v, nbgp )
718    CALL exchange_horiz( w, nbgp )
719    IF ( numprocs == 1 )  THEN    ! workaround for single-core GPU runs
720       on_device = .FALSE.        ! to be removed after complete porting
721    ELSE                          ! of ghost point exchange
722       !$acc update device( u, v, w )
723    ENDIF
724
725!
726!-- Compute the divergence of the corrected velocity field,
727!-- a possible PE-sum is computed in flow_statistics
728    CALL cpu_log( log_point_s(1), 'divergence', 'start' )
729    sums_divnew_l = 0.0
730
731!
732!-- d must be reset to zero because it can contain nonzero values below the
733!-- topography
734    IF ( topography /= 'flat' )  d = 0.0
735
736    localsum  = 0.0
737    threadsum = 0.0
738
739    !$OMP PARALLEL PRIVATE (i,j,k) FIRSTPRIVATE(threadsum) REDUCTION(+:localsum)
740    !$OMP DO SCHEDULE( STATIC )
741#if defined( __ibm )
742    DO  i = nxl, nxr
743       DO  j = nys, nyn
744          DO  k = nzb_s_inner(j,i)+1, nzt
745             d(k,j,i) = ( u(k,j,i+1) - u(k,j,i) ) * ddx + &
746                        ( v(k,j+1,i) - v(k,j,i) ) * ddy + &
747                        ( w(k,j,i) - w(k-1,j,i) ) * ddzw(k)
748          ENDDO
749          DO  k = nzb+1, nzt
750             threadsum = threadsum + ABS( d(k,j,i) )
751          ENDDO
752       ENDDO
753    ENDDO
754#else
755    !$acc kernels present( d, ddzw, nzb_s_inner, u, v, w )
756    !$acc loop
757    DO  i = nxl, nxr
758       DO  j = nys, nyn
759          !$acc loop vector( 32 )
760          DO  k = 1, nzt
761             IF ( k > nzb_s_inner(j,i) )  THEN
762                d(k,j,i) = ( u(k,j,i+1) - u(k,j,i) ) * ddx + &
763                           ( v(k,j+1,i) - v(k,j,i) ) * ddy + &
764                           ( w(k,j,i) - w(k-1,j,i) ) * ddzw(k)
765             ENDIF
766          ENDDO
767       ENDDO
768    ENDDO
769    !$acc end kernels
770!
771!-- Compute possible PE-sum of divergences for flow_statistics
772    DO  i = nxl, nxr
773       DO  j = nys, nyn
774          DO  k = nzb_s_inner(j,i)+1, nzt
775             threadsum = threadsum + ABS( d(k,j,i) )
776          ENDDO
777       ENDDO
778    ENDDO
779#endif
780
781    localsum = localsum + threadsum
782    !$OMP END PARALLEL
783
784!
785!-- For completeness, set the divergence sum of all statistic regions to those
786!-- of the total domain
787    sums_divnew_l(0:statistic_regions) = localsum
788
789    CALL cpu_log( log_point_s(1), 'divergence', 'stop' )
790
791    CALL cpu_log( log_point(8), 'pres', 'stop' )
792
793
794 END SUBROUTINE pres
Note: See TracBrowser for help on using the repository browser.