source: palm/trunk/SOURCE/pres.f90 @ 1306

Last change on this file since 1306 was 1306, checked in by raasch, 8 years ago

openmp sections removed from fft/alltoall overlapping
second argument removed from routine poisfft
module for declaring kind definitions added

  • Property svn:keywords set to Id
File size: 24.3 KB
Line 
1 SUBROUTINE pres
2
3!--------------------------------------------------------------------------------!
4! This file is part of PALM.
5!
6! PALM is free software: you can redistribute it and/or modify it under the terms
7! of the GNU General Public License as published by the Free Software Foundation,
8! either version 3 of the License, or (at your option) any later version.
9!
10! PALM is distributed in the hope that it will be useful, but WITHOUT ANY
11! WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
12! A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
13!
14! You should have received a copy of the GNU General Public License along with
15! PALM. If not, see <http://www.gnu.org/licenses/>.
16!
17! Copyright 1997-2012  Leibniz University Hannover
18!--------------------------------------------------------------------------------!
19!
20! Current revisions:
21! ------------------
22! second argument removed from routine poisfft
23!
24! Former revisions:
25! -----------------
26! $Id: pres.f90 1306 2014-03-13 14:30:59Z raasch $
27!
28! 1257 2013-11-08 15:18:40Z raasch
29! openacc loop and loop vector clauses removed, independent clauses added,
30! end parallel replaced by end parallel loop
31!
32! 1221 2013-09-10 08:59:13Z raasch
33! openACC porting of reduction operations, loops for calculating d are
34! using the rflags_s_inner multiply flag instead of the nzb_s_inner loop index
35!
36! 1212 2013-08-15 08:46:27Z raasch
37! call of poisfft_hybrid removed
38!
39! 1117 2013-03-27 11:15:36Z suehring
40! Bugfix in OpenMP parallelization.
41!
42! 1113 2013-03-10 02:48:14Z raasch
43! GPU-porting of several loops, some loops rearranged
44!
45! 1111 2013-03-08 23:54:10Z
46! openACC statements added,
47! ibc_p_b = 2 removed
48!
49! 1092 2013-02-02 11:24:22Z raasch
50! unused variables removed
51!
52! 1036 2012-10-22 13:43:42Z raasch
53! code put under GPL (PALM 3.9)
54!
55! 1003 2012-09-14 14:35:53Z raasch
56! adjustment of array tend for cases with unequal subdomain sizes removed
57!
58! 778 2011-11-07 14:18:25Z fricke
59! New allocation of tend when multigrid is used and the collected field on PE0
60! has more grid points than the subdomain of an PE.
61!
62! 719 2011-04-06 13:05:23Z gryschka
63! Bugfix in volume flow control for double cyclic boundary conditions
64!
65! 709 2011-03-30 09:31:40Z raasch
66! formatting adjustments
67!
68! 707 2011-03-29 11:39:40Z raasch
69! Calculation of weighted average of p is now handled in the same way
70! regardless of the number of ghost layers (advection scheme),
71! multigrid and sor method are using p_loc for iterative advancements of
72! pressure,
73! localsum calculation modified for proper OpenMP reduction,
74! bc_lr/ns replaced by bc_lr/ns_cyc
75!
76! 693 2011-03-08 09:..:..Z raasch
77! bugfix: weighting coefficient added to ibm branch
78!
79! 680 2011-02-04 23:16:06Z gryschka
80! bugfix: collective_wait
81!
82! 675 2011-01-19 10:56:55Z suehring
83! Removed bugfix while copying tend.
84!
85! 673 2011-01-18 16:19:48Z suehring
86! Weighting coefficients added for right computation of the pressure during
87! Runge-Kutta substeps.
88!
89! 667 2010-12-23 12:06:00Z suehring/gryschka
90! New allocation of tend when ws-scheme and multigrid is used. This is due to
91! reasons of perforance of the data_exchange. The same is done with p after
92! poismg is called.
93! nxl-1, nxr+1, nys-1, nyn+1 replaced by nxlg, nxrg, nysg, nyng when no
94! multigrid is used. Calls of exchange_horiz are modified.
95! bugfix: After pressure correction no volume flow correction in case of
96! non-cyclic boundary conditions
97! (has to be done only before pressure correction)
98! Call of SOR routine is referenced with ddzu_pres.
99!
100! 622 2010-12-10 08:08:13Z raasch
101! optional barriers included in order to speed up collective operations
102!
103! 151 2008-03-07 13:42:18Z raasch
104! Bugfix in volume flow control for non-cyclic boundary conditions
105!
106! 106 2007-08-16 14:30:26Z raasch
107! Volume flow conservation added for the remaining three outflow boundaries
108!
109! 85 2007-05-11 09:35:14Z raasch
110! Division through dt_3d replaced by multiplication of the inverse.
111! For performance optimisation, this is done in the loop calculating the
112! divergence instead of using a seperate loop.
113!
114! 75 2007-03-22 09:54:05Z raasch
115! Volume flow control for non-cyclic boundary conditions added (currently only
116! for the north boundary!!), 2nd+3rd argument removed from exchange horiz,
117! mean vertical velocity is removed in case of Neumann boundary conditions
118! both at the bottom and the top
119!
120! RCS Log replace by Id keyword, revision history cleaned up
121!
122! Revision 1.25  2006/04/26 13:26:12  raasch
123! OpenMP optimization (+localsum, threadsum)
124!
125! Revision 1.1  1997/07/24 11:24:44  raasch
126! Initial revision
127!
128!
129! Description:
130! ------------
131! Compute the divergence of the provisional velocity field. Solve the Poisson
132! equation for the perturbation pressure. Compute the final velocities using
133! this perturbation pressure. Compute the remaining divergence.
134!------------------------------------------------------------------------------!
135
136    USE arrays_3d
137    USE constants
138    USE control_parameters
139    USE cpulog
140    USE grid_variables
141    USE indices
142    USE interfaces
143    USE pegrid
144    USE poisfft_mod
145    USE statistics
146
147    IMPLICIT NONE
148
149    INTEGER ::  i, j, k
150
151    REAL    ::  ddt_3d, localsum, threadsum, d_weight_pres
152
153    REAL, DIMENSION(1:2) ::  volume_flow_l, volume_flow_offset
154    REAL, DIMENSION(1:nzt) ::  w_l, w_l_l
155
156
157    CALL cpu_log( log_point(8), 'pres', 'start' )
158
159
160    ddt_3d = 1.0 / dt_3d
161    d_weight_pres = 1.0 / weight_pres(intermediate_timestep_count)
162
163!
164!-- Multigrid method expects array d to have one ghost layer.
165!--
166    IF ( psolver == 'multigrid' )  THEN
167     
168       DEALLOCATE( d )
169       ALLOCATE( d(nzb:nzt+1,nys-1:nyn+1,nxl-1:nxr+1) ) 
170
171!
172!--    Since p is later used to hold the weighted average of the substeps, it
173!--    cannot be used in the iterative solver. Therefore, its initial value is
174!--    stored on p_loc, which is then iteratively advanced in every substep.
175       IF ( intermediate_timestep_count == 1 )  THEN
176          DO  i = nxl-1, nxr+1
177             DO  j = nys-1, nyn+1
178                DO  k = nzb, nzt+1
179                   p_loc(k,j,i) = p(k,j,i)
180                ENDDO
181             ENDDO
182          ENDDO
183       ENDIF
184       
185    ELSEIF ( psolver == 'sor'  .AND.  intermediate_timestep_count == 1 )  THEN
186
187!
188!--    Since p is later used to hold the weighted average of the substeps, it
189!--    cannot be used in the iterative solver. Therefore, its initial value is
190!--    stored on p_loc, which is then iteratively advanced in every substep.
191       p_loc = p
192
193    ENDIF
194
195!
196!-- Conserve the volume flow at the outflow in case of non-cyclic lateral
197!-- boundary conditions
198!-- WARNING: so far, this conservation does not work at the left/south
199!--          boundary if the topography at the inflow differs from that at the
200!--          outflow! For this case, volume_flow_area needs adjustment!
201!
202!-- Left/right
203    IF ( conserve_volume_flow  .AND.  ( outflow_l .OR. outflow_r ) )  THEN
204
205       volume_flow(1)   = 0.0
206       volume_flow_l(1) = 0.0
207
208       IF ( outflow_l )  THEN
209          i = 0
210       ELSEIF ( outflow_r )  THEN
211          i = nx+1
212       ENDIF
213
214       DO  j = nys, nyn
215!
216!--       Sum up the volume flow through the south/north boundary
217          DO  k = nzb_2d(j,i)+1, nzt
218             volume_flow_l(1) = volume_flow_l(1) + u(k,j,i) * dzw(k)
219          ENDDO
220       ENDDO
221
222#if defined( __parallel )   
223       IF ( collective_wait )  CALL MPI_BARRIER( comm1dy, ierr )
224       CALL MPI_ALLREDUCE( volume_flow_l(1), volume_flow(1), 1, MPI_REAL, &
225                           MPI_SUM, comm1dy, ierr )   
226#else
227       volume_flow = volume_flow_l 
228#endif
229       volume_flow_offset(1) = ( volume_flow_initial(1) - volume_flow(1) ) &
230                               / volume_flow_area(1)
231
232       DO  j = nysg, nyng
233          DO  k = nzb_2d(j,i)+1, nzt
234             u(k,j,i) = u(k,j,i) + volume_flow_offset(1)
235          ENDDO
236       ENDDO
237
238    ENDIF
239
240!
241!-- South/north
242    IF ( conserve_volume_flow  .AND.  ( outflow_n .OR. outflow_s ) )  THEN
243
244       volume_flow(2)   = 0.0
245       volume_flow_l(2) = 0.0
246
247       IF ( outflow_s )  THEN
248          j = 0
249       ELSEIF ( outflow_n )  THEN
250          j = ny+1
251       ENDIF
252
253       DO  i = nxl, nxr
254!
255!--       Sum up the volume flow through the south/north boundary
256          DO  k = nzb_2d(j,i)+1, nzt
257             volume_flow_l(2) = volume_flow_l(2) + v(k,j,i) * dzw(k)
258          ENDDO
259       ENDDO
260
261#if defined( __parallel )   
262       IF ( collective_wait )  CALL MPI_BARRIER( comm1dx, ierr )
263       CALL MPI_ALLREDUCE( volume_flow_l(2), volume_flow(2), 1, MPI_REAL, &
264                           MPI_SUM, comm1dx, ierr )   
265#else
266       volume_flow = volume_flow_l 
267#endif
268       volume_flow_offset(2) = ( volume_flow_initial(2) - volume_flow(2) )    &
269                               / volume_flow_area(2)
270
271       DO  i = nxlg, nxrg
272          DO  k = nzb_v_inner(j,i)+1, nzt
273             v(k,j,i) = v(k,j,i) + volume_flow_offset(2)
274          ENDDO
275       ENDDO
276
277    ENDIF
278
279!
280!-- Remove mean vertical velocity
281    IF ( ibc_p_b == 1  .AND.  ibc_p_t == 1 )  THEN
282       IF ( simulated_time > 0.0 )  THEN ! otherwise nzb_w_inner not yet known
283          w_l = 0.0;  w_l_l = 0.0
284          DO  i = nxl, nxr
285             DO  j = nys, nyn
286                DO  k = nzb_w_inner(j,i)+1, nzt
287                   w_l_l(k) = w_l_l(k) + w(k,j,i)
288                ENDDO
289             ENDDO
290          ENDDO
291#if defined( __parallel )   
292          IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
293          CALL MPI_ALLREDUCE( w_l_l(1), w_l(1), nzt, MPI_REAL, MPI_SUM, &
294                              comm2d, ierr )
295#else
296          w_l = w_l_l 
297#endif
298          DO  k = 1, nzt
299             w_l(k) = w_l(k) / ngp_2dh_outer(k,0)
300          ENDDO
301          DO  i = nxlg, nxrg
302             DO  j = nysg, nyng
303                DO  k = nzb_w_inner(j,i)+1, nzt
304                   w(k,j,i) = w(k,j,i) - w_l(k)
305                ENDDO
306             ENDDO
307          ENDDO
308       ENDIF
309    ENDIF
310
311!
312!-- Compute the divergence of the provisional velocity field.
313    CALL cpu_log( log_point_s(1), 'divergence', 'start' )
314
315    IF ( psolver == 'multigrid' )  THEN
316       !$OMP PARALLEL DO SCHEDULE( STATIC )
317       DO  i = nxl-1, nxr+1
318          DO  j = nys-1, nyn+1
319             DO  k = nzb, nzt+1
320                d(k,j,i) = 0.0
321             ENDDO
322          ENDDO
323       ENDDO
324    ELSE
325       !$OMP PARALLEL DO SCHEDULE( STATIC )
326       !$acc kernels present( d )
327       DO  i = nxl, nxr
328          DO  j = nys, nyn
329             DO  k = nzb+1, nzt
330                d(k,j,i) = 0.0
331             ENDDO
332          ENDDO
333       ENDDO
334       !$acc end kernels
335    ENDIF
336
337    localsum  = 0.0
338    threadsum = 0.0
339
340#if defined( __ibm )
341    !$OMP PARALLEL PRIVATE (i,j,k) FIRSTPRIVATE(threadsum) REDUCTION(+:localsum)
342    !$OMP DO SCHEDULE( STATIC )
343    DO  i = nxl, nxr
344       DO  j = nys, nyn
345          DO  k = nzb_s_inner(j,i)+1, nzt
346             d(k,j,i) = ( ( u(k,j,i+1) - u(k,j,i) ) * ddx + &
347                          ( v(k,j+1,i) - v(k,j,i) ) * ddy + &
348                          ( w(k,j,i) - w(k-1,j,i) ) * ddzw(k) ) * ddt_3d      &
349                                                                * d_weight_pres
350          ENDDO
351!
352!--       Compute possible PE-sum of divergences for flow_statistics
353          DO  k = nzb_s_inner(j,i)+1, nzt
354             threadsum = threadsum + ABS( d(k,j,i) )
355          ENDDO
356
357       ENDDO
358    ENDDO
359
360    localsum = localsum + threadsum * dt_3d * &
361                          weight_pres(intermediate_timestep_count)
362
363    !$OMP END PARALLEL
364#else
365
366    !$OMP PARALLEL PRIVATE (i,j,k)
367    !$OMP DO SCHEDULE( STATIC )
368    !$acc kernels present( d, ddzw, rflags_s_inner, u, v, w )
369    !$acc loop collapse( 3 )
370    DO  i = nxl, nxr
371       DO  j = nys, nyn
372          DO  k = 1, nzt
373             d(k,j,i) = ( ( u(k,j,i+1) - u(k,j,i) ) * ddx +             &
374                        ( v(k,j+1,i) - v(k,j,i) ) * ddy +               &
375                        ( w(k,j,i) - w(k-1,j,i) ) * ddzw(k) ) * ddt_3d  &
376                        * d_weight_pres * rflags_s_inner(k,j,i)
377          ENDDO
378       ENDDO
379    ENDDO
380    !$acc end kernels
381    !$OMP END PARALLEL
382
383!
384!-- Compute possible PE-sum of divergences for flow_statistics
385    !$OMP PARALLEL PRIVATE (i,j,k) FIRSTPRIVATE(threadsum) REDUCTION(+:localsum)
386    !$OMP DO SCHEDULE( STATIC )
387    !$acc parallel loop collapse(3) present( d ) reduction(+:threadsum)
388    DO  i = nxl, nxr
389       DO  j = nys, nyn
390          DO  k = nzb+1, nzt
391             threadsum = threadsum + ABS( d(k,j,i) )
392          ENDDO
393       ENDDO
394    ENDDO
395    !$acc end parallel loop
396    localsum = localsum + threadsum * dt_3d * &
397                          weight_pres(intermediate_timestep_count)
398    !$OMP END PARALLEL
399#endif
400
401!
402!-- For completeness, set the divergence sum of all statistic regions to those
403!-- of the total domain
404    sums_divold_l(0:statistic_regions) = localsum
405
406    CALL cpu_log( log_point_s(1), 'divergence', 'stop' )
407
408!
409!-- Compute the pressure perturbation solving the Poisson equation
410    IF ( psolver(1:7) == 'poisfft' )  THEN
411
412!
413!--    Solve Poisson equation via FFT and solution of tridiagonal matrices
414       IF ( psolver == 'poisfft' )  THEN
415
416          CALL poisfft( d )
417
418       ENDIF
419
420!
421!--    Store computed perturbation pressure and set boundary condition in
422!--    z-direction
423       !$OMP PARALLEL DO
424       !$acc kernels present( d, tend )
425       DO  i = nxl, nxr
426          DO  j = nys, nyn
427             DO  k = nzb+1, nzt
428                tend(k,j,i) = d(k,j,i)
429             ENDDO
430          ENDDO
431       ENDDO
432       !$acc end kernels
433
434!
435!--    Bottom boundary:
436!--    This condition is only required for internal output. The pressure
437!--    gradient (dp(nzb+1)-dp(nzb))/dz is not used anywhere else.
438       IF ( ibc_p_b == 1 )  THEN
439!
440!--       Neumann (dp/dz = 0)
441          !$OMP PARALLEL DO
442          !$acc kernels present( nzb_s_inner, tend )
443          DO  i = nxlg, nxrg
444             DO  j = nysg, nyng
445                tend(nzb_s_inner(j,i),j,i) = tend(nzb_s_inner(j,i)+1,j,i)
446             ENDDO
447          ENDDO
448          !$acc end kernels
449
450       ELSE
451!
452!--       Dirichlet
453          !$OMP PARALLEL DO
454          !$acc kernels present( tend )
455          DO  i = nxlg, nxrg
456             DO  j = nysg, nyng
457                tend(nzb_s_inner(j,i),j,i) = 0.0
458             ENDDO
459          ENDDO
460          !$acc end kernels
461
462       ENDIF
463
464!
465!--    Top boundary
466       IF ( ibc_p_t == 1 )  THEN
467!
468!--       Neumann
469          !$OMP PARALLEL DO
470          !$acc kernels present( tend )
471          DO  i = nxlg, nxrg
472             DO  j = nysg, nyng
473                tend(nzt+1,j,i) = tend(nzt,j,i)
474             ENDDO
475          ENDDO
476          !$acc end kernels
477
478       ELSE
479!
480!--       Dirichlet
481          !$OMP PARALLEL DO
482          !$acc kernels present( tend )
483          DO  i = nxlg, nxrg
484             DO  j = nysg, nyng
485                tend(nzt+1,j,i) = 0.0
486             ENDDO
487          ENDDO
488          !$acc end kernels
489
490       ENDIF
491
492!
493!--    Exchange boundaries for p
494       IF ( numprocs == 1 )  THEN    ! workaround for single-core GPU runs
495          on_device = .TRUE.         ! to be removed after complete porting
496       ELSE                          ! of ghost point exchange
497          !$acc update host( tend )
498       ENDIF
499       CALL exchange_horiz( tend, nbgp )
500       IF ( numprocs == 1 )  THEN    ! workaround for single-core GPU runs
501          on_device = .FALSE.        ! to be removed after complete porting
502       ELSE                          ! of ghost point exchange
503          !$acc update device( tend )
504       ENDIF
505     
506    ELSEIF ( psolver == 'sor' )  THEN
507
508!
509!--    Solve Poisson equation for perturbation pressure using SOR-Red/Black
510!--    scheme
511       CALL sor( d, ddzu_pres, ddzw, p_loc )
512       tend = p_loc
513
514    ELSEIF ( psolver == 'multigrid' )  THEN
515
516!
517!--    Solve Poisson equation for perturbation pressure using Multigrid scheme,
518!--    array tend is used to store the residuals, logical exchange_mg is used
519!--    to discern data exchange in multigrid ( 1 ghostpoint ) and normal grid
520!--    ( nbgp ghost points ).
521
522!--    If the number of grid points of the gathered grid, which is collected
523!--    on PE0, is larger than the number of grid points of an PE, than array
524!--    tend will be enlarged.
525       IF ( gathered_size > subdomain_size )  THEN
526          DEALLOCATE( tend )
527          ALLOCATE( tend(nzb:nzt_mg(mg_switch_to_pe0_level)+1,nys_mg(          &
528                    mg_switch_to_pe0_level)-1:nyn_mg(mg_switch_to_pe0_level)+1,&
529                    nxl_mg(mg_switch_to_pe0_level)-1:nxr_mg(                   &
530                    mg_switch_to_pe0_level)+1) )
531       ENDIF
532
533       CALL poismg( tend )
534
535       IF ( gathered_size > subdomain_size )  THEN
536          DEALLOCATE( tend )
537          ALLOCATE( tend(nzb:nzt+1,nysg:nyng,nxlg:nxrg) )
538       ENDIF
539
540!
541!--    Restore perturbation pressure on tend because this array is used
542!--    further below to correct the velocity fields
543       DO  i = nxl-1, nxr+1
544          DO  j = nys-1, nyn+1
545             DO  k = nzb, nzt+1
546                tend(k,j,i) = p_loc(k,j,i)
547             ENDDO
548          ENDDO
549       ENDDO
550
551    ENDIF
552
553!
554!-- Store perturbation pressure on array p, used for pressure data output.
555!-- Ghost layers are added in the output routines (except sor-method: see below)
556    IF ( intermediate_timestep_count == 1 )  THEN
557       !$OMP PARALLEL PRIVATE (i,j,k)
558       !$OMP DO
559       !$acc kernels present( p, tend, weight_substep )
560       !$acc loop independent
561       DO  i = nxl-1, nxr+1
562          !$acc loop independent
563          DO  j = nys-1, nyn+1
564             !$acc loop independent
565             DO  k = nzb, nzt+1
566                p(k,j,i) = tend(k,j,i) * &
567                           weight_substep(intermediate_timestep_count)
568             ENDDO
569          ENDDO
570       ENDDO
571       !$acc end kernels
572       !$OMP END PARALLEL
573
574    ELSE 
575       !$OMP PARALLEL PRIVATE (i,j,k)
576       !$OMP DO
577       !$acc kernels present( p, tend, weight_substep )
578       !$acc loop independent
579       DO  i = nxl-1, nxr+1
580          !$acc loop independent
581          DO  j = nys-1, nyn+1
582             !$acc loop independent
583             DO  k = nzb, nzt+1
584                p(k,j,i) = p(k,j,i) + tend(k,j,i) * &
585                           weight_substep(intermediate_timestep_count)
586             ENDDO
587          ENDDO
588       ENDDO
589       !$acc end kernels
590       !$OMP END PARALLEL
591
592    ENDIF
593       
594!
595!-- SOR-method needs ghost layers for the next timestep
596    IF ( psolver == 'sor' )  CALL exchange_horiz( p, nbgp )
597
598!
599!-- Correction of the provisional velocities with the current perturbation
600!-- pressure just computed
601    IF ( conserve_volume_flow  .AND.  ( bc_lr_cyc .OR. bc_ns_cyc ) )  THEN
602       volume_flow_l(1) = 0.0
603       volume_flow_l(2) = 0.0
604    ENDIF
605
606    !$OMP PARALLEL PRIVATE (i,j,k)
607    !$OMP DO
608    !$acc kernels present( ddzu, nzb_u_inner, nzb_v_inner, nzb_w_inner, tend, u, v, w, weight_pres )
609    !$acc loop independent
610    DO  i = nxl, nxr   
611       !$acc loop independent
612       DO  j = nys, nyn
613          !$acc loop independent
614          DO  k = 1, nzt
615             IF ( k > nzb_w_inner(j,i) )  THEN
616                w(k,j,i) = w(k,j,i) - dt_3d *                                 &
617                           ( tend(k+1,j,i) - tend(k,j,i) ) * ddzu(k+1) *      &
618                           weight_pres(intermediate_timestep_count)
619             ENDIF
620          ENDDO
621          !$acc loop independent
622          DO  k = 1, nzt
623             IF ( k > nzb_u_inner(j,i) )  THEN
624                u(k,j,i) = u(k,j,i) - dt_3d *                                 &
625                           ( tend(k,j,i) - tend(k,j,i-1) ) * ddx *            &
626                           weight_pres(intermediate_timestep_count)
627             ENDIF
628          ENDDO
629          !$acc loop independent
630          DO  k = 1, nzt
631             IF ( k > nzb_v_inner(j,i) )  THEN
632                v(k,j,i) = v(k,j,i) - dt_3d *                                 &
633                           ( tend(k,j,i) - tend(k,j-1,i) ) * ddy *            &
634                           weight_pres(intermediate_timestep_count)
635             ENDIF
636          ENDDO                                                         
637
638       ENDDO
639    ENDDO
640    !$acc end kernels
641    !$OMP END PARALLEL
642
643!
644!-- Sum up the volume flow through the right and north boundary
645    IF ( conserve_volume_flow  .AND.  bc_lr_cyc  .AND.  bc_ns_cyc  .AND.  &
646         nxr == nx )  THEN
647
648       !$OMP PARALLEL PRIVATE (j,k)
649       !$OMP DO
650       DO  j = nys, nyn
651          !$OMP CRITICAL
652          DO  k = nzb_2d(j,nx) + 1, nzt
653             volume_flow_l(1) = volume_flow_l(1) + u(k,j,nx) * dzw(k)
654          ENDDO
655          !$OMP END CRITICAL
656       ENDDO
657       !$OMP END PARALLEL
658
659    ENDIF
660
661    IF ( conserve_volume_flow  .AND.  bc_ns_cyc  .AND.  bc_lr_cyc  .AND.  &
662         nyn == ny )  THEN
663
664       !$OMP PARALLEL PRIVATE (i,k)
665       !$OMP DO
666       DO  i = nxl, nxr
667          !$OMP CRITICAL
668          DO  k = nzb_2d(ny,i) + 1, nzt
669             volume_flow_l(2) = volume_flow_l(2) + v(k,ny,i) * dzw(k)
670           ENDDO
671          !$OMP END CRITICAL
672       ENDDO
673       !$OMP END PARALLEL
674
675    ENDIF
676   
677!
678!-- Conserve the volume flow
679    IF ( conserve_volume_flow  .AND.  ( bc_lr_cyc  .AND.  bc_ns_cyc ) )  THEN
680
681#if defined( __parallel )   
682       IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
683       CALL MPI_ALLREDUCE( volume_flow_l(1), volume_flow(1), 2, MPI_REAL, &
684                           MPI_SUM, comm2d, ierr ) 
685#else
686       volume_flow = volume_flow_l 
687#endif   
688
689       volume_flow_offset = ( volume_flow_initial - volume_flow ) / &
690                            volume_flow_area
691
692       !$OMP PARALLEL PRIVATE (i,j,k)
693       !$OMP DO
694       DO  i = nxl, nxr
695          DO  j = nys, nyn
696             DO  k = nzb_u_inner(j,i) + 1, nzt
697                u(k,j,i) = u(k,j,i) + volume_flow_offset(1)
698             ENDDO
699             DO k = nzb_v_inner(j,i) + 1, nzt
700                v(k,j,i) = v(k,j,i) + volume_flow_offset(2)
701             ENDDO
702          ENDDO
703       ENDDO
704
705       !$OMP END PARALLEL
706
707    ENDIF
708
709!
710!-- Exchange of boundaries for the velocities
711    IF ( numprocs == 1 )  THEN    ! workaround for single-core GPU runs
712       on_device = .TRUE.         ! to be removed after complete porting
713    ELSE                          ! of ghost point exchange
714       !$acc update host( u, v, w )
715    ENDIF
716    CALL exchange_horiz( u, nbgp )
717    CALL exchange_horiz( v, nbgp )
718    CALL exchange_horiz( w, nbgp )
719    IF ( numprocs == 1 )  THEN    ! workaround for single-core GPU runs
720       on_device = .FALSE.        ! to be removed after complete porting
721    ELSE                          ! of ghost point exchange
722       !$acc update device( u, v, w )
723    ENDIF
724
725!
726!-- Compute the divergence of the corrected velocity field,
727!-- a possible PE-sum is computed in flow_statistics
728    CALL cpu_log( log_point_s(1), 'divergence', 'start' )
729    sums_divnew_l = 0.0
730
731!
732!-- d must be reset to zero because it can contain nonzero values below the
733!-- topography
734    IF ( topography /= 'flat' )  d = 0.0
735
736    localsum  = 0.0
737    threadsum = 0.0
738
739    !$OMP PARALLEL PRIVATE (i,j,k) FIRSTPRIVATE(threadsum) REDUCTION(+:localsum)
740    !$OMP DO SCHEDULE( STATIC )
741#if defined( __ibm )
742    DO  i = nxl, nxr
743       DO  j = nys, nyn
744          DO  k = nzb_s_inner(j,i)+1, nzt
745             d(k,j,i) = ( u(k,j,i+1) - u(k,j,i) ) * ddx + &
746                        ( v(k,j+1,i) - v(k,j,i) ) * ddy + &
747                        ( w(k,j,i) - w(k-1,j,i) ) * ddzw(k)
748          ENDDO
749          DO  k = nzb+1, nzt
750             threadsum = threadsum + ABS( d(k,j,i) )
751          ENDDO
752       ENDDO
753    ENDDO
754#else
755    !$acc kernels present( d, ddzw, rflags_s_inner, u, v, w )
756    !$acc loop collapse( 3 )
757    DO  i = nxl, nxr
758       DO  j = nys, nyn
759          DO  k = 1, nzt
760                d(k,j,i) = ( ( u(k,j,i+1) - u(k,j,i) ) * ddx +   &
761                             ( v(k,j+1,i) - v(k,j,i) ) * ddy +   &
762                             ( w(k,j,i) - w(k-1,j,i) ) * ddzw(k) &
763                           ) * rflags_s_inner(k,j,i)
764          ENDDO
765       ENDDO
766    ENDDO
767    !$acc end kernels
768!
769!-- Compute possible PE-sum of divergences for flow_statistics
770    !$acc parallel loop collapse(3) present( d ) reduction(+:threadsum)
771    DO  i = nxl, nxr
772       DO  j = nys, nyn
773          DO  k = nzb+1, nzt
774             threadsum = threadsum + ABS( d(k,j,i) )
775          ENDDO
776       ENDDO
777    ENDDO
778    !$acc end parallel loop
779#endif
780
781    localsum = localsum + threadsum
782    !$OMP END PARALLEL
783
784!
785!-- For completeness, set the divergence sum of all statistic regions to those
786!-- of the total domain
787    sums_divnew_l(0:statistic_regions) = localsum
788
789    CALL cpu_log( log_point_s(1), 'divergence', 'stop' )
790
791    CALL cpu_log( log_point(8), 'pres', 'stop' )
792
793
794 END SUBROUTINE pres
Note: See TracBrowser for help on using the repository browser.