source: palm/trunk/SOURCE/pres.f90 @ 1319

Last change on this file since 1319 was 1319, checked in by raasch, 10 years ago

last commit documented

  • Property svn:keywords set to Id
File size: 24.4 KB
Line 
1 SUBROUTINE pres
2
3!--------------------------------------------------------------------------------!
4! This file is part of PALM.
5!
6! PALM is free software: you can redistribute it and/or modify it under the terms
7! of the GNU General Public License as published by the Free Software Foundation,
8! either version 3 of the License, or (at your option) any later version.
9!
10! PALM is distributed in the hope that it will be useful, but WITHOUT ANY
11! WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
12! A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
13!
14! You should have received a copy of the GNU General Public License along with
15! PALM. If not, see <http://www.gnu.org/licenses/>.
16!
17! Copyright 1997-2014 Leibniz Universitaet Hannover
18!--------------------------------------------------------------------------------!
19!
20! Current revisions:
21! ------------------
22!
23!
24! Former revisions:
25! -----------------
26! $Id: pres.f90 1319 2014-03-17 15:08:44Z raasch $
27!
28! 1318 2014-03-17 13:35:16Z raasch
29! module interfaces removed
30!
31! 1306 2014-03-13 14:30:59Z raasch
32! second argument removed from routine poisfft
33!
34! 1257 2013-11-08 15:18:40Z raasch
35! openacc loop and loop vector clauses removed, independent clauses added,
36! end parallel replaced by end parallel loop
37!
38! 1221 2013-09-10 08:59:13Z raasch
39! openACC porting of reduction operations, loops for calculating d are
40! using the rflags_s_inner multiply flag instead of the nzb_s_inner loop index
41!
42! 1212 2013-08-15 08:46:27Z raasch
43! call of poisfft_hybrid removed
44!
45! 1117 2013-03-27 11:15:36Z suehring
46! Bugfix in OpenMP parallelization.
47!
48! 1113 2013-03-10 02:48:14Z raasch
49! GPU-porting of several loops, some loops rearranged
50!
51! 1111 2013-03-08 23:54:10Z
52! openACC statements added,
53! ibc_p_b = 2 removed
54!
55! 1092 2013-02-02 11:24:22Z raasch
56! unused variables removed
57!
58! 1036 2012-10-22 13:43:42Z raasch
59! code put under GPL (PALM 3.9)
60!
61! 1003 2012-09-14 14:35:53Z raasch
62! adjustment of array tend for cases with unequal subdomain sizes removed
63!
64! 778 2011-11-07 14:18:25Z fricke
65! New allocation of tend when multigrid is used and the collected field on PE0
66! has more grid points than the subdomain of an PE.
67!
68! 719 2011-04-06 13:05:23Z gryschka
69! Bugfix in volume flow control for double cyclic boundary conditions
70!
71! 709 2011-03-30 09:31:40Z raasch
72! formatting adjustments
73!
74! 707 2011-03-29 11:39:40Z raasch
75! Calculation of weighted average of p is now handled in the same way
76! regardless of the number of ghost layers (advection scheme),
77! multigrid and sor method are using p_loc for iterative advancements of
78! pressure,
79! localsum calculation modified for proper OpenMP reduction,
80! bc_lr/ns replaced by bc_lr/ns_cyc
81!
82! 693 2011-03-08 09:..:..Z raasch
83! bugfix: weighting coefficient added to ibm branch
84!
85! 680 2011-02-04 23:16:06Z gryschka
86! bugfix: collective_wait
87!
88! 675 2011-01-19 10:56:55Z suehring
89! Removed bugfix while copying tend.
90!
91! 673 2011-01-18 16:19:48Z suehring
92! Weighting coefficients added for right computation of the pressure during
93! Runge-Kutta substeps.
94!
95! 667 2010-12-23 12:06:00Z suehring/gryschka
96! New allocation of tend when ws-scheme and multigrid is used. This is due to
97! reasons of perforance of the data_exchange. The same is done with p after
98! poismg is called.
99! nxl-1, nxr+1, nys-1, nyn+1 replaced by nxlg, nxrg, nysg, nyng when no
100! multigrid is used. Calls of exchange_horiz are modified.
101! bugfix: After pressure correction no volume flow correction in case of
102! non-cyclic boundary conditions
103! (has to be done only before pressure correction)
104! Call of SOR routine is referenced with ddzu_pres.
105!
106! 622 2010-12-10 08:08:13Z raasch
107! optional barriers included in order to speed up collective operations
108!
109! 151 2008-03-07 13:42:18Z raasch
110! Bugfix in volume flow control for non-cyclic boundary conditions
111!
112! 106 2007-08-16 14:30:26Z raasch
113! Volume flow conservation added for the remaining three outflow boundaries
114!
115! 85 2007-05-11 09:35:14Z raasch
116! Division through dt_3d replaced by multiplication of the inverse.
117! For performance optimisation, this is done in the loop calculating the
118! divergence instead of using a seperate loop.
119!
120! 75 2007-03-22 09:54:05Z raasch
121! Volume flow control for non-cyclic boundary conditions added (currently only
122! for the north boundary!!), 2nd+3rd argument removed from exchange horiz,
123! mean vertical velocity is removed in case of Neumann boundary conditions
124! both at the bottom and the top
125!
126! RCS Log replace by Id keyword, revision history cleaned up
127!
128! Revision 1.25  2006/04/26 13:26:12  raasch
129! OpenMP optimization (+localsum, threadsum)
130!
131! Revision 1.1  1997/07/24 11:24:44  raasch
132! Initial revision
133!
134!
135! Description:
136! ------------
137! Compute the divergence of the provisional velocity field. Solve the Poisson
138! equation for the perturbation pressure. Compute the final velocities using
139! this perturbation pressure. Compute the remaining divergence.
140!------------------------------------------------------------------------------!
141
142    USE arrays_3d
143    USE constants
144    USE control_parameters
145    USE cpulog
146    USE grid_variables
147    USE indices
148    USE pegrid
149    USE poisfft_mod
150    USE statistics
151
152    IMPLICIT NONE
153
154    INTEGER ::  i, j, k
155
156    REAL    ::  ddt_3d, localsum, threadsum, d_weight_pres
157
158    REAL, DIMENSION(1:2) ::  volume_flow_l, volume_flow_offset
159    REAL, DIMENSION(1:nzt) ::  w_l, w_l_l
160
161
162    CALL cpu_log( log_point(8), 'pres', 'start' )
163
164
165    ddt_3d = 1.0 / dt_3d
166    d_weight_pres = 1.0 / weight_pres(intermediate_timestep_count)
167
168!
169!-- Multigrid method expects array d to have one ghost layer.
170!--
171    IF ( psolver == 'multigrid' )  THEN
172     
173       DEALLOCATE( d )
174       ALLOCATE( d(nzb:nzt+1,nys-1:nyn+1,nxl-1:nxr+1) ) 
175
176!
177!--    Since p is later used to hold the weighted average of the substeps, it
178!--    cannot be used in the iterative solver. Therefore, its initial value is
179!--    stored on p_loc, which is then iteratively advanced in every substep.
180       IF ( intermediate_timestep_count == 1 )  THEN
181          DO  i = nxl-1, nxr+1
182             DO  j = nys-1, nyn+1
183                DO  k = nzb, nzt+1
184                   p_loc(k,j,i) = p(k,j,i)
185                ENDDO
186             ENDDO
187          ENDDO
188       ENDIF
189       
190    ELSEIF ( psolver == 'sor'  .AND.  intermediate_timestep_count == 1 )  THEN
191
192!
193!--    Since p is later used to hold the weighted average of the substeps, it
194!--    cannot be used in the iterative solver. Therefore, its initial value is
195!--    stored on p_loc, which is then iteratively advanced in every substep.
196       p_loc = p
197
198    ENDIF
199
200!
201!-- Conserve the volume flow at the outflow in case of non-cyclic lateral
202!-- boundary conditions
203!-- WARNING: so far, this conservation does not work at the left/south
204!--          boundary if the topography at the inflow differs from that at the
205!--          outflow! For this case, volume_flow_area needs adjustment!
206!
207!-- Left/right
208    IF ( conserve_volume_flow  .AND.  ( outflow_l .OR. outflow_r ) )  THEN
209
210       volume_flow(1)   = 0.0
211       volume_flow_l(1) = 0.0
212
213       IF ( outflow_l )  THEN
214          i = 0
215       ELSEIF ( outflow_r )  THEN
216          i = nx+1
217       ENDIF
218
219       DO  j = nys, nyn
220!
221!--       Sum up the volume flow through the south/north boundary
222          DO  k = nzb_2d(j,i)+1, nzt
223             volume_flow_l(1) = volume_flow_l(1) + u(k,j,i) * dzw(k)
224          ENDDO
225       ENDDO
226
227#if defined( __parallel )   
228       IF ( collective_wait )  CALL MPI_BARRIER( comm1dy, ierr )
229       CALL MPI_ALLREDUCE( volume_flow_l(1), volume_flow(1), 1, MPI_REAL, &
230                           MPI_SUM, comm1dy, ierr )   
231#else
232       volume_flow = volume_flow_l 
233#endif
234       volume_flow_offset(1) = ( volume_flow_initial(1) - volume_flow(1) ) &
235                               / volume_flow_area(1)
236
237       DO  j = nysg, nyng
238          DO  k = nzb_2d(j,i)+1, nzt
239             u(k,j,i) = u(k,j,i) + volume_flow_offset(1)
240          ENDDO
241       ENDDO
242
243    ENDIF
244
245!
246!-- South/north
247    IF ( conserve_volume_flow  .AND.  ( outflow_n .OR. outflow_s ) )  THEN
248
249       volume_flow(2)   = 0.0
250       volume_flow_l(2) = 0.0
251
252       IF ( outflow_s )  THEN
253          j = 0
254       ELSEIF ( outflow_n )  THEN
255          j = ny+1
256       ENDIF
257
258       DO  i = nxl, nxr
259!
260!--       Sum up the volume flow through the south/north boundary
261          DO  k = nzb_2d(j,i)+1, nzt
262             volume_flow_l(2) = volume_flow_l(2) + v(k,j,i) * dzw(k)
263          ENDDO
264       ENDDO
265
266#if defined( __parallel )   
267       IF ( collective_wait )  CALL MPI_BARRIER( comm1dx, ierr )
268       CALL MPI_ALLREDUCE( volume_flow_l(2), volume_flow(2), 1, MPI_REAL, &
269                           MPI_SUM, comm1dx, ierr )   
270#else
271       volume_flow = volume_flow_l 
272#endif
273       volume_flow_offset(2) = ( volume_flow_initial(2) - volume_flow(2) )    &
274                               / volume_flow_area(2)
275
276       DO  i = nxlg, nxrg
277          DO  k = nzb_v_inner(j,i)+1, nzt
278             v(k,j,i) = v(k,j,i) + volume_flow_offset(2)
279          ENDDO
280       ENDDO
281
282    ENDIF
283
284!
285!-- Remove mean vertical velocity
286    IF ( ibc_p_b == 1  .AND.  ibc_p_t == 1 )  THEN
287       IF ( simulated_time > 0.0 )  THEN ! otherwise nzb_w_inner not yet known
288          w_l = 0.0;  w_l_l = 0.0
289          DO  i = nxl, nxr
290             DO  j = nys, nyn
291                DO  k = nzb_w_inner(j,i)+1, nzt
292                   w_l_l(k) = w_l_l(k) + w(k,j,i)
293                ENDDO
294             ENDDO
295          ENDDO
296#if defined( __parallel )   
297          IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
298          CALL MPI_ALLREDUCE( w_l_l(1), w_l(1), nzt, MPI_REAL, MPI_SUM, &
299                              comm2d, ierr )
300#else
301          w_l = w_l_l 
302#endif
303          DO  k = 1, nzt
304             w_l(k) = w_l(k) / ngp_2dh_outer(k,0)
305          ENDDO
306          DO  i = nxlg, nxrg
307             DO  j = nysg, nyng
308                DO  k = nzb_w_inner(j,i)+1, nzt
309                   w(k,j,i) = w(k,j,i) - w_l(k)
310                ENDDO
311             ENDDO
312          ENDDO
313       ENDIF
314    ENDIF
315
316!
317!-- Compute the divergence of the provisional velocity field.
318    CALL cpu_log( log_point_s(1), 'divergence', 'start' )
319
320    IF ( psolver == 'multigrid' )  THEN
321       !$OMP PARALLEL DO SCHEDULE( STATIC )
322       DO  i = nxl-1, nxr+1
323          DO  j = nys-1, nyn+1
324             DO  k = nzb, nzt+1
325                d(k,j,i) = 0.0
326             ENDDO
327          ENDDO
328       ENDDO
329    ELSE
330       !$OMP PARALLEL DO SCHEDULE( STATIC )
331       !$acc kernels present( d )
332       DO  i = nxl, nxr
333          DO  j = nys, nyn
334             DO  k = nzb+1, nzt
335                d(k,j,i) = 0.0
336             ENDDO
337          ENDDO
338       ENDDO
339       !$acc end kernels
340    ENDIF
341
342    localsum  = 0.0
343    threadsum = 0.0
344
345#if defined( __ibm )
346    !$OMP PARALLEL PRIVATE (i,j,k) FIRSTPRIVATE(threadsum) REDUCTION(+:localsum)
347    !$OMP DO SCHEDULE( STATIC )
348    DO  i = nxl, nxr
349       DO  j = nys, nyn
350          DO  k = nzb_s_inner(j,i)+1, nzt
351             d(k,j,i) = ( ( u(k,j,i+1) - u(k,j,i) ) * ddx + &
352                          ( v(k,j+1,i) - v(k,j,i) ) * ddy + &
353                          ( w(k,j,i) - w(k-1,j,i) ) * ddzw(k) ) * ddt_3d      &
354                                                                * d_weight_pres 
355          ENDDO
356!
357!--       Compute possible PE-sum of divergences for flow_statistics
358          DO  k = nzb_s_inner(j,i)+1, nzt
359             threadsum = threadsum + ABS( d(k,j,i) )
360          ENDDO
361
362       ENDDO
363    ENDDO
364
365    localsum = localsum + threadsum * dt_3d * &
366                          weight_pres(intermediate_timestep_count)
367
368    !$OMP END PARALLEL
369#else
370
371    !$OMP PARALLEL PRIVATE (i,j,k)
372    !$OMP DO SCHEDULE( STATIC )
373    !$acc kernels present( d, ddzw, rflags_s_inner, u, v, w )
374    !$acc loop collapse( 3 )
375    DO  i = nxl, nxr
376       DO  j = nys, nyn
377          DO  k = 1, nzt
378             d(k,j,i) = ( ( u(k,j,i+1) - u(k,j,i) ) * ddx +             &
379                        ( v(k,j+1,i) - v(k,j,i) ) * ddy +               &
380                        ( w(k,j,i) - w(k-1,j,i) ) * ddzw(k) ) * ddt_3d  &
381                        * d_weight_pres * rflags_s_inner(k,j,i)
382          ENDDO
383       ENDDO
384    ENDDO
385    !$acc end kernels
386    !$OMP END PARALLEL
387
388!
389!-- Compute possible PE-sum of divergences for flow_statistics
390    !$OMP PARALLEL PRIVATE (i,j,k) FIRSTPRIVATE(threadsum) REDUCTION(+:localsum)
391    !$OMP DO SCHEDULE( STATIC )
392    !$acc parallel loop collapse(3) present( d ) reduction(+:threadsum)
393    DO  i = nxl, nxr
394       DO  j = nys, nyn
395          DO  k = nzb+1, nzt
396             threadsum = threadsum + ABS( d(k,j,i) )
397          ENDDO
398       ENDDO
399    ENDDO
400    !$acc end parallel loop
401    localsum = localsum + threadsum * dt_3d * &
402                          weight_pres(intermediate_timestep_count)
403    !$OMP END PARALLEL
404#endif
405
406!
407!-- For completeness, set the divergence sum of all statistic regions to those
408!-- of the total domain
409    sums_divold_l(0:statistic_regions) = localsum
410
411    CALL cpu_log( log_point_s(1), 'divergence', 'stop' )
412
413!
414!-- Compute the pressure perturbation solving the Poisson equation
415    IF ( psolver(1:7) == 'poisfft' )  THEN
416
417!
418!--    Solve Poisson equation via FFT and solution of tridiagonal matrices
419       IF ( psolver == 'poisfft' )  THEN
420
421          CALL poisfft( d )
422
423       ENDIF
424
425!
426!--    Store computed perturbation pressure and set boundary condition in
427!--    z-direction
428       !$OMP PARALLEL DO
429       !$acc kernels present( d, tend )
430       DO  i = nxl, nxr
431          DO  j = nys, nyn
432             DO  k = nzb+1, nzt
433                tend(k,j,i) = d(k,j,i)
434             ENDDO
435          ENDDO
436       ENDDO
437       !$acc end kernels
438
439!
440!--    Bottom boundary:
441!--    This condition is only required for internal output. The pressure
442!--    gradient (dp(nzb+1)-dp(nzb))/dz is not used anywhere else.
443       IF ( ibc_p_b == 1 )  THEN
444!
445!--       Neumann (dp/dz = 0)
446          !$OMP PARALLEL DO
447          !$acc kernels present( nzb_s_inner, tend )
448          DO  i = nxlg, nxrg
449             DO  j = nysg, nyng
450                tend(nzb_s_inner(j,i),j,i) = tend(nzb_s_inner(j,i)+1,j,i)
451             ENDDO
452          ENDDO
453          !$acc end kernels
454
455       ELSE
456!
457!--       Dirichlet
458          !$OMP PARALLEL DO
459          !$acc kernels present( tend )
460          DO  i = nxlg, nxrg
461             DO  j = nysg, nyng
462                tend(nzb_s_inner(j,i),j,i) = 0.0
463             ENDDO
464          ENDDO
465          !$acc end kernels
466
467       ENDIF
468
469!
470!--    Top boundary
471       IF ( ibc_p_t == 1 )  THEN
472!
473!--       Neumann
474          !$OMP PARALLEL DO
475          !$acc kernels present( tend )
476          DO  i = nxlg, nxrg
477             DO  j = nysg, nyng
478                tend(nzt+1,j,i) = tend(nzt,j,i)
479             ENDDO
480          ENDDO
481          !$acc end kernels
482
483       ELSE
484!
485!--       Dirichlet
486          !$OMP PARALLEL DO
487          !$acc kernels present( tend )
488          DO  i = nxlg, nxrg
489             DO  j = nysg, nyng
490                tend(nzt+1,j,i) = 0.0
491             ENDDO
492          ENDDO
493          !$acc end kernels
494
495       ENDIF
496
497!
498!--    Exchange boundaries for p
499       IF ( numprocs == 1 )  THEN    ! workaround for single-core GPU runs
500          on_device = .TRUE.         ! to be removed after complete porting
501       ELSE                          ! of ghost point exchange
502          !$acc update host( tend )
503       ENDIF
504       CALL exchange_horiz( tend, nbgp )
505       IF ( numprocs == 1 )  THEN    ! workaround for single-core GPU runs
506          on_device = .FALSE.        ! to be removed after complete porting
507       ELSE                          ! of ghost point exchange
508          !$acc update device( tend )
509       ENDIF
510     
511    ELSEIF ( psolver == 'sor' )  THEN
512
513!
514!--    Solve Poisson equation for perturbation pressure using SOR-Red/Black
515!--    scheme
516       CALL sor( d, ddzu_pres, ddzw, p_loc )
517       tend = p_loc
518
519    ELSEIF ( psolver == 'multigrid' )  THEN
520
521!
522!--    Solve Poisson equation for perturbation pressure using Multigrid scheme,
523!--    array tend is used to store the residuals, logical exchange_mg is used
524!--    to discern data exchange in multigrid ( 1 ghostpoint ) and normal grid
525!--    ( nbgp ghost points ).
526
527!--    If the number of grid points of the gathered grid, which is collected
528!--    on PE0, is larger than the number of grid points of an PE, than array
529!--    tend will be enlarged.
530       IF ( gathered_size > subdomain_size )  THEN
531          DEALLOCATE( tend )
532          ALLOCATE( tend(nzb:nzt_mg(mg_switch_to_pe0_level)+1,nys_mg(          &
533                    mg_switch_to_pe0_level)-1:nyn_mg(mg_switch_to_pe0_level)+1,&
534                    nxl_mg(mg_switch_to_pe0_level)-1:nxr_mg(                   &
535                    mg_switch_to_pe0_level)+1) )
536       ENDIF
537
538       CALL poismg( tend )
539
540       IF ( gathered_size > subdomain_size )  THEN
541          DEALLOCATE( tend )
542          ALLOCATE( tend(nzb:nzt+1,nysg:nyng,nxlg:nxrg) )
543       ENDIF
544
545!
546!--    Restore perturbation pressure on tend because this array is used
547!--    further below to correct the velocity fields
548       DO  i = nxl-1, nxr+1
549          DO  j = nys-1, nyn+1
550             DO  k = nzb, nzt+1
551                tend(k,j,i) = p_loc(k,j,i)
552             ENDDO
553          ENDDO
554       ENDDO
555
556    ENDIF
557
558!
559!-- Store perturbation pressure on array p, used for pressure data output.
560!-- Ghost layers are added in the output routines (except sor-method: see below)
561    IF ( intermediate_timestep_count == 1 )  THEN
562       !$OMP PARALLEL PRIVATE (i,j,k)
563       !$OMP DO
564       !$acc kernels present( p, tend, weight_substep )
565       !$acc loop independent
566       DO  i = nxl-1, nxr+1
567          !$acc loop independent
568          DO  j = nys-1, nyn+1
569             !$acc loop independent
570             DO  k = nzb, nzt+1
571                p(k,j,i) = tend(k,j,i) * &
572                           weight_substep(intermediate_timestep_count)
573             ENDDO
574          ENDDO
575       ENDDO
576       !$acc end kernels
577       !$OMP END PARALLEL
578
579    ELSE 
580       !$OMP PARALLEL PRIVATE (i,j,k)
581       !$OMP DO
582       !$acc kernels present( p, tend, weight_substep )
583       !$acc loop independent
584       DO  i = nxl-1, nxr+1
585          !$acc loop independent
586          DO  j = nys-1, nyn+1
587             !$acc loop independent
588             DO  k = nzb, nzt+1
589                p(k,j,i) = p(k,j,i) + tend(k,j,i) * &
590                           weight_substep(intermediate_timestep_count)
591             ENDDO
592          ENDDO
593       ENDDO
594       !$acc end kernels
595       !$OMP END PARALLEL
596
597    ENDIF
598       
599!
600!-- SOR-method needs ghost layers for the next timestep
601    IF ( psolver == 'sor' )  CALL exchange_horiz( p, nbgp )
602
603!
604!-- Correction of the provisional velocities with the current perturbation
605!-- pressure just computed
606    IF ( conserve_volume_flow  .AND.  ( bc_lr_cyc .OR. bc_ns_cyc ) )  THEN
607       volume_flow_l(1) = 0.0
608       volume_flow_l(2) = 0.0
609    ENDIF
610
611    !$OMP PARALLEL PRIVATE (i,j,k)
612    !$OMP DO
613    !$acc kernels present( ddzu, nzb_u_inner, nzb_v_inner, nzb_w_inner, tend, u, v, w, weight_pres )
614    !$acc loop independent
615    DO  i = nxl, nxr   
616       !$acc loop independent
617       DO  j = nys, nyn
618          !$acc loop independent
619          DO  k = 1, nzt
620             IF ( k > nzb_w_inner(j,i) )  THEN
621                w(k,j,i) = w(k,j,i) - dt_3d *                                 &
622                           ( tend(k+1,j,i) - tend(k,j,i) ) * ddzu(k+1) *      &
623                           weight_pres(intermediate_timestep_count)
624             ENDIF
625          ENDDO
626          !$acc loop independent
627          DO  k = 1, nzt
628             IF ( k > nzb_u_inner(j,i) )  THEN
629                u(k,j,i) = u(k,j,i) - dt_3d *                                 &
630                           ( tend(k,j,i) - tend(k,j,i-1) ) * ddx *            &
631                           weight_pres(intermediate_timestep_count)
632             ENDIF
633          ENDDO
634          !$acc loop independent
635          DO  k = 1, nzt
636             IF ( k > nzb_v_inner(j,i) )  THEN
637                v(k,j,i) = v(k,j,i) - dt_3d *                                 &
638                           ( tend(k,j,i) - tend(k,j-1,i) ) * ddy *            &
639                           weight_pres(intermediate_timestep_count)
640             ENDIF
641          ENDDO                                                         
642
643       ENDDO
644    ENDDO
645    !$acc end kernels
646    !$OMP END PARALLEL
647
648!
649!-- Sum up the volume flow through the right and north boundary
650    IF ( conserve_volume_flow  .AND.  bc_lr_cyc  .AND.  bc_ns_cyc  .AND.  &
651         nxr == nx )  THEN
652
653       !$OMP PARALLEL PRIVATE (j,k)
654       !$OMP DO
655       DO  j = nys, nyn
656          !$OMP CRITICAL
657          DO  k = nzb_2d(j,nx) + 1, nzt
658             volume_flow_l(1) = volume_flow_l(1) + u(k,j,nx) * dzw(k)
659          ENDDO
660          !$OMP END CRITICAL
661       ENDDO
662       !$OMP END PARALLEL
663
664    ENDIF
665
666    IF ( conserve_volume_flow  .AND.  bc_ns_cyc  .AND.  bc_lr_cyc  .AND.  &
667         nyn == ny )  THEN
668
669       !$OMP PARALLEL PRIVATE (i,k)
670       !$OMP DO
671       DO  i = nxl, nxr
672          !$OMP CRITICAL
673          DO  k = nzb_2d(ny,i) + 1, nzt
674             volume_flow_l(2) = volume_flow_l(2) + v(k,ny,i) * dzw(k)
675           ENDDO
676          !$OMP END CRITICAL
677       ENDDO
678       !$OMP END PARALLEL
679
680    ENDIF
681   
682!
683!-- Conserve the volume flow
684    IF ( conserve_volume_flow  .AND.  ( bc_lr_cyc  .AND.  bc_ns_cyc ) )  THEN
685
686#if defined( __parallel )   
687       IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
688       CALL MPI_ALLREDUCE( volume_flow_l(1), volume_flow(1), 2, MPI_REAL, &
689                           MPI_SUM, comm2d, ierr ) 
690#else
691       volume_flow = volume_flow_l 
692#endif   
693
694       volume_flow_offset = ( volume_flow_initial - volume_flow ) / &
695                            volume_flow_area
696
697       !$OMP PARALLEL PRIVATE (i,j,k)
698       !$OMP DO
699       DO  i = nxl, nxr
700          DO  j = nys, nyn
701             DO  k = nzb_u_inner(j,i) + 1, nzt
702                u(k,j,i) = u(k,j,i) + volume_flow_offset(1)
703             ENDDO
704             DO k = nzb_v_inner(j,i) + 1, nzt
705                v(k,j,i) = v(k,j,i) + volume_flow_offset(2)
706             ENDDO
707          ENDDO
708       ENDDO
709
710       !$OMP END PARALLEL
711
712    ENDIF
713
714!
715!-- Exchange of boundaries for the velocities
716    IF ( numprocs == 1 )  THEN    ! workaround for single-core GPU runs
717       on_device = .TRUE.         ! to be removed after complete porting
718    ELSE                          ! of ghost point exchange
719       !$acc update host( u, v, w )
720    ENDIF
721    CALL exchange_horiz( u, nbgp )
722    CALL exchange_horiz( v, nbgp )
723    CALL exchange_horiz( w, nbgp )
724    IF ( numprocs == 1 )  THEN    ! workaround for single-core GPU runs
725       on_device = .FALSE.        ! to be removed after complete porting
726    ELSE                          ! of ghost point exchange
727       !$acc update device( u, v, w )
728    ENDIF
729
730!
731!-- Compute the divergence of the corrected velocity field,
732!-- a possible PE-sum is computed in flow_statistics
733    CALL cpu_log( log_point_s(1), 'divergence', 'start' )
734    sums_divnew_l = 0.0
735
736!
737!-- d must be reset to zero because it can contain nonzero values below the
738!-- topography
739    IF ( topography /= 'flat' )  d = 0.0
740
741    localsum  = 0.0
742    threadsum = 0.0
743
744    !$OMP PARALLEL PRIVATE (i,j,k) FIRSTPRIVATE(threadsum) REDUCTION(+:localsum)
745    !$OMP DO SCHEDULE( STATIC )
746#if defined( __ibm )
747    DO  i = nxl, nxr
748       DO  j = nys, nyn
749          DO  k = nzb_s_inner(j,i)+1, nzt
750             d(k,j,i) = ( u(k,j,i+1) - u(k,j,i) ) * ddx + &
751                        ( v(k,j+1,i) - v(k,j,i) ) * ddy + &
752                        ( w(k,j,i) - w(k-1,j,i) ) * ddzw(k)
753          ENDDO
754          DO  k = nzb+1, nzt
755             threadsum = threadsum + ABS( d(k,j,i) )
756          ENDDO
757       ENDDO
758    ENDDO
759#else
760    !$acc kernels present( d, ddzw, rflags_s_inner, u, v, w )
761    !$acc loop collapse( 3 )
762    DO  i = nxl, nxr
763       DO  j = nys, nyn
764          DO  k = 1, nzt
765                d(k,j,i) = ( ( u(k,j,i+1) - u(k,j,i) ) * ddx +   &
766                             ( v(k,j+1,i) - v(k,j,i) ) * ddy +   &
767                             ( w(k,j,i) - w(k-1,j,i) ) * ddzw(k) &
768                           ) * rflags_s_inner(k,j,i)
769          ENDDO
770       ENDDO
771    ENDDO
772    !$acc end kernels
773!
774!-- Compute possible PE-sum of divergences for flow_statistics
775    !$acc parallel loop collapse(3) present( d ) reduction(+:threadsum)
776    DO  i = nxl, nxr
777       DO  j = nys, nyn
778          DO  k = nzb+1, nzt
779             threadsum = threadsum + ABS( d(k,j,i) )
780          ENDDO
781       ENDDO
782    ENDDO
783    !$acc end parallel loop
784#endif
785
786    localsum = localsum + threadsum
787    !$OMP END PARALLEL
788
789!
790!-- For completeness, set the divergence sum of all statistic regions to those
791!-- of the total domain
792    sums_divnew_l(0:statistic_regions) = localsum
793
794    CALL cpu_log( log_point_s(1), 'divergence', 'stop' )
795
796    CALL cpu_log( log_point(8), 'pres', 'stop' )
797
798
799 END SUBROUTINE pres
Note: See TracBrowser for help on using the repository browser.