source: palm/trunk/SOURCE/pres.f90 @ 1318

Last change on this file since 1318 was 1318, checked in by raasch, 10 years ago

former files/routines cpu_log and cpu_statistics combined to one module,
which also includes the former data module cpulog from the modules-file,
module interfaces removed

  • Property svn:keywords set to Id
File size: 24.3 KB
Line 
1 SUBROUTINE pres
2
3!--------------------------------------------------------------------------------!
4! This file is part of PALM.
5!
6! PALM is free software: you can redistribute it and/or modify it under the terms
7! of the GNU General Public License as published by the Free Software Foundation,
8! either version 3 of the License, or (at your option) any later version.
9!
10! PALM is distributed in the hope that it will be useful, but WITHOUT ANY
11! WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
12! A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
13!
14! You should have received a copy of the GNU General Public License along with
15! PALM. If not, see <http://www.gnu.org/licenses/>.
16!
17! Copyright 1997-2014 Leibniz Universitaet Hannover
18!--------------------------------------------------------------------------------!
19!
20! Current revisions:
21! ------------------
22! module interfaces removed
23!
24! Former revisions:
25! -----------------
26! $Id: pres.f90 1318 2014-03-17 13:35:16Z raasch $
27!
28! 1306 2014-03-13 14:30:59Z raasch
29! second argument removed from routine poisfft
30!
31! 1257 2013-11-08 15:18:40Z raasch
32! openacc loop and loop vector clauses removed, independent clauses added,
33! end parallel replaced by end parallel loop
34!
35! 1221 2013-09-10 08:59:13Z raasch
36! openACC porting of reduction operations, loops for calculating d are
37! using the rflags_s_inner multiply flag instead of the nzb_s_inner loop index
38!
39! 1212 2013-08-15 08:46:27Z raasch
40! call of poisfft_hybrid removed
41!
42! 1117 2013-03-27 11:15:36Z suehring
43! Bugfix in OpenMP parallelization.
44!
45! 1113 2013-03-10 02:48:14Z raasch
46! GPU-porting of several loops, some loops rearranged
47!
48! 1111 2013-03-08 23:54:10Z
49! openACC statements added,
50! ibc_p_b = 2 removed
51!
52! 1092 2013-02-02 11:24:22Z raasch
53! unused variables removed
54!
55! 1036 2012-10-22 13:43:42Z raasch
56! code put under GPL (PALM 3.9)
57!
58! 1003 2012-09-14 14:35:53Z raasch
59! adjustment of array tend for cases with unequal subdomain sizes removed
60!
61! 778 2011-11-07 14:18:25Z fricke
62! New allocation of tend when multigrid is used and the collected field on PE0
63! has more grid points than the subdomain of an PE.
64!
65! 719 2011-04-06 13:05:23Z gryschka
66! Bugfix in volume flow control for double cyclic boundary conditions
67!
68! 709 2011-03-30 09:31:40Z raasch
69! formatting adjustments
70!
71! 707 2011-03-29 11:39:40Z raasch
72! Calculation of weighted average of p is now handled in the same way
73! regardless of the number of ghost layers (advection scheme),
74! multigrid and sor method are using p_loc for iterative advancements of
75! pressure,
76! localsum calculation modified for proper OpenMP reduction,
77! bc_lr/ns replaced by bc_lr/ns_cyc
78!
79! 693 2011-03-08 09:..:..Z raasch
80! bugfix: weighting coefficient added to ibm branch
81!
82! 680 2011-02-04 23:16:06Z gryschka
83! bugfix: collective_wait
84!
85! 675 2011-01-19 10:56:55Z suehring
86! Removed bugfix while copying tend.
87!
88! 673 2011-01-18 16:19:48Z suehring
89! Weighting coefficients added for right computation of the pressure during
90! Runge-Kutta substeps.
91!
92! 667 2010-12-23 12:06:00Z suehring/gryschka
93! New allocation of tend when ws-scheme and multigrid is used. This is due to
94! reasons of perforance of the data_exchange. The same is done with p after
95! poismg is called.
96! nxl-1, nxr+1, nys-1, nyn+1 replaced by nxlg, nxrg, nysg, nyng when no
97! multigrid is used. Calls of exchange_horiz are modified.
98! bugfix: After pressure correction no volume flow correction in case of
99! non-cyclic boundary conditions
100! (has to be done only before pressure correction)
101! Call of SOR routine is referenced with ddzu_pres.
102!
103! 622 2010-12-10 08:08:13Z raasch
104! optional barriers included in order to speed up collective operations
105!
106! 151 2008-03-07 13:42:18Z raasch
107! Bugfix in volume flow control for non-cyclic boundary conditions
108!
109! 106 2007-08-16 14:30:26Z raasch
110! Volume flow conservation added for the remaining three outflow boundaries
111!
112! 85 2007-05-11 09:35:14Z raasch
113! Division through dt_3d replaced by multiplication of the inverse.
114! For performance optimisation, this is done in the loop calculating the
115! divergence instead of using a seperate loop.
116!
117! 75 2007-03-22 09:54:05Z raasch
118! Volume flow control for non-cyclic boundary conditions added (currently only
119! for the north boundary!!), 2nd+3rd argument removed from exchange horiz,
120! mean vertical velocity is removed in case of Neumann boundary conditions
121! both at the bottom and the top
122!
123! RCS Log replace by Id keyword, revision history cleaned up
124!
125! Revision 1.25  2006/04/26 13:26:12  raasch
126! OpenMP optimization (+localsum, threadsum)
127!
128! Revision 1.1  1997/07/24 11:24:44  raasch
129! Initial revision
130!
131!
132! Description:
133! ------------
134! Compute the divergence of the provisional velocity field. Solve the Poisson
135! equation for the perturbation pressure. Compute the final velocities using
136! this perturbation pressure. Compute the remaining divergence.
137!------------------------------------------------------------------------------!
138
139    USE arrays_3d
140    USE constants
141    USE control_parameters
142    USE cpulog
143    USE grid_variables
144    USE indices
145    USE pegrid
146    USE poisfft_mod
147    USE statistics
148
149    IMPLICIT NONE
150
151    INTEGER ::  i, j, k
152
153    REAL    ::  ddt_3d, localsum, threadsum, d_weight_pres
154
155    REAL, DIMENSION(1:2) ::  volume_flow_l, volume_flow_offset
156    REAL, DIMENSION(1:nzt) ::  w_l, w_l_l
157
158
159    CALL cpu_log( log_point(8), 'pres', 'start' )
160
161
162    ddt_3d = 1.0 / dt_3d
163    d_weight_pres = 1.0 / weight_pres(intermediate_timestep_count)
164
165!
166!-- Multigrid method expects array d to have one ghost layer.
167!--
168    IF ( psolver == 'multigrid' )  THEN
169     
170       DEALLOCATE( d )
171       ALLOCATE( d(nzb:nzt+1,nys-1:nyn+1,nxl-1:nxr+1) ) 
172
173!
174!--    Since p is later used to hold the weighted average of the substeps, it
175!--    cannot be used in the iterative solver. Therefore, its initial value is
176!--    stored on p_loc, which is then iteratively advanced in every substep.
177       IF ( intermediate_timestep_count == 1 )  THEN
178          DO  i = nxl-1, nxr+1
179             DO  j = nys-1, nyn+1
180                DO  k = nzb, nzt+1
181                   p_loc(k,j,i) = p(k,j,i)
182                ENDDO
183             ENDDO
184          ENDDO
185       ENDIF
186       
187    ELSEIF ( psolver == 'sor'  .AND.  intermediate_timestep_count == 1 )  THEN
188
189!
190!--    Since p is later used to hold the weighted average of the substeps, it
191!--    cannot be used in the iterative solver. Therefore, its initial value is
192!--    stored on p_loc, which is then iteratively advanced in every substep.
193       p_loc = p
194
195    ENDIF
196
197!
198!-- Conserve the volume flow at the outflow in case of non-cyclic lateral
199!-- boundary conditions
200!-- WARNING: so far, this conservation does not work at the left/south
201!--          boundary if the topography at the inflow differs from that at the
202!--          outflow! For this case, volume_flow_area needs adjustment!
203!
204!-- Left/right
205    IF ( conserve_volume_flow  .AND.  ( outflow_l .OR. outflow_r ) )  THEN
206
207       volume_flow(1)   = 0.0
208       volume_flow_l(1) = 0.0
209
210       IF ( outflow_l )  THEN
211          i = 0
212       ELSEIF ( outflow_r )  THEN
213          i = nx+1
214       ENDIF
215
216       DO  j = nys, nyn
217!
218!--       Sum up the volume flow through the south/north boundary
219          DO  k = nzb_2d(j,i)+1, nzt
220             volume_flow_l(1) = volume_flow_l(1) + u(k,j,i) * dzw(k)
221          ENDDO
222       ENDDO
223
224#if defined( __parallel )   
225       IF ( collective_wait )  CALL MPI_BARRIER( comm1dy, ierr )
226       CALL MPI_ALLREDUCE( volume_flow_l(1), volume_flow(1), 1, MPI_REAL, &
227                           MPI_SUM, comm1dy, ierr )   
228#else
229       volume_flow = volume_flow_l 
230#endif
231       volume_flow_offset(1) = ( volume_flow_initial(1) - volume_flow(1) ) &
232                               / volume_flow_area(1)
233
234       DO  j = nysg, nyng
235          DO  k = nzb_2d(j,i)+1, nzt
236             u(k,j,i) = u(k,j,i) + volume_flow_offset(1)
237          ENDDO
238       ENDDO
239
240    ENDIF
241
242!
243!-- South/north
244    IF ( conserve_volume_flow  .AND.  ( outflow_n .OR. outflow_s ) )  THEN
245
246       volume_flow(2)   = 0.0
247       volume_flow_l(2) = 0.0
248
249       IF ( outflow_s )  THEN
250          j = 0
251       ELSEIF ( outflow_n )  THEN
252          j = ny+1
253       ENDIF
254
255       DO  i = nxl, nxr
256!
257!--       Sum up the volume flow through the south/north boundary
258          DO  k = nzb_2d(j,i)+1, nzt
259             volume_flow_l(2) = volume_flow_l(2) + v(k,j,i) * dzw(k)
260          ENDDO
261       ENDDO
262
263#if defined( __parallel )   
264       IF ( collective_wait )  CALL MPI_BARRIER( comm1dx, ierr )
265       CALL MPI_ALLREDUCE( volume_flow_l(2), volume_flow(2), 1, MPI_REAL, &
266                           MPI_SUM, comm1dx, ierr )   
267#else
268       volume_flow = volume_flow_l 
269#endif
270       volume_flow_offset(2) = ( volume_flow_initial(2) - volume_flow(2) )    &
271                               / volume_flow_area(2)
272
273       DO  i = nxlg, nxrg
274          DO  k = nzb_v_inner(j,i)+1, nzt
275             v(k,j,i) = v(k,j,i) + volume_flow_offset(2)
276          ENDDO
277       ENDDO
278
279    ENDIF
280
281!
282!-- Remove mean vertical velocity
283    IF ( ibc_p_b == 1  .AND.  ibc_p_t == 1 )  THEN
284       IF ( simulated_time > 0.0 )  THEN ! otherwise nzb_w_inner not yet known
285          w_l = 0.0;  w_l_l = 0.0
286          DO  i = nxl, nxr
287             DO  j = nys, nyn
288                DO  k = nzb_w_inner(j,i)+1, nzt
289                   w_l_l(k) = w_l_l(k) + w(k,j,i)
290                ENDDO
291             ENDDO
292          ENDDO
293#if defined( __parallel )   
294          IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
295          CALL MPI_ALLREDUCE( w_l_l(1), w_l(1), nzt, MPI_REAL, MPI_SUM, &
296                              comm2d, ierr )
297#else
298          w_l = w_l_l 
299#endif
300          DO  k = 1, nzt
301             w_l(k) = w_l(k) / ngp_2dh_outer(k,0)
302          ENDDO
303          DO  i = nxlg, nxrg
304             DO  j = nysg, nyng
305                DO  k = nzb_w_inner(j,i)+1, nzt
306                   w(k,j,i) = w(k,j,i) - w_l(k)
307                ENDDO
308             ENDDO
309          ENDDO
310       ENDIF
311    ENDIF
312
313!
314!-- Compute the divergence of the provisional velocity field.
315    CALL cpu_log( log_point_s(1), 'divergence', 'start' )
316
317    IF ( psolver == 'multigrid' )  THEN
318       !$OMP PARALLEL DO SCHEDULE( STATIC )
319       DO  i = nxl-1, nxr+1
320          DO  j = nys-1, nyn+1
321             DO  k = nzb, nzt+1
322                d(k,j,i) = 0.0
323             ENDDO
324          ENDDO
325       ENDDO
326    ELSE
327       !$OMP PARALLEL DO SCHEDULE( STATIC )
328       !$acc kernels present( d )
329       DO  i = nxl, nxr
330          DO  j = nys, nyn
331             DO  k = nzb+1, nzt
332                d(k,j,i) = 0.0
333             ENDDO
334          ENDDO
335       ENDDO
336       !$acc end kernels
337    ENDIF
338
339    localsum  = 0.0
340    threadsum = 0.0
341
342#if defined( __ibm )
343    !$OMP PARALLEL PRIVATE (i,j,k) FIRSTPRIVATE(threadsum) REDUCTION(+:localsum)
344    !$OMP DO SCHEDULE( STATIC )
345    DO  i = nxl, nxr
346       DO  j = nys, nyn
347          DO  k = nzb_s_inner(j,i)+1, nzt
348             d(k,j,i) = ( ( u(k,j,i+1) - u(k,j,i) ) * ddx + &
349                          ( v(k,j+1,i) - v(k,j,i) ) * ddy + &
350                          ( w(k,j,i) - w(k-1,j,i) ) * ddzw(k) ) * ddt_3d      &
351                                                                * d_weight_pres 
352          ENDDO
353!
354!--       Compute possible PE-sum of divergences for flow_statistics
355          DO  k = nzb_s_inner(j,i)+1, nzt
356             threadsum = threadsum + ABS( d(k,j,i) )
357          ENDDO
358
359       ENDDO
360    ENDDO
361
362    localsum = localsum + threadsum * dt_3d * &
363                          weight_pres(intermediate_timestep_count)
364
365    !$OMP END PARALLEL
366#else
367
368    !$OMP PARALLEL PRIVATE (i,j,k)
369    !$OMP DO SCHEDULE( STATIC )
370    !$acc kernels present( d, ddzw, rflags_s_inner, u, v, w )
371    !$acc loop collapse( 3 )
372    DO  i = nxl, nxr
373       DO  j = nys, nyn
374          DO  k = 1, nzt
375             d(k,j,i) = ( ( u(k,j,i+1) - u(k,j,i) ) * ddx +             &
376                        ( v(k,j+1,i) - v(k,j,i) ) * ddy +               &
377                        ( w(k,j,i) - w(k-1,j,i) ) * ddzw(k) ) * ddt_3d  &
378                        * d_weight_pres * rflags_s_inner(k,j,i)
379          ENDDO
380       ENDDO
381    ENDDO
382    !$acc end kernels
383    !$OMP END PARALLEL
384
385!
386!-- Compute possible PE-sum of divergences for flow_statistics
387    !$OMP PARALLEL PRIVATE (i,j,k) FIRSTPRIVATE(threadsum) REDUCTION(+:localsum)
388    !$OMP DO SCHEDULE( STATIC )
389    !$acc parallel loop collapse(3) present( d ) reduction(+:threadsum)
390    DO  i = nxl, nxr
391       DO  j = nys, nyn
392          DO  k = nzb+1, nzt
393             threadsum = threadsum + ABS( d(k,j,i) )
394          ENDDO
395       ENDDO
396    ENDDO
397    !$acc end parallel loop
398    localsum = localsum + threadsum * dt_3d * &
399                          weight_pres(intermediate_timestep_count)
400    !$OMP END PARALLEL
401#endif
402
403!
404!-- For completeness, set the divergence sum of all statistic regions to those
405!-- of the total domain
406    sums_divold_l(0:statistic_regions) = localsum
407
408    CALL cpu_log( log_point_s(1), 'divergence', 'stop' )
409
410!
411!-- Compute the pressure perturbation solving the Poisson equation
412    IF ( psolver(1:7) == 'poisfft' )  THEN
413
414!
415!--    Solve Poisson equation via FFT and solution of tridiagonal matrices
416       IF ( psolver == 'poisfft' )  THEN
417
418          CALL poisfft( d )
419
420       ENDIF
421
422!
423!--    Store computed perturbation pressure and set boundary condition in
424!--    z-direction
425       !$OMP PARALLEL DO
426       !$acc kernels present( d, tend )
427       DO  i = nxl, nxr
428          DO  j = nys, nyn
429             DO  k = nzb+1, nzt
430                tend(k,j,i) = d(k,j,i)
431             ENDDO
432          ENDDO
433       ENDDO
434       !$acc end kernels
435
436!
437!--    Bottom boundary:
438!--    This condition is only required for internal output. The pressure
439!--    gradient (dp(nzb+1)-dp(nzb))/dz is not used anywhere else.
440       IF ( ibc_p_b == 1 )  THEN
441!
442!--       Neumann (dp/dz = 0)
443          !$OMP PARALLEL DO
444          !$acc kernels present( nzb_s_inner, tend )
445          DO  i = nxlg, nxrg
446             DO  j = nysg, nyng
447                tend(nzb_s_inner(j,i),j,i) = tend(nzb_s_inner(j,i)+1,j,i)
448             ENDDO
449          ENDDO
450          !$acc end kernels
451
452       ELSE
453!
454!--       Dirichlet
455          !$OMP PARALLEL DO
456          !$acc kernels present( tend )
457          DO  i = nxlg, nxrg
458             DO  j = nysg, nyng
459                tend(nzb_s_inner(j,i),j,i) = 0.0
460             ENDDO
461          ENDDO
462          !$acc end kernels
463
464       ENDIF
465
466!
467!--    Top boundary
468       IF ( ibc_p_t == 1 )  THEN
469!
470!--       Neumann
471          !$OMP PARALLEL DO
472          !$acc kernels present( tend )
473          DO  i = nxlg, nxrg
474             DO  j = nysg, nyng
475                tend(nzt+1,j,i) = tend(nzt,j,i)
476             ENDDO
477          ENDDO
478          !$acc end kernels
479
480       ELSE
481!
482!--       Dirichlet
483          !$OMP PARALLEL DO
484          !$acc kernels present( tend )
485          DO  i = nxlg, nxrg
486             DO  j = nysg, nyng
487                tend(nzt+1,j,i) = 0.0
488             ENDDO
489          ENDDO
490          !$acc end kernels
491
492       ENDIF
493
494!
495!--    Exchange boundaries for p
496       IF ( numprocs == 1 )  THEN    ! workaround for single-core GPU runs
497          on_device = .TRUE.         ! to be removed after complete porting
498       ELSE                          ! of ghost point exchange
499          !$acc update host( tend )
500       ENDIF
501       CALL exchange_horiz( tend, nbgp )
502       IF ( numprocs == 1 )  THEN    ! workaround for single-core GPU runs
503          on_device = .FALSE.        ! to be removed after complete porting
504       ELSE                          ! of ghost point exchange
505          !$acc update device( tend )
506       ENDIF
507     
508    ELSEIF ( psolver == 'sor' )  THEN
509
510!
511!--    Solve Poisson equation for perturbation pressure using SOR-Red/Black
512!--    scheme
513       CALL sor( d, ddzu_pres, ddzw, p_loc )
514       tend = p_loc
515
516    ELSEIF ( psolver == 'multigrid' )  THEN
517
518!
519!--    Solve Poisson equation for perturbation pressure using Multigrid scheme,
520!--    array tend is used to store the residuals, logical exchange_mg is used
521!--    to discern data exchange in multigrid ( 1 ghostpoint ) and normal grid
522!--    ( nbgp ghost points ).
523
524!--    If the number of grid points of the gathered grid, which is collected
525!--    on PE0, is larger than the number of grid points of an PE, than array
526!--    tend will be enlarged.
527       IF ( gathered_size > subdomain_size )  THEN
528          DEALLOCATE( tend )
529          ALLOCATE( tend(nzb:nzt_mg(mg_switch_to_pe0_level)+1,nys_mg(          &
530                    mg_switch_to_pe0_level)-1:nyn_mg(mg_switch_to_pe0_level)+1,&
531                    nxl_mg(mg_switch_to_pe0_level)-1:nxr_mg(                   &
532                    mg_switch_to_pe0_level)+1) )
533       ENDIF
534
535       CALL poismg( tend )
536
537       IF ( gathered_size > subdomain_size )  THEN
538          DEALLOCATE( tend )
539          ALLOCATE( tend(nzb:nzt+1,nysg:nyng,nxlg:nxrg) )
540       ENDIF
541
542!
543!--    Restore perturbation pressure on tend because this array is used
544!--    further below to correct the velocity fields
545       DO  i = nxl-1, nxr+1
546          DO  j = nys-1, nyn+1
547             DO  k = nzb, nzt+1
548                tend(k,j,i) = p_loc(k,j,i)
549             ENDDO
550          ENDDO
551       ENDDO
552
553    ENDIF
554
555!
556!-- Store perturbation pressure on array p, used for pressure data output.
557!-- Ghost layers are added in the output routines (except sor-method: see below)
558    IF ( intermediate_timestep_count == 1 )  THEN
559       !$OMP PARALLEL PRIVATE (i,j,k)
560       !$OMP DO
561       !$acc kernels present( p, tend, weight_substep )
562       !$acc loop independent
563       DO  i = nxl-1, nxr+1
564          !$acc loop independent
565          DO  j = nys-1, nyn+1
566             !$acc loop independent
567             DO  k = nzb, nzt+1
568                p(k,j,i) = tend(k,j,i) * &
569                           weight_substep(intermediate_timestep_count)
570             ENDDO
571          ENDDO
572       ENDDO
573       !$acc end kernels
574       !$OMP END PARALLEL
575
576    ELSE 
577       !$OMP PARALLEL PRIVATE (i,j,k)
578       !$OMP DO
579       !$acc kernels present( p, tend, weight_substep )
580       !$acc loop independent
581       DO  i = nxl-1, nxr+1
582          !$acc loop independent
583          DO  j = nys-1, nyn+1
584             !$acc loop independent
585             DO  k = nzb, nzt+1
586                p(k,j,i) = p(k,j,i) + tend(k,j,i) * &
587                           weight_substep(intermediate_timestep_count)
588             ENDDO
589          ENDDO
590       ENDDO
591       !$acc end kernels
592       !$OMP END PARALLEL
593
594    ENDIF
595       
596!
597!-- SOR-method needs ghost layers for the next timestep
598    IF ( psolver == 'sor' )  CALL exchange_horiz( p, nbgp )
599
600!
601!-- Correction of the provisional velocities with the current perturbation
602!-- pressure just computed
603    IF ( conserve_volume_flow  .AND.  ( bc_lr_cyc .OR. bc_ns_cyc ) )  THEN
604       volume_flow_l(1) = 0.0
605       volume_flow_l(2) = 0.0
606    ENDIF
607
608    !$OMP PARALLEL PRIVATE (i,j,k)
609    !$OMP DO
610    !$acc kernels present( ddzu, nzb_u_inner, nzb_v_inner, nzb_w_inner, tend, u, v, w, weight_pres )
611    !$acc loop independent
612    DO  i = nxl, nxr   
613       !$acc loop independent
614       DO  j = nys, nyn
615          !$acc loop independent
616          DO  k = 1, nzt
617             IF ( k > nzb_w_inner(j,i) )  THEN
618                w(k,j,i) = w(k,j,i) - dt_3d *                                 &
619                           ( tend(k+1,j,i) - tend(k,j,i) ) * ddzu(k+1) *      &
620                           weight_pres(intermediate_timestep_count)
621             ENDIF
622          ENDDO
623          !$acc loop independent
624          DO  k = 1, nzt
625             IF ( k > nzb_u_inner(j,i) )  THEN
626                u(k,j,i) = u(k,j,i) - dt_3d *                                 &
627                           ( tend(k,j,i) - tend(k,j,i-1) ) * ddx *            &
628                           weight_pres(intermediate_timestep_count)
629             ENDIF
630          ENDDO
631          !$acc loop independent
632          DO  k = 1, nzt
633             IF ( k > nzb_v_inner(j,i) )  THEN
634                v(k,j,i) = v(k,j,i) - dt_3d *                                 &
635                           ( tend(k,j,i) - tend(k,j-1,i) ) * ddy *            &
636                           weight_pres(intermediate_timestep_count)
637             ENDIF
638          ENDDO                                                         
639
640       ENDDO
641    ENDDO
642    !$acc end kernels
643    !$OMP END PARALLEL
644
645!
646!-- Sum up the volume flow through the right and north boundary
647    IF ( conserve_volume_flow  .AND.  bc_lr_cyc  .AND.  bc_ns_cyc  .AND.  &
648         nxr == nx )  THEN
649
650       !$OMP PARALLEL PRIVATE (j,k)
651       !$OMP DO
652       DO  j = nys, nyn
653          !$OMP CRITICAL
654          DO  k = nzb_2d(j,nx) + 1, nzt
655             volume_flow_l(1) = volume_flow_l(1) + u(k,j,nx) * dzw(k)
656          ENDDO
657          !$OMP END CRITICAL
658       ENDDO
659       !$OMP END PARALLEL
660
661    ENDIF
662
663    IF ( conserve_volume_flow  .AND.  bc_ns_cyc  .AND.  bc_lr_cyc  .AND.  &
664         nyn == ny )  THEN
665
666       !$OMP PARALLEL PRIVATE (i,k)
667       !$OMP DO
668       DO  i = nxl, nxr
669          !$OMP CRITICAL
670          DO  k = nzb_2d(ny,i) + 1, nzt
671             volume_flow_l(2) = volume_flow_l(2) + v(k,ny,i) * dzw(k)
672           ENDDO
673          !$OMP END CRITICAL
674       ENDDO
675       !$OMP END PARALLEL
676
677    ENDIF
678   
679!
680!-- Conserve the volume flow
681    IF ( conserve_volume_flow  .AND.  ( bc_lr_cyc  .AND.  bc_ns_cyc ) )  THEN
682
683#if defined( __parallel )   
684       IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
685       CALL MPI_ALLREDUCE( volume_flow_l(1), volume_flow(1), 2, MPI_REAL, &
686                           MPI_SUM, comm2d, ierr ) 
687#else
688       volume_flow = volume_flow_l 
689#endif   
690
691       volume_flow_offset = ( volume_flow_initial - volume_flow ) / &
692                            volume_flow_area
693
694       !$OMP PARALLEL PRIVATE (i,j,k)
695       !$OMP DO
696       DO  i = nxl, nxr
697          DO  j = nys, nyn
698             DO  k = nzb_u_inner(j,i) + 1, nzt
699                u(k,j,i) = u(k,j,i) + volume_flow_offset(1)
700             ENDDO
701             DO k = nzb_v_inner(j,i) + 1, nzt
702                v(k,j,i) = v(k,j,i) + volume_flow_offset(2)
703             ENDDO
704          ENDDO
705       ENDDO
706
707       !$OMP END PARALLEL
708
709    ENDIF
710
711!
712!-- Exchange of boundaries for the velocities
713    IF ( numprocs == 1 )  THEN    ! workaround for single-core GPU runs
714       on_device = .TRUE.         ! to be removed after complete porting
715    ELSE                          ! of ghost point exchange
716       !$acc update host( u, v, w )
717    ENDIF
718    CALL exchange_horiz( u, nbgp )
719    CALL exchange_horiz( v, nbgp )
720    CALL exchange_horiz( w, nbgp )
721    IF ( numprocs == 1 )  THEN    ! workaround for single-core GPU runs
722       on_device = .FALSE.        ! to be removed after complete porting
723    ELSE                          ! of ghost point exchange
724       !$acc update device( u, v, w )
725    ENDIF
726
727!
728!-- Compute the divergence of the corrected velocity field,
729!-- a possible PE-sum is computed in flow_statistics
730    CALL cpu_log( log_point_s(1), 'divergence', 'start' )
731    sums_divnew_l = 0.0
732
733!
734!-- d must be reset to zero because it can contain nonzero values below the
735!-- topography
736    IF ( topography /= 'flat' )  d = 0.0
737
738    localsum  = 0.0
739    threadsum = 0.0
740
741    !$OMP PARALLEL PRIVATE (i,j,k) FIRSTPRIVATE(threadsum) REDUCTION(+:localsum)
742    !$OMP DO SCHEDULE( STATIC )
743#if defined( __ibm )
744    DO  i = nxl, nxr
745       DO  j = nys, nyn
746          DO  k = nzb_s_inner(j,i)+1, nzt
747             d(k,j,i) = ( u(k,j,i+1) - u(k,j,i) ) * ddx + &
748                        ( v(k,j+1,i) - v(k,j,i) ) * ddy + &
749                        ( w(k,j,i) - w(k-1,j,i) ) * ddzw(k)
750          ENDDO
751          DO  k = nzb+1, nzt
752             threadsum = threadsum + ABS( d(k,j,i) )
753          ENDDO
754       ENDDO
755    ENDDO
756#else
757    !$acc kernels present( d, ddzw, rflags_s_inner, u, v, w )
758    !$acc loop collapse( 3 )
759    DO  i = nxl, nxr
760       DO  j = nys, nyn
761          DO  k = 1, nzt
762                d(k,j,i) = ( ( u(k,j,i+1) - u(k,j,i) ) * ddx +   &
763                             ( v(k,j+1,i) - v(k,j,i) ) * ddy +   &
764                             ( w(k,j,i) - w(k-1,j,i) ) * ddzw(k) &
765                           ) * rflags_s_inner(k,j,i)
766          ENDDO
767       ENDDO
768    ENDDO
769    !$acc end kernels
770!
771!-- Compute possible PE-sum of divergences for flow_statistics
772    !$acc parallel loop collapse(3) present( d ) reduction(+:threadsum)
773    DO  i = nxl, nxr
774       DO  j = nys, nyn
775          DO  k = nzb+1, nzt
776             threadsum = threadsum + ABS( d(k,j,i) )
777          ENDDO
778       ENDDO
779    ENDDO
780    !$acc end parallel loop
781#endif
782
783    localsum = localsum + threadsum
784    !$OMP END PARALLEL
785
786!
787!-- For completeness, set the divergence sum of all statistic regions to those
788!-- of the total domain
789    sums_divnew_l(0:statistic_regions) = localsum
790
791    CALL cpu_log( log_point_s(1), 'divergence', 'stop' )
792
793    CALL cpu_log( log_point(8), 'pres', 'stop' )
794
795
796 END SUBROUTINE pres
Note: See TracBrowser for help on using the repository browser.