source: palm/trunk/SOURCE/pres.f90 @ 1212

Last change on this file since 1212 was 1212, checked in by raasch, 8 years ago

tridia-solver moved to seperate module; the tridiagonal matrix coefficients of array tri are calculated only once at the beginning

  • Property svn:keywords set to Id
File size: 23.8 KB
Line 
1 SUBROUTINE pres
2
3!--------------------------------------------------------------------------------!
4! This file is part of PALM.
5!
6! PALM is free software: you can redistribute it and/or modify it under the terms
7! of the GNU General Public License as published by the Free Software Foundation,
8! either version 3 of the License, or (at your option) any later version.
9!
10! PALM is distributed in the hope that it will be useful, but WITHOUT ANY
11! WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
12! A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
13!
14! You should have received a copy of the GNU General Public License along with
15! PALM. If not, see <http://www.gnu.org/licenses/>.
16!
17! Copyright 1997-2012  Leibniz University Hannover
18!--------------------------------------------------------------------------------!
19!
20! Current revisions:
21! ------------------
22! call of poisfft_hybrid removed
23!
24! Former revisions:
25! -----------------
26! $Id: pres.f90 1212 2013-08-15 08:46:27Z raasch $
27!
28! 1117 2013-03-27 11:15:36Z suehring
29! Bugfix in OpenMP parallelization.
30!
31! 1113 2013-03-10 02:48:14Z raasch
32! GPU-porting of several loops, some loops rearranged
33!
34! 1111 2013-03-08 23:54:10Z
35! openACC statements added,
36! ibc_p_b = 2 removed
37!
38! 1092 2013-02-02 11:24:22Z raasch
39! unused variables removed
40!
41! 1036 2012-10-22 13:43:42Z raasch
42! code put under GPL (PALM 3.9)
43!
44! 1003 2012-09-14 14:35:53Z raasch
45! adjustment of array tend for cases with unequal subdomain sizes removed
46!
47! 778 2011-11-07 14:18:25Z fricke
48! New allocation of tend when multigrid is used and the collected field on PE0
49! has more grid points than the subdomain of an PE.
50!
51! 719 2011-04-06 13:05:23Z gryschka
52! Bugfix in volume flow control for double cyclic boundary conditions
53!
54! 709 2011-03-30 09:31:40Z raasch
55! formatting adjustments
56!
57! 707 2011-03-29 11:39:40Z raasch
58! Calculation of weighted average of p is now handled in the same way
59! regardless of the number of ghost layers (advection scheme),
60! multigrid and sor method are using p_loc for iterative advancements of
61! pressure,
62! localsum calculation modified for proper OpenMP reduction,
63! bc_lr/ns replaced by bc_lr/ns_cyc
64!
65! 693 2011-03-08 09:..:..Z raasch
66! bugfix: weighting coefficient added to ibm branch
67!
68! 680 2011-02-04 23:16:06Z gryschka
69! bugfix: collective_wait
70!
71! 675 2011-01-19 10:56:55Z suehring
72! Removed bugfix while copying tend.
73!
74! 673 2011-01-18 16:19:48Z suehring
75! Weighting coefficients added for right computation of the pressure during
76! Runge-Kutta substeps.
77!
78! 667 2010-12-23 12:06:00Z suehring/gryschka
79! New allocation of tend when ws-scheme and multigrid is used. This is due to
80! reasons of perforance of the data_exchange. The same is done with p after
81! poismg is called.
82! nxl-1, nxr+1, nys-1, nyn+1 replaced by nxlg, nxrg, nysg, nyng when no
83! multigrid is used. Calls of exchange_horiz are modified.
84! bugfix: After pressure correction no volume flow correction in case of
85! non-cyclic boundary conditions
86! (has to be done only before pressure correction)
87! Call of SOR routine is referenced with ddzu_pres.
88!
89! 622 2010-12-10 08:08:13Z raasch
90! optional barriers included in order to speed up collective operations
91!
92! 151 2008-03-07 13:42:18Z raasch
93! Bugfix in volume flow control for non-cyclic boundary conditions
94!
95! 106 2007-08-16 14:30:26Z raasch
96! Volume flow conservation added for the remaining three outflow boundaries
97!
98! 85 2007-05-11 09:35:14Z raasch
99! Division through dt_3d replaced by multiplication of the inverse.
100! For performance optimisation, this is done in the loop calculating the
101! divergence instead of using a seperate loop.
102!
103! 75 2007-03-22 09:54:05Z raasch
104! Volume flow control for non-cyclic boundary conditions added (currently only
105! for the north boundary!!), 2nd+3rd argument removed from exchange horiz,
106! mean vertical velocity is removed in case of Neumann boundary conditions
107! both at the bottom and the top
108!
109! RCS Log replace by Id keyword, revision history cleaned up
110!
111! Revision 1.25  2006/04/26 13:26:12  raasch
112! OpenMP optimization (+localsum, threadsum)
113!
114! Revision 1.1  1997/07/24 11:24:44  raasch
115! Initial revision
116!
117!
118! Description:
119! ------------
120! Compute the divergence of the provisional velocity field. Solve the Poisson
121! equation for the perturbation pressure. Compute the final velocities using
122! this perturbation pressure. Compute the remaining divergence.
123!------------------------------------------------------------------------------!
124
125    USE arrays_3d
126    USE constants
127    USE control_parameters
128    USE cpulog
129    USE grid_variables
130    USE indices
131    USE interfaces
132    USE pegrid
133    USE poisfft_mod
134    USE statistics
135
136    IMPLICIT NONE
137
138    INTEGER ::  i, j, k
139
140    REAL    ::  ddt_3d, localsum, threadsum, d_weight_pres
141
142    REAL, DIMENSION(1:2) ::  volume_flow_l, volume_flow_offset
143    REAL, DIMENSION(1:nzt) ::  w_l, w_l_l
144
145
146    CALL cpu_log( log_point(8), 'pres', 'start' )
147
148
149    ddt_3d = 1.0 / dt_3d
150    d_weight_pres = 1.0 / weight_pres(intermediate_timestep_count)
151
152!
153!-- Multigrid method expects array d to have one ghost layer.
154!--
155    IF ( psolver == 'multigrid' )  THEN
156     
157       DEALLOCATE( d )
158       ALLOCATE( d(nzb:nzt+1,nys-1:nyn+1,nxl-1:nxr+1) ) 
159
160!
161!--    Since p is later used to hold the weighted average of the substeps, it
162!--    cannot be used in the iterative solver. Therefore, its initial value is
163!--    stored on p_loc, which is then iteratively advanced in every substep.
164       IF ( intermediate_timestep_count == 1 )  THEN
165          DO  i = nxl-1, nxr+1
166             DO  j = nys-1, nyn+1
167                DO  k = nzb, nzt+1
168                   p_loc(k,j,i) = p(k,j,i)
169                ENDDO
170             ENDDO
171          ENDDO
172       ENDIF
173       
174    ELSEIF ( psolver == 'sor'  .AND.  intermediate_timestep_count == 1 )  THEN
175
176!
177!--    Since p is later used to hold the weighted average of the substeps, it
178!--    cannot be used in the iterative solver. Therefore, its initial value is
179!--    stored on p_loc, which is then iteratively advanced in every substep.
180       p_loc = p
181
182    ENDIF
183
184!
185!-- Conserve the volume flow at the outflow in case of non-cyclic lateral
186!-- boundary conditions
187!-- WARNING: so far, this conservation does not work at the left/south
188!--          boundary if the topography at the inflow differs from that at the
189!--          outflow! For this case, volume_flow_area needs adjustment!
190!
191!-- Left/right
192    IF ( conserve_volume_flow  .AND.  ( outflow_l .OR. outflow_r ) )  THEN
193
194       volume_flow(1)   = 0.0
195       volume_flow_l(1) = 0.0
196
197       IF ( outflow_l )  THEN
198          i = 0
199       ELSEIF ( outflow_r )  THEN
200          i = nx+1
201       ENDIF
202
203       DO  j = nys, nyn
204!
205!--       Sum up the volume flow through the south/north boundary
206          DO  k = nzb_2d(j,i)+1, nzt
207             volume_flow_l(1) = volume_flow_l(1) + u(k,j,i) * dzw(k)
208          ENDDO
209       ENDDO
210
211#if defined( __parallel )   
212       IF ( collective_wait )  CALL MPI_BARRIER( comm1dy, ierr )
213       CALL MPI_ALLREDUCE( volume_flow_l(1), volume_flow(1), 1, MPI_REAL, &
214                           MPI_SUM, comm1dy, ierr )   
215#else
216       volume_flow = volume_flow_l 
217#endif
218       volume_flow_offset(1) = ( volume_flow_initial(1) - volume_flow(1) ) &
219                               / volume_flow_area(1)
220
221       DO  j = nysg, nyng
222          DO  k = nzb_2d(j,i)+1, nzt
223             u(k,j,i) = u(k,j,i) + volume_flow_offset(1)
224          ENDDO
225       ENDDO
226
227    ENDIF
228
229!
230!-- South/north
231    IF ( conserve_volume_flow  .AND.  ( outflow_n .OR. outflow_s ) )  THEN
232
233       volume_flow(2)   = 0.0
234       volume_flow_l(2) = 0.0
235
236       IF ( outflow_s )  THEN
237          j = 0
238       ELSEIF ( outflow_n )  THEN
239          j = ny+1
240       ENDIF
241
242       DO  i = nxl, nxr
243!
244!--       Sum up the volume flow through the south/north boundary
245          DO  k = nzb_2d(j,i)+1, nzt
246             volume_flow_l(2) = volume_flow_l(2) + v(k,j,i) * dzw(k)
247          ENDDO
248       ENDDO
249
250#if defined( __parallel )   
251       IF ( collective_wait )  CALL MPI_BARRIER( comm1dx, ierr )
252       CALL MPI_ALLREDUCE( volume_flow_l(2), volume_flow(2), 1, MPI_REAL, &
253                           MPI_SUM, comm1dx, ierr )   
254#else
255       volume_flow = volume_flow_l 
256#endif
257       volume_flow_offset(2) = ( volume_flow_initial(2) - volume_flow(2) )    &
258                               / volume_flow_area(2)
259
260       DO  i = nxlg, nxrg
261          DO  k = nzb_v_inner(j,i)+1, nzt
262             v(k,j,i) = v(k,j,i) + volume_flow_offset(2)
263          ENDDO
264       ENDDO
265
266    ENDIF
267
268!
269!-- Remove mean vertical velocity
270    IF ( ibc_p_b == 1  .AND.  ibc_p_t == 1 )  THEN
271       IF ( simulated_time > 0.0 )  THEN ! otherwise nzb_w_inner not yet known
272          w_l = 0.0;  w_l_l = 0.0
273          DO  i = nxl, nxr
274             DO  j = nys, nyn
275                DO  k = nzb_w_inner(j,i)+1, nzt
276                   w_l_l(k) = w_l_l(k) + w(k,j,i)
277                ENDDO
278             ENDDO
279          ENDDO
280#if defined( __parallel )   
281          IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
282          CALL MPI_ALLREDUCE( w_l_l(1), w_l(1), nzt, MPI_REAL, MPI_SUM, &
283                              comm2d, ierr )
284#else
285          w_l = w_l_l 
286#endif
287          DO  k = 1, nzt
288             w_l(k) = w_l(k) / ngp_2dh_outer(k,0)
289          ENDDO
290          DO  i = nxlg, nxrg
291             DO  j = nysg, nyng
292                DO  k = nzb_w_inner(j,i)+1, nzt
293                   w(k,j,i) = w(k,j,i) - w_l(k)
294                ENDDO
295             ENDDO
296          ENDDO
297       ENDIF
298    ENDIF
299
300!
301!-- Compute the divergence of the provisional velocity field.
302    CALL cpu_log( log_point_s(1), 'divergence', 'start' )
303
304    IF ( psolver == 'multigrid' )  THEN
305       !$OMP PARALLEL DO SCHEDULE( STATIC )
306       DO  i = nxl-1, nxr+1
307          DO  j = nys-1, nyn+1
308             DO  k = nzb, nzt+1
309                d(k,j,i) = 0.0
310             ENDDO
311          ENDDO
312       ENDDO
313    ELSE
314       !$OMP PARALLEL DO SCHEDULE( STATIC )
315       !$acc kernels present( d )
316       !$acc loop
317       DO  i = nxl, nxr
318          DO  j = nys, nyn
319             !$acc loop vector(32)
320             DO  k = nzb+1, nzt
321                d(k,j,i) = 0.0
322             ENDDO
323          ENDDO
324       ENDDO
325       !$acc end kernels
326    ENDIF
327
328    localsum  = 0.0
329    threadsum = 0.0
330
331#if defined( __ibm )
332    !$OMP PARALLEL PRIVATE (i,j,k) FIRSTPRIVATE(threadsum) REDUCTION(+:localsum)
333    !$OMP DO SCHEDULE( STATIC )
334    DO  i = nxl, nxr
335       DO  j = nys, nyn
336          DO  k = nzb_s_inner(j,i)+1, nzt
337             d(k,j,i) = ( ( u(k,j,i+1) - u(k,j,i) ) * ddx + &
338                          ( v(k,j+1,i) - v(k,j,i) ) * ddy + &
339                          ( w(k,j,i) - w(k-1,j,i) ) * ddzw(k) ) * ddt_3d      &
340                                                                * d_weight_pres
341          ENDDO
342!
343!--       Compute possible PE-sum of divergences for flow_statistics
344          DO  k = nzb_s_inner(j,i)+1, nzt
345             threadsum = threadsum + ABS( d(k,j,i) )
346          ENDDO
347
348       ENDDO
349    ENDDO
350
351    localsum = localsum + threadsum * dt_3d * &
352                          weight_pres(intermediate_timestep_count)
353
354    !$OMP END PARALLEL
355#else
356
357    !$OMP PARALLEL PRIVATE (i,j,k)
358    !$OMP DO SCHEDULE( STATIC )
359    !$acc kernels present( d, ddzw, nzb_s_inner, u, v, w )
360    !$acc loop
361    DO  i = nxl, nxr
362       DO  j = nys, nyn
363          !$acc loop vector(32)
364          DO  k = 1, nzt
365             IF ( k > nzb_s_inner(j,i) )  THEN
366                d(k,j,i) = ( ( u(k,j,i+1) - u(k,j,i) ) * ddx + &
367                           ( v(k,j+1,i) - v(k,j,i) ) * ddy + &
368                           ( w(k,j,i) - w(k-1,j,i) ) * ddzw(k) ) * ddt_3d      &
369                           * d_weight_pres
370             ENDIF
371          ENDDO
372       ENDDO
373    ENDDO
374    !$acc end kernels
375    !$OMP END PARALLEL
376
377!
378!-- Compute possible PE-sum of divergences for flow_statistics
379    !$OMP PARALLEL PRIVATE (i,j,k) FIRSTPRIVATE(threadsum) REDUCTION(+:localsum)
380    !$OMP DO SCHEDULE( STATIC )
381    DO  i = nxl, nxr
382       DO  j = nys, nyn
383          DO  k = nzb+1, nzt
384             threadsum = threadsum + ABS( d(k,j,i) )
385          ENDDO
386       ENDDO
387    ENDDO
388    localsum = localsum + threadsum * dt_3d * &
389                          weight_pres(intermediate_timestep_count)
390    !$OMP END PARALLEL
391#endif
392
393!
394!-- For completeness, set the divergence sum of all statistic regions to those
395!-- of the total domain
396    sums_divold_l(0:statistic_regions) = localsum
397
398    CALL cpu_log( log_point_s(1), 'divergence', 'stop' )
399
400!
401!-- Compute the pressure perturbation solving the Poisson equation
402    IF ( psolver(1:7) == 'poisfft' )  THEN
403
404!
405!--    Solve Poisson equation via FFT and solution of tridiagonal matrices
406       IF ( psolver == 'poisfft' )  THEN
407
408          CALL poisfft( d, tend )
409
410       ENDIF
411
412!
413!--    Store computed perturbation pressure and set boundary condition in
414!--    z-direction
415       !$OMP PARALLEL DO
416       !$acc kernels present( d, tend )
417       !$acc loop
418       DO  i = nxl, nxr
419          DO  j = nys, nyn
420             !$acc loop vector( 32 )
421             DO  k = nzb+1, nzt
422                tend(k,j,i) = d(k,j,i)
423             ENDDO
424          ENDDO
425       ENDDO
426       !$acc end kernels
427
428!
429!--    Bottom boundary:
430!--    This condition is only required for internal output. The pressure
431!--    gradient (dp(nzb+1)-dp(nzb))/dz is not used anywhere else.
432       IF ( ibc_p_b == 1 )  THEN
433!
434!--       Neumann (dp/dz = 0)
435          !$OMP PARALLEL DO
436          !$acc kernels present( nzb_s_inner, tend )
437          DO  i = nxlg, nxrg
438             DO  j = nysg, nyng
439                tend(nzb_s_inner(j,i),j,i) = tend(nzb_s_inner(j,i)+1,j,i)
440             ENDDO
441          ENDDO
442          !$acc end kernels
443
444       ELSE
445!
446!--       Dirichlet
447          !$OMP PARALLEL DO
448          !$acc kernels present( tend )
449          DO  i = nxlg, nxrg
450             DO  j = nysg, nyng
451                tend(nzb_s_inner(j,i),j,i) = 0.0
452             ENDDO
453          ENDDO
454          !$acc end kernels
455
456       ENDIF
457
458!
459!--    Top boundary
460       IF ( ibc_p_t == 1 )  THEN
461!
462!--       Neumann
463          !$OMP PARALLEL DO
464          !$acc kernels present( tend )
465          DO  i = nxlg, nxrg
466             DO  j = nysg, nyng
467                tend(nzt+1,j,i) = tend(nzt,j,i)
468             ENDDO
469          ENDDO
470          !$acc end kernels
471
472       ELSE
473!
474!--       Dirichlet
475          !$OMP PARALLEL DO
476          !$acc kernels present( tend )
477          DO  i = nxlg, nxrg
478             DO  j = nysg, nyng
479                tend(nzt+1,j,i) = 0.0
480             ENDDO
481          ENDDO
482          !$acc end kernels
483
484       ENDIF
485
486!
487!--    Exchange boundaries for p
488       IF ( numprocs == 1 )  THEN    ! workaround for single-core GPU runs
489          on_device = .TRUE.         ! to be removed after complete porting
490       ELSE                          ! of ghost point exchange
491          !$acc update host( tend )
492       ENDIF
493       CALL exchange_horiz( tend, nbgp )
494       IF ( numprocs == 1 )  THEN    ! workaround for single-core GPU runs
495          on_device = .FALSE.        ! to be removed after complete porting
496       ELSE                          ! of ghost point exchange
497          !$acc update device( tend )
498       ENDIF
499     
500    ELSEIF ( psolver == 'sor' )  THEN
501
502!
503!--    Solve Poisson equation for perturbation pressure using SOR-Red/Black
504!--    scheme
505       CALL sor( d, ddzu_pres, ddzw, p_loc )
506       tend = p_loc
507
508    ELSEIF ( psolver == 'multigrid' )  THEN
509
510!
511!--    Solve Poisson equation for perturbation pressure using Multigrid scheme,
512!--    array tend is used to store the residuals, logical exchange_mg is used
513!--    to discern data exchange in multigrid ( 1 ghostpoint ) and normal grid
514!--    ( nbgp ghost points ).
515
516!--    If the number of grid points of the gathered grid, which is collected
517!--    on PE0, is larger than the number of grid points of an PE, than array
518!--    tend will be enlarged.
519       IF ( gathered_size > subdomain_size )  THEN
520          DEALLOCATE( tend )
521          ALLOCATE( tend(nzb:nzt_mg(mg_switch_to_pe0_level)+1,nys_mg(          &
522                    mg_switch_to_pe0_level)-1:nyn_mg(mg_switch_to_pe0_level)+1,&
523                    nxl_mg(mg_switch_to_pe0_level)-1:nxr_mg(                   &
524                    mg_switch_to_pe0_level)+1) )
525       ENDIF
526
527       CALL poismg( tend )
528
529       IF ( gathered_size > subdomain_size )  THEN
530          DEALLOCATE( tend )
531          ALLOCATE( tend(nzb:nzt+1,nysg:nyng,nxlg:nxrg) )
532       ENDIF
533
534!
535!--    Restore perturbation pressure on tend because this array is used
536!--    further below to correct the velocity fields
537       DO  i = nxl-1, nxr+1
538          DO  j = nys-1, nyn+1
539             DO  k = nzb, nzt+1
540                tend(k,j,i) = p_loc(k,j,i)
541             ENDDO
542          ENDDO
543       ENDDO
544
545    ENDIF
546
547!
548!-- Store perturbation pressure on array p, used for pressure data output.
549!-- Ghost layers are added in the output routines (except sor-method: see below)
550    IF ( intermediate_timestep_count == 1 )  THEN
551       !$OMP PARALLEL PRIVATE (i,j,k)
552       !$OMP DO
553       !$acc kernels present( p, tend, weight_substep )
554       !$acc loop
555       DO  i = nxl-1, nxr+1
556          DO  j = nys-1, nyn+1
557             !$acc loop vector( 32 )
558             DO  k = nzb, nzt+1
559                p(k,j,i) = tend(k,j,i) * &
560                           weight_substep(intermediate_timestep_count)
561             ENDDO
562          ENDDO
563       ENDDO
564       !$acc end kernels
565       !$OMP END PARALLEL
566
567    ELSE 
568       !$OMP PARALLEL PRIVATE (i,j,k)
569       !$OMP DO
570       !$acc kernels present( p, tend, weight_substep )
571       !$acc loop
572       DO  i = nxl-1, nxr+1
573          DO  j = nys-1, nyn+1
574             !$acc loop vector( 32 )
575             DO  k = nzb, nzt+1
576                p(k,j,i) = p(k,j,i) + tend(k,j,i) * &
577                           weight_substep(intermediate_timestep_count)
578             ENDDO
579          ENDDO
580       ENDDO
581       !$acc end kernels
582       !$OMP END PARALLEL
583
584    ENDIF
585       
586!
587!-- SOR-method needs ghost layers for the next timestep
588    IF ( psolver == 'sor' )  CALL exchange_horiz( p, nbgp )
589
590!
591!-- Correction of the provisional velocities with the current perturbation
592!-- pressure just computed
593    !$acc update host( u, v, w )
594    IF ( conserve_volume_flow  .AND.  ( bc_lr_cyc .OR. bc_ns_cyc ) )  THEN
595       volume_flow_l(1) = 0.0
596       volume_flow_l(2) = 0.0
597    ENDIF
598
599    !$OMP PARALLEL PRIVATE (i,j,k)
600    !$OMP DO
601    !$acc kernels present( ddzu, nzb_u_inner, nzb_v_inner, nzb_w_inner, tend, u, v, w, weight_pres )
602    !$acc loop
603    DO  i = nxl, nxr   
604       DO  j = nys, nyn
605          !$acc loop vector( 32 )
606          DO  k = 1, nzt
607             IF ( k > nzb_w_inner(j,i) )  THEN
608                w(k,j,i) = w(k,j,i) - dt_3d *                                 &
609                           ( tend(k+1,j,i) - tend(k,j,i) ) * ddzu(k+1) *      &
610                           weight_pres(intermediate_timestep_count)
611             ENDIF
612          ENDDO
613          !$acc loop vector( 32 )
614          DO  k = 1, nzt
615             IF ( k > nzb_u_inner(j,i) )  THEN
616                u(k,j,i) = u(k,j,i) - dt_3d *                                 &
617                           ( tend(k,j,i) - tend(k,j,i-1) ) * ddx *            &
618                           weight_pres(intermediate_timestep_count)
619             ENDIF
620          ENDDO
621          !$acc loop vector( 32 )
622          DO  k = 1, nzt
623             IF ( k > nzb_v_inner(j,i) )  THEN
624                v(k,j,i) = v(k,j,i) - dt_3d *                                 &
625                           ( tend(k,j,i) - tend(k,j-1,i) ) * ddy *            &
626                           weight_pres(intermediate_timestep_count)
627             ENDIF
628          ENDDO                                                         
629
630       ENDDO
631    ENDDO
632    !$acc end kernels
633    !$OMP END PARALLEL
634
635!
636!-- Sum up the volume flow through the right and north boundary
637    IF ( conserve_volume_flow  .AND.  bc_lr_cyc  .AND.  bc_ns_cyc  .AND.  &
638         nxr == nx )  THEN
639
640       !$OMP PARALLEL PRIVATE (j,k)
641       !$OMP DO
642       DO  j = nys, nyn
643          !$OMP CRITICAL
644          DO  k = nzb_2d(j,nx) + 1, nzt
645             volume_flow_l(1) = volume_flow_l(1) + u(k,j,nx) * dzw(k)
646          ENDDO
647          !$OMP END CRITICAL
648       ENDDO
649       !$OMP END PARALLEL
650
651    ENDIF
652
653    IF ( conserve_volume_flow  .AND.  bc_ns_cyc  .AND.  bc_lr_cyc  .AND.  &
654         nyn == ny )  THEN
655
656       !$OMP PARALLEL PRIVATE (i,k)
657       !$OMP DO
658       DO  i = nxl, nxr
659          !$OMP CRITICAL
660          DO  k = nzb_2d(ny,i) + 1, nzt
661             volume_flow_l(2) = volume_flow_l(2) + v(k,ny,i) * dzw(k)
662           ENDDO
663          !$OMP END CRITICAL
664       ENDDO
665       !$OMP END PARALLEL
666
667    ENDIF
668   
669!
670!-- Conserve the volume flow
671    IF ( conserve_volume_flow  .AND.  ( bc_lr_cyc  .AND.  bc_ns_cyc ) )  THEN
672
673#if defined( __parallel )   
674       IF ( collective_wait )  CALL MPI_BARRIER( comm2d, ierr )
675       CALL MPI_ALLREDUCE( volume_flow_l(1), volume_flow(1), 2, MPI_REAL, &
676                           MPI_SUM, comm2d, ierr ) 
677#else
678       volume_flow = volume_flow_l 
679#endif   
680
681       volume_flow_offset = ( volume_flow_initial - volume_flow ) / &
682                            volume_flow_area
683
684       !$OMP PARALLEL PRIVATE (i,j,k)
685       !$OMP DO
686       DO  i = nxl, nxr
687          DO  j = nys, nyn
688             DO  k = nzb_u_inner(j,i) + 1, nzt
689                u(k,j,i) = u(k,j,i) + volume_flow_offset(1)
690             ENDDO
691             DO k = nzb_v_inner(j,i) + 1, nzt
692                v(k,j,i) = v(k,j,i) + volume_flow_offset(2)
693             ENDDO
694          ENDDO
695       ENDDO
696
697       !$OMP END PARALLEL
698
699    ENDIF
700
701!
702!-- Exchange of boundaries for the velocities
703    IF ( numprocs == 1 )  THEN    ! workaround for single-core GPU runs
704       on_device = .TRUE.         ! to be removed after complete porting
705    ELSE                          ! of ghost point exchange
706       !$acc update host( u, v, w )
707    ENDIF
708    CALL exchange_horiz( u, nbgp )
709    CALL exchange_horiz( v, nbgp )
710    CALL exchange_horiz( w, nbgp )
711    IF ( numprocs == 1 )  THEN    ! workaround for single-core GPU runs
712       on_device = .FALSE.        ! to be removed after complete porting
713    ELSE                          ! of ghost point exchange
714       !$acc update device( u, v, w )
715    ENDIF
716
717!
718!-- Compute the divergence of the corrected velocity field,
719!-- a possible PE-sum is computed in flow_statistics
720    CALL cpu_log( log_point_s(1), 'divergence', 'start' )
721    sums_divnew_l = 0.0
722
723!
724!-- d must be reset to zero because it can contain nonzero values below the
725!-- topography
726    IF ( topography /= 'flat' )  d = 0.0
727
728    localsum  = 0.0
729    threadsum = 0.0
730
731    !$OMP PARALLEL PRIVATE (i,j,k) FIRSTPRIVATE(threadsum) REDUCTION(+:localsum)
732    !$OMP DO SCHEDULE( STATIC )
733#if defined( __ibm )
734    DO  i = nxl, nxr
735       DO  j = nys, nyn
736          DO  k = nzb_s_inner(j,i)+1, nzt
737             d(k,j,i) = ( u(k,j,i+1) - u(k,j,i) ) * ddx + &
738                        ( v(k,j+1,i) - v(k,j,i) ) * ddy + &
739                        ( w(k,j,i) - w(k-1,j,i) ) * ddzw(k)
740          ENDDO
741          DO  k = nzb+1, nzt
742             threadsum = threadsum + ABS( d(k,j,i) )
743          ENDDO
744       ENDDO
745    ENDDO
746#else
747    !$acc kernels present( d, ddzw, nzb_s_inner, u, v, w )
748    !$acc loop
749    DO  i = nxl, nxr
750       DO  j = nys, nyn
751          !$acc loop vector( 32 )
752          DO  k = 1, nzt
753             IF ( k > nzb_s_inner(j,i) )  THEN
754                d(k,j,i) = ( u(k,j,i+1) - u(k,j,i) ) * ddx + &
755                           ( v(k,j+1,i) - v(k,j,i) ) * ddy + &
756                           ( w(k,j,i) - w(k-1,j,i) ) * ddzw(k)
757             ENDIF
758          ENDDO
759       ENDDO
760    ENDDO
761    !$acc end kernels
762!
763!-- Compute possible PE-sum of divergences for flow_statistics
764    DO  i = nxl, nxr
765       DO  j = nys, nyn
766          DO  k = nzb_s_inner(j,i)+1, nzt
767             threadsum = threadsum + ABS( d(k,j,i) )
768          ENDDO
769       ENDDO
770    ENDDO
771#endif
772
773    localsum = localsum + threadsum
774    !$OMP END PARALLEL
775
776!
777!-- For completeness, set the divergence sum of all statistic regions to those
778!-- of the total domain
779    sums_divnew_l(0:statistic_regions) = localsum
780
781    CALL cpu_log( log_point_s(1), 'divergence', 'stop' )
782
783    CALL cpu_log( log_point(8), 'pres', 'stop' )
784
785
786 END SUBROUTINE pres
Note: See TracBrowser for help on using the repository browser.