Home

Context Navigation

source: palm/trunk/SOURCE/pres.f90 @ 1307

Last change on this file since 1307 was 1307, checked in by raasch, 10 years ago
last commit documented
Property svn:keywords set to `Id`
File size: 24.3 KB

Rev	Line
[1]	1	SUBROUTINE pres
	2
[1036]	3	!--------------------------------------------------------------------------------!
	4	! This file is part of PALM.
	5	!
	6	! PALM is free software: you can redistribute it and/or modify it under the terms
	7	! of the GNU General Public License as published by the Free Software Foundation,
	8	! either version 3 of the License, or (at your option) any later version.
	9	!
	10	! PALM is distributed in the hope that it will be useful, but WITHOUT ANY
	11	! WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
	12	! A PARTICULAR PURPOSE. See the GNU General Public License for more details.
	13	!
	14	! You should have received a copy of the GNU General Public License along with
	15	! PALM. If not, see <http://www.gnu.org/licenses/>.
	16	!
	17	! Copyright 1997-2012 Leibniz University Hannover
	18	!--------------------------------------------------------------------------------!
	19	!
[484]	20	! Current revisions:
[1212]	21	! ------------------
[708]	22	!
[1307]	23	!
[708]	24	! Former revisions:
	25	! -----------------
	26	! $Id: pres.f90 1307 2014-03-13 14:39:47Z raasch $
	27	!
[1307]	28	! 1306 2014-03-13 14:30:59Z raasch
	29	! second argument removed from routine poisfft
	30	!
[1258]	31	! 1257 2013-11-08 15:18:40Z raasch
	32	! openacc loop and loop vector clauses removed, independent clauses added,
	33	! end parallel replaced by end parallel loop
	34	!
[1222]	35	! 1221 2013-09-10 08:59:13Z raasch
	36	! openACC porting of reduction operations, loops for calculating d are
	37	! using the rflags_s_inner multiply flag instead of the nzb_s_inner loop index
	38	!
[1213]	39	! 1212 2013-08-15 08:46:27Z raasch
	40	! call of poisfft_hybrid removed
	41	!
[1118]	42	! 1117 2013-03-27 11:15:36Z suehring
	43	! Bugfix in OpenMP parallelization.
	44	!
[1114]	45	! 1113 2013-03-10 02:48:14Z raasch
	46	! GPU-porting of several loops, some loops rearranged
	47	!
[1112]	48	! 1111 2013-03-08 23:54:10Z
	49	! openACC statements added,
	50	! ibc_p_b = 2 removed
	51	!
[1093]	52	! 1092 2013-02-02 11:24:22Z raasch
	53	! unused variables removed
	54	!
[1037]	55	! 1036 2012-10-22 13:43:42Z raasch
	56	! code put under GPL (PALM 3.9)
	57	!
[1004]	58	! 1003 2012-09-14 14:35:53Z raasch
	59	! adjustment of array tend for cases with unequal subdomain sizes removed
	60	!
[779]	61	! 778 2011-11-07 14:18:25Z fricke
	62	! New allocation of tend when multigrid is used and the collected field on PE0
	63	! has more grid points than the subdomain of an PE.
	64	!
[720]	65	! 719 2011-04-06 13:05:23Z gryschka
	66	! Bugfix in volume flow control for double cyclic boundary conditions
	67	!
[710]	68	! 709 2011-03-30 09:31:40Z raasch
	69	! formatting adjustments
	70	!
[708]	71	! 707 2011-03-29 11:39:40Z raasch
[707]	72	! Calculation of weighted average of p is now handled in the same way
	73	! regardless of the number of ghost layers (advection scheme),
	74	! multigrid and sor method are using p_loc for iterative advancements of
	75	! pressure,
	76	! localsum calculation modified for proper OpenMP reduction,
	77	! bc_lr/ns replaced by bc_lr/ns_cyc
[674]	78	!
[694]	79	! 693 2011-03-08 09:..:..Z raasch
[695]	80	! bugfix: weighting coefficient added to ibm branch
[694]	81	!
	82	! 680 2011-02-04 23:16:06Z gryschka
[681]	83	! bugfix: collective_wait
[668]	84	!
[676]	85	! 675 2011-01-19 10:56:55Z suehring
	86	! Removed bugfix while copying tend.
	87	!
[674]	88	! 673 2011-01-18 16:19:48Z suehring
	89	! Weighting coefficients added for right computation of the pressure during
	90	! Runge-Kutta substeps.
	91	!
[668]	92	! 667 2010-12-23 12:06:00Z suehring/gryschka
[667]	93	! New allocation of tend when ws-scheme and multigrid is used. This is due to
	94	! reasons of perforance of the data_exchange. The same is done with p after
	95	! poismg is called.
	96	! nxl-1, nxr+1, nys-1, nyn+1 replaced by nxlg, nxrg, nysg, nyng when no
	97	! multigrid is used. Calls of exchange_horiz are modified.
	98	! bugfix: After pressure correction no volume flow correction in case of
	99	! non-cyclic boundary conditions
	100	! (has to be done only before pressure correction)
	101	! Call of SOR routine is referenced with ddzu_pres.
	102	!
[623]	103	! 622 2010-12-10 08:08:13Z raasch
	104	! optional barriers included in order to speed up collective operations
	105	!
[198]	106	! 151 2008-03-07 13:42:18Z raasch
	107	! Bugfix in volume flow control for non-cyclic boundary conditions
	108	!
[110]	109	! 106 2007-08-16 14:30:26Z raasch
	110	! Volume flow conservation added for the remaining three outflow boundaries
	111	!
[90]	112	! 85 2007-05-11 09:35:14Z raasch
	113	! Division through dt_3d replaced by multiplication of the inverse.
	114	! For performance optimisation, this is done in the loop calculating the
	115	! divergence instead of using a seperate loop.
	116	!
[77]	117	! 75 2007-03-22 09:54:05Z raasch
[75]	118	! Volume flow control for non-cyclic boundary conditions added (currently only
[76]	119	! for the north boundary!!), 2nd+3rd argument removed from exchange horiz,
	120	! mean vertical velocity is removed in case of Neumann boundary conditions
	121	! both at the bottom and the top
[1]	122	!
[3]	123	! RCS Log replace by Id keyword, revision history cleaned up
	124	!
[1]	125	! Revision 1.25 2006/04/26 13:26:12 raasch
	126	! OpenMP optimization (+localsum, threadsum)
	127	!
	128	! Revision 1.1 1997/07/24 11:24:44 raasch
	129	! Initial revision
	130	!
	131	!
	132	! Description:
	133	! ------------
	134	! Compute the divergence of the provisional velocity field. Solve the Poisson
	135	! equation for the perturbation pressure. Compute the final velocities using
	136	! this perturbation pressure. Compute the remaining divergence.
	137	!------------------------------------------------------------------------------!
	138
	139	USE arrays_3d
	140	USE constants
	141	USE control_parameters
	142	USE cpulog
	143	USE grid_variables
	144	USE indices
	145	USE interfaces
	146	USE pegrid
	147	USE poisfft_mod
	148	USE statistics
	149
	150	IMPLICIT NONE
	151
[1092]	152	INTEGER :: i, j, k
[1]	153
[673]	154	REAL :: ddt_3d, localsum, threadsum, d_weight_pres
[1]	155
	156	REAL, DIMENSION(1:2) :: volume_flow_l, volume_flow_offset
[76]	157	REAL, DIMENSION(1:nzt) :: w_l, w_l_l
[1]	158
	159
	160	CALL cpu_log( log_point(8), 'pres', 'start' )
	161
[85]	162
	163	ddt_3d = 1.0 / dt_3d
[709]	164	d_weight_pres = 1.0 / weight_pres(intermediate_timestep_count)
[85]	165
[1]	166	!
[707]	167	!-- Multigrid method expects array d to have one ghost layer.
	168	!--
[1]	169	IF ( psolver == 'multigrid' ) THEN
[667]	170
[1]	171	DEALLOCATE( d )
[667]	172	ALLOCATE( d(nzb:nzt+1,nys-1:nyn+1,nxl-1:nxr+1) )
[707]	173
	174	!
	175	!-- Since p is later used to hold the weighted average of the substeps, it
	176	!-- cannot be used in the iterative solver. Therefore, its initial value is
	177	!-- stored on p_loc, which is then iteratively advanced in every substep.
	178	IF ( intermediate_timestep_count == 1 ) THEN
	179	DO i = nxl-1, nxr+1
	180	DO j = nys-1, nyn+1
	181	DO k = nzb, nzt+1
	182	p_loc(k,j,i) = p(k,j,i)
	183	ENDDO
	184	ENDDO
	185	ENDDO
[667]	186	ENDIF
	187
[707]	188	ELSEIF ( psolver == 'sor' .AND. intermediate_timestep_count == 1 ) THEN
	189
	190	!
	191	!-- Since p is later used to hold the weighted average of the substeps, it
	192	!-- cannot be used in the iterative solver. Therefore, its initial value is
	193	!-- stored on p_loc, which is then iteratively advanced in every substep.
	194	p_loc = p
	195
[1]	196	ENDIF
	197
	198	!
[75]	199	!-- Conserve the volume flow at the outflow in case of non-cyclic lateral
	200	!-- boundary conditions
[106]	201	!-- WARNING: so far, this conservation does not work at the left/south
	202	!-- boundary if the topography at the inflow differs from that at the
	203	!-- outflow! For this case, volume_flow_area needs adjustment!
	204	!
	205	!-- Left/right
[709]	206	IF ( conserve_volume_flow .AND. ( outflow_l .OR. outflow_r ) ) THEN
[680]	207
[106]	208	volume_flow(1) = 0.0
	209	volume_flow_l(1) = 0.0
	210
	211	IF ( outflow_l ) THEN
	212	i = 0
	213	ELSEIF ( outflow_r ) THEN
	214	i = nx+1
	215	ENDIF
	216
	217	DO j = nys, nyn
	218	!
	219	!-- Sum up the volume flow through the south/north boundary
[709]	220	DO k = nzb_2d(j,i)+1, nzt
[667]	221	volume_flow_l(1) = volume_flow_l(1) + u(k,j,i) * dzw(k)
[106]	222	ENDDO
	223	ENDDO
	224
	225	#if defined( __parallel )
[680]	226	IF ( collective_wait ) CALL MPI_BARRIER( comm1dy, ierr )
[106]	227	CALL MPI_ALLREDUCE( volume_flow_l(1), volume_flow(1), 1, MPI_REAL, &
	228	MPI_SUM, comm1dy, ierr )
	229	#else
	230	volume_flow = volume_flow_l
	231	#endif
[709]	232	volume_flow_offset(1) = ( volume_flow_initial(1) - volume_flow(1) ) &
[106]	233	/ volume_flow_area(1)
	234
[667]	235	DO j = nysg, nyng
[709]	236	DO k = nzb_2d(j,i)+1, nzt
[106]	237	u(k,j,i) = u(k,j,i) + volume_flow_offset(1)
	238	ENDDO
	239	ENDDO
	240
	241	ENDIF
	242
	243	!
	244	!-- South/north
[709]	245	IF ( conserve_volume_flow .AND. ( outflow_n .OR. outflow_s ) ) THEN
[106]	246
[75]	247	volume_flow(2) = 0.0
	248	volume_flow_l(2) = 0.0
	249
[106]	250	IF ( outflow_s ) THEN
	251	j = 0
	252	ELSEIF ( outflow_n ) THEN
[75]	253	j = ny+1
[106]	254	ENDIF
	255
	256	DO i = nxl, nxr
[75]	257	!
[106]	258	!-- Sum up the volume flow through the south/north boundary
[709]	259	DO k = nzb_2d(j,i)+1, nzt
[667]	260	volume_flow_l(2) = volume_flow_l(2) + v(k,j,i) * dzw(k)
[75]	261	ENDDO
[106]	262	ENDDO
	263
[75]	264	#if defined( __parallel )
[680]	265	IF ( collective_wait ) CALL MPI_BARRIER( comm1dx, ierr )
[75]	266	CALL MPI_ALLREDUCE( volume_flow_l(2), volume_flow(2), 1, MPI_REAL, &
	267	MPI_SUM, comm1dx, ierr )
	268	#else
	269	volume_flow = volume_flow_l
	270	#endif
	271	volume_flow_offset(2) = ( volume_flow_initial(2) - volume_flow(2) ) &
[106]	272	/ volume_flow_area(2)
[75]	273
[667]	274	DO i = nxlg, nxrg
[709]	275	DO k = nzb_v_inner(j,i)+1, nzt
[106]	276	v(k,j,i) = v(k,j,i) + volume_flow_offset(2)
[75]	277	ENDDO
[106]	278	ENDDO
[75]	279
	280	ENDIF
	281
[76]	282	!
	283	!-- Remove mean vertical velocity
	284	IF ( ibc_p_b == 1 .AND. ibc_p_t == 1 ) THEN
[709]	285	IF ( simulated_time > 0.0 ) THEN ! otherwise nzb_w_inner not yet known
[76]	286	w_l = 0.0; w_l_l = 0.0
	287	DO i = nxl, nxr
	288	DO j = nys, nyn
	289	DO k = nzb_w_inner(j,i)+1, nzt
	290	w_l_l(k) = w_l_l(k) + w(k,j,i)
	291	ENDDO
	292	ENDDO
	293	ENDDO
	294	#if defined( __parallel )
[622]	295	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
[709]	296	CALL MPI_ALLREDUCE( w_l_l(1), w_l(1), nzt, MPI_REAL, MPI_SUM, &
	297	comm2d, ierr )
[76]	298	#else
	299	w_l = w_l_l
	300	#endif
	301	DO k = 1, nzt
	302	w_l(k) = w_l(k) / ngp_2dh_outer(k,0)
	303	ENDDO
[667]	304	DO i = nxlg, nxrg
	305	DO j = nysg, nyng
[76]	306	DO k = nzb_w_inner(j,i)+1, nzt
	307	w(k,j,i) = w(k,j,i) - w_l(k)
	308	ENDDO
	309	ENDDO
	310	ENDDO
	311	ENDIF
	312	ENDIF
[75]	313
	314	!
[1]	315	!-- Compute the divergence of the provisional velocity field.
	316	CALL cpu_log( log_point_s(1), 'divergence', 'start' )
	317
	318	IF ( psolver == 'multigrid' ) THEN
	319	!$OMP PARALLEL DO SCHEDULE( STATIC )
	320	DO i = nxl-1, nxr+1
	321	DO j = nys-1, nyn+1
	322	DO k = nzb, nzt+1
	323	d(k,j,i) = 0.0
	324	ENDDO
	325	ENDDO
	326	ENDDO
	327	ELSE
	328	!$OMP PARALLEL DO SCHEDULE( STATIC )
[1111]	329	!$acc kernels present( d )
[1003]	330	DO i = nxl, nxr
	331	DO j = nys, nyn
	332	DO k = nzb+1, nzt
[1]	333	d(k,j,i) = 0.0
	334	ENDDO
	335	ENDDO
	336	ENDDO
[1111]	337	!$acc end kernels
[1]	338	ENDIF
	339
	340	localsum = 0.0
	341	threadsum = 0.0
	342
	343	#if defined( __ibm )
	344	!$OMP PARALLEL PRIVATE (i,j,k) FIRSTPRIVATE(threadsum) REDUCTION(+:localsum)
	345	!$OMP DO SCHEDULE( STATIC )
	346	DO i = nxl, nxr
	347	DO j = nys, nyn
	348	DO k = nzb_s_inner(j,i)+1, nzt
[85]	349	d(k,j,i) = ( ( u(k,j,i+1) - u(k,j,i) ) * ddx + &
	350	( v(k,j+1,i) - v(k,j,i) ) * ddy + &
[673]	351	( w(k,j,i) - w(k-1,j,i) ) * ddzw(k) ) * ddt_3d &
	352	* d_weight_pres
[1]	353	ENDDO
	354	!
	355	!-- Compute possible PE-sum of divergences for flow_statistics
	356	DO k = nzb_s_inner(j,i)+1, nzt
	357	threadsum = threadsum + ABS( d(k,j,i) )
	358	ENDDO
	359
	360	ENDDO
	361	ENDDO
	362
[707]	363	localsum = localsum + threadsum * dt_3d * &
	364	weight_pres(intermediate_timestep_count)
[693]	365
[1]	366	!$OMP END PARALLEL
	367	#else
	368
[1111]	369	!$OMP PARALLEL PRIVATE (i,j,k)
	370	!$OMP DO SCHEDULE( STATIC )
[1221]	371	!$acc kernels present( d, ddzw, rflags_s_inner, u, v, w )
	372	!$acc loop collapse( 3 )
[1111]	373	DO i = nxl, nxr
	374	DO j = nys, nyn
	375	DO k = 1, nzt
[1221]	376	d(k,j,i) = ( ( u(k,j,i+1) - u(k,j,i) ) * ddx + &
	377	( v(k,j+1,i) - v(k,j,i) ) * ddy + &
	378	( w(k,j,i) - w(k-1,j,i) ) * ddzw(k) ) * ddt_3d &
	379	* d_weight_pres * rflags_s_inner(k,j,i)
[1]	380	ENDDO
	381	ENDDO
[1111]	382	ENDDO
	383	!$acc end kernels
	384	!$OMP END PARALLEL
[1]	385
	386	!
	387	!-- Compute possible PE-sum of divergences for flow_statistics
	388	!$OMP PARALLEL PRIVATE (i,j,k) FIRSTPRIVATE(threadsum) REDUCTION(+:localsum)
	389	!$OMP DO SCHEDULE( STATIC )
[1221]	390	!$acc parallel loop collapse(3) present( d ) reduction(+:threadsum)
[1]	391	DO i = nxl, nxr
	392	DO j = nys, nyn
	393	DO k = nzb+1, nzt
	394	threadsum = threadsum + ABS( d(k,j,i) )
	395	ENDDO
	396	ENDDO
	397	ENDDO
[1257]	398	!$acc end parallel loop
[707]	399	localsum = localsum + threadsum * dt_3d * &
	400	weight_pres(intermediate_timestep_count)
[1]	401	!$OMP END PARALLEL
	402	#endif
	403
	404	!
	405	!-- For completeness, set the divergence sum of all statistic regions to those
	406	!-- of the total domain
	407	sums_divold_l(0:statistic_regions) = localsum
	408
	409	CALL cpu_log( log_point_s(1), 'divergence', 'stop' )
	410
	411	!
	412	!-- Compute the pressure perturbation solving the Poisson equation
	413	IF ( psolver(1:7) == 'poisfft' ) THEN
	414
	415	!
	416	!-- Solve Poisson equation via FFT and solution of tridiagonal matrices
	417	IF ( psolver == 'poisfft' ) THEN
[1212]	418
[1306]	419	CALL poisfft( d )
[1113]	420
[1]	421	ENDIF
	422
	423	!
	424	!-- Store computed perturbation pressure and set boundary condition in
	425	!-- z-direction
	426	!$OMP PARALLEL DO
[1113]	427	!$acc kernels present( d, tend )
[1]	428	DO i = nxl, nxr
	429	DO j = nys, nyn
	430	DO k = nzb+1, nzt
	431	tend(k,j,i) = d(k,j,i)
	432	ENDDO
	433	ENDDO
	434	ENDDO
[1113]	435	!$acc end kernels
[1]	436
	437	!
	438	!-- Bottom boundary:
	439	!-- This condition is only required for internal output. The pressure
	440	!-- gradient (dp(nzb+1)-dp(nzb))/dz is not used anywhere else.
	441	IF ( ibc_p_b == 1 ) THEN
	442	!
	443	!-- Neumann (dp/dz = 0)
	444	!$OMP PARALLEL DO
[1113]	445	!$acc kernels present( nzb_s_inner, tend )
[667]	446	DO i = nxlg, nxrg
	447	DO j = nysg, nyng
[1]	448	tend(nzb_s_inner(j,i),j,i) = tend(nzb_s_inner(j,i)+1,j,i)
	449	ENDDO
	450	ENDDO
[1113]	451	!$acc end kernels
[1]	452
	453	ELSE
	454	!
	455	!-- Dirichlet
	456	!$OMP PARALLEL DO
[1113]	457	!$acc kernels present( tend )
[667]	458	DO i = nxlg, nxrg
	459	DO j = nysg, nyng
[1]	460	tend(nzb_s_inner(j,i),j,i) = 0.0
	461	ENDDO
	462	ENDDO
[1113]	463	!$acc end kernels
[1]	464
	465	ENDIF
	466
	467	!
	468	!-- Top boundary
	469	IF ( ibc_p_t == 1 ) THEN
	470	!
	471	!-- Neumann
	472	!$OMP PARALLEL DO
[1113]	473	!$acc kernels present( tend )
[667]	474	DO i = nxlg, nxrg
	475	DO j = nysg, nyng
[1]	476	tend(nzt+1,j,i) = tend(nzt,j,i)
	477	ENDDO
	478	ENDDO
[1113]	479	!$acc end kernels
[1]	480
	481	ELSE
	482	!
	483	!-- Dirichlet
	484	!$OMP PARALLEL DO
[1113]	485	!$acc kernels present( tend )
[667]	486	DO i = nxlg, nxrg
	487	DO j = nysg, nyng
[1]	488	tend(nzt+1,j,i) = 0.0
	489	ENDDO
	490	ENDDO
[1113]	491	!$acc end kernels
[1]	492
	493	ENDIF
	494
	495	!
	496	!-- Exchange boundaries for p
[1113]	497	IF ( numprocs == 1 ) THEN ! workaround for single-core GPU runs
	498	on_device = .TRUE. ! to be removed after complete porting
	499	ELSE ! of ghost point exchange
	500	!$acc update host( tend )
	501	ENDIF
[667]	502	CALL exchange_horiz( tend, nbgp )
[1113]	503	IF ( numprocs == 1 ) THEN ! workaround for single-core GPU runs
	504	on_device = .FALSE. ! to be removed after complete porting
	505	ELSE ! of ghost point exchange
	506	!$acc update device( tend )
	507	ENDIF
[1]	508
	509	ELSEIF ( psolver == 'sor' ) THEN
	510
	511	!
	512	!-- Solve Poisson equation for perturbation pressure using SOR-Red/Black
	513	!-- scheme
[707]	514	CALL sor( d, ddzu_pres, ddzw, p_loc )
	515	tend = p_loc
[1]	516
	517	ELSEIF ( psolver == 'multigrid' ) THEN
	518
	519	!
	520	!-- Solve Poisson equation for perturbation pressure using Multigrid scheme,
[667]	521	!-- array tend is used to store the residuals, logical exchange_mg is used
	522	!-- to discern data exchange in multigrid ( 1 ghostpoint ) and normal grid
	523	!-- ( nbgp ghost points ).
[778]	524
	525	!-- If the number of grid points of the gathered grid, which is collected
	526	!-- on PE0, is larger than the number of grid points of an PE, than array
	527	!-- tend will be enlarged.
	528	IF ( gathered_size > subdomain_size ) THEN
	529	DEALLOCATE( tend )
	530	ALLOCATE( tend(nzb:nzt_mg(mg_switch_to_pe0_level)+1,nys_mg( &
	531	mg_switch_to_pe0_level)-1:nyn_mg(mg_switch_to_pe0_level)+1,&
	532	nxl_mg(mg_switch_to_pe0_level)-1:nxr_mg( &
	533	mg_switch_to_pe0_level)+1) )
	534	ENDIF
	535
[1]	536	CALL poismg( tend )
[707]	537
[778]	538	IF ( gathered_size > subdomain_size ) THEN
	539	DEALLOCATE( tend )
	540	ALLOCATE( tend(nzb:nzt+1,nysg:nyng,nxlg:nxrg) )
	541	ENDIF
	542
[1]	543	!
	544	!-- Restore perturbation pressure on tend because this array is used
	545	!-- further below to correct the velocity fields
[707]	546	DO i = nxl-1, nxr+1
	547	DO j = nys-1, nyn+1
	548	DO k = nzb, nzt+1
	549	tend(k,j,i) = p_loc(k,j,i)
	550	ENDDO
	551	ENDDO
	552	ENDDO
[667]	553
[1]	554	ENDIF
	555
	556	!
[707]	557	!-- Store perturbation pressure on array p, used for pressure data output.
	558	!-- Ghost layers are added in the output routines (except sor-method: see below)
	559	IF ( intermediate_timestep_count == 1 ) THEN
	560	!$OMP PARALLEL PRIVATE (i,j,k)
	561	!$OMP DO
[1113]	562	!$acc kernels present( p, tend, weight_substep )
[1257]	563	!$acc loop independent
[707]	564	DO i = nxl-1, nxr+1
[1257]	565	!$acc loop independent
[707]	566	DO j = nys-1, nyn+1
[1257]	567	!$acc loop independent
[707]	568	DO k = nzb, nzt+1
	569	p(k,j,i) = tend(k,j,i) * &
	570	weight_substep(intermediate_timestep_count)
[673]	571	ENDDO
[1]	572	ENDDO
[707]	573	ENDDO
[1113]	574	!$acc end kernels
[707]	575	!$OMP END PARALLEL
	576
	577	ELSE
	578	!$OMP PARALLEL PRIVATE (i,j,k)
	579	!$OMP DO
[1113]	580	!$acc kernels present( p, tend, weight_substep )
[1257]	581	!$acc loop independent
[707]	582	DO i = nxl-1, nxr+1
[1257]	583	!$acc loop independent
[707]	584	DO j = nys-1, nyn+1
[1257]	585	!$acc loop independent
[707]	586	DO k = nzb, nzt+1
	587	p(k,j,i) = p(k,j,i) + tend(k,j,i) * &
	588	weight_substep(intermediate_timestep_count)
[673]	589	ENDDO
	590	ENDDO
[707]	591	ENDDO
[1113]	592	!$acc end kernels
[707]	593	!$OMP END PARALLEL
	594
	595	ENDIF
[673]	596
[707]	597	!
	598	!-- SOR-method needs ghost layers for the next timestep
	599	IF ( psolver == 'sor' ) CALL exchange_horiz( p, nbgp )
[682]	600
[1]	601	!
	602	!-- Correction of the provisional velocities with the current perturbation
	603	!-- pressure just computed
[709]	604	IF ( conserve_volume_flow .AND. ( bc_lr_cyc .OR. bc_ns_cyc ) ) THEN
[1]	605	volume_flow_l(1) = 0.0
	606	volume_flow_l(2) = 0.0
	607	ENDIF
[707]	608
[1]	609	!$OMP PARALLEL PRIVATE (i,j,k)
	610	!$OMP DO
[1113]	611	!$acc kernels present( ddzu, nzb_u_inner, nzb_v_inner, nzb_w_inner, tend, u, v, w, weight_pres )
[1257]	612	!$acc loop independent
[673]	613	DO i = nxl, nxr
[1257]	614	!$acc loop independent
[1]	615	DO j = nys, nyn
[1257]	616	!$acc loop independent
[1113]	617	DO k = 1, nzt
	618	IF ( k > nzb_w_inner(j,i) ) THEN
	619	w(k,j,i) = w(k,j,i) - dt_3d * &
	620	( tend(k+1,j,i) - tend(k,j,i) ) * ddzu(k+1) * &
	621	weight_pres(intermediate_timestep_count)
	622	ENDIF
[1]	623	ENDDO
[1257]	624	!$acc loop independent
[1113]	625	DO k = 1, nzt
	626	IF ( k > nzb_u_inner(j,i) ) THEN
	627	u(k,j,i) = u(k,j,i) - dt_3d * &
	628	( tend(k,j,i) - tend(k,j,i-1) ) * ddx * &
	629	weight_pres(intermediate_timestep_count)
	630	ENDIF
[1]	631	ENDDO
[1257]	632	!$acc loop independent
[1113]	633	DO k = 1, nzt
	634	IF ( k > nzb_v_inner(j,i) ) THEN
	635	v(k,j,i) = v(k,j,i) - dt_3d * &
	636	( tend(k,j,i) - tend(k,j-1,i) ) * ddy * &
	637	weight_pres(intermediate_timestep_count)
	638	ENDIF
[673]	639	ENDDO
[1]	640
	641	ENDDO
	642	ENDDO
[1113]	643	!$acc end kernels
[1]	644	!$OMP END PARALLEL
[1113]	645
	646	!
	647	!-- Sum up the volume flow through the right and north boundary
	648	IF ( conserve_volume_flow .AND. bc_lr_cyc .AND. bc_ns_cyc .AND. &
	649	nxr == nx ) THEN
	650
	651	!$OMP PARALLEL PRIVATE (j,k)
	652	!$OMP DO
	653	DO j = nys, nyn
	654	!$OMP CRITICAL
	655	DO k = nzb_2d(j,nx) + 1, nzt
	656	volume_flow_l(1) = volume_flow_l(1) + u(k,j,nx) * dzw(k)
	657	ENDDO
	658	!$OMP END CRITICAL
	659	ENDDO
	660	!$OMP END PARALLEL
	661
	662	ENDIF
	663
	664	IF ( conserve_volume_flow .AND. bc_ns_cyc .AND. bc_lr_cyc .AND. &
	665	nyn == ny ) THEN
	666
	667	!$OMP PARALLEL PRIVATE (i,k)
	668	!$OMP DO
	669	DO i = nxl, nxr
	670	!$OMP CRITICAL
	671	DO k = nzb_2d(ny,i) + 1, nzt
	672	volume_flow_l(2) = volume_flow_l(2) + v(k,ny,i) * dzw(k)
	673	ENDDO
	674	!$OMP END CRITICAL
	675	ENDDO
	676	!$OMP END PARALLEL
	677
	678	ENDIF
[673]	679
[1]	680	!
	681	!-- Conserve the volume flow
[707]	682	IF ( conserve_volume_flow .AND. ( bc_lr_cyc .AND. bc_ns_cyc ) ) THEN
[1]	683
	684	#if defined( __parallel )
[622]	685	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
[1]	686	CALL MPI_ALLREDUCE( volume_flow_l(1), volume_flow(1), 2, MPI_REAL, &
	687	MPI_SUM, comm2d, ierr )
	688	#else
	689	volume_flow = volume_flow_l
	690	#endif
	691
	692	volume_flow_offset = ( volume_flow_initial - volume_flow ) / &
	693	volume_flow_area
	694
	695	!$OMP PARALLEL PRIVATE (i,j,k)
	696	!$OMP DO
	697	DO i = nxl, nxr
	698	DO j = nys, nyn
[667]	699	DO k = nzb_u_inner(j,i) + 1, nzt
	700	u(k,j,i) = u(k,j,i) + volume_flow_offset(1)
[719]	701	ENDDO
	702	DO k = nzb_v_inner(j,i) + 1, nzt
[667]	703	v(k,j,i) = v(k,j,i) + volume_flow_offset(2)
	704	ENDDO
[1]	705	ENDDO
	706	ENDDO
[667]	707
[1]	708	!$OMP END PARALLEL
	709
	710	ENDIF
	711
	712	!
	713	!-- Exchange of boundaries for the velocities
[1113]	714	IF ( numprocs == 1 ) THEN ! workaround for single-core GPU runs
	715	on_device = .TRUE. ! to be removed after complete porting
	716	ELSE ! of ghost point exchange
	717	!$acc update host( u, v, w )
	718	ENDIF
[667]	719	CALL exchange_horiz( u, nbgp )
	720	CALL exchange_horiz( v, nbgp )
	721	CALL exchange_horiz( w, nbgp )
[1113]	722	IF ( numprocs == 1 ) THEN ! workaround for single-core GPU runs
	723	on_device = .FALSE. ! to be removed after complete porting
	724	ELSE ! of ghost point exchange
	725	!$acc update device( u, v, w )
	726	ENDIF
[1]	727
	728	!
	729	!-- Compute the divergence of the corrected velocity field,
	730	!-- a possible PE-sum is computed in flow_statistics
	731	CALL cpu_log( log_point_s(1), 'divergence', 'start' )
	732	sums_divnew_l = 0.0
	733
	734	!
	735	!-- d must be reset to zero because it can contain nonzero values below the
	736	!-- topography
	737	IF ( topography /= 'flat' ) d = 0.0
	738
	739	localsum = 0.0
	740	threadsum = 0.0
	741
	742	!$OMP PARALLEL PRIVATE (i,j,k) FIRSTPRIVATE(threadsum) REDUCTION(+:localsum)
	743	!$OMP DO SCHEDULE( STATIC )
	744	#if defined( __ibm )
	745	DO i = nxl, nxr
	746	DO j = nys, nyn
	747	DO k = nzb_s_inner(j,i)+1, nzt
	748	d(k,j,i) = ( u(k,j,i+1) - u(k,j,i) ) * ddx + &
	749	( v(k,j+1,i) - v(k,j,i) ) * ddy + &
	750	( w(k,j,i) - w(k-1,j,i) ) * ddzw(k)
	751	ENDDO
	752	DO k = nzb+1, nzt
	753	threadsum = threadsum + ABS( d(k,j,i) )
	754	ENDDO
	755	ENDDO
	756	ENDDO
	757	#else
[1221]	758	!$acc kernels present( d, ddzw, rflags_s_inner, u, v, w )
	759	!$acc loop collapse( 3 )
[1]	760	DO i = nxl, nxr
	761	DO j = nys, nyn
[1113]	762	DO k = 1, nzt
[1221]	763	d(k,j,i) = ( ( u(k,j,i+1) - u(k,j,i) ) * ddx + &
	764	( v(k,j+1,i) - v(k,j,i) ) * ddy + &
	765	( w(k,j,i) - w(k-1,j,i) ) * ddzw(k) &
	766	) * rflags_s_inner(k,j,i)
[1113]	767	ENDDO
	768	ENDDO
	769	ENDDO
	770	!$acc end kernels
	771	!
	772	!-- Compute possible PE-sum of divergences for flow_statistics
[1221]	773	!$acc parallel loop collapse(3) present( d ) reduction(+:threadsum)
[1113]	774	DO i = nxl, nxr
	775	DO j = nys, nyn
[1221]	776	DO k = nzb+1, nzt
[1]	777	threadsum = threadsum + ABS( d(k,j,i) )
	778	ENDDO
	779	ENDDO
	780	ENDDO
[1257]	781	!$acc end parallel loop
[1]	782	#endif
[667]	783
[1]	784	localsum = localsum + threadsum
	785	!$OMP END PARALLEL
	786
	787	!
	788	!-- For completeness, set the divergence sum of all statistic regions to those
	789	!-- of the total domain
	790	sums_divnew_l(0:statistic_regions) = localsum
	791
	792	CALL cpu_log( log_point_s(1), 'divergence', 'stop' )
	793
	794	CALL cpu_log( log_point(8), 'pres', 'stop' )
	795
	796
	797	END SUBROUTINE pres

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

| Impressum | ©Leibniz Universität Hannover |