Home

Context Navigation

source: palm/trunk/SOURCE/pres.f90 @ 2073

Last change on this file since 2073 was 2073, checked in by raasch, 7 years ago
mostly openmp related bugfixes
Property svn:keywords set to `Id`
File size: 28.0 KB

Rev	Line
[1682]	1	!> @file pres.f90
[2000]	2	!------------------------------------------------------------------------------!
[1036]	3	! This file is part of PALM.
	4	!
[2000]	5	! PALM is free software: you can redistribute it and/or modify it under the
	6	! terms of the GNU General Public License as published by the Free Software
	7	! Foundation, either version 3 of the License, or (at your option) any later
	8	! version.
[1036]	9	!
	10	! PALM is distributed in the hope that it will be useful, but WITHOUT ANY
	11	! WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
	12	! A PARTICULAR PURPOSE. See the GNU General Public License for more details.
	13	!
	14	! You should have received a copy of the GNU General Public License along with
	15	! PALM. If not, see <http://www.gnu.org/licenses/>.
	16	!
[1818]	17	! Copyright 1997-2016 Leibniz Universitaet Hannover
[2000]	18	!------------------------------------------------------------------------------!
[1036]	19	!
[484]	20	! Current revisions:
[1212]	21	! ------------------
[2073]	22	! openmp bugfix for calculation of new divergence
[1932]	23	!
[1919]	24	! Former revisions:
	25	! -----------------
	26	! $Id: pres.f90 2073 2016-11-30 14:34:05Z raasch $
	27	!
[2038]	28	! 2037 2016-10-26 11:15:40Z knoop
	29	! Anelastic approximation implemented
	30	!
[2001]	31	! 2000 2016-08-20 18:09:15Z knoop
	32	! Forced header and separation lines into 80 columns
	33	!
[1933]	34	! 1932 2016-06-10 12:09:21Z suehring
	35	! Initial version of purely vertical nesting introduced.
	36	!
[1932]	37	! 1931 2016-06-10 12:06:59Z suehring
	38	! Rename multigrid into multigrid_noopt and multigrid_fast into multigrid
	39	!
[1930]	40	! 1929 2016-06-09 16:25:25Z suehring
	41	! Bugfix: weight_substep for initial call, replace by local variable
	42	!
[1919]	43	! 1918 2016-05-27 14:35:57Z raasch
[1918]	44	! sum of divergence is also calculated when pres is called before the initial
	45	! first time step,
	46	! stearing is modified by using intermediate_timestep_count = 0 in order to
	47	! determine if pres is called before the first initial timestep,
	48	! bugfix: in case of Neumann conditions for pressure both at bottom and top,
	49	! mean vertical velocity is also removed for the first time step
	50	! bugfix for calculating divergences
[1321]	51	!
[1909]	52	! 1908 2016-05-25 17:22:32Z suehring
	53	! New divergence for output into RUN_CONTROL file is calculated only at last
	54	! Runge-Kutta step
	55	!
[1846]	56	! 1845 2016-04-08 08:29:13Z raasch
	57	! nzb_2d replace by nzb_u\|v_inner
	58	!
[1800]	59	! 1799 2016-04-05 08:35:55Z gronemeier
	60	! Bugfix: excluded third dimension from horizontal volume flow calculation
	61	!
[1763]	62	! 1762 2016-02-25 12:31:13Z hellstea
	63	! Introduction of nested domain feature
	64	!
[1683]	65	! 1682 2015-10-07 23:56:08Z knoop
	66	! Code annotations made doxygen readable
	67	!
[1576]	68	! 1575 2015-03-27 09:56:27Z raasch
	69	! poismg_fast + respective module added, adjustments for psolver-queries
	70	!
[1343]	71	! 1342 2014-03-26 17:04:47Z kanani
	72	! REAL constants defined as wp-kind
	73	!
[1321]	74	! 1320 2014-03-20 08:40:49Z raasch
[1320]	75	! ONLY-attribute added to USE-statements,
	76	! kind-parameters added to all INTEGER and REAL declaration statements,
	77	! kinds are defined in new module kinds,
	78	! old module precision_kind is removed,
	79	! revision history before 2012 removed,
	80	! comment fields (!:) to be used for variable explanations added to
	81	! all variable declaration statements
[708]	82	!
[1319]	83	! 1318 2014-03-17 13:35:16Z raasch
	84	! module interfaces removed
	85	!
[1307]	86	! 1306 2014-03-13 14:30:59Z raasch
	87	! second argument removed from routine poisfft
	88	!
[1258]	89	! 1257 2013-11-08 15:18:40Z raasch
	90	! openacc loop and loop vector clauses removed, independent clauses added,
	91	! end parallel replaced by end parallel loop
	92	!
[1222]	93	! 1221 2013-09-10 08:59:13Z raasch
	94	! openACC porting of reduction operations, loops for calculating d are
	95	! using the rflags_s_inner multiply flag instead of the nzb_s_inner loop index
	96	!
[1213]	97	! 1212 2013-08-15 08:46:27Z raasch
	98	! call of poisfft_hybrid removed
	99	!
[1118]	100	! 1117 2013-03-27 11:15:36Z suehring
	101	! Bugfix in OpenMP parallelization.
	102	!
[1114]	103	! 1113 2013-03-10 02:48:14Z raasch
	104	! GPU-porting of several loops, some loops rearranged
	105	!
[1112]	106	! 1111 2013-03-08 23:54:10Z
	107	! openACC statements added,
	108	! ibc_p_b = 2 removed
	109	!
[1093]	110	! 1092 2013-02-02 11:24:22Z raasch
	111	! unused variables removed
	112	!
[1037]	113	! 1036 2012-10-22 13:43:42Z raasch
	114	! code put under GPL (PALM 3.9)
	115	!
[1004]	116	! 1003 2012-09-14 14:35:53Z raasch
	117	! adjustment of array tend for cases with unequal subdomain sizes removed
	118	!
[1]	119	! Revision 1.1 1997/07/24 11:24:44 raasch
	120	! Initial revision
	121	!
	122	!
	123	! Description:
	124	! ------------
[1682]	125	!> Compute the divergence of the provisional velocity field. Solve the Poisson
	126	!> equation for the perturbation pressure. Compute the final velocities using
	127	!> this perturbation pressure. Compute the remaining divergence.
[1]	128	!------------------------------------------------------------------------------!
[1682]	129	SUBROUTINE pres
	130
[1]	131
[1320]	132	USE arrays_3d, &
[2037]	133	ONLY: d, ddzu, ddzu_pres, ddzw, dzw, p, p_loc, rho_air, rho_air_zw, &
	134	tend, u, v, w
[1320]	135
	136	USE control_parameters, &
	137	ONLY: bc_lr_cyc, bc_ns_cyc, conserve_volume_flow, dt_3d, &
	138	gathered_size, ibc_p_b, ibc_p_t, intermediate_timestep_count, &
[1908]	139	intermediate_timestep_count_max, mg_switch_to_pe0_level, &
[1933]	140	nest_domain, on_device, outflow_l, outflow_n, outflow_r, &
[1918]	141	outflow_s, psolver, subdomain_size, topography, volume_flow, &
	142	volume_flow_area, volume_flow_initial
[1320]	143
	144	USE cpulog, &
	145	ONLY: cpu_log, log_point, log_point_s
	146
	147	USE grid_variables, &
	148	ONLY: ddx, ddy
	149
	150	USE indices, &
	151	ONLY: nbgp, ngp_2dh_outer, nx, nxl, nxlg, nxl_mg, nxr, nxrg, nxr_mg, &
	152	ny, nys, nysg, nys_mg, nyn, nyng, nyn_mg, nzb, nzb_s_inner, &
[1845]	153	nzb_u_inner, nzb_v_inner, nzb_w_inner, nzt, nzt_mg, &
[1320]	154	rflags_s_inner
	155
	156	USE kinds
	157
[1]	158	USE pegrid
[1933]	159
	160	USE pmc_interface, &
	161	ONLY: nesting_mode
[1]	162
[1320]	163	USE poisfft_mod, &
	164	ONLY: poisfft
	165
[1575]	166	USE poismg_mod
	167
[1320]	168	USE statistics, &
	169	ONLY: statistic_regions, sums_divnew_l, sums_divold_l, weight_pres, &
	170	weight_substep
	171
[1]	172	IMPLICIT NONE
	173
[1682]	174	INTEGER(iwp) :: i !<
	175	INTEGER(iwp) :: j !<
	176	INTEGER(iwp) :: k !<
[1]	177
[1682]	178	REAL(wp) :: ddt_3d !<
[1918]	179	REAL(wp) :: d_weight_pres !<
[1682]	180	REAL(wp) :: localsum !<
	181	REAL(wp) :: threadsum !<
[1918]	182	REAL(wp) :: weight_pres_l !<
[1929]	183	REAL(wp) :: weight_substep_l !<
[1]	184
[1762]	185	REAL(wp), DIMENSION(1:3) :: volume_flow_l !<
	186	REAL(wp), DIMENSION(1:3) :: volume_flow_offset !<
[1682]	187	REAL(wp), DIMENSION(1:nzt) :: w_l !<
	188	REAL(wp), DIMENSION(1:nzt) :: w_l_l !<
[1]	189
[1933]	190	LOGICAL :: nest_domain_nvn !<
[1]	191
[1933]	192
[1]	193	CALL cpu_log( log_point(8), 'pres', 'start' )
	194
[85]	195
[1918]	196	!
	197	!-- Calculate quantities to be used locally
[1342]	198	ddt_3d = 1.0_wp / dt_3d
[1918]	199	IF ( intermediate_timestep_count == 0 ) THEN
	200	!
	201	!-- If pres is called before initial time step
[1929]	202	weight_pres_l = 1.0_wp
	203	d_weight_pres = 1.0_wp
	204	weight_substep_l = 1.0_wp
[1918]	205	ELSE
[1929]	206	weight_pres_l = weight_pres(intermediate_timestep_count)
	207	d_weight_pres = 1.0_wp / weight_pres(intermediate_timestep_count)
	208	weight_substep_l = weight_substep(intermediate_timestep_count)
[1918]	209	ENDIF
[85]	210
[1]	211	!
[707]	212	!-- Multigrid method expects array d to have one ghost layer.
	213	!--
[1575]	214	IF ( psolver(1:9) == 'multigrid' ) THEN
[667]	215
[1]	216	DEALLOCATE( d )
[667]	217	ALLOCATE( d(nzb:nzt+1,nys-1:nyn+1,nxl-1:nxr+1) )
[707]	218
	219	!
	220	!-- Since p is later used to hold the weighted average of the substeps, it
	221	!-- cannot be used in the iterative solver. Therefore, its initial value is
	222	!-- stored on p_loc, which is then iteratively advanced in every substep.
[1762]	223	IF ( intermediate_timestep_count <= 1 ) THEN
[707]	224	DO i = nxl-1, nxr+1
	225	DO j = nys-1, nyn+1
	226	DO k = nzb, nzt+1
	227	p_loc(k,j,i) = p(k,j,i)
	228	ENDDO
	229	ENDDO
	230	ENDDO
[667]	231	ENDIF
	232
[1918]	233	ELSEIF ( psolver == 'sor' .AND. intermediate_timestep_count <= 1 ) THEN
[707]	234
	235	!
	236	!-- Since p is later used to hold the weighted average of the substeps, it
	237	!-- cannot be used in the iterative solver. Therefore, its initial value is
	238	!-- stored on p_loc, which is then iteratively advanced in every substep.
	239	p_loc = p
	240
[1]	241	ENDIF
	242
	243	!
[75]	244	!-- Conserve the volume flow at the outflow in case of non-cyclic lateral
	245	!-- boundary conditions
[106]	246	!-- WARNING: so far, this conservation does not work at the left/south
	247	!-- boundary if the topography at the inflow differs from that at the
	248	!-- outflow! For this case, volume_flow_area needs adjustment!
	249	!
	250	!-- Left/right
[709]	251	IF ( conserve_volume_flow .AND. ( outflow_l .OR. outflow_r ) ) THEN
[680]	252
[1342]	253	volume_flow(1) = 0.0_wp
	254	volume_flow_l(1) = 0.0_wp
[106]	255
	256	IF ( outflow_l ) THEN
	257	i = 0
	258	ELSEIF ( outflow_r ) THEN
	259	i = nx+1
	260	ENDIF
	261
	262	DO j = nys, nyn
	263	!
	264	!-- Sum up the volume flow through the south/north boundary
[1845]	265	DO k = nzb_u_inner(j,i)+1, nzt
[667]	266	volume_flow_l(1) = volume_flow_l(1) + u(k,j,i) * dzw(k)
[106]	267	ENDDO
	268	ENDDO
	269
	270	#if defined( __parallel )
[680]	271	IF ( collective_wait ) CALL MPI_BARRIER( comm1dy, ierr )
[106]	272	CALL MPI_ALLREDUCE( volume_flow_l(1), volume_flow(1), 1, MPI_REAL, &
	273	MPI_SUM, comm1dy, ierr )
	274	#else
	275	volume_flow = volume_flow_l
	276	#endif
[709]	277	volume_flow_offset(1) = ( volume_flow_initial(1) - volume_flow(1) ) &
[106]	278	/ volume_flow_area(1)
	279
[667]	280	DO j = nysg, nyng
[1845]	281	DO k = nzb_u_inner(j,i)+1, nzt
[106]	282	u(k,j,i) = u(k,j,i) + volume_flow_offset(1)
	283	ENDDO
	284	ENDDO
	285
	286	ENDIF
	287
	288	!
	289	!-- South/north
[709]	290	IF ( conserve_volume_flow .AND. ( outflow_n .OR. outflow_s ) ) THEN
[106]	291
[1342]	292	volume_flow(2) = 0.0_wp
	293	volume_flow_l(2) = 0.0_wp
[75]	294
[106]	295	IF ( outflow_s ) THEN
	296	j = 0
	297	ELSEIF ( outflow_n ) THEN
[75]	298	j = ny+1
[106]	299	ENDIF
	300
	301	DO i = nxl, nxr
[75]	302	!
[106]	303	!-- Sum up the volume flow through the south/north boundary
[1845]	304	DO k = nzb_v_inner(j,i)+1, nzt
[667]	305	volume_flow_l(2) = volume_flow_l(2) + v(k,j,i) * dzw(k)
[75]	306	ENDDO
[106]	307	ENDDO
	308
[75]	309	#if defined( __parallel )
[680]	310	IF ( collective_wait ) CALL MPI_BARRIER( comm1dx, ierr )
[75]	311	CALL MPI_ALLREDUCE( volume_flow_l(2), volume_flow(2), 1, MPI_REAL, &
	312	MPI_SUM, comm1dx, ierr )
	313	#else
	314	volume_flow = volume_flow_l
	315	#endif
	316	volume_flow_offset(2) = ( volume_flow_initial(2) - volume_flow(2) ) &
[106]	317	/ volume_flow_area(2)
[75]	318
[667]	319	DO i = nxlg, nxrg
[709]	320	DO k = nzb_v_inner(j,i)+1, nzt
[106]	321	v(k,j,i) = v(k,j,i) + volume_flow_offset(2)
[75]	322	ENDDO
[106]	323	ENDDO
[75]	324
	325	ENDIF
	326
[76]	327	!
[1762]	328	!-- Remove mean vertical velocity in case that Neumann conditions are
[1933]	329	!-- used both at bottom and top boundary, and if not a nested domain in a
	330	!-- normal nesting run. In case of vertical nesting, this must be done.
	331	!-- Therefore an auxiliary logical variable nest_domain_nvn is used here, and
	332	!-- nvn stands for non-vertical nesting.
[1918]	333	!-- This cannot be done before the first initial time step because ngp_2dh_outer
	334	!-- is not yet known then.
[1933]	335	nest_domain_nvn = nest_domain
	336	IF ( nest_domain .AND. nesting_mode == 'vertical' ) THEN
	337	nest_domain_nvn = .FALSE.
	338	ENDIF
	339
	340	IF ( ibc_p_b == 1 .AND. ibc_p_t == 1 .AND. &
	341	.NOT. nest_domain_nvn .AND. intermediate_timestep_count /= 0 ) &
[1918]	342	THEN
	343	w_l = 0.0_wp; w_l_l = 0.0_wp
	344	DO i = nxl, nxr
	345	DO j = nys, nyn
	346	DO k = nzb_w_inner(j,i)+1, nzt
	347	w_l_l(k) = w_l_l(k) + w(k,j,i)
[76]	348	ENDDO
	349	ENDDO
[1918]	350	ENDDO
[76]	351	#if defined( __parallel )
[1918]	352	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
	353	CALL MPI_ALLREDUCE( w_l_l(1), w_l(1), nzt, MPI_REAL, MPI_SUM, &
	354	comm2d, ierr )
[76]	355	#else
[1918]	356	w_l = w_l_l
[76]	357	#endif
[1918]	358	DO k = 1, nzt
	359	w_l(k) = w_l(k) / ngp_2dh_outer(k,0)
	360	ENDDO
	361	DO i = nxlg, nxrg
	362	DO j = nysg, nyng
	363	DO k = nzb_w_inner(j,i)+1, nzt
	364	w(k,j,i) = w(k,j,i) - w_l(k)
[76]	365	ENDDO
	366	ENDDO
[1918]	367	ENDDO
[76]	368	ENDIF
[75]	369
	370	!
[1]	371	!-- Compute the divergence of the provisional velocity field.
	372	CALL cpu_log( log_point_s(1), 'divergence', 'start' )
	373
[1575]	374	IF ( psolver(1:9) == 'multigrid' ) THEN
[1]	375	!$OMP PARALLEL DO SCHEDULE( STATIC )
	376	DO i = nxl-1, nxr+1
	377	DO j = nys-1, nyn+1
	378	DO k = nzb, nzt+1
[1342]	379	d(k,j,i) = 0.0_wp
[1]	380	ENDDO
	381	ENDDO
	382	ENDDO
	383	ELSE
	384	!$OMP PARALLEL DO SCHEDULE( STATIC )
[1111]	385	!$acc kernels present( d )
[1003]	386	DO i = nxl, nxr
	387	DO j = nys, nyn
	388	DO k = nzb+1, nzt
[1342]	389	d(k,j,i) = 0.0_wp
[1]	390	ENDDO
	391	ENDDO
	392	ENDDO
[1111]	393	!$acc end kernels
[1]	394	ENDIF
	395
[1342]	396	localsum = 0.0_wp
	397	threadsum = 0.0_wp
[1]	398
	399	#if defined( __ibm )
	400	!$OMP PARALLEL PRIVATE (i,j,k) FIRSTPRIVATE(threadsum) REDUCTION(+:localsum)
	401	!$OMP DO SCHEDULE( STATIC )
	402	DO i = nxl, nxr
	403	DO j = nys, nyn
	404	DO k = nzb_s_inner(j,i)+1, nzt
[2037]	405	d(k,j,i) = ( ( u(k,j,i+1) - u(k,j,i) ) * rho_air(k) * ddx + &
	406	( v(k,j+1,i) - v(k,j,i) ) * rho_air(k) * ddy + &
	407	( w(k,j,i) * rho_air_zw(k) - &
	408	w(k-1,j,i) * rho_air_zw(k-1) ) * ddzw(k) &
	409	) * ddt_3d * d_weight_pres
[1]	410	ENDDO
	411	!
	412	!-- Compute possible PE-sum of divergences for flow_statistics
	413	DO k = nzb_s_inner(j,i)+1, nzt
	414	threadsum = threadsum + ABS( d(k,j,i) )
	415	ENDDO
	416
	417	ENDDO
	418	ENDDO
	419
[1918]	420	IF ( intermediate_timestep_count == intermediate_timestep_count_max .OR. &
	421	intermediate_timestep_count == 0 ) THEN
	422	localsum = localsum + threadsum * dt_3d * weight_pres_l
[1908]	423	ENDIF
[1]	424	!$OMP END PARALLEL
	425	#else
	426
[1111]	427	!$OMP PARALLEL PRIVATE (i,j,k)
	428	!$OMP DO SCHEDULE( STATIC )
[1221]	429	!$acc kernels present( d, ddzw, rflags_s_inner, u, v, w )
	430	!$acc loop collapse( 3 )
[1111]	431	DO i = nxl, nxr
	432	DO j = nys, nyn
	433	DO k = 1, nzt
[2037]	434	d(k,j,i) = ( ( u(k,j,i+1) - u(k,j,i) ) * rho_air(k) * ddx + &
	435	( v(k,j+1,i) - v(k,j,i) ) * rho_air(k) * ddy + &
	436	( w(k,j,i) * rho_air_zw(k) - &
	437	w(k-1,j,i) * rho_air_zw(k-1) ) * ddzw(k) &
	438	) * ddt_3d * d_weight_pres * rflags_s_inner(k,j,i)
[1]	439	ENDDO
	440	ENDDO
[1111]	441	ENDDO
	442	!$acc end kernels
	443	!$OMP END PARALLEL
[1]	444
	445	!
[1908]	446	!-- Compute possible PE-sum of divergences for flow_statistics. Carry out
	447	!-- computation only at last Runge-Kutta substep.
[1918]	448	IF ( intermediate_timestep_count == intermediate_timestep_count_max .OR. &
	449	intermediate_timestep_count == 0 ) THEN
[1908]	450	!$OMP PARALLEL PRIVATE (i,j,k) FIRSTPRIVATE(threadsum) REDUCTION(+:localsum)
	451	!$OMP DO SCHEDULE( STATIC )
	452	!$acc parallel loop collapse(3) present( d ) reduction(+:threadsum)
	453	DO i = nxl, nxr
	454	DO j = nys, nyn
	455	DO k = nzb+1, nzt
	456	threadsum = threadsum + ABS( d(k,j,i) )
	457	ENDDO
[1]	458	ENDDO
	459	ENDDO
[1908]	460	!$acc end parallel loop
[1918]	461	localsum = localsum + threadsum * dt_3d * weight_pres_l
[1908]	462	!$OMP END PARALLEL
	463	ENDIF
[1]	464	#endif
	465
	466	!
	467	!-- For completeness, set the divergence sum of all statistic regions to those
	468	!-- of the total domain
[1918]	469	IF ( intermediate_timestep_count == intermediate_timestep_count_max .OR. &
	470	intermediate_timestep_count == 0 ) THEN
[1908]	471	sums_divold_l(0:statistic_regions) = localsum
[1918]	472	ENDIF
[1]	473
	474	CALL cpu_log( log_point_s(1), 'divergence', 'stop' )
	475
	476	!
	477	!-- Compute the pressure perturbation solving the Poisson equation
[1575]	478	IF ( psolver == 'poisfft' ) THEN
[1]	479
	480	!
	481	!-- Solve Poisson equation via FFT and solution of tridiagonal matrices
[1575]	482	CALL poisfft( d )
[1212]	483
[1]	484	!
	485	!-- Store computed perturbation pressure and set boundary condition in
	486	!-- z-direction
	487	!$OMP PARALLEL DO
[1113]	488	!$acc kernels present( d, tend )
[1]	489	DO i = nxl, nxr
	490	DO j = nys, nyn
	491	DO k = nzb+1, nzt
	492	tend(k,j,i) = d(k,j,i)
	493	ENDDO
	494	ENDDO
	495	ENDDO
[1113]	496	!$acc end kernels
[1]	497
	498	!
	499	!-- Bottom boundary:
	500	!-- This condition is only required for internal output. The pressure
	501	!-- gradient (dp(nzb+1)-dp(nzb))/dz is not used anywhere else.
	502	IF ( ibc_p_b == 1 ) THEN
	503	!
	504	!-- Neumann (dp/dz = 0)
	505	!$OMP PARALLEL DO
[1113]	506	!$acc kernels present( nzb_s_inner, tend )
[667]	507	DO i = nxlg, nxrg
	508	DO j = nysg, nyng
[1]	509	tend(nzb_s_inner(j,i),j,i) = tend(nzb_s_inner(j,i)+1,j,i)
	510	ENDDO
	511	ENDDO
[1113]	512	!$acc end kernels
[1]	513
	514	ELSE
	515	!
	516	!-- Dirichlet
	517	!$OMP PARALLEL DO
[1113]	518	!$acc kernels present( tend )
[667]	519	DO i = nxlg, nxrg
	520	DO j = nysg, nyng
[1342]	521	tend(nzb_s_inner(j,i),j,i) = 0.0_wp
[1]	522	ENDDO
	523	ENDDO
[1113]	524	!$acc end kernels
[1]	525
	526	ENDIF
	527
	528	!
	529	!-- Top boundary
	530	IF ( ibc_p_t == 1 ) THEN
	531	!
	532	!-- Neumann
	533	!$OMP PARALLEL DO
[1113]	534	!$acc kernels present( tend )
[667]	535	DO i = nxlg, nxrg
	536	DO j = nysg, nyng
[1]	537	tend(nzt+1,j,i) = tend(nzt,j,i)
	538	ENDDO
	539	ENDDO
[1113]	540	!$acc end kernels
[1]	541
	542	ELSE
	543	!
	544	!-- Dirichlet
	545	!$OMP PARALLEL DO
[1113]	546	!$acc kernels present( tend )
[667]	547	DO i = nxlg, nxrg
	548	DO j = nysg, nyng
[1342]	549	tend(nzt+1,j,i) = 0.0_wp
[1]	550	ENDDO
	551	ENDDO
[1113]	552	!$acc end kernels
[1]	553
	554	ENDIF
	555
	556	!
	557	!-- Exchange boundaries for p
[1113]	558	IF ( numprocs == 1 ) THEN ! workaround for single-core GPU runs
	559	on_device = .TRUE. ! to be removed after complete porting
	560	ELSE ! of ghost point exchange
	561	!$acc update host( tend )
	562	ENDIF
[667]	563	CALL exchange_horiz( tend, nbgp )
[1113]	564	IF ( numprocs == 1 ) THEN ! workaround for single-core GPU runs
	565	on_device = .FALSE. ! to be removed after complete porting
	566	ELSE ! of ghost point exchange
	567	!$acc update device( tend )
	568	ENDIF
[1]	569
	570	ELSEIF ( psolver == 'sor' ) THEN
	571
	572	!
	573	!-- Solve Poisson equation for perturbation pressure using SOR-Red/Black
	574	!-- scheme
[707]	575	CALL sor( d, ddzu_pres, ddzw, p_loc )
	576	tend = p_loc
[1]	577
[1575]	578	ELSEIF ( psolver(1:9) == 'multigrid' ) THEN
[1]	579
	580	!
	581	!-- Solve Poisson equation for perturbation pressure using Multigrid scheme,
[667]	582	!-- array tend is used to store the residuals, logical exchange_mg is used
	583	!-- to discern data exchange in multigrid ( 1 ghostpoint ) and normal grid
	584	!-- ( nbgp ghost points ).
[778]	585
	586	!-- If the number of grid points of the gathered grid, which is collected
	587	!-- on PE0, is larger than the number of grid points of an PE, than array
	588	!-- tend will be enlarged.
	589	IF ( gathered_size > subdomain_size ) THEN
	590	DEALLOCATE( tend )
	591	ALLOCATE( tend(nzb:nzt_mg(mg_switch_to_pe0_level)+1,nys_mg( &
	592	mg_switch_to_pe0_level)-1:nyn_mg(mg_switch_to_pe0_level)+1,&
	593	nxl_mg(mg_switch_to_pe0_level)-1:nxr_mg( &
	594	mg_switch_to_pe0_level)+1) )
	595	ENDIF
	596
[1575]	597	IF ( psolver == 'multigrid' ) THEN
	598	CALL poismg( tend )
	599	ELSE
[1931]	600	CALL poismg_noopt( tend )
[1575]	601	ENDIF
[707]	602
[778]	603	IF ( gathered_size > subdomain_size ) THEN
	604	DEALLOCATE( tend )
	605	ALLOCATE( tend(nzb:nzt+1,nysg:nyng,nxlg:nxrg) )
	606	ENDIF
	607
[1]	608	!
	609	!-- Restore perturbation pressure on tend because this array is used
	610	!-- further below to correct the velocity fields
[707]	611	DO i = nxl-1, nxr+1
	612	DO j = nys-1, nyn+1
	613	DO k = nzb, nzt+1
	614	tend(k,j,i) = p_loc(k,j,i)
	615	ENDDO
	616	ENDDO
	617	ENDDO
[667]	618
[1]	619	ENDIF
	620
	621	!
[707]	622	!-- Store perturbation pressure on array p, used for pressure data output.
	623	!-- Ghost layers are added in the output routines (except sor-method: see below)
[1918]	624	IF ( intermediate_timestep_count <= 1 ) THEN
[707]	625	!$OMP PARALLEL PRIVATE (i,j,k)
	626	!$OMP DO
[1929]	627	!$acc kernels present( p, tend, weight_substep_l )
[1257]	628	!$acc loop independent
[707]	629	DO i = nxl-1, nxr+1
[1257]	630	!$acc loop independent
[707]	631	DO j = nys-1, nyn+1
[1257]	632	!$acc loop independent
[707]	633	DO k = nzb, nzt+1
	634	p(k,j,i) = tend(k,j,i) * &
[1929]	635	weight_substep_l
[673]	636	ENDDO
[1]	637	ENDDO
[707]	638	ENDDO
[1113]	639	!$acc end kernels
[707]	640	!$OMP END PARALLEL
	641
[1762]	642	ELSEIF ( intermediate_timestep_count > 1 ) THEN
[707]	643	!$OMP PARALLEL PRIVATE (i,j,k)
	644	!$OMP DO
[1929]	645	!$acc kernels present( p, tend, weight_substep_l )
[1257]	646	!$acc loop independent
[707]	647	DO i = nxl-1, nxr+1
[1257]	648	!$acc loop independent
[707]	649	DO j = nys-1, nyn+1
[1257]	650	!$acc loop independent
[707]	651	DO k = nzb, nzt+1
	652	p(k,j,i) = p(k,j,i) + tend(k,j,i) * &
[1929]	653	weight_substep_l
[673]	654	ENDDO
	655	ENDDO
[707]	656	ENDDO
[1113]	657	!$acc end kernels
[707]	658	!$OMP END PARALLEL
	659
	660	ENDIF
[673]	661
[707]	662	!
	663	!-- SOR-method needs ghost layers for the next timestep
	664	IF ( psolver == 'sor' ) CALL exchange_horiz( p, nbgp )
[682]	665
[1]	666	!
	667	!-- Correction of the provisional velocities with the current perturbation
	668	!-- pressure just computed
[709]	669	IF ( conserve_volume_flow .AND. ( bc_lr_cyc .OR. bc_ns_cyc ) ) THEN
[1342]	670	volume_flow_l(1) = 0.0_wp
	671	volume_flow_l(2) = 0.0_wp
[1]	672	ENDIF
[707]	673
[1]	674	!$OMP PARALLEL PRIVATE (i,j,k)
	675	!$OMP DO
[1918]	676	!$acc kernels present( ddzu, nzb_u_inner, nzb_v_inner, nzb_w_inner, tend, u, v, w )
[1257]	677	!$acc loop independent
[673]	678	DO i = nxl, nxr
[1257]	679	!$acc loop independent
[1]	680	DO j = nys, nyn
[1257]	681	!$acc loop independent
[1113]	682	DO k = 1, nzt
	683	IF ( k > nzb_w_inner(j,i) ) THEN
	684	w(k,j,i) = w(k,j,i) - dt_3d * &
	685	( tend(k+1,j,i) - tend(k,j,i) ) * ddzu(k+1) * &
[1918]	686	weight_pres_l
[1113]	687	ENDIF
[1]	688	ENDDO
[1257]	689	!$acc loop independent
[1113]	690	DO k = 1, nzt
	691	IF ( k > nzb_u_inner(j,i) ) THEN
	692	u(k,j,i) = u(k,j,i) - dt_3d * &
	693	( tend(k,j,i) - tend(k,j,i-1) ) * ddx * &
[1918]	694	weight_pres_l
[1113]	695	ENDIF
[1]	696	ENDDO
[1257]	697	!$acc loop independent
[1113]	698	DO k = 1, nzt
	699	IF ( k > nzb_v_inner(j,i) ) THEN
	700	v(k,j,i) = v(k,j,i) - dt_3d * &
	701	( tend(k,j,i) - tend(k,j-1,i) ) * ddy * &
[1918]	702	weight_pres_l
[1113]	703	ENDIF
[673]	704	ENDDO
[1]	705
	706	ENDDO
	707	ENDDO
[1113]	708	!$acc end kernels
[1]	709	!$OMP END PARALLEL
[1113]	710
	711	!
	712	!-- Sum up the volume flow through the right and north boundary
	713	IF ( conserve_volume_flow .AND. bc_lr_cyc .AND. bc_ns_cyc .AND. &
	714	nxr == nx ) THEN
	715
	716	!$OMP PARALLEL PRIVATE (j,k)
	717	!$OMP DO
	718	DO j = nys, nyn
	719	!$OMP CRITICAL
[1845]	720	DO k = nzb_u_inner(j,nx) + 1, nzt
[1113]	721	volume_flow_l(1) = volume_flow_l(1) + u(k,j,nx) * dzw(k)
	722	ENDDO
	723	!$OMP END CRITICAL
	724	ENDDO
	725	!$OMP END PARALLEL
	726
	727	ENDIF
	728
	729	IF ( conserve_volume_flow .AND. bc_ns_cyc .AND. bc_lr_cyc .AND. &
	730	nyn == ny ) THEN
	731
	732	!$OMP PARALLEL PRIVATE (i,k)
	733	!$OMP DO
	734	DO i = nxl, nxr
	735	!$OMP CRITICAL
[1845]	736	DO k = nzb_v_inner(ny,i) + 1, nzt
[1113]	737	volume_flow_l(2) = volume_flow_l(2) + v(k,ny,i) * dzw(k)
	738	ENDDO
	739	!$OMP END CRITICAL
	740	ENDDO
	741	!$OMP END PARALLEL
	742
	743	ENDIF
[673]	744
[1]	745	!
	746	!-- Conserve the volume flow
[707]	747	IF ( conserve_volume_flow .AND. ( bc_lr_cyc .AND. bc_ns_cyc ) ) THEN
[1]	748
	749	#if defined( __parallel )
[622]	750	IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )
[1]	751	CALL MPI_ALLREDUCE( volume_flow_l(1), volume_flow(1), 2, MPI_REAL, &
	752	MPI_SUM, comm2d, ierr )
	753	#else
	754	volume_flow = volume_flow_l
	755	#endif
	756
[1799]	757	volume_flow_offset(1:2) = ( volume_flow_initial(1:2) - volume_flow(1:2) ) / &
	758	volume_flow_area(1:2)
[1]	759
	760	!$OMP PARALLEL PRIVATE (i,j,k)
	761	!$OMP DO
	762	DO i = nxl, nxr
	763	DO j = nys, nyn
[667]	764	DO k = nzb_u_inner(j,i) + 1, nzt
	765	u(k,j,i) = u(k,j,i) + volume_flow_offset(1)
[719]	766	ENDDO
[1845]	767	DO k = nzb_v_inner(j,i) + 1, nzt
[667]	768	v(k,j,i) = v(k,j,i) + volume_flow_offset(2)
	769	ENDDO
[1]	770	ENDDO
	771	ENDDO
[667]	772
[1]	773	!$OMP END PARALLEL
	774
	775	ENDIF
	776
	777	!
	778	!-- Exchange of boundaries for the velocities
[1113]	779	IF ( numprocs == 1 ) THEN ! workaround for single-core GPU runs
	780	on_device = .TRUE. ! to be removed after complete porting
	781	ELSE ! of ghost point exchange
	782	!$acc update host( u, v, w )
	783	ENDIF
[667]	784	CALL exchange_horiz( u, nbgp )
	785	CALL exchange_horiz( v, nbgp )
	786	CALL exchange_horiz( w, nbgp )
[1113]	787	IF ( numprocs == 1 ) THEN ! workaround for single-core GPU runs
	788	on_device = .FALSE. ! to be removed after complete porting
	789	ELSE ! of ghost point exchange
	790	!$acc update device( u, v, w )
	791	ENDIF
[1]	792
	793	!
	794	!-- Compute the divergence of the corrected velocity field,
[1908]	795	!-- A possible PE-sum is computed in flow_statistics. Carry out computation
	796	!-- only at last Runge-Kutta step.
[1918]	797	IF ( intermediate_timestep_count == intermediate_timestep_count_max .OR. &
	798	intermediate_timestep_count == 0 ) THEN
[1908]	799	CALL cpu_log( log_point_s(1), 'divergence', 'start' )
	800	sums_divnew_l = 0.0_wp
[1]	801
	802	!
[1908]	803	!-- d must be reset to zero because it can contain nonzero values below the
	804	!-- topography
	805	IF ( topography /= 'flat' ) d = 0.0_wp
[1]	806
[1908]	807	localsum = 0.0_wp
	808	threadsum = 0.0_wp
[1]	809
[1908]	810	!$OMP PARALLEL PRIVATE (i,j,k) FIRSTPRIVATE(threadsum) REDUCTION(+:localsum)
[2073]	811	#if defined( __ibm )
[1908]	812	!$OMP DO SCHEDULE( STATIC )
	813	DO i = nxl, nxr
	814	DO j = nys, nyn
	815	DO k = nzb_s_inner(j,i)+1, nzt
[2037]	816	d(k,j,i) = ( u(k,j,i+1) - u(k,j,i) ) * rho_air(k) * ddx + &
	817	( v(k,j+1,i) - v(k,j,i) ) * rho_air(k) * ddy + &
	818	( w(k,j,i) * rho_air_zw(k) - &
	819	w(k-1,j,i) * rho_air_zw(k-1) ) * ddzw(k)
[1908]	820	ENDDO
	821	DO k = nzb+1, nzt
	822	threadsum = threadsum + ABS( d(k,j,i) )
	823	ENDDO
[1]	824	ENDDO
	825	ENDDO
	826	#else
[2073]	827	!$OMP DO SCHEDULE( STATIC )
[1908]	828	!$acc kernels present( d, ddzw, rflags_s_inner, u, v, w )
	829	!$acc loop collapse( 3 )
	830	DO i = nxl, nxr
	831	DO j = nys, nyn
	832	DO k = 1, nzt
[2037]	833	d(k,j,i) = ( ( u(k,j,i+1) - u(k,j,i) ) * rho_air(k) * ddx + &
	834	( v(k,j+1,i) - v(k,j,i) ) * rho_air(k) * ddy + &
	835	( w(k,j,i) * rho_air_zw(k) - &
	836	w(k-1,j,i) * rho_air_zw(k-1) ) * ddzw(k) &
	837	) * rflags_s_inner(k,j,i)
[1908]	838	ENDDO
[1113]	839	ENDDO
	840	ENDDO
[1908]	841	!$acc end kernels
[1113]	842	!
[1908]	843	!-- Compute possible PE-sum of divergences for flow_statistics
[2073]	844	!$OMP DO SCHEDULE( STATIC )
[1908]	845	!$acc parallel loop collapse(3) present( d ) reduction(+:threadsum)
	846	DO i = nxl, nxr
	847	DO j = nys, nyn
	848	DO k = nzb+1, nzt
	849	threadsum = threadsum + ABS( d(k,j,i) )
	850	ENDDO
[1]	851	ENDDO
	852	ENDDO
[1908]	853	!$acc end parallel loop
[1]	854	#endif
[667]	855
[1908]	856	localsum = localsum + threadsum
	857	!$OMP END PARALLEL
[1]	858
	859	!
[1908]	860	!-- For completeness, set the divergence sum of all statistic regions to those
	861	!-- of the total domain
	862	sums_divnew_l(0:statistic_regions) = localsum
[1]	863
[1908]	864	CALL cpu_log( log_point_s(1), 'divergence', 'stop' )
[1]	865
[1908]	866	ENDIF
	867
[1]	868	CALL cpu_log( log_point(8), 'pres', 'stop' )
	869
	870
	871	END SUBROUTINE pres

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

| Impressum | ©Leibniz Universität Hannover |