Changeset 2118 for palm/trunk
- Timestamp:
- Jan 17, 2017 4:38:49 PM (8 years ago)
- Location:
- palm/trunk/SOURCE
- Files:
-
- 1 deleted
- 32 edited
Legend:
- Unmodified
- Added
- Removed
-
TabularUnified palm/trunk/SOURCE/Makefile ¶
r2051 r2118 20 20 # Current revisions: 21 21 # ------------------ 22 # 22 # -cuda_fft_interfaces_mod 23 23 # 24 24 # Former revisions: … … 312 312 check_for_restart.f90 check_open.f90 check_parameters.f90 \ 313 313 close_file.f90 compute_vpt.f90 coriolis.f90 cpulog_mod.f90 \ 314 cuda_fft_interfaces_mod.f90data_log.f90 data_output_dvrp.f90 \314 data_log.f90 data_output_dvrp.f90 \ 315 315 data_output_mask.f90 data_output_profiles.f90 \ 316 316 data_output_ptseries.f90 data_output_spectra.f90 data_output_flight.f90\ … … 426 426 cpulog_mod.o: modules.o mod_kinds.o 427 427 cpu_statistics.o: modules.o mod_kinds.o 428 cuda_fft_interfaces_mod.o: cuda_fft_interfaces_mod.f90 modules.o mod_kinds.o429 428 data_log.o: modules.o mod_kinds.o 430 429 data_output_dvrp.o: modules.o cpulog_mod.o mod_kinds.o … … 456 455 exchange_horiz.o: modules.o cpulog_mod.o mod_kinds.o 457 456 exchange_horiz_2d.o: modules.o cpulog_mod.o mod_kinds.o pmc_interface_mod.o 458 fft_xy_mod.o: cuda_fft_interfaces_mod.omodules.o mod_kinds.o singleton_mod.o temperton_fft_mod.o457 fft_xy_mod.o: modules.o mod_kinds.o singleton_mod.o temperton_fft_mod.o 459 458 flow_statistics.o: modules.o cpulog_mod.o mod_kinds.o land_surface_model_mod.o \ 460 459 netcdf_interface_mod.o radiation_model_mod.o -
TabularUnified palm/trunk/SOURCE/advec_ws.f90 ¶
r2101 r2118 20 20 ! Current revisions: 21 21 ! ------------------ 22 ! 22 ! OpenACC version of subroutines removed 23 23 ! 24 24 ! Former revisions: … … 181 181 182 182 PRIVATE 183 PUBLIC advec_s_ws, advec_s_ws_acc, advec_u_ws, advec_u_ws_acc, & 184 advec_v_ws, advec_v_ws_acc, advec_w_ws, advec_w_ws_acc, & 185 ws_init, ws_init_flags, ws_statistics 183 PUBLIC advec_s_ws, advec_u_ws, advec_v_ws, advec_w_ws, ws_init, & 184 ws_init_flags, ws_statistics 186 185 187 186 INTERFACE ws_init … … 207 206 END INTERFACE advec_u_ws 208 207 209 INTERFACE advec_u_ws_acc210 MODULE PROCEDURE advec_u_ws_acc211 END INTERFACE advec_u_ws_acc212 213 208 INTERFACE advec_v_ws 214 209 MODULE PROCEDURE advec_v_ws … … 216 211 END INTERFACE advec_v_ws 217 212 218 INTERFACE advec_v_ws_acc219 MODULE PROCEDURE advec_v_ws_acc220 END INTERFACE advec_v_ws_acc221 222 213 INTERFACE advec_w_ws 223 214 MODULE PROCEDURE advec_w_ws 224 215 MODULE PROCEDURE advec_w_ws_ij 225 216 END INTERFACE advec_w_ws 226 227 INTERFACE advec_w_ws_acc228 MODULE PROCEDURE advec_w_ws_acc229 END INTERFACE advec_w_ws_acc230 217 231 218 CONTAINS … … 4029 4016 4030 4017 4031 !------------------------------------------------------------------------------!4032 ! Description:4033 ! ------------4034 !> Scalar advection - Call for all grid points - accelerator version4035 !------------------------------------------------------------------------------!4036 SUBROUTINE advec_s_ws_acc ( sk, sk_char )4037 4038 USE arrays_3d, &4039 ONLY: ddzw, drho_air, tend, u, v, w, rho_air, rho_air_zw4040 4041 USE constants, &4042 ONLY: adv_sca_1, adv_sca_3, adv_sca_54043 4044 USE control_parameters, &4045 ONLY: intermediate_timestep_count, monotonic_adjustment, u_gtrans,&4046 v_gtrans4047 4048 USE grid_variables, &4049 ONLY: ddx, ddy4050 4051 USE indices, &4052 ONLY: i_left, i_right, j_north, j_south, nxlg, nxrg, nyng, nysg, &4053 nzb, nzb_max, nzt, wall_flags_04054 4055 USE kinds4056 4057 ! USE statistics, &4058 ! ONLY: sums_wspts_ws_l, sums_wsqs_ws_l, sums_wssas_ws_l, &4059 ! sums_wsqrs_ws_l, sums_wsnrs_ws_l, weight_substep4060 4061 IMPLICIT NONE4062 4063 CHARACTER (LEN = *), INTENT(IN) :: sk_char !<4064 4065 INTEGER(iwp) :: i !<4066 INTEGER(iwp) :: ibit0 !<4067 INTEGER(iwp) :: ibit1 !<4068 INTEGER(iwp) :: ibit2 !<4069 INTEGER(iwp) :: ibit3 !<4070 INTEGER(iwp) :: ibit4 !<4071 INTEGER(iwp) :: ibit5 !<4072 INTEGER(iwp) :: ibit6 !<4073 INTEGER(iwp) :: ibit7 !<4074 INTEGER(iwp) :: ibit8 !<4075 INTEGER(iwp) :: j !<4076 INTEGER(iwp) :: k !<4077 INTEGER(iwp) :: k_mm !<4078 INTEGER(iwp) :: k_mmm !<4079 INTEGER(iwp) :: k_pp !<4080 INTEGER(iwp) :: k_ppp !<4081 INTEGER(iwp) :: tn = 0 !<4082 4083 REAL(wp) :: diss_d !<4084 REAL(wp) :: diss_l !<4085 REAL(wp) :: diss_n !<4086 REAL(wp) :: diss_r !<4087 REAL(wp) :: diss_s !<4088 REAL(wp) :: diss_t !<4089 REAL(wp) :: div !<4090 REAL(wp) :: flux_d !<4091 REAL(wp) :: flux_l !<4092 REAL(wp) :: flux_n !<4093 REAL(wp) :: flux_r !<4094 REAL(wp) :: flux_s !<4095 REAL(wp) :: flux_t !<4096 REAL(wp) :: fd_1 !<4097 REAL(wp) :: fl_1 !<4098 REAL(wp) :: fn_1 !<4099 REAL(wp) :: fr_1 !<4100 REAL(wp) :: fs_1 !<4101 REAL(wp) :: ft_1 !<4102 REAL(wp) :: phi_d !<4103 REAL(wp) :: phi_l !<4104 REAL(wp) :: phi_n !<4105 REAL(wp) :: phi_r !<4106 REAL(wp) :: phi_s !<4107 REAL(wp) :: phi_t !<4108 REAL(wp) :: rd !<4109 REAL(wp) :: rl !<4110 REAL(wp) :: rn !<4111 REAL(wp) :: rr !<4112 REAL(wp) :: rs !<4113 REAL(wp) :: rt !<4114 REAL(wp) :: u_comp !<4115 REAL(wp) :: v_comp !<4116 4117 REAL(wp), INTENT(IN), DIMENSION(nzb:nzt+1,nysg:nyng,nxlg:nxrg) :: sk !<4118 4119 !4120 !-- Computation of fluxes and tendency terms4121 !$acc kernels present( ddzw, sk, tend, u, v, w, wall_flags_0 )4122 DO i = i_left, i_right4123 DO j = j_south, j_north4124 DO k = nzb+1, nzt4125 4126 ibit2 = IBITS(wall_flags_0(k,j,i-1),2,1)4127 ibit1 = IBITS(wall_flags_0(k,j,i-1),1,1)4128 ibit0 = IBITS(wall_flags_0(k,j,i-1),0,1)4129 4130 u_comp = u(k,j,i) - u_gtrans4131 flux_l = u_comp * ( &4132 ( 37.0_wp * ibit2 * adv_sca_5 &4133 + 7.0_wp * ibit1 * adv_sca_3 &4134 + ibit0 * adv_sca_1 &4135 ) * &4136 ( sk(k,j,i) + sk(k,j,i-1) ) &4137 - ( 8.0_wp * ibit2 * adv_sca_5 &4138 + ibit1 * adv_sca_3 &4139 ) * &4140 ( sk(k,j,i+1) + sk(k,j,i-2) ) &4141 + ( ibit2 * adv_sca_5 &4142 ) * &4143 ( sk(k,j,i+2) + sk(k,j,i-3) ) &4144 )4145 4146 diss_l = -ABS( u_comp ) * ( &4147 ( 10.0_wp * ibit2 * adv_sca_5 &4148 + 3.0_wp * ibit1 * adv_sca_3 &4149 + ibit0 * adv_sca_1 &4150 ) * &4151 ( sk(k,j,i) - sk(k,j,i-1) ) &4152 - ( 5.0_wp * ibit2 * adv_sca_5 &4153 + ibit1 * adv_sca_3 &4154 ) * &4155 ( sk(k,j,i+1) - sk(k,j,i-2) ) &4156 + ( ibit2 * adv_sca_5 &4157 ) * &4158 ( sk(k,j,i+2) - sk(k,j,i-3) ) &4159 )4160 4161 ibit2 = IBITS(wall_flags_0(k,j,i),2,1)4162 ibit1 = IBITS(wall_flags_0(k,j,i),1,1)4163 ibit0 = IBITS(wall_flags_0(k,j,i),0,1)4164 4165 u_comp = u(k,j,i+1) - u_gtrans4166 flux_r = u_comp * ( &4167 ( 37.0_wp * ibit2 * adv_sca_5 &4168 + 7.0_wp * ibit1 * adv_sca_3 &4169 + ibit0 * adv_sca_1 &4170 ) * &4171 ( sk(k,j,i+1) + sk(k,j,i) ) &4172 - ( 8.0_wp * ibit2 * adv_sca_5 &4173 + ibit1 * adv_sca_3 &4174 ) * &4175 ( sk(k,j,i+2) + sk(k,j,i-1) ) &4176 + ( ibit2 * adv_sca_5 &4177 ) * &4178 ( sk(k,j,i+3) + sk(k,j,i-2) ) &4179 )4180 4181 diss_r = -ABS( u_comp ) * ( &4182 ( 10.0_wp * ibit2 * adv_sca_5 &4183 + 3.0_wp * ibit1 * adv_sca_3 &4184 + ibit0 * adv_sca_1 &4185 ) * &4186 ( sk(k,j,i+1) - sk(k,j,i) ) &4187 - ( 5.0_wp * ibit2 * adv_sca_5 &4188 + ibit1 * adv_sca_3 &4189 ) * &4190 ( sk(k,j,i+2) - sk(k,j,i-1) ) &4191 + ( ibit2 * adv_sca_5 &4192 ) * &4193 ( sk(k,j,i+3) - sk(k,j,i-2) ) &4194 )4195 4196 ibit5 = IBITS(wall_flags_0(k,j-1,i),5,1)4197 ibit4 = IBITS(wall_flags_0(k,j-1,i),4,1)4198 ibit3 = IBITS(wall_flags_0(k,j-1,i),3,1)4199 4200 v_comp = v(k,j,i) - v_gtrans4201 flux_s = v_comp * ( &4202 ( 37.0_wp * ibit5 * adv_sca_5 &4203 + 7.0_wp * ibit4 * adv_sca_3 &4204 + ibit3 * adv_sca_1 &4205 ) * &4206 ( sk(k,j,i) + sk(k,j-1,i) ) &4207 - ( 8.0_wp * ibit5 * adv_sca_5 &4208 + ibit4 * adv_sca_3 &4209 ) * &4210 ( sk(k,j+1,i) + sk(k,j-2,i) ) &4211 + ( ibit5 * adv_sca_5 &4212 ) * &4213 ( sk(k,j+2,i) + sk(k,j-3,i) ) &4214 )4215 4216 diss_s = -ABS( v_comp ) * ( &4217 ( 10.0_wp * ibit5 * adv_sca_5 &4218 + 3.0_wp * ibit4 * adv_sca_3 &4219 + ibit3 * adv_sca_1 &4220 ) * &4221 ( sk(k,j,i) - sk(k,j-1,i) ) &4222 - ( 5.0_wp * ibit5 * adv_sca_5 &4223 + ibit4 * adv_sca_3 &4224 ) * &4225 ( sk(k,j+1,i) - sk(k,j-2,i) ) &4226 + ( ibit5 * adv_sca_5 &4227 ) * &4228 ( sk(k,j+2,i) - sk(k,j-3,i) ) &4229 )4230 4231 ibit5 = IBITS(wall_flags_0(k,j,i),5,1)4232 ibit4 = IBITS(wall_flags_0(k,j,i),4,1)4233 ibit3 = IBITS(wall_flags_0(k,j,i),3,1)4234 4235 v_comp = v(k,j+1,i) - v_gtrans4236 flux_n = v_comp * ( &4237 ( 37.0_wp * ibit5 * adv_sca_5 &4238 + 7.0_wp * ibit4 * adv_sca_3 &4239 + ibit3 * adv_sca_1 &4240 ) * &4241 ( sk(k,j+1,i) + sk(k,j,i) ) &4242 - ( 8.0_wp * ibit5 * adv_sca_5 &4243 + ibit4 * adv_sca_3 &4244 ) * &4245 ( sk(k,j+2,i) + sk(k,j-1,i) ) &4246 + ( ibit5 * adv_sca_5 &4247 ) * &4248 ( sk(k,j+3,i) + sk(k,j-2,i) ) &4249 )4250 4251 diss_n = -ABS( v_comp ) * ( &4252 ( 10.0_wp * ibit5 * adv_sca_5 &4253 + 3.0_wp * ibit4 * adv_sca_3 &4254 + ibit3 * adv_sca_1 &4255 ) * &4256 ( sk(k,j+1,i) - sk(k,j,i) ) &4257 - ( 5.0_wp * ibit5 * adv_sca_5 &4258 + ibit4 * adv_sca_3 &4259 ) * &4260 ( sk(k,j+2,i) - sk(k,j-1,i) ) &4261 + ( ibit5 * adv_sca_5 &4262 ) * &4263 ( sk(k,j+3,i) - sk(k,j-2,i) ) &4264 )4265 4266 !4267 !-- indizes k_m, k_mm, ... should be known at these point4268 ibit8 = IBITS(wall_flags_0(k-1,j,i),8,1)4269 ibit7 = IBITS(wall_flags_0(k-1,j,i),7,1)4270 ibit6 = IBITS(wall_flags_0(k-1,j,i),6,1)4271 4272 k_pp = k + 2 * ibit84273 k_mm = k - 2 * ( ibit7 + ibit8 )4274 k_mmm = k - 3 * ibit84275 4276 flux_d = w(k-1,j,i) * ( &4277 ( 37.0_wp * ibit8 * adv_sca_5 &4278 + 7.0_wp * ibit7 * adv_sca_3 &4279 + ibit6 * adv_sca_1 &4280 ) * &4281 ( sk(k,j,i) + sk(k-1,j,i) ) &4282 - ( 8.0_wp * ibit8 * adv_sca_5 &4283 + ibit7 * adv_sca_3 &4284 ) * &4285 ( sk(k+1,j,i) + sk(k_mm,j,i) ) &4286 + ( ibit8 * adv_sca_5 &4287 ) * ( sk(k_pp,j,i)+ sk(k_mmm,j,i) ) &4288 )4289 4290 diss_d = -ABS( w(k-1,j,i) ) * ( &4291 ( 10.0_wp * ibit8 * adv_sca_5 &4292 + 3.0_wp * ibit7 * adv_sca_3 &4293 + ibit6 * adv_sca_1 &4294 ) * &4295 ( sk(k,j,i) - sk(k-1,j,i) ) &4296 - ( 5.0_wp * ibit8 * adv_sca_5 &4297 + ibit7 * adv_sca_3 &4298 ) * &4299 ( sk(k+1,j,i) - sk(k_mm,j,i) ) &4300 + ( ibit8 * adv_sca_5 &4301 ) * &4302 ( sk(k_pp,j,i) - sk(k_mmm,j,i) ) &4303 )4304 4305 ibit8 = IBITS(wall_flags_0(k,j,i),8,1)4306 ibit7 = IBITS(wall_flags_0(k,j,i),7,1)4307 ibit6 = IBITS(wall_flags_0(k,j,i),6,1)4308 4309 k_ppp = k + 3 * ibit84310 k_pp = k + 2 * ( 1 - ibit6 )4311 k_mm = k - 2 * ibit84312 4313 flux_t = w(k,j,i) * rho_air_zw(k) * ( &4314 ( 37.0_wp * ibit8 * adv_sca_5 &4315 + 7.0_wp * ibit7 * adv_sca_3 &4316 + ibit6 * adv_sca_1 &4317 ) * &4318 ( sk(k+1,j,i) + sk(k,j,i) ) &4319 - ( 8.0_wp * ibit8 * adv_sca_5 &4320 + ibit7 * adv_sca_3 &4321 ) * &4322 ( sk(k_pp,j,i) + sk(k-1,j,i) ) &4323 + ( ibit8 * adv_sca_5 &4324 ) * ( sk(k_ppp,j,i)+ sk(k_mm,j,i) ) &4325 )4326 4327 diss_t = -ABS( w(k,j,i) ) * rho_air_zw(k) * ( &4328 ( 10.0_wp * ibit8 * adv_sca_5 &4329 + 3.0_wp * ibit7 * adv_sca_3 &4330 + ibit6 * adv_sca_1 &4331 ) * &4332 ( sk(k+1,j,i) - sk(k,j,i) ) &4333 - ( 5.0_wp * ibit8 * adv_sca_5 &4334 + ibit7 * adv_sca_3 &4335 ) * &4336 ( sk(k_pp,j,i) - sk(k-1,j,i) ) &4337 + ( ibit8 * adv_sca_5 &4338 ) * &4339 ( sk(k_ppp,j,i) - sk(k_mm,j,i) ) &4340 )4341 !4342 !-- Apply monotonic adjustment.4343 IF ( monotonic_adjustment ) THEN4344 !4345 !-- At first, calculate first order fluxes.4346 u_comp = u(k,j,i) - u_gtrans4347 fl_1 = ( u_comp * ( sk(k,j,i) + sk(k,j,i-1) ) &4348 -ABS( u_comp ) * ( sk(k,j,i) - sk(k,j,i-1) ) &4349 ) * adv_sca_14350 4351 u_comp = u(k,j,i+1) - u_gtrans4352 fr_1 = ( u_comp * ( sk(k,j,i+1) + sk(k,j,i) ) &4353 -ABS( u_comp ) * ( sk(k,j,i+1) - sk(k,j,i) ) &4354 ) * adv_sca_14355 4356 v_comp = v(k,j,i) - v_gtrans4357 fs_1 = ( v_comp * ( sk(k,j,i) + sk(k,j-1,i) ) &4358 -ABS( v_comp ) * ( sk(k,j,i) - sk(k,j-1,i) ) &4359 ) * adv_sca_14360 4361 v_comp = v(k,j+1,i) - v_gtrans4362 fn_1 = ( v_comp * ( sk(k,j+1,i) + sk(k,j,i) ) &4363 -ABS( v_comp ) * ( sk(k,j+1,i) - sk(k,j,i) ) &4364 ) * adv_sca_14365 4366 fd_1 = ( w(k-1,j,i) * ( sk(k,j,i) + sk(k-1,j,i) ) &4367 -ABS( w(k-1,j,i) ) * ( sk(k,j,i) - sk(k-1,j,i) ) &4368 ) * adv_sca_1 * rho_air_zw(k)4369 4370 ft_1 = ( w(k,j,i) * ( sk(k+1,j,i) + sk(k,j,i) ) &4371 -ABS( w(k,j,i) ) * ( sk(k+1,j,i) - sk(k,j,i) ) &4372 ) * adv_sca_1 * rho_air_zw(k)4373 !4374 !-- Calculate ratio of upwind gradients. Note, Min/Max is just4375 !-- to avoid if statements.4376 rl = ( MAX( 0.0_wp, u(k,j,i) - u_gtrans ) * &4377 ABS( ( sk(k,j,i-1) - sk(k,j,i-2) ) /&4378 ( sk(k,j,i) - sk(k,j,i-1) + 1E-20_wp ) &4379 ) + &4380 MIN( 0.0_wp, u(k,j,i) - u_gtrans ) * &4381 ABS( ( sk(k,j,i) - sk(k,j,i+1) ) /&4382 ( sk(k,j,i-1) - sk(k,j,i) + 1E-20_wp ) &4383 ) &4384 ) / ABS( u(k,j,i) - u_gtrans + 1E-20_wp )4385 4386 rr = ( MAX( 0.0_wp, u(k,j,i+1) - u_gtrans ) * &4387 ABS( ( sk(k,j,i) - sk(k,j,i-1) ) /&4388 ( sk(k,j,i+1) - sk(k,j,i) + 1E-20_wp ) &4389 ) + &4390 MIN( 0.0_wp, u(k,j,i+1) - u_gtrans ) * &4391 ABS( ( sk(k,j,i+1) - sk(k,j,i+2) ) /&4392 ( sk(k,j,i) - sk(k,j,i+1) + 1E-20_wp ) &4393 ) &4394 ) / ABS( u(k,j,i+1) - u_gtrans + 1E-20_wp )4395 4396 rs = ( MAX( 0.0_wp, v(k,j,i) - v_gtrans ) * &4397 ABS( ( sk(k,j-1,i) - sk(k,j-2,i) ) /&4398 ( sk(k,j,i) - sk(k,j-1,i) + 1E-20_wp ) &4399 ) + &4400 MIN( 0.0_wp, v(k,j,i) - v_gtrans ) * &4401 ABS( ( sk(k,j,i) - sk(k,j+1,i) ) /&4402 ( sk(k,j-1,i) - sk(k,j,i) + 1E-20_wp ) &4403 ) &4404 ) / ABS( v(k,j,i) - v_gtrans + 1E-20_wp )4405 4406 rn = ( MAX( 0.0_wp, v(k,j+1,i) - v_gtrans ) * &4407 ABS( ( sk(k,j,i) - sk(k,j-1,i) ) /&4408 ( sk(k,j+1,i) - sk(k,j,i) + 1E-20_wp ) &4409 ) + &4410 MIN( 0.0_wp, v(k,j+1,i) - v_gtrans ) * &4411 ABS( ( sk(k,j+1,i) - sk(k,j+2,i) ) /&4412 ( sk(k,j,i) - sk(k,j+1,i) + 1E-20_wp ) &4413 ) &4414 ) / ABS( v(k,j+1,i) - v_gtrans + 1E-20_wp )4415 !4416 !-- Reuse k_mm and compute k_mmm for the vertical gradient ratios.4417 !-- Note, for vertical advection below the third grid point above4418 !-- surface ( or below the model top) rd and rt are set to 0, i.e.4419 !-- use of first order scheme is enforced.4420 k_mmm = k - 3 * ibit84421 4422 rd = ( MAX( 0.0_wp, w(k-1,j,i) ) * &4423 ABS( ( sk(k_mm,j,i) - sk(k_mmm,j,i) ) / &4424 ( sk(k-1,j,i) - sk(k_mm,j,i) + 1E-20_wp ) &4425 ) + &4426 MIN( 0.0_wp, w(k-1,j,i) ) * &4427 ABS( ( sk(k-1,j,i) - sk(k,j,i) ) / &4428 ( sk(k_mm,j,i) - sk(k-1,j,i) + 1E-20_wp ) &4429 ) &4430 ) * ibit8 / ABS( w(k-1,j,i) + 1E-20_wp )4431 4432 rt = ( MAX( 0.0_wp, w(k,j,i) ) * &4433 ABS( ( sk(k,j,i) - sk(k-1,j,i) ) / &4434 ( sk(k+1,j,i) - sk(k,j,i) + 1E-20_wp ) &4435 ) + &4436 MIN( 0.0_wp, w(k,j,i) ) * &4437 ABS( ( sk(k+1,j,i) - sk(k_pp,j,i) ) / &4438 ( sk(k,j,i) - sk(k+1,j,i) + 1E-20_wp ) &4439 ) &4440 ) * ibit8 / ABS( w(k,j,i) + 1E-20_wp )4441 !4442 !-- Calculate empirical limiter function (van Albada2 limiter).4443 phi_l = MIN( 1.0_wp, ( 2.0_wp * ABS( rl ) ) / &4444 ( rl**2 + 1.0_wp ) )4445 phi_r = MIN( 1.0_wp, ( 2.0_wp * ABS( rr ) ) / &4446 ( rr**2 + 1.0_wp ) )4447 phi_s = MIN( 1.0_wp, ( 2.0_wp * ABS( rs ) ) / &4448 ( rs**2 + 1.0_wp ) )4449 phi_n = MIN( 1.0_wp, ( 2.0_wp * ABS( rn ) ) / &4450 ( rn**2 + 1.0_wp ) )4451 phi_d = MIN( 1.0_wp, ( 2.0_wp * ABS( rd ) ) / &4452 ( rd**2 + 1.0_wp ) )4453 phi_t = MIN( 1.0_wp, ( 2.0_wp * ABS( rt ) ) / &4454 ( rt**2 + 1.0_wp ) )4455 !4456 !-- Calculate the resulting monotone flux.4457 flux_l = fl_1 - phi_l * ( fl_1 - flux_l )4458 flux_r = fr_1 - phi_r * ( fr_1 - flux_r )4459 flux_s = fs_1 - phi_s * ( fs_1 - flux_s )4460 flux_n = fn_1 - phi_n * ( fn_1 - flux_n )4461 flux_d = fd_1 - phi_d * ( fd_1 - flux_d )4462 flux_t = ft_1 - phi_t * ( ft_1 - flux_t )4463 !4464 !-- Moreover, modify dissipation flux according to the limiter.4465 diss_l = diss_l * phi_l4466 diss_r = diss_r * phi_r4467 diss_s = diss_s * phi_s4468 diss_n = diss_n * phi_n4469 diss_d = diss_d * phi_d4470 diss_t = diss_t * phi_t4471 4472 ENDIF4473 !4474 !-- Calculate the divergence of the velocity field. A respective4475 !-- correction is needed to overcome numerical instabilities caused4476 !-- by a not sufficient reduction of divergences near topography.4477 div = ( u(k,j,i+1) * ( ibit0 + ibit1 + ibit2 ) &4478 - u(k,j,i) * ( IBITS(wall_flags_0(k,j,i-1),0,1) &4479 + IBITS(wall_flags_0(k,j,i-1),1,1) &4480 + IBITS(wall_flags_0(k,j,i-1),2,1) &4481 ) &4482 ) * rho_air(k) * ddx &4483 + ( v(k,j+1,i) * ( ibit3 + ibit4 + ibit5 ) &4484 - v(k,j,i) * ( IBITS(wall_flags_0(k,j-1,i),3,1) &4485 + IBITS(wall_flags_0(k,j-1,i),4,1) &4486 + IBITS(wall_flags_0(k,j-1,i),5,1) &4487 ) &4488 ) * rho_air(k) * ddy &4489 + ( w(k,j,i) * rho_air_zw(k) * &4490 ( ibit6 + ibit7 + ibit8 ) &4491 - w(k-1,j,i) * rho_air_zw(k-1) * &4492 ( IBITS(wall_flags_0(k-1,j,i),6,1) &4493 + IBITS(wall_flags_0(k-1,j,i),7,1) &4494 + IBITS(wall_flags_0(k-1,j,i),8,1) &4495 ) &4496 ) * ddzw(k)4497 4498 4499 tend(k,j,i) = - ( &4500 ( flux_r + diss_r - flux_l - diss_l ) * ddx &4501 + ( flux_n + diss_n - flux_s - diss_s ) * ddy &4502 + ( ( flux_t + diss_t ) - &4503 ( flux_d + diss_d ) &4504 ) * drho_air(k) * ddzw(k) &4505 ) + div * sk(k,j,i)4506 4507 !++4508 !-- Evaluation of statistics4509 ! SELECT CASE ( sk_char )4510 !4511 ! CASE ( 'pt' )4512 ! sums_wspts_ws_l(k,tn) = sums_wspts_ws_l(k,tn) &4513 ! + ( flux_t + diss_t ) &4514 ! * weight_substep(intermediate_timestep_count)4515 ! CASE ( 'sa' )4516 ! sums_wssas_ws_l(k,tn) = sums_wssas_ws_l(k,tn) &4517 ! + ( flux_t + diss_t ) &4518 ! * weight_substep(intermediate_timestep_count)4519 ! CASE ( 'q' )4520 ! sums_wsqs_ws_l(k,tn) = sums_wsqs_ws_l(k,tn) &4521 ! + ( flux_t + diss_t ) &4522 ! * weight_substep(intermediate_timestep_count)4523 ! CASE ( 'qr' )4524 ! sums_wsqrs_ws_l(k,tn) = sums_wsqrs_ws_l(k,tn) &4525 ! + ( flux_t + diss_t ) &4526 ! * weight_substep(intermediate_timestep_count)4527 ! CASE ( 'nr' )4528 ! sums_wsnrs_ws_l(k,tn) = sums_wsnrs_ws_l(k,tn) &4529 ! + ( flux_t + diss_t ) &4530 ! * weight_substep(intermediate_timestep_count)4531 !4532 ! END SELECT4533 4534 ENDDO4535 ENDDO4536 ENDDO4537 !$acc end kernels4538 4539 END SUBROUTINE advec_s_ws_acc4540 4541 4018 4542 4019 !------------------------------------------------------------------------------! … … 5039 4516 ! Description: 5040 4517 ! ------------ 5041 !> Advection of u - Call for all grid points - accelerator version5042 !------------------------------------------------------------------------------!5043 SUBROUTINE advec_u_ws_acc5044 5045 USE arrays_3d, &5046 ONLY: ddzw, drho_air, tend, u, v, w, rho_air, rho_air_zw5047 5048 USE constants, &5049 ONLY: adv_mom_1, adv_mom_3, adv_mom_55050 5051 USE control_parameters, &5052 ONLY: intermediate_timestep_count, u_gtrans, v_gtrans5053 5054 USE grid_variables, &5055 ONLY: ddx, ddy5056 5057 USE indices, &5058 ONLY: i_left, i_right, j_north, j_south, nxl, nxr, nyn, nys, nzb, &5059 nzb_max, nzt, wall_flags_05060 5061 USE kinds5062 5063 ! USE statistics, &5064 ! ONLY: hom, sums_us2_ws_l, sums_wsus_ws_l, weight_substep5065 5066 IMPLICIT NONE5067 5068 INTEGER(iwp) :: i !<5069 INTEGER(iwp) :: ibit9 !<5070 INTEGER(iwp) :: ibit10 !<5071 INTEGER(iwp) :: ibit11 !<5072 INTEGER(iwp) :: ibit12 !<5073 INTEGER(iwp) :: ibit13 !<5074 INTEGER(iwp) :: ibit14 !<5075 INTEGER(iwp) :: ibit15 !<5076 INTEGER(iwp) :: ibit16 !<5077 INTEGER(iwp) :: ibit17 !<5078 INTEGER(iwp) :: j !<5079 INTEGER(iwp) :: k !<5080 INTEGER(iwp) :: k_mmm !<5081 INTEGER(iwp) :: k_mm !<5082 INTEGER(iwp) :: k_pp !<5083 INTEGER(iwp) :: k_ppp !<5084 INTEGER(iwp) :: tn = 0 !<5085 5086 REAL(wp) :: diss_d !<5087 REAL(wp) :: diss_l !<5088 REAL(wp) :: diss_n !<5089 REAL(wp) :: diss_r !<5090 REAL(wp) :: diss_s !<5091 REAL(wp) :: diss_t !<5092 REAL(wp) :: div !<5093 REAL(wp) :: flux_d !<5094 REAL(wp) :: flux_l !<5095 REAL(wp) :: flux_n !<5096 REAL(wp) :: flux_r !<5097 REAL(wp) :: flux_s !<5098 REAL(wp) :: flux_t !<5099 REAL(wp) :: gu !<5100 REAL(wp) :: gv !<5101 REAL(wp) :: u_comp !<5102 REAL(wp) :: u_comp_l !<5103 REAL(wp) :: v_comp !<5104 REAL(wp) :: v_comp_s !<5105 REAL(wp) :: w_comp !<5106 5107 5108 gu = 2.0_wp * u_gtrans5109 gv = 2.0_wp * v_gtrans5110 5111 !5112 !-- Computation of fluxes and tendency terms5113 !$acc kernels present( ddzw, tend, u, v, w, wall_flags_0 )5114 DO i = i_left, i_right5115 DO j = j_south, j_north5116 DO k = nzb+1, nzt5117 5118 ibit11 = IBITS(wall_flags_0(k,j,i-1),11,1)5119 ibit10 = IBITS(wall_flags_0(k,j,i-1),10,1)5120 ibit9 = IBITS(wall_flags_0(k,j,i-1),9,1)5121 5122 u_comp_l = u(k,j,i) + u(k,j,i-1) - gu5123 flux_l = u_comp_l * ( &5124 ( 37.0_wp * ibit11 * adv_mom_5 &5125 + 7.0_wp * ibit10 * adv_mom_3 &5126 + ibit9 * adv_mom_1 &5127 ) * &5128 ( u(k,j,i) + u(k,j,i-1) ) &5129 - ( 8.0_wp * ibit11 * adv_mom_5 &5130 + ibit10 * adv_mom_3 &5131 ) * &5132 ( u(k,j,i+1) + u(k,j,i-2) ) &5133 + ( ibit11 * adv_mom_5 &5134 ) * &5135 ( u(k,j,i+2) + u(k,j,i-3) ) &5136 )5137 5138 diss_l = - ABS( u_comp_l ) * ( &5139 ( 10.0_wp * ibit11 * adv_mom_5 &5140 + 3.0_wp * ibit10 * adv_mom_3 &5141 + ibit9 * adv_mom_1 &5142 ) * &5143 ( u(k,j,i) - u(k,j,i-1) ) &5144 - ( 5.0_wp * ibit11 * adv_mom_5 &5145 + ibit10 * adv_mom_3 &5146 ) * &5147 ( u(k,j,i+1) - u(k,j,i-2) ) &5148 + ( ibit11 * adv_mom_5 &5149 ) * &5150 ( u(k,j,i+2) - u(k,j,i-3) ) &5151 )5152 5153 ibit11 = IBITS(wall_flags_0(k,j,i),11,1)5154 ibit10 = IBITS(wall_flags_0(k,j,i),10,1)5155 ibit9 = IBITS(wall_flags_0(k,j,i),9,1)5156 5157 u_comp = u(k,j,i+1) + u(k,j,i)5158 flux_r = ( u_comp - gu ) * ( &5159 ( 37.0_wp * ibit11 * adv_mom_5 &5160 + 7.0_wp * ibit10 * adv_mom_3 &5161 + ibit9 * adv_mom_1 &5162 ) * &5163 ( u(k,j,i+1) + u(k,j,i) ) &5164 - ( 8.0_wp * ibit11 * adv_mom_5 &5165 + ibit10 * adv_mom_3 &5166 ) * &5167 ( u(k,j,i+2) + u(k,j,i-1) ) &5168 + ( ibit11 * adv_mom_5 &5169 ) * &5170 ( u(k,j,i+3) + u(k,j,i-2) ) &5171 )5172 5173 diss_r = - ABS( u_comp - gu ) * ( &5174 ( 10.0_wp * ibit11 * adv_mom_5 &5175 + 3.0_wp * ibit10 * adv_mom_3 &5176 + ibit9 * adv_mom_1 &5177 ) * &5178 ( u(k,j,i+1) - u(k,j,i) ) &5179 - ( 5.0_wp * ibit11 * adv_mom_5 &5180 + ibit10 * adv_mom_3 &5181 ) * &5182 ( u(k,j,i+2) - u(k,j,i-1) ) &5183 + ( ibit11 * adv_mom_5 &5184 ) * &5185 ( u(k,j,i+3) - u(k,j,i-2) ) &5186 )5187 5188 ibit14 = IBITS(wall_flags_0(k,j-1,i),14,1)5189 ibit13 = IBITS(wall_flags_0(k,j-1,i),13,1)5190 ibit12 = IBITS(wall_flags_0(k,j-1,i),12,1)5191 5192 v_comp_s = v(k,j,i) + v(k,j,i-1) - gv5193 flux_s = v_comp_s * ( &5194 ( 37.0_wp * ibit14 * adv_mom_5 &5195 + 7.0_wp * ibit13 * adv_mom_3 &5196 + ibit12 * adv_mom_1 &5197 ) * &5198 ( u(k,j,i) + u(k,j-1,i) ) &5199 - ( 8.0_wp * ibit14 * adv_mom_5 &5200 + ibit13 * adv_mom_3 &5201 ) * &5202 ( u(k,j+1,i) + u(k,j-2,i) ) &5203 + ( ibit14 * adv_mom_5 &5204 ) * &5205 ( u(k,j+2,i) + u(k,j-3,i) ) &5206 )5207 5208 diss_s = - ABS ( v_comp_s ) * ( &5209 ( 10.0_wp * ibit14 * adv_mom_5 &5210 + 3.0_wp * ibit13 * adv_mom_3 &5211 + ibit12 * adv_mom_1 &5212 ) * &5213 ( u(k,j,i) - u(k,j-1,i) ) &5214 - ( 5.0_wp * ibit14 * adv_mom_5 &5215 + ibit13 * adv_mom_3 &5216 ) * &5217 ( u(k,j+1,i) - u(k,j-2,i) ) &5218 + ( ibit14 * adv_mom_5 &5219 ) * &5220 ( u(k,j+2,i) - u(k,j-3,i) ) &5221 )5222 5223 5224 ibit14 = IBITS(wall_flags_0(k,j,i),14,1)5225 ibit13 = IBITS(wall_flags_0(k,j,i),13,1)5226 ibit12 = IBITS(wall_flags_0(k,j,i),12,1)5227 5228 v_comp = v(k,j+1,i) + v(k,j+1,i-1) - gv5229 flux_n = v_comp * ( &5230 ( 37.0_wp * ibit14 * adv_mom_5 &5231 + 7.0_wp * ibit13 * adv_mom_3 &5232 + ibit12 * adv_mom_1 &5233 ) * &5234 ( u(k,j+1,i) + u(k,j,i) ) &5235 - ( 8.0_wp * ibit14 * adv_mom_5 &5236 + ibit13 * adv_mom_3 &5237 ) * &5238 ( u(k,j+2,i) + u(k,j-1,i) ) &5239 + ( ibit14 * adv_mom_5 &5240 ) * &5241 ( u(k,j+3,i) + u(k,j-2,i) ) &5242 )5243 5244 diss_n = - ABS ( v_comp ) * ( &5245 ( 10.0_wp * ibit14 * adv_mom_5 &5246 + 3.0_wp * ibit13 * adv_mom_3 &5247 + ibit12 * adv_mom_1 &5248 ) * &5249 ( u(k,j+1,i) - u(k,j,i) ) &5250 - ( 5.0_wp * ibit14 * adv_mom_5 &5251 + ibit13 * adv_mom_3 &5252 ) * &5253 ( u(k,j+2,i) - u(k,j-1,i) ) &5254 + ( ibit14 * adv_mom_5 &5255 ) * &5256 ( u(k,j+3,i) - u(k,j-2,i) ) &5257 )5258 5259 ibit17 = IBITS(wall_flags_0(k-1,j,i),17,1)5260 ibit16 = IBITS(wall_flags_0(k-1,j,i),16,1)5261 ibit15 = IBITS(wall_flags_0(k-1,j,i),15,1)5262 5263 k_pp = k + 2 * ibit175264 k_mm = k - 2 * ( ibit16 + ibit17 )5265 k_mmm = k - 3 * ibit175266 5267 w_comp = w(k-1,j,i) + w(k-1,j,i-1)5268 flux_d = w_comp * ( &5269 ( 37.0_wp * ibit17 * adv_mom_5 &5270 + 7.0_wp * ibit16 * adv_mom_3 &5271 + ibit15 * adv_mom_1 &5272 ) * &5273 ( u(k,j,i) + u(k-1,j,i) ) &5274 - ( 8.0_wp * ibit17 * adv_mom_5 &5275 + ibit16 * adv_mom_3 &5276 ) * &5277 ( u(k+1,j,i) + u(k_mm,j,i) ) &5278 + ( ibit17 * adv_mom_5 &5279 ) * &5280 ( u(k_pp,j,i) + u(k_mmm,j,i) ) &5281 )5282 5283 diss_d = - ABS( w_comp ) * ( &5284 ( 10.0_wp * ibit17 * adv_mom_5 &5285 + 3.0_wp * ibit16 * adv_mom_3 &5286 + ibit15 * adv_mom_1 &5287 ) * &5288 ( u(k,j,i) - u(k-1,j,i) ) &5289 - ( 5.0_wp * ibit17 * adv_mom_5 &5290 + ibit16 * adv_mom_3 &5291 ) * &5292 ( u(k+1,j,i) - u(k_mm,j,i) ) &5293 + ( ibit17 * adv_mom_5 &5294 ) * &5295 ( u(k_pp,j,i) - u(k_mmm,j,i) ) &5296 )5297 !5298 !-- k index has to be modified near bottom and top, else array5299 !-- subscripts will be exceeded.5300 ibit17 = IBITS(wall_flags_0(k,j,i),17,1)5301 ibit16 = IBITS(wall_flags_0(k,j,i),16,1)5302 ibit15 = IBITS(wall_flags_0(k,j,i),15,1)5303 5304 k_ppp = k + 3 * ibit175305 k_pp = k + 2 * ( 1 - ibit15 )5306 k_mm = k - 2 * ibit175307 5308 w_comp = w(k,j,i) + w(k,j,i-1)5309 flux_t = w_comp * rho_air_zw(k) * ( &5310 ( 37.0_wp * ibit17 * adv_mom_5 &5311 + 7.0_wp * ibit16 * adv_mom_3 &5312 + ibit15 * adv_mom_1 &5313 ) * &5314 ( u(k+1,j,i) + u(k,j,i) ) &5315 - ( 8.0_wp * ibit17 * adv_mom_5 &5316 + ibit16 * adv_mom_3 &5317 ) * &5318 ( u(k_pp,j,i) + u(k-1,j,i) ) &5319 + ( ibit17 * adv_mom_5 &5320 ) * &5321 ( u(k_ppp,j,i) + u(k_mm,j,i) ) &5322 )5323 5324 diss_t = - ABS( w_comp ) * rho_air_zw(k) * ( &5325 ( 10.0_wp * ibit17 * adv_mom_5 &5326 + 3.0_wp * ibit16 * adv_mom_3 &5327 + ibit15 * adv_mom_1 &5328 ) * &5329 ( u(k+1,j,i) - u(k,j,i) ) &5330 - ( 5.0_wp * ibit17 * adv_mom_5 &5331 + ibit16 * adv_mom_3 &5332 ) * &5333 ( u(k_pp,j,i) - u(k-1,j,i) ) &5334 + ( ibit17 * adv_mom_5 &5335 ) * &5336 ( u(k_ppp,j,i) - u(k_mm,j,i) ) &5337 )5338 !5339 !-- Calculate the divergence of the velocity field. A respective5340 !-- correction is needed to overcome numerical instabilities caused5341 !-- by a not sufficient reduction of divergences near topography.5342 div = ( ( u_comp * ( ibit9 + ibit10 + ibit11 ) &5343 - ( u(k,j,i) + u(k,j,i-1) ) &5344 * ( IBITS(wall_flags_0(k,j,i-1),9,1) &5345 + IBITS(wall_flags_0(k,j,i-1),10,1) &5346 + IBITS(wall_flags_0(k,j,i-1),11,1) &5347 ) &5348 ) * rho_air(k) * ddx &5349 + ( ( v_comp + gv ) * ( ibit12 + ibit13 + ibit14 ) &5350 - ( v(k,j,i) + v(k,j,i-1 ) ) &5351 * ( IBITS(wall_flags_0(k,j-1,i),12,1) &5352 + IBITS(wall_flags_0(k,j-1,i),13,1) &5353 + IBITS(wall_flags_0(k,j-1,i),14,1) &5354 ) &5355 ) * rho_air(k) * ddy &5356 + ( w_comp * rho_air_zw(k) * ( ibit15 + ibit16 + ibit17 ) &5357 - ( w(k-1,j,i) + w(k-1,j,i-1) ) * rho_air_zw(k-1) &5358 * ( IBITS(wall_flags_0(k-1,j,i),15,1) &5359 + IBITS(wall_flags_0(k-1,j,i),16,1) &5360 + IBITS(wall_flags_0(k-1,j,i),17,1) &5361 ) &5362 ) * ddzw(k) &5363 ) * 0.5_wp5364 5365 5366 tend(k,j,i) = - ( &5367 ( flux_r + diss_r - flux_l - diss_l ) * ddx &5368 + ( flux_n + diss_n - flux_s - diss_s ) * ddy &5369 + ( ( flux_t + diss_t ) - &5370 ( flux_d + diss_d ) &5371 ) * drho_air(k) * ddzw(k) &5372 ) + div * u(k,j,i)5373 5374 !++5375 !-- Statistical Evaluation of u'u'. The factor has to be applied5376 !-- for right evaluation when gallilei_trans = .T. .5377 ! sums_us2_ws_l(k,tn) = sums_us2_ws_l(k,tn) &5378 ! + ( flux_r * &5379 ! ( u_comp - 2.0_wp * hom(k,1,1,0) ) &5380 ! / ( u_comp - gu + 1.0E-20_wp ) &5381 ! + diss_r * &5382 ! ABS( u_comp - 2.0_wp * hom(k,1,1,0) ) &5383 ! / ( ABS( u_comp - gu ) + 1.0E-20_wp ) ) &5384 ! * weight_substep(intermediate_timestep_count)5385 !5386 !-- Statistical Evaluation of w'u'.5387 ! sums_wsus_ws_l(k,tn) = sums_wsus_ws_l(k,tn) &5388 ! + ( flux_t + diss_t ) &5389 ! * weight_substep(intermediate_timestep_count)5390 ENDDO5391 ENDDO5392 ENDDO5393 !$acc end kernels5394 5395 !++5396 ! sums_us2_ws_l(nzb,tn) = sums_us2_ws_l(nzb+1,tn)5397 5398 END SUBROUTINE advec_u_ws_acc5399 5400 5401 !------------------------------------------------------------------------------!5402 ! Description:5403 ! ------------5404 4518 !> Advection of v - Call for all grid points 5405 4519 !------------------------------------------------------------------------------! … … 5906 5020 5907 5021 5908 !------------------------------------------------------------------------------!5909 ! Description:5910 ! ------------5911 !> Advection of v - Call for all grid points - accelerator version5912 !------------------------------------------------------------------------------!5913 SUBROUTINE advec_v_ws_acc5914 5915 USE arrays_3d, &5916 ONLY: ddzw, drho_air, tend, u, v, w, rho_air, rho_air_zw5917 5918 USE constants, &5919 ONLY: adv_mom_1, adv_mom_3, adv_mom_55920 5921 USE control_parameters, &5922 ONLY: intermediate_timestep_count, u_gtrans, v_gtrans5923 5924 USE grid_variables, &5925 ONLY: ddx, ddy5926 5927 USE indices, &5928 ONLY: i_left, i_right, j_north, j_south, nxl, nxr, nyn, nys, nzb, &5929 nzb_max, nzt, wall_flags_05930 5931 USE kinds5932 5933 ! USE statistics, &5934 ! ONLY: hom, sums_vs2_ws_l, sums_wsvs_ws_l, weight_substep5935 5936 IMPLICIT NONE5937 5938 5939 INTEGER(iwp) :: i !<5940 INTEGER(iwp) :: ibit18 !<5941 INTEGER(iwp) :: ibit19 !<5942 INTEGER(iwp) :: ibit20 !<5943 INTEGER(iwp) :: ibit21 !<5944 INTEGER(iwp) :: ibit22 !<5945 INTEGER(iwp) :: ibit23 !<5946 INTEGER(iwp) :: ibit24 !<5947 INTEGER(iwp) :: ibit25 !<5948 INTEGER(iwp) :: ibit26 !<5949 INTEGER(iwp) :: j !<5950 INTEGER(iwp) :: k !<5951 INTEGER(iwp) :: k_mm !<5952 INTEGER(iwp) :: k_mmm !<5953 INTEGER(iwp) :: k_pp !<5954 INTEGER(iwp) :: k_ppp !<5955 INTEGER(iwp) :: tn = 0 !<5956 5957 REAL(wp) :: diss_d !<5958 REAL(wp) :: diss_l !<5959 REAL(wp) :: diss_n !<5960 REAL(wp) :: diss_r !<5961 REAL(wp) :: diss_s !<5962 REAL(wp) :: diss_t !<5963 REAL(wp) :: div !<5964 REAL(wp) :: flux_d !<5965 REAL(wp) :: flux_l !<5966 REAL(wp) :: flux_n !<5967 REAL(wp) :: flux_r !<5968 REAL(wp) :: flux_s !<5969 REAL(wp) :: flux_t !<5970 REAL(wp) :: gu !<5971 REAL(wp) :: gv !<5972 REAL(wp) :: u_comp !<5973 REAL(wp) :: u_comp_l !<5974 REAL(wp) :: v_comp !<5975 REAL(wp) :: v_comp_s !<5976 REAL(wp) :: w_comp !<5977 5978 gu = 2.0_wp * u_gtrans5979 gv = 2.0_wp * v_gtrans5980 5981 !5982 !-- Computation of fluxes and tendency terms5983 !$acc kernels present( ddzw, tend, u, v, w, wall_flags_0 )5984 DO i = i_left, i_right5985 DO j = j_south, j_north5986 DO k = nzb+1, nzt5987 5988 ibit20 = IBITS(wall_flags_0(k,j,i-1),20,1)5989 ibit19 = IBITS(wall_flags_0(k,j,i-1),19,1)5990 ibit18 = IBITS(wall_flags_0(k,j,i-1),18,1)5991 5992 u_comp_l = u(k,j-1,i) + u(k,j,i) - gu5993 flux_l = u_comp_l * ( &5994 ( 37.0_wp * ibit20 * adv_mom_5 &5995 + 7.0_wp * ibit19 * adv_mom_3 &5996 + ibit18 * adv_mom_1 &5997 ) * &5998 ( v(k,j,i) + v(k,j,i-1) ) &5999 - ( 8.0_wp * ibit20 * adv_mom_5 &6000 + ibit19 * adv_mom_3 &6001 ) * &6002 ( v(k,j,i+1) + v(k,j,i-2) ) &6003 + ( ibit20 * adv_mom_5 &6004 ) * &6005 ( v(k,j,i+2) + v(k,j,i-3) ) &6006 )6007 6008 diss_l = - ABS( u_comp_l ) * ( &6009 ( 10.0_wp * ibit20 * adv_mom_5 &6010 + 3.0_wp * ibit19 * adv_mom_3 &6011 + ibit18 * adv_mom_1 &6012 ) * &6013 ( v(k,j,i) - v(k,j,i-1) ) &6014 - ( 5.0_wp * ibit20 * adv_mom_5 &6015 + ibit19 * adv_mom_3 &6016 ) * &6017 ( v(k,j,i+1) - v(k,j,i-2) ) &6018 + ( ibit20 * adv_mom_5 &6019 ) * &6020 ( v(k,j,i+2) - v(k,j,i-3) ) &6021 )6022 6023 ibit20 = IBITS(wall_flags_0(k,j,i),20,1)6024 ibit19 = IBITS(wall_flags_0(k,j,i),19,1)6025 ibit18 = IBITS(wall_flags_0(k,j,i),18,1)6026 6027 u_comp = u(k,j-1,i+1) + u(k,j,i+1) - gu6028 flux_r = u_comp * ( &6029 ( 37.0_wp * ibit20 * adv_mom_5 &6030 + 7.0_wp * ibit19 * adv_mom_3 &6031 + ibit18 * adv_mom_1 &6032 ) * &6033 ( v(k,j,i+1) + v(k,j,i) ) &6034 - ( 8.0_wp * ibit20 * adv_mom_5 &6035 + ibit19 * adv_mom_3 &6036 ) * &6037 ( v(k,j,i+2) + v(k,j,i-1) ) &6038 + ( ibit20 * adv_mom_5 &6039 ) * &6040 ( v(k,j,i+3) + v(k,j,i-2) ) &6041 )6042 6043 diss_r = - ABS( u_comp ) * ( &6044 ( 10.0_wp * ibit20 * adv_mom_5 &6045 + 3.0_wp * ibit19 * adv_mom_3 &6046 + ibit18 * adv_mom_1 &6047 ) * &6048 ( v(k,j,i+1) - v(k,j,i) ) &6049 - ( 5.0_wp * ibit20 * adv_mom_5 &6050 + ibit19 * adv_mom_3 &6051 ) * &6052 ( v(k,j,i+2) - v(k,j,i-1) ) &6053 + ( ibit20 * adv_mom_5 &6054 ) * &6055 ( v(k,j,i+3) - v(k,j,i-2) ) &6056 )6057 6058 ibit23 = IBITS(wall_flags_0(k,j-1,i),23,1)6059 ibit22 = IBITS(wall_flags_0(k,j-1,i),22,1)6060 ibit21 = IBITS(wall_flags_0(k,j-1,i),21,1)6061 6062 6063 v_comp_s = v(k,j,i) + v(k,j-1,i) - gv6064 flux_s = v_comp_s * ( &6065 ( 37.0_wp * ibit23 * adv_mom_5 &6066 + 7.0_wp * ibit22 * adv_mom_3 &6067 + ibit21 * adv_mom_1 &6068 ) * &6069 ( v(k,j,i) + v(k,j-1,i) ) &6070 - ( 8.0_wp * ibit23 * adv_mom_5 &6071 + ibit22 * adv_mom_3 &6072 ) * &6073 ( v(k,j+1,i) + v(k,j-2,i) ) &6074 + ( ibit23 * adv_mom_5 &6075 ) * &6076 ( v(k,j+2,i) + v(k,j-3,i) ) &6077 )6078 6079 diss_s = - ABS( v_comp_s ) * ( &6080 ( 10.0_wp * ibit23 * adv_mom_5 &6081 + 3.0_wp * ibit22 * adv_mom_3 &6082 + ibit21 * adv_mom_1 &6083 ) * &6084 ( v(k,j,i) - v(k,j-1,i) ) &6085 - ( 5.0_wp * ibit23 * adv_mom_5 &6086 + ibit22 * adv_mom_3 &6087 ) * &6088 ( v(k,j+1,i) - v(k,j-2,i) ) &6089 + ( ibit23 * adv_mom_5 &6090 ) * &6091 ( v(k,j+2,i) - v(k,j-3,i) ) &6092 )6093 6094 ibit23 = IBITS(wall_flags_0(k,j,i),23,1)6095 ibit22 = IBITS(wall_flags_0(k,j,i),22,1)6096 ibit21 = IBITS(wall_flags_0(k,j,i),21,1)6097 6098 v_comp = v(k,j+1,i) + v(k,j,i)6099 flux_n = ( v_comp - gv ) * ( &6100 ( 37.0_wp * ibit23 * adv_mom_5 &6101 + 7.0_wp * ibit22 * adv_mom_3 &6102 + ibit21 * adv_mom_1 &6103 ) * &6104 ( v(k,j+1,i) + v(k,j,i) ) &6105 - ( 8.0_wp * ibit23 * adv_mom_5 &6106 + ibit22 * adv_mom_3 &6107 ) * &6108 ( v(k,j+2,i) + v(k,j-1,i) ) &6109 + ( ibit23 * adv_mom_5 &6110 ) * &6111 ( v(k,j+3,i) + v(k,j-2,i) ) &6112 )6113 6114 diss_n = - ABS( v_comp - gv ) * ( &6115 ( 10.0_wp * ibit23 * adv_mom_5 &6116 + 3.0_wp * ibit22 * adv_mom_3 &6117 + ibit21 * adv_mom_1 &6118 ) * &6119 ( v(k,j+1,i) - v(k,j,i) ) &6120 - ( 5.0_wp * ibit23 * adv_mom_5 &6121 + ibit22 * adv_mom_3 &6122 ) * &6123 ( v(k,j+2,i) - v(k,j-1,i) ) &6124 + ( ibit23 * adv_mom_5 &6125 ) * &6126 ( v(k,j+3,i) - v(k,j-2,i) ) &6127 )6128 6129 ibit26 = IBITS(wall_flags_0(k-1,j,i),26,1)6130 ibit25 = IBITS(wall_flags_0(k-1,j,i),25,1)6131 ibit24 = IBITS(wall_flags_0(k-1,j,i),24,1)6132 6133 k_pp = k + 2 * ibit266134 k_mm = k - 2 * ( ibit25 + ibit26 )6135 k_mmm = k - 3 * ibit266136 6137 w_comp = w(k-1,j-1,i) + w(k-1,j,i)6138 flux_d = w_comp * ( &6139 ( 37.0_wp * ibit26 * adv_mom_5 &6140 + 7.0_wp * ibit25 * adv_mom_3 &6141 + ibit24 * adv_mom_1 &6142 ) * &6143 ( v(k,j,i) + v(k-1,j,i) ) &6144 - ( 8.0_wp * ibit26 * adv_mom_5 &6145 + ibit25 * adv_mom_3 &6146 ) * &6147 ( v(k+1,j,i) + v(k_mm,j,i) ) &6148 + ( ibit26 * adv_mom_5 &6149 ) * &6150 ( v(k_pp,j,i) + v(k_mmm,j,i) ) &6151 )6152 6153 diss_d = - ABS( w_comp ) * ( &6154 ( 10.0_wp * ibit26 * adv_mom_5 &6155 + 3.0_wp * ibit25 * adv_mom_3 &6156 + ibit24 * adv_mom_1 &6157 ) * &6158 ( v(k,j,i) - v(k-1,j,i) ) &6159 - ( 5.0_wp * ibit26 * adv_mom_5 &6160 + ibit25 * adv_mom_3 &6161 ) * &6162 ( v(k+1,j,i) - v(k_mm,j,i) ) &6163 + ( ibit26 * adv_mom_5 &6164 ) * &6165 ( v(k_pp,j,i) - v(k_mmm,j,i) ) &6166 )6167 !6168 !-- k index has to be modified near bottom and top, else array6169 !-- subscripts will be exceeded.6170 ibit26 = IBITS(wall_flags_0(k,j,i),26,1)6171 ibit25 = IBITS(wall_flags_0(k,j,i),25,1)6172 ibit24 = IBITS(wall_flags_0(k,j,i),24,1)6173 6174 k_ppp = k + 3 * ibit266175 k_pp = k + 2 * ( 1 - ibit24 )6176 k_mm = k - 2 * ibit266177 6178 w_comp = w(k,j-1,i) + w(k,j,i)6179 flux_t = w_comp * rho_air_zw(k) * ( &6180 ( 37.0_wp * ibit26 * adv_mom_5 &6181 + 7.0_wp * ibit25 * adv_mom_3 &6182 + ibit24 * adv_mom_1 &6183 ) * &6184 ( v(k+1,j,i) + v(k,j,i) ) &6185 - ( 8.0_wp * ibit26 * adv_mom_5 &6186 + ibit25 * adv_mom_3 &6187 ) * &6188 ( v(k_pp,j,i) + v(k-1,j,i) ) &6189 + ( ibit26 * adv_mom_5 &6190 ) * &6191 ( v(k_ppp,j,i) + v(k_mm,j,i) ) &6192 )6193 6194 diss_t = - ABS( w_comp ) * rho_air_zw(k) * ( &6195 ( 10.0_wp * ibit26 * adv_mom_5 &6196 + 3.0_wp * ibit25 * adv_mom_3 &6197 + ibit24 * adv_mom_1 &6198 ) * &6199 ( v(k+1,j,i) - v(k,j,i) ) &6200 - ( 5.0_wp * ibit26 * adv_mom_5 &6201 + ibit25 * adv_mom_3 &6202 ) * &6203 ( v(k_pp,j,i) - v(k-1,j,i) ) &6204 + ( ibit26 * adv_mom_5 &6205 ) * &6206 ( v(k_ppp,j,i) - v(k_mm,j,i) ) &6207 )6208 !6209 !-- Calculate the divergence of the velocity field. A respective6210 !-- correction is needed to overcome numerical instabilities caused6211 !-- by a not sufficient reduction of divergences near topography.6212 div = ( ( ( u_comp + gu ) &6213 * ( ibit18 + ibit19 + ibit20 ) &6214 - ( u(k,j-1,i) + u(k,j,i) ) &6215 * ( IBITS(wall_flags_0(k,j,i-1),18,1) &6216 + IBITS(wall_flags_0(k,j,i-1),19,1) &6217 + IBITS(wall_flags_0(k,j,i-1),20,1) &6218 ) &6219 ) * rho_air(k) * ddx &6220 + ( v_comp &6221 * ( ibit21 + ibit22 + ibit23 ) &6222 - ( v(k,j,i) + v(k,j-1,i) ) &6223 * ( IBITS(wall_flags_0(k,j-1,i),21,1) &6224 + IBITS(wall_flags_0(k,j-1,i),22,1) &6225 + IBITS(wall_flags_0(k,j-1,i),23,1) &6226 ) &6227 ) * rho_air(k) * ddy &6228 + ( w_comp * rho_air_zw(k) &6229 * ( ibit24 + ibit25 + ibit26 ) &6230 - ( w(k-1,j-1,i) + w(k-1,j,i) ) * rho_air_zw(k-1) &6231 * ( IBITS(wall_flags_0(k-1,j,i),24,1) &6232 + IBITS(wall_flags_0(k-1,j,i),25,1) &6233 + IBITS(wall_flags_0(k-1,j,i),26,1) &6234 ) &6235 ) * ddzw(k) &6236 ) * 0.5_wp6237 6238 6239 tend(k,j,i) = - ( &6240 ( flux_r + diss_r - flux_l - diss_l ) * ddx &6241 + ( flux_n + diss_n - flux_s - diss_s ) * ddy &6242 + ( ( flux_t + diss_t ) - &6243 ( flux_d + diss_d ) &6244 ) * drho_air(k) * ddzw(k) &6245 ) + div * v(k,j,i)6246 6247 6248 !++6249 !-- Statistical Evaluation of v'v'. The factor has to be applied6250 !-- for right evaluation when gallilei_trans = .T. .6251 ! sums_vs2_ws_l(k,tn) = sums_vs2_ws_l(k,tn) &6252 ! + ( flux_n &6253 ! * ( v_comp - 2.0_wp * hom(k,1,2,0) ) &6254 ! / ( v_comp - gv + 1.0E-20_wp ) &6255 ! + diss_n &6256 ! * ABS( v_comp - 2.0_wp * hom(k,1,2,0) ) &6257 ! / ( ABS( v_comp - gv ) +1.0E-20_wp ) ) &6258 ! * weight_substep(intermediate_timestep_count)6259 !6260 !-- Statistical Evaluation of w'v'.6261 ! sums_wsvs_ws_l(k,tn) = sums_wsvs_ws_l(k,tn) &6262 ! + ( flux_t + diss_t ) &6263 ! * weight_substep(intermediate_timestep_count)6264 6265 ENDDO6266 ENDDO6267 ENDDO6268 !$acc end kernels6269 6270 !++6271 ! sums_vs2_ws_l(nzb,tn) = sums_vs2_ws_l(nzb+1,tn)6272 6273 END SUBROUTINE advec_v_ws_acc6274 5022 6275 5023 … … 6756 5504 6757 5505 6758 !------------------------------------------------------------------------------!6759 ! Description:6760 ! ------------6761 !> Advection of w - Call for all grid points - accelerator version6762 !------------------------------------------------------------------------------!6763 SUBROUTINE advec_w_ws_acc6764 6765 USE arrays_3d, &6766 ONLY: ddzu, drho_air_zw, tend, u, v, w, rho_air, rho_air_zw6767 6768 USE constants, &6769 ONLY: adv_mom_1, adv_mom_3, adv_mom_56770 6771 USE control_parameters, &6772 ONLY: intermediate_timestep_count, u_gtrans, v_gtrans6773 6774 USE grid_variables, &6775 ONLY: ddx, ddy6776 6777 USE indices, &6778 ONLY: i_left, i_right, j_north, j_south, nxl, nxr, nyn, nys, nzb, &6779 nzb_max, nzt, wall_flags_0, wall_flags_006780 6781 USE kinds6782 6783 ! USE statistics, &6784 ! ONLY: hom, sums_ws2_ws_l, weight_substep6785 6786 IMPLICIT NONE6787 6788 INTEGER(iwp) :: i !<6789 INTEGER(iwp) :: ibit27 !<6790 INTEGER(iwp) :: ibit28 !<6791 INTEGER(iwp) :: ibit29 !<6792 INTEGER(iwp) :: ibit30 !<6793 INTEGER(iwp) :: ibit31 !<6794 INTEGER(iwp) :: ibit32 !<6795 INTEGER(iwp) :: ibit33 !<6796 INTEGER(iwp) :: ibit34 !<6797 INTEGER(iwp) :: ibit35 !<6798 INTEGER(iwp) :: j !<6799 INTEGER(iwp) :: k !<6800 INTEGER(iwp) :: k_mmm !<6801 INTEGER(iwp) :: k_mm !<6802 INTEGER(iwp) :: k_pp !<6803 INTEGER(iwp) :: k_ppp !<6804 INTEGER(iwp) :: tn = 0 !<6805 6806 REAL(wp) :: diss_d !<6807 REAL(wp) :: diss_l !<6808 REAL(wp) :: diss_n !<6809 REAL(wp) :: diss_r !<6810 REAL(wp) :: diss_s !<6811 REAL(wp) :: diss_t !<6812 REAL(wp) :: div !<6813 REAL(wp) :: flux_d !<6814 REAL(wp) :: flux_l !<6815 REAL(wp) :: flux_n !<6816 REAL(wp) :: flux_r !<6817 REAL(wp) :: flux_s !<6818 REAL(wp) :: flux_t !<6819 REAL(wp) :: gu !<6820 REAL(wp) :: gv !<6821 REAL(wp) :: u_comp !<6822 REAL(wp) :: u_comp_l !<6823 REAL(wp) :: v_comp !<6824 REAL(wp) :: v_comp_s !<6825 REAL(wp) :: w_comp !<6826 6827 gu = 2.0_wp * u_gtrans6828 gv = 2.0_wp * v_gtrans6829 6830 6831 !6832 !-- Computation of fluxes and tendency terms6833 !$acc kernels present( ddzu, tend, u, v, w, wall_flags_0, wall_flags_00 )6834 DO i = i_left, i_right6835 DO j = j_south, j_north6836 DO k = nzb+1, nzt6837 6838 ibit27 = IBITS(wall_flags_0(k,j,i-1),27,1)6839 ibit28 = IBITS(wall_flags_0(k,j,i-1),28,1)6840 ibit29 = IBITS(wall_flags_0(k,j,i-1),29,1)6841 6842 u_comp_l = u(k+1,j,i) + u(k,j,i) - gu6843 flux_l = u_comp_l * ( &6844 ( 37.0_wp * ibit29 * adv_mom_5 &6845 + 7.0_wp * ibit28 * adv_mom_3 &6846 + ibit27 * adv_mom_1 &6847 ) * &6848 ( w(k,j,i) + w(k,j,i-1) ) &6849 - ( 8.0_wp * ibit29 * adv_mom_5 &6850 + ibit28 * adv_mom_3 &6851 ) * &6852 ( w(k,j,i+1) + w(k,j,i-2) ) &6853 + ( ibit29 * adv_mom_5 &6854 ) * &6855 ( w(k,j,i+2) + w(k,j,i-3) ) &6856 )6857 6858 diss_l = - ABS( u_comp_l ) * ( &6859 ( 10.0_wp * ibit29 * adv_mom_5 &6860 + 3.0_wp * ibit28 * adv_mom_3 &6861 + ibit27 * adv_mom_1 &6862 ) * &6863 ( w(k,j,i) - w(k,j,i-1) ) &6864 - ( 5.0_wp * ibit29 * adv_mom_5 &6865 + ibit28 * adv_mom_3 &6866 ) * &6867 ( w(k,j,i+1) - w(k,j,i-2) ) &6868 + ( ibit29 * adv_mom_5 &6869 ) * &6870 ( w(k,j,i+2) - w(k,j,i-3) ) &6871 )6872 6873 ibit27 = IBITS(wall_flags_0(k,j,i),27,1)6874 ibit28 = IBITS(wall_flags_0(k,j,i),28,1)6875 ibit29 = IBITS(wall_flags_0(k,j,i),29,1)6876 6877 u_comp = u(k+1,j,i+1) + u(k,j,i+1) - gu6878 flux_r = u_comp * ( &6879 ( 37.0_wp * ibit29 * adv_mom_5 &6880 + 7.0_wp * ibit28 * adv_mom_3 &6881 + ibit27 * adv_mom_1 &6882 ) * &6883 ( w(k,j,i+1) + w(k,j,i) ) &6884 - ( 8.0_wp * ibit29 * adv_mom_5 &6885 + ibit28 * adv_mom_3 &6886 ) * &6887 ( w(k,j,i+2) + w(k,j,i-1) ) &6888 + ( ibit29 * adv_mom_5 &6889 ) * &6890 ( w(k,j,i+3) + w(k,j,i-2) ) &6891 )6892 6893 diss_r = - ABS( u_comp ) * ( &6894 ( 10.0_wp * ibit29 * adv_mom_5 &6895 + 3.0_wp * ibit28 * adv_mom_3 &6896 + ibit27 * adv_mom_1 &6897 ) * &6898 ( w(k,j,i+1) - w(k,j,i) ) &6899 - ( 5.0_wp * ibit29 * adv_mom_5 &6900 + ibit28 * adv_mom_3 &6901 ) * &6902 ( w(k,j,i+2) - w(k,j,i-1) ) &6903 + ( ibit29 * adv_mom_5 &6904 ) * &6905 ( w(k,j,i+3) - w(k,j,i-2) ) &6906 )6907 ibit32 = IBITS(wall_flags_00(k,j-1,i),0,1)6908 ibit31 = IBITS(wall_flags_0(k,j-1,i),31,1)6909 ibit30 = IBITS(wall_flags_0(k,j-1,i),30,1)6910 6911 v_comp_s = v(k+1,j,i) + v(k,j,i) - gv6912 flux_s = v_comp_s * ( &6913 ( 37.0_wp * ibit32 * adv_mom_5 &6914 + 7.0_wp * ibit31 * adv_mom_3 &6915 + ibit30 * adv_mom_1 &6916 ) * &6917 ( w(k,j,i) + w(k,j-1,i) ) &6918 - ( 8.0_wp * ibit32 * adv_mom_5 &6919 + ibit31 * adv_mom_3 &6920 ) * &6921 ( w(k,j+1,i) + w(k,j-2,i) ) &6922 + ( ibit32 * adv_mom_5 &6923 ) * &6924 ( w(k,j+2,i) + w(k,j-3,i) ) &6925 )6926 6927 diss_s = - ABS( v_comp_s ) * ( &6928 ( 10.0_wp * ibit32 * adv_mom_5 &6929 + 3.0_wp * ibit31 * adv_mom_3 &6930 + ibit30 * adv_mom_1 &6931 ) * &6932 ( w(k,j,i) - w(k,j-1,i) ) &6933 - ( 5.0_wp * ibit32 * adv_mom_5 &6934 + ibit31 * adv_mom_3 &6935 ) * &6936 ( w(k,j+1,i) - w(k,j-2,i) ) &6937 + ( ibit32 * adv_mom_5 &6938 ) * &6939 ( w(k,j+2,i) - w(k,j-3,i) ) &6940 )6941 6942 ibit32 = IBITS(wall_flags_00(k,j,i),0,1)6943 ibit31 = IBITS(wall_flags_0(k,j,i),31,1)6944 ibit30 = IBITS(wall_flags_0(k,j,i),30,1)6945 6946 v_comp = v(k+1,j+1,i) + v(k,j+1,i) - gv6947 flux_n = v_comp * ( &6948 ( 37.0_wp * ibit32 * adv_mom_5 &6949 + 7.0_wp * ibit31 * adv_mom_3 &6950 + ibit30 * adv_mom_1 &6951 ) * &6952 ( w(k,j+1,i) + w(k,j,i) ) &6953 - ( 8.0_wp * ibit32 * adv_mom_5 &6954 + ibit31 * adv_mom_3 &6955 ) * &6956 ( w(k,j+2,i) + w(k,j-1,i) ) &6957 + ( ibit32 * adv_mom_5 &6958 ) * &6959 ( w(k,j+3,i) + w(k,j-2,i) ) &6960 )6961 6962 diss_n = - ABS( v_comp ) * ( &6963 ( 10.0_wp * ibit32 * adv_mom_5 &6964 + 3.0_wp * ibit31 * adv_mom_3 &6965 + ibit30 * adv_mom_1 &6966 ) * &6967 ( w(k,j+1,i) - w(k,j,i) ) &6968 - ( 5.0_wp * ibit32 * adv_mom_5 &6969 + ibit31 * adv_mom_3 &6970 ) * &6971 ( w(k,j+2,i) - w(k,j-1,i) ) &6972 + ( ibit32 * adv_mom_5 &6973 ) * &6974 ( w(k,j+3,i) - w(k,j-2,i) ) &6975 )6976 6977 ibit35 = IBITS(wall_flags_00(k-1,j,i),3,1)6978 ibit34 = IBITS(wall_flags_00(k-1,j,i),2,1)6979 ibit33 = IBITS(wall_flags_00(k-1,j,i),1,1)6980 6981 k_pp = k + 2 * ibit356982 k_mm = k - 2 * ( ibit34 + ibit35 )6983 k_mmm = k - 3 * ibit356984 6985 w_comp = w(k,j,i) + w(k-1,j,i)6986 flux_d = w_comp * ( &6987 ( 37.0_wp * ibit35 * adv_mom_5 &6988 + 7.0_wp * ibit34 * adv_mom_3 &6989 + ibit33 * adv_mom_1 &6990 ) * &6991 ( w(k,j,i) + w(k-1,j,i) ) &6992 - ( 8.0_wp * ibit35 * adv_mom_5 &6993 + ibit34 * adv_mom_3 &6994 ) * &6995 ( w(k+1,j,i) + w(k_mm,j,i) ) &6996 + ( ibit35 * adv_mom_5 &6997 ) * &6998 ( w(k_pp,j,i) + w(k_mmm,j,i) ) &6999 )7000 7001 diss_d = - ABS( w_comp ) * ( &7002 ( 10.0_wp * ibit35 * adv_mom_5 &7003 + 3.0_wp * ibit34 * adv_mom_3 &7004 + ibit33 * adv_mom_1 &7005 ) * &7006 ( w(k,j,i) - w(k-1,j,i) ) &7007 - ( 5.0_wp * ibit35 * adv_mom_5 &7008 + ibit34 * adv_mom_3 &7009 ) * &7010 ( w(k+1,j,i) - w(k_mm,j,i) ) &7011 + ( ibit35 * adv_mom_5 &7012 ) * &7013 ( w(k_pp,j,i) - w(k_mmm,j,i) ) &7014 )7015 7016 !7017 !-- k index has to be modified near bottom and top, else array7018 !-- subscripts will be exceeded.7019 ibit35 = IBITS(wall_flags_00(k,j,i),3,1)7020 ibit34 = IBITS(wall_flags_00(k,j,i),2,1)7021 ibit33 = IBITS(wall_flags_00(k,j,i),1,1)7022 7023 k_ppp = k + 3 * ibit357024 k_pp = k + 2 * ( 1 - ibit33 )7025 k_mm = k - 2 * ibit357026 7027 w_comp = w(k+1,j,i) + w(k,j,i)7028 flux_t = w_comp * rho_air(k+1) * ( &7029 ( 37.0_wp * ibit35 * adv_mom_5 &7030 + 7.0_wp * ibit34 * adv_mom_3 &7031 + ibit33 * adv_mom_1 &7032 ) * &7033 ( w(k+1,j,i) + w(k,j,i) ) &7034 - ( 8.0_wp * ibit35 * adv_mom_5 &7035 + ibit34 * adv_mom_3 &7036 ) * &7037 ( w(k_pp,j,i) + w(k-1,j,i) ) &7038 + ( ibit35 * adv_mom_5 &7039 ) * &7040 ( w(k_ppp,j,i) + w(k_mm,j,i) ) &7041 )7042 7043 diss_t = - ABS( w_comp ) * rho_air(k+1) * ( &7044 ( 10.0_wp * ibit35 * adv_mom_5 &7045 + 3.0_wp * ibit34 * adv_mom_3 &7046 + ibit33 * adv_mom_1 &7047 ) * &7048 ( w(k+1,j,i) - w(k,j,i) ) &7049 - ( 5.0_wp * ibit35 * adv_mom_5 &7050 + ibit34 * adv_mom_3 &7051 ) * &7052 ( w(k_pp,j,i) - w(k-1,j,i) ) &7053 + ( ibit35 * adv_mom_5 &7054 ) * &7055 ( w(k_ppp,j,i) - w(k_mm,j,i) ) &7056 )7057 !7058 !-- Calculate the divergence of the velocity field. A respective7059 !-- correction is needed to overcome numerical instabilities caused7060 !-- by a not sufficient reduction of divergences near topography.7061 div = ( ( ( u_comp + gu ) * ( ibit27 + ibit28 + ibit29 ) &7062 - ( u(k+1,j,i) + u(k,j,i) ) &7063 * ( IBITS(wall_flags_0(k,j,i-1),27,1) &7064 + IBITS(wall_flags_0(k,j,i-1),28,1) &7065 + IBITS(wall_flags_0(k,j,i-1),29,1) &7066 ) &7067 ) * rho_air_zw(k) * ddx &7068 + ( ( v_comp + gv ) * ( ibit30 + ibit31 + ibit32 ) &7069 - ( v(k+1,j,i) + v(k,j,i) ) &7070 * ( IBITS(wall_flags_0(k,j-1,i),30,1) &7071 + IBITS(wall_flags_0(k,j-1,i),31,1) &7072 + IBITS(wall_flags_00(k,j-1,i),0,1) &7073 ) &7074 ) * rho_air_zw(k) * ddy &7075 + ( w_comp * rho_air(k+1) * ( ibit33 + ibit34 + ibit35 ) &7076 - ( w(k,j,i) + w(k-1,j,i) ) * rho_air(k) &7077 * ( IBITS(wall_flags_00(k-1,j,i),1,1) &7078 + IBITS(wall_flags_00(k-1,j,i),2,1) &7079 + IBITS(wall_flags_00(k-1,j,i),3,1) &7080 ) &7081 ) * ddzu(k+1) &7082 ) * 0.5_wp7083 7084 7085 tend(k,j,i) = - ( &7086 ( flux_r + diss_r - flux_l - diss_l ) * ddx &7087 + ( flux_n + diss_n - flux_s - diss_s ) * ddy &7088 + ( ( flux_t + diss_t ) - &7089 ( flux_d + diss_d ) * rho_air(k) &7090 ) * drho_air_zw(k) * ddzu(k+1) &7091 ) + div * w(k,j,i)7092 7093 7094 !++7095 !-- Statistical Evaluation of w'w'.7096 ! sums_ws2_ws_l(k,tn) = sums_ws2_ws_l(k,tn) &7097 ! + ( flux_t + diss_t ) &7098 ! * weight_substep(intermediate_timestep_count)7099 7100 ENDDO7101 ENDDO7102 ENDDO7103 !$acc end kernels7104 7105 END SUBROUTINE advec_w_ws_acc7106 7107 5506 END MODULE advec_ws -
TabularUnified palm/trunk/SOURCE/boundary_conds.f90 ¶
r2101 r2118 20 20 ! Current revisions: 21 21 ! ----------------- 22 ! 22 ! OpenACC directives removed 23 23 ! 24 24 ! Former revisions: … … 188 188 !-- Bottom boundary 189 189 IF ( ibc_uv_b == 1 ) THEN 190 !$acc kernels present( u_p, v_p )191 190 u_p(nzb,:,:) = u_p(nzb+1,:,:) 192 191 v_p(nzb,:,:) = v_p(nzb+1,:,:) 193 !$acc end kernels 194 ENDIF 195 196 !$acc kernels present( nzb_w_inner, w_p ) 192 ENDIF 193 197 194 DO i = nxlg, nxrg 198 195 DO j = nysg, nyng … … 200 197 ENDDO 201 198 ENDDO 202 !$acc end kernels203 199 204 200 ! 205 201 !-- Top boundary. A nested domain ( ibc_uv_t = 3 ) does not require settings. 206 202 IF ( ibc_uv_t == 0 ) THEN 207 !$acc kernels present( u_init, u_p, v_init, v_p )208 203 u_p(nzt+1,:,:) = u_init(nzt+1) 209 204 v_p(nzt+1,:,:) = v_init(nzt+1) 210 !$acc end kernels211 205 ELSEIF ( ibc_uv_t == 1 ) THEN 212 !$acc kernels present( u_p, v_p )213 206 u_p(nzt+1,:,:) = u_p(nzt,:,:) 214 207 v_p(nzt+1,:,:) = v_p(nzt,:,:) 215 !$acc end kernels216 208 ENDIF 217 209 218 210 IF ( .NOT. nest_domain ) THEN 219 !$acc kernels present( w_p )220 211 w_p(nzt:nzt+1,:,:) = 0.0_wp ! nzt is not a prognostic level (but cf. pres) 221 !$acc end kernels222 212 ENDIF 223 213 … … 227 217 !-- the sea surface temperature of the coupled ocean model. 228 218 IF ( ibc_pt_b == 0 ) THEN 229 !$acc kernels present( nzb_s_inner, pt, pt_p )230 !$acc loop independent231 219 DO i = nxlg, nxrg 232 !$acc loop independent233 220 DO j = nysg, nyng 234 221 pt_p(nzb_s_inner(j,i),j,i) = pt(nzb_s_inner(j,i),j,i) 235 222 ENDDO 236 223 ENDDO 237 !$acc end kernels238 224 ELSEIF ( ibc_pt_b == 1 ) THEN 239 !$acc kernels present( nzb_s_inner, pt_p )240 !$acc loop independent241 225 DO i = nxlg, nxrg 242 !$acc loop independent243 226 DO j = nysg, nyng 244 227 pt_p(nzb_s_inner(j,i),j,i) = pt_p(nzb_s_inner(j,i)+1,j,i) 245 228 ENDDO 246 229 ENDDO 247 !$acc end kernels248 230 ENDIF 249 231 … … 251 233 !-- Temperature at top boundary 252 234 IF ( ibc_pt_t == 0 ) THEN 253 !$acc kernels present( pt, pt_p )254 235 pt_p(nzt+1,:,:) = pt(nzt+1,:,:) 255 236 ! … … 259 240 pt_p(nzt+1,:,:) = pt_init(nzt+1) 260 241 ENDIF 261 !$acc end kernels262 242 ELSEIF ( ibc_pt_t == 1 ) THEN 263 !$acc kernels present( pt_p )264 243 pt_p(nzt+1,:,:) = pt_p(nzt,:,:) 265 !$acc end kernels266 244 ELSEIF ( ibc_pt_t == 2 ) THEN 267 !$acc kernels present( dzu, pt_p )268 245 pt_p(nzt+1,:,:) = pt_p(nzt,:,:) + bc_pt_t_val * dzu(nzt+1) 269 !$acc end kernels270 246 ENDIF 271 247 … … 274 250 !-- Generally Neumann conditions with de/dz=0 are assumed 275 251 IF ( .NOT. constant_diffusion ) THEN 276 !$acc kernels present( e_p, nzb_s_inner )277 !$acc loop independent278 252 DO i = nxlg, nxrg 279 !$acc loop independent280 253 DO j = nysg, nyng 281 254 e_p(nzb_s_inner(j,i),j,i) = e_p(nzb_s_inner(j,i)+1,j,i) … … 285 258 e_p(nzt+1,:,:) = e_p(nzt,:,:) 286 259 ENDIF 287 !$acc end kernels288 260 ENDIF 289 261 -
TabularUnified palm/trunk/SOURCE/buoyancy.f90 ¶
r2101 r2118 20 20 ! Current revisions: 21 21 ! ------------------ 22 ! 22 ! OpenACC version of subroutine removed 23 23 ! 24 24 ! Former revisions: … … 102 102 103 103 PRIVATE 104 PUBLIC buoyancy , buoyancy_acc104 PUBLIC buoyancy 105 105 106 106 INTERFACE buoyancy … … 108 108 MODULE PROCEDURE buoyancy_ij 109 109 END INTERFACE buoyancy 110 111 INTERFACE buoyancy_acc112 MODULE PROCEDURE buoyancy_acc113 END INTERFACE buoyancy_acc114 110 115 111 CONTAINS … … 212 208 213 209 END SUBROUTINE buoyancy 214 215 216 !------------------------------------------------------------------------------!217 ! Description:218 ! ------------219 !> Call for all grid points - accelerator version220 !------------------------------------------------------------------------------!221 SUBROUTINE buoyancy_acc( var, wind_component )222 223 USE arrays_3d, &224 ONLY: pt, pt_slope_ref, ref_state, tend225 226 USE control_parameters, &227 ONLY: atmos_ocean_sign, cos_alpha_surface, g, message_string, &228 pt_surface, sin_alpha_surface, sloping_surface229 230 USE indices, &231 ONLY: i_left, i_right, j_north, j_south, nxl, nxlg, nxlu, nxr, &232 nxrg, nyn, nyng, nys, nysg, nzb, nzb_s_inner, nzt233 234 USE kinds235 236 USE pegrid237 238 239 IMPLICIT NONE240 241 INTEGER(iwp) :: i !<242 INTEGER(iwp) :: j !<243 INTEGER(iwp) :: k !<244 INTEGER(iwp) :: wind_component !<245 246 #if defined( __nopointer )247 REAL(wp), DIMENSION(nzb:nzt+1,nysg:nyng,nxlg:nxrg) :: var !<248 #else249 REAL(wp), DIMENSION(:,:,:), POINTER :: var250 #endif251 252 253 IF ( .NOT. sloping_surface ) THEN254 !255 !-- Normal case: horizontal surface256 !$acc kernels present( nzb_s_inner, ref_state, tend, var )257 !$acc loop258 DO i = i_left, i_right259 DO j = j_south, j_north260 !$acc loop independent vector261 DO k = nzb_s_inner(j,i)+1, nzt-1262 tend(k,j,i) = tend(k,j,i) + atmos_ocean_sign * g * 0.5_wp * &263 ( &264 ( var(k,j,i) - ref_state(k) ) / ref_state(k) + &265 ( var(k+1,j,i) - ref_state(k+1) ) / ref_state(k+1) &266 )267 ENDDO268 ENDDO269 ENDDO270 !$acc end kernels271 272 ELSE273 !274 !-- Buoyancy term for a surface with a slope in x-direction. The equations275 !-- for both the u and w velocity-component contain proportionate terms.276 !-- Temperature field at time t=0 serves as environmental temperature.277 !-- Reference temperature (pt_surface) is the one at the lower left corner278 !-- of the total domain.279 IF ( wind_component == 1 ) THEN280 281 DO i = nxlu, nxr282 DO j = nys, nyn283 DO k = nzb_s_inner(j,i)+1, nzt-1284 tend(k,j,i) = tend(k,j,i) + g * sin_alpha_surface * &285 0.5_wp * ( ( pt(k,j,i-1) + pt(k,j,i) ) &286 - ( pt_slope_ref(k,i-1) + pt_slope_ref(k,i) ) &287 ) / pt_surface288 ENDDO289 ENDDO290 ENDDO291 292 ELSEIF ( wind_component == 3 ) THEN293 294 DO i = nxl, nxr295 DO j = nys, nyn296 DO k = nzb_s_inner(j,i)+1, nzt-1297 tend(k,j,i) = tend(k,j,i) + g * cos_alpha_surface * &298 0.5_wp * ( ( pt(k,j,i) + pt(k+1,j,i) ) &299 - ( pt_slope_ref(k,i) + pt_slope_ref(k+1,i) ) &300 ) / pt_surface301 ENDDO302 ENDDO303 ENDDO304 305 ELSE306 307 WRITE( message_string, * ) 'no term for component "', &308 wind_component,'"'309 CALL message( 'buoyancy', 'PA0159', 1, 2, 0, 6, 0 )310 311 ENDIF312 313 ENDIF314 315 END SUBROUTINE buoyancy_acc316 210 317 211 -
TabularUnified palm/trunk/SOURCE/check_parameters.f90 ¶
r2101 r2118 20 20 ! Current revisions: 21 21 ! ----------------- 22 ! 22 ! OpenACC related parts of code removed 23 23 ! 24 24 ! Former revisions: … … 518 518 IF ( transpose_compute_overlap ) THEN 519 519 IF ( numprocs == 1 ) STOP '+++ transpose-compute-overlap not implemented for single PE runs' 520 #if defined( __openacc )521 STOP '+++ transpose-compute-overlap not implemented for GPU usage'522 #endif523 520 ENDIF 524 521 … … 774 771 SELECT CASE ( TRIM( loop_optimization ) ) 775 772 776 CASE ( ' acc', 'cache', 'vector' )773 CASE ( 'cache', 'vector' ) 777 774 CONTINUE 778 775 -
TabularUnified palm/trunk/SOURCE/coriolis.f90 ¶
r2101 r2118 20 20 ! Current revisions: 21 21 ! ----------------- 22 ! 22 ! OpenACC version of subroutine removed 23 23 ! 24 24 ! Former revisions: … … 35 35 ! 1850 2016-04-08 13:29:27Z maronga 36 36 ! Module renamed 37 !38 37 ! 39 38 ! 1682 2015-10-07 23:56:08Z knoop … … 76 75 77 76 PRIVATE 78 PUBLIC coriolis , coriolis_acc77 PUBLIC coriolis 79 78 80 79 INTERFACE coriolis … … 82 81 MODULE PROCEDURE coriolis_ij 83 82 END INTERFACE coriolis 84 85 INTERFACE coriolis_acc86 MODULE PROCEDURE coriolis_acc87 END INTERFACE coriolis_acc88 83 89 84 CONTAINS … … 177 172 ! Description: 178 173 ! ------------ 179 !> Call for all grid points - accelerator version180 !------------------------------------------------------------------------------!181 SUBROUTINE coriolis_acc( component )182 183 USE arrays_3d, &184 ONLY: tend, u, ug, v, vg, w185 186 USE control_parameters, &187 ONLY: f, fs, message_string188 189 USE indices, &190 ONLY: i_left, i_right, j_north, j_south, nzb_u_inner, &191 nzb_v_inner, nzb_w_inner, nzt192 193 USE kinds194 195 IMPLICIT NONE196 197 INTEGER(iwp) :: component !<198 INTEGER(iwp) :: i !<199 INTEGER(iwp) :: j !<200 INTEGER(iwp) :: k !<201 202 203 !204 !-- Compute Coriolis terms for the three velocity components205 SELECT CASE ( component )206 207 !208 !-- u-component209 CASE ( 1 )210 !$acc kernels present( nzb_u_inner, tend, v, vg, w )211 DO i = i_left, i_right212 DO j = j_south, j_north213 DO k = 1, nzt214 IF ( k > nzb_u_inner(j,i) ) THEN215 tend(k,j,i) = tend(k,j,i) + f * ( 0.25_wp * &216 ( v(k,j,i-1) + v(k,j,i) + v(k,j+1,i-1) + &217 v(k,j+1,i) ) - vg(k) ) &218 - fs * ( 0.25_wp * &219 ( w(k-1,j,i-1) + w(k-1,j,i) + w(k,j,i-1) &220 + w(k,j,i) ) &221 )222 ENDIF223 ENDDO224 ENDDO225 ENDDO226 !$acc end kernels227 228 !229 !-- v-component230 CASE ( 2 )231 !$acc kernels present( nzb_v_inner, tend, u, ug )232 DO i = i_left, i_right233 DO j = j_south, j_north234 DO k = 1, nzt235 IF ( k > nzb_v_inner(j,i) ) THEN236 tend(k,j,i) = tend(k,j,i) - f * ( 0.25_wp * &237 ( u(k,j-1,i) + u(k,j,i) + u(k,j-1,i+1) + &238 u(k,j,i+1) ) - ug(k) )239 ENDIF240 ENDDO241 ENDDO242 ENDDO243 !$acc end kernels244 245 !246 !-- w-component247 CASE ( 3 )248 !$acc kernels present( nzb_w_inner, tend, u )249 DO i = i_left, i_right250 DO j = j_south, j_north251 DO k = 1, nzt252 IF ( k > nzb_w_inner(j,i) ) THEN253 tend(k,j,i) = tend(k,j,i) + fs * 0.25_wp * &254 ( u(k,j,i) + u(k+1,j,i) + u(k,j,i+1) + &255 u(k+1,j,i+1) )256 ENDIF257 ENDDO258 ENDDO259 ENDDO260 !$acc end kernels261 262 CASE DEFAULT263 264 WRITE( message_string, * ) ' wrong component: ', component265 CALL message( 'coriolis', 'PA0173', 1, 2, 0, 6, 0 )266 267 END SELECT268 269 END SUBROUTINE coriolis_acc270 271 272 !------------------------------------------------------------------------------!273 ! Description:274 ! ------------275 174 !> Call for grid point i,j 276 175 !------------------------------------------------------------------------------! -
TabularUnified palm/trunk/SOURCE/cpulog_mod.f90 ¶
r2101 r2118 20 20 ! Current revisions: 21 21 ! ----------------- 22 ! 22 ! OpenACC relevant code removed 23 23 ! 24 24 ! Former revisions: … … 401 401 average_cputime 402 402 403 IF ( num_acc_per_node /= 0 ) WRITE ( 18, 108 ) num_acc_per_node404 403 WRITE ( 18, 110 ) 405 404 #else … … 409 408 average_cputime 410 409 411 IF ( num_acc_per_node /= 0 ) WRITE ( 18, 109 ) num_acc_per_node412 410 WRITE ( 18, 110 ) 413 411 #endif … … 565 563 106 FORMAT (/'Exchange of ghostpoints via MPI_ISEND/MPI_IRECV') 566 564 107 FORMAT (//) 567 108 FORMAT ('Accelerator boards per node: ',14X,I2)568 109 FORMAT ('Accelerator boards: ',23X,I2)569 565 110 FORMAT ('-------------------------------------------------------------', & 570 566 &'---------'//& -
TabularUnified palm/trunk/SOURCE/diffusion_e.f90 ¶
r2101 r2118 20 20 ! Current revisions: 21 21 ! ----------------- 22 ! 22 ! OpenACC version of subroutine removed 23 23 ! 24 24 ! Former revisions: … … 34 34 ! 1873 2016-04-18 14:50:06Z maronga 35 35 ! Module renamed (removed _mod) 36 !37 36 ! 38 37 ! 1850 2016-04-08 13:29:27Z maronga … … 108 107 109 108 PRIVATE 110 PUBLIC diffusion_e , diffusion_e_acc109 PUBLIC diffusion_e 111 110 112 111 … … 116 115 END INTERFACE diffusion_e 117 116 118 INTERFACE diffusion_e_acc119 MODULE PROCEDURE diffusion_e_acc120 END INTERFACE diffusion_e_acc121 122 117 CONTAINS 123 118 … … 337 332 338 333 END SUBROUTINE diffusion_e 339 340 341 !------------------------------------------------------------------------------!342 ! Description:343 ! ------------344 !> Call for all grid points - accelerator version345 !------------------------------------------------------------------------------!346 SUBROUTINE diffusion_e_acc( var, var_reference )347 348 USE arrays_3d, &349 ONLY: dd2zu, ddzu, ddzw, diss, e, km, l_grid, tend, zu, zw, &350 drho_air, rho_air_zw351 352 USE control_parameters, &353 ONLY: atmos_ocean_sign, g, use_single_reference_value, &354 wall_adjustment, wall_adjustment_factor355 356 USE grid_variables, &357 ONLY: ddx2, ddy2358 359 USE indices, &360 ONLY: i_left, i_right, j_north, j_south, nxlg, nxrg, nyng, nysg, &361 nzb, nzb_s_inner, nzt362 363 USE kinds364 365 USE microphysics_mod, &366 ONLY: collision_turbulence367 368 USE particle_attributes, &369 ONLY: use_sgs_for_particles, wang_kernel370 371 IMPLICIT NONE372 373 INTEGER(iwp) :: i !<374 INTEGER(iwp) :: j !<375 INTEGER(iwp) :: k !<376 REAL(wp) :: dissipation !<377 REAL(wp) :: dvar_dz !<378 REAL(wp) :: l !<379 REAL(wp) :: ll !<380 REAL(wp) :: l_stable !<381 REAL(wp) :: var_reference !<382 383 #if defined( __nopointer )384 REAL(wp), DIMENSION(nzb:nzt+1,nysg:nyng,nxlg:nxrg) :: var !<385 #else386 REAL(wp), DIMENSION(:,:,:), POINTER :: var !<387 #endif388 389 390 !391 !-- This if clause must be outside the k-loop because otherwise392 !-- runtime errors occur with -C hopt on NEC393 IF ( use_single_reference_value ) THEN394 395 !$acc kernels present( ddzu, ddzw, dd2zu, diss, e, km, l_grid ) &396 !$acc present( nzb_s_inner, tend, var, zu, zw )397 DO i = i_left, i_right398 DO j = j_south, j_north399 DO k = 1, nzt400 401 IF ( k > nzb_s_inner(j,i) ) THEN402 !403 !-- Calculate the mixing length (for dissipation)404 dvar_dz = atmos_ocean_sign * &405 ( var(k+1,j,i) - var(k-1,j,i) ) * dd2zu(k)406 IF ( dvar_dz > 0.0_wp ) THEN407 l_stable = 0.76_wp * SQRT( e(k,j,i) ) / &408 SQRT( g / var_reference * dvar_dz ) + 1E-5_wp409 ELSE410 l_stable = l_grid(k)411 ENDIF412 !413 !-- Adjustment of the mixing length414 IF ( wall_adjustment ) THEN415 l = MIN( wall_adjustment_factor * &416 ( zu(k) - zw(nzb_s_inner(j,i)) ), &417 l_grid(k), l_stable )418 ll = MIN( wall_adjustment_factor * &419 ( zu(k) - zw(nzb_s_inner(j,i)) ), &420 l_grid(k) )421 ELSE422 l = MIN( l_grid(k), l_stable )423 ll = l_grid(k)424 ENDIF425 !426 !-- Calculate the tendency terms427 dissipation = ( 0.19_wp + 0.74_wp * l / ll ) * &428 e(k,j,i) * SQRT( e(k,j,i) ) / l429 430 tend(k,j,i) = tend(k,j,i) &431 + ( &432 ( km(k,j,i)+km(k,j,i+1) ) * ( e(k,j,i+1)-e(k,j,i) ) &433 - ( km(k,j,i)+km(k,j,i-1) ) * ( e(k,j,i)-e(k,j,i-1) ) &434 ) * ddx2 &435 + ( &436 ( km(k,j,i)+km(k,j+1,i) ) * ( e(k,j+1,i)-e(k,j,i) ) &437 - ( km(k,j,i)+km(k,j-1,i) ) * ( e(k,j,i)-e(k,j-1,i) ) &438 ) * ddy2 &439 + ( &440 ( km(k,j,i)+km(k+1,j,i) ) * ( e(k+1,j,i)-e(k,j,i) ) * ddzu(k+1) &441 * rho_air_zw(k) &442 - ( km(k,j,i)+km(k-1,j,i) ) * ( e(k,j,i)-e(k-1,j,i) ) * ddzu(k) &443 * rho_air_zw(k-1) &444 ) * ddzw(k) * drho_air(k) &445 - dissipation446 447 !448 !-- Store dissipation if needed for calculating the sgs particle449 !-- velocities450 IF ( use_sgs_for_particles .OR. wang_kernel .OR. &451 collision_turbulence ) THEN452 diss(k,j,i) = dissipation453 ENDIF454 455 ENDIF456 457 ENDDO458 ENDDO459 ENDDO460 !$acc end kernels461 462 ELSE463 464 !$acc kernels present( ddzu, ddzw, dd2zu, diss, e, km, l_grid ) &465 !$acc present( nzb_s_inner, tend, var, zu, zw )466 DO i = i_left, i_right467 DO j = j_south, j_north468 DO k = 1, nzt469 470 IF ( k > nzb_s_inner(j,i) ) THEN471 !472 !-- Calculate the mixing length (for dissipation)473 dvar_dz = atmos_ocean_sign * &474 ( var(k+1,j,i) - var(k-1,j,i) ) * dd2zu(k)475 IF ( dvar_dz > 0.0_wp ) THEN476 l_stable = 0.76_wp * SQRT( e(k,j,i) ) / &477 SQRT( g / var(k,j,i) * dvar_dz ) + 1E-5_wp478 ELSE479 l_stable = l_grid(k)480 ENDIF481 !482 !-- Adjustment of the mixing length483 IF ( wall_adjustment ) THEN484 l = MIN( wall_adjustment_factor * &485 ( zu(k) - zw(nzb_s_inner(j,i)) ), &486 l_grid(k), l_stable )487 ll = MIN( wall_adjustment_factor * &488 ( zu(k) - zw(nzb_s_inner(j,i)) ), &489 l_grid(k) )490 ELSE491 l = MIN( l_grid(k), l_stable )492 ll = l_grid(k)493 ENDIF494 !495 !-- Calculate the tendency terms496 dissipation = ( 0.19_wp + 0.74_wp * l / ll ) * &497 e(k,j,i) * SQRT( e(k,j,i) ) / l498 499 tend(k,j,i) = tend(k,j,i) &500 + ( &501 ( km(k,j,i)+km(k,j,i+1) ) * ( e(k,j,i+1)-e(k,j,i) ) &502 - ( km(k,j,i)+km(k,j,i-1) ) * ( e(k,j,i)-e(k,j,i-1) ) &503 ) * ddx2 &504 + ( &505 ( km(k,j,i)+km(k,j+1,i) ) * ( e(k,j+1,i)-e(k,j,i) ) &506 - ( km(k,j,i)+km(k,j-1,i) ) * ( e(k,j,i)-e(k,j-1,i) ) &507 ) * ddy2 &508 + ( &509 ( km(k,j,i)+km(k+1,j,i) ) * ( e(k+1,j,i)-e(k,j,i) ) * ddzu(k+1) &510 * rho_air_zw(k) &511 - ( km(k,j,i)+km(k-1,j,i) ) * ( e(k,j,i)-e(k-1,j,i) ) * ddzu(k) &512 * rho_air_zw(k-1) &513 ) * ddzw(k) * drho_air(k) &514 - dissipation515 516 !517 !-- Store dissipation if needed for calculating the sgs518 !-- particle velocities519 IF ( use_sgs_for_particles .OR. wang_kernel .OR. &520 collision_turbulence ) THEN521 diss(k,j,i) = dissipation522 ENDIF523 524 ENDIF525 526 ENDDO527 ENDDO528 ENDDO529 !$acc end kernels530 531 ENDIF532 533 !534 !-- Boundary condition for dissipation535 IF ( use_sgs_for_particles .OR. wang_kernel .OR. &536 collision_turbulence ) THEN537 !$acc kernels present( diss, nzb_s_inner )538 DO i = i_left, i_right539 DO j = j_south, j_north540 diss(nzb_s_inner(j,i),j,i) = diss(nzb_s_inner(j,i)+1,j,i)541 ENDDO542 ENDDO543 !$acc end kernels544 ENDIF545 546 END SUBROUTINE diffusion_e_acc547 334 548 335 -
TabularUnified palm/trunk/SOURCE/diffusion_s.f90 ¶
r2101 r2118 20 20 ! Current revisions: 21 21 ! ------------------ 22 ! 22 ! OpenACC version of subroutine removed 23 23 ! 24 24 ! Former revisions: … … 34 34 ! 1873 2016-04-18 14:50:06Z maronga 35 35 ! Module renamed (removed _mod) 36 ! 37 ! 36 ! 38 37 ! 1850 2016-04-08 13:29:27Z maronga 39 38 ! Module renamed 40 !41 39 ! 42 40 ! 1691 2015-10-26 16:17:44Z maronga … … 94 92 95 93 PRIVATE 96 PUBLIC diffusion_s , diffusion_s_acc94 PUBLIC diffusion_s 97 95 98 96 INTERFACE diffusion_s … … 100 98 MODULE PROCEDURE diffusion_s_ij 101 99 END INTERFACE diffusion_s 102 103 INTERFACE diffusion_s_acc104 MODULE PROCEDURE diffusion_s_acc105 END INTERFACE diffusion_s_acc106 100 107 101 CONTAINS … … 242 236 ! Description: 243 237 ! ------------ 244 !> Call for all grid points - accelerator version245 !------------------------------------------------------------------------------!246 SUBROUTINE diffusion_s_acc( s, s_flux_b, s_flux_t, wall_s_flux )247 248 USE arrays_3d, &249 ONLY: ddzu, ddzw, kh, tend, drho_air, rho_air_zw250 251 USE control_parameters, &252 ONLY: use_surface_fluxes, use_top_fluxes253 254 USE grid_variables, &255 ONLY: ddx2, ddy2, fwxm, fwxp, fwym, fwyp, wall_w_x, wall_w_y256 257 USE indices, &258 ONLY: i_left, i_right, j_north, j_south, nxlg, nxrg, nyng, nysg, &259 nzb, nzb_diff_s_inner, nzb_s_inner, nzb_s_outer, nzt, nzt_diff260 261 USE kinds262 263 IMPLICIT NONE264 265 INTEGER(iwp) :: i !<266 INTEGER(iwp) :: j !<267 INTEGER(iwp) :: k !<268 REAL(wp) :: wall_s_flux(0:4) !<269 REAL(wp), DIMENSION(nysg:nyng,nxlg:nxrg) :: s_flux_b !<270 REAL(wp), DIMENSION(nysg:nyng,nxlg:nxrg) :: s_flux_t !<271 #if defined( __nopointer )272 REAL(wp), DIMENSION(nzb:nzt+1,nysg:nyng,nxlg:nxrg) :: s !<273 #else274 REAL(wp), DIMENSION(:,:,:), POINTER :: s !<275 #endif276 277 !$acc kernels present( ddzu, ddzw, fwxm, fwxp, fwym, fwyp, kh ) &278 !$acc present( nzb_diff_s_inner, nzb_s_inner, nzb_s_outer, s ) &279 !$acc present( s_flux_b, s_flux_t, tend, wall_s_flux ) &280 !$acc present( wall_w_x, wall_w_y )281 DO i = i_left, i_right282 DO j = j_south, j_north283 !284 !-- Compute horizontal diffusion285 DO k = 1, nzt286 IF ( k > nzb_s_outer(j,i) ) THEN287 288 tend(k,j,i) = tend(k,j,i) &289 + 0.5_wp * ( &290 ( kh(k,j,i) + kh(k,j,i+1) ) * ( s(k,j,i+1)-s(k,j,i) ) &291 - ( kh(k,j,i) + kh(k,j,i-1) ) * ( s(k,j,i)-s(k,j,i-1) ) &292 ) * ddx2 &293 + 0.5_wp * ( &294 ( kh(k,j,i) + kh(k,j+1,i) ) * ( s(k,j+1,i)-s(k,j,i) ) &295 - ( kh(k,j,i) + kh(k,j-1,i) ) * ( s(k,j,i)-s(k,j-1,i) ) &296 ) * ddy2297 ENDIF298 ENDDO299 300 !301 !-- Apply prescribed horizontal wall heatflux where necessary302 DO k = 1, nzt303 IF ( k > nzb_s_inner(j,i) .AND. k <= nzb_s_outer(j,i) .AND. &304 ( wall_w_x(j,i) /= 0.0_wp .OR. wall_w_y(j,i) /= 0.0_wp ) ) &305 THEN306 tend(k,j,i) = tend(k,j,i) &307 + ( fwxp(j,i) * 0.5_wp * &308 ( kh(k,j,i) + kh(k,j,i+1) ) * ( s(k,j,i+1)-s(k,j,i) ) &309 + ( 1.0_wp - fwxp(j,i) ) * wall_s_flux(1) &310 -fwxm(j,i) * 0.5_wp * &311 ( kh(k,j,i) + kh(k,j,i-1) ) * ( s(k,j,i)-s(k,j,i-1) ) &312 + ( 1.0_wp - fwxm(j,i) ) * wall_s_flux(2) &313 ) * ddx2 &314 + ( fwyp(j,i) * 0.5_wp * &315 ( kh(k,j,i) + kh(k,j+1,i) ) * ( s(k,j+1,i)-s(k,j,i) ) &316 + ( 1.0_wp - fwyp(j,i) ) * wall_s_flux(3) &317 -fwym(j,i) * 0.5_wp * &318 ( kh(k,j,i) + kh(k,j-1,i) ) * ( s(k,j,i)-s(k,j-1,i) ) &319 + ( 1.0_wp - fwym(j,i) ) * wall_s_flux(4) &320 ) * ddy2321 ENDIF322 ENDDO323 324 !325 !-- Compute vertical diffusion. In case that surface fluxes have been326 !-- prescribed or computed at bottom and/or top, index k starts/ends at327 !-- nzb+2 or nzt-1, respectively.328 DO k = 1, nzt_diff329 IF ( k >= nzb_diff_s_inner(j,i) ) THEN330 tend(k,j,i) = tend(k,j,i) &331 + 0.5_wp * ( &332 ( kh(k,j,i) + kh(k+1,j,i) ) * ( s(k+1,j,i)-s(k,j,i) ) * ddzu(k+1) &333 * rho_air_zw(k) &334 - ( kh(k,j,i) + kh(k-1,j,i) ) * ( s(k,j,i)-s(k-1,j,i) ) * ddzu(k) &335 * rho_air_zw(k-1) &336 ) * ddzw(k) * drho_air(k)337 ENDIF338 ENDDO339 340 !341 !-- Vertical diffusion at the first computational gridpoint along342 !-- z-direction343 DO k = 1, nzt344 IF ( use_surface_fluxes .AND. k == nzb_s_inner(j,i)+1 ) THEN345 tend(k,j,i) = tend(k,j,i) &346 + ( 0.5_wp * ( kh(k,j,i)+kh(k+1,j,i) )&347 * ( s(k+1,j,i)-s(k,j,i) ) &348 * ddzu(k+1) &349 * rho_air_zw(k) &350 + s_flux_b(j,i) &351 ) * ddzw(k) * drho_air(k)352 ENDIF353 354 !355 !-- Vertical diffusion at the last computational gridpoint along356 !-- z-direction357 IF ( use_top_fluxes .AND. k == nzt ) THEN358 tend(k,j,i) = tend(k,j,i) &359 + ( - s_flux_t(j,i) &360 - 0.5_wp * ( kh(k-1,j,i)+kh(k,j,i) )&361 * ( s(k,j,i)-s(k-1,j,i) ) &362 * ddzu(k) &363 * rho_air_zw(k-1) &364 ) * ddzw(k) * drho_air(k)365 ENDIF366 ENDDO367 368 ENDDO369 ENDDO370 !$acc end kernels371 372 END SUBROUTINE diffusion_s_acc373 374 375 !------------------------------------------------------------------------------!376 ! Description:377 ! ------------378 238 !> Call for grid point i,j 379 239 !------------------------------------------------------------------------------! -
TabularUnified palm/trunk/SOURCE/diffusion_u.f90 ¶
r2101 r2118 20 20 ! Current revisions: 21 21 ! ----------------- 22 ! 22 ! OpenACC version of subroutine removed 23 23 ! 24 24 ! Former revisions: … … 35 35 ! Module renamed (removed _mod) 36 36 ! 37 !38 37 ! 1850 2016-04-08 13:29:27Z maronga 39 38 ! Module renamed 40 !41 39 ! 42 40 ! 1740 2016-01-13 08:19:40Z raasch … … 97 95 98 96 PRIVATE 99 PUBLIC diffusion_u , diffusion_u_acc97 PUBLIC diffusion_u 100 98 101 99 INTERFACE diffusion_u … … 103 101 MODULE PROCEDURE diffusion_u_ij 104 102 END INTERFACE diffusion_u 105 106 INTERFACE diffusion_u_acc107 MODULE PROCEDURE diffusion_u_acc108 END INTERFACE diffusion_u_acc109 103 110 104 CONTAINS … … 280 274 ! Description: 281 275 ! ------------ 282 !> Call for all grid points - accelerator version283 !------------------------------------------------------------------------------!284 SUBROUTINE diffusion_u_acc285 286 USE arrays_3d, &287 ONLY: ddzu, ddzw, km, tend, u, usws, uswst, v, w, &288 drho_air, rho_air_zw289 290 USE control_parameters, &291 ONLY: constant_top_momentumflux, topography, use_surface_fluxes, &292 use_top_fluxes293 294 USE grid_variables, &295 ONLY: ddx, ddx2, ddy, fym, fyp, wall_u296 297 USE indices, &298 ONLY: i_left, i_right, j_north, j_south, nxl, nxr, nyn, nys, nzb, &299 nzb_diff_u, nzb_u_inner, nzb_u_outer, nzt, nzt_diff300 301 USE kinds302 303 IMPLICIT NONE304 305 INTEGER(iwp) :: i !<306 INTEGER(iwp) :: j !<307 INTEGER(iwp) :: k !<308 REAL(wp) :: kmym !<309 REAL(wp) :: kmyp !<310 REAL(wp) :: kmzm !<311 REAL(wp) :: kmzp !<312 313 REAL(wp), DIMENSION(nzb:nzt+1,nys:nyn,nxl:nxr) :: usvs !<314 !$acc declare create ( usvs )315 316 !317 !-- First calculate horizontal momentum flux u'v' at vertical walls,318 !-- if neccessary319 IF ( topography /= 'flat' ) THEN320 CALL wall_fluxes_acc( usvs, 1.0_wp, 0.0_wp, 0.0_wp, 0.0_wp, &321 nzb_u_inner, nzb_u_outer, wall_u )322 ENDIF323 324 !$acc kernels present ( u, v, w, km, tend, usws, uswst ) &325 !$acc present ( ddzu, ddzw, fym, fyp, wall_u ) &326 !$acc present ( nzb_u_inner, nzb_u_outer, nzb_diff_u )327 DO i = i_left, i_right328 DO j = j_south, j_north329 !330 !-- Compute horizontal diffusion331 DO k = 1, nzt332 IF ( k > nzb_u_outer(j,i) ) THEN333 !334 !-- Interpolate eddy diffusivities on staggered gridpoints335 kmyp = 0.25_wp * &336 ( km(k,j,i)+km(k,j+1,i)+km(k,j,i-1)+km(k,j+1,i-1) )337 kmym = 0.25_wp * &338 ( km(k,j,i)+km(k,j-1,i)+km(k,j,i-1)+km(k,j-1,i-1) )339 340 tend(k,j,i) = tend(k,j,i) &341 & + 2.0_wp * ( &342 & km(k,j,i) * ( u(k,j,i+1) - u(k,j,i) ) &343 & - km(k,j,i-1) * ( u(k,j,i) - u(k,j,i-1) ) &344 & ) * ddx2 &345 & + ( kmyp * ( u(k,j+1,i) - u(k,j,i) ) * ddy &346 & + kmyp * ( v(k,j+1,i) - v(k,j+1,i-1) ) * ddx &347 & - kmym * ( u(k,j,i) - u(k,j-1,i) ) * ddy &348 & - kmym * ( v(k,j,i) - v(k,j,i-1) ) * ddx &349 & ) * ddy350 ENDIF351 ENDDO352 353 !354 !-- Wall functions at the north and south walls, respectively355 DO k = 1, nzt356 IF( k > nzb_u_inner(j,i) .AND. k <= nzb_u_outer(j,i) .AND. &357 wall_u(j,i) /= 0.0_wp ) THEN358 359 kmyp = 0.25_wp * &360 ( km(k,j,i)+km(k,j+1,i)+km(k,j,i-1)+km(k,j+1,i-1) )361 kmym = 0.25_wp * &362 ( km(k,j,i)+km(k,j-1,i)+km(k,j,i-1)+km(k,j-1,i-1) )363 364 tend(k,j,i) = tend(k,j,i) &365 + 2.0_wp * ( &366 km(k,j,i) * ( u(k,j,i+1) - u(k,j,i) ) &367 - km(k,j,i-1) * ( u(k,j,i) - u(k,j,i-1) ) &368 ) * ddx2 &369 + ( fyp(j,i) * ( &370 kmyp * ( u(k,j+1,i) - u(k,j,i) ) * ddy &371 + kmyp * ( v(k,j+1,i) - v(k,j+1,i-1) ) * ddx &372 ) &373 - fym(j,i) * ( &374 kmym * ( u(k,j,i) - u(k,j-1,i) ) * ddy &375 + kmym * ( v(k,j,i) - v(k,j,i-1) ) * ddx &376 ) &377 + wall_u(j,i) * usvs(k,j,i) &378 ) * ddy379 ENDIF380 ENDDO381 382 !383 !-- Compute vertical diffusion. In case of simulating a Prandtl layer,384 !-- index k starts at nzb_u_inner+2.385 DO k = 1, nzt_diff386 IF ( k >= nzb_diff_u(j,i) ) THEN387 !388 !-- Interpolate eddy diffusivities on staggered gridpoints389 kmzp = 0.25_wp * &390 ( km(k,j,i)+km(k+1,j,i)+km(k,j,i-1)+km(k+1,j,i-1) )391 kmzm = 0.25_wp * &392 ( km(k,j,i)+km(k-1,j,i)+km(k,j,i-1)+km(k-1,j,i-1) )393 394 tend(k,j,i) = tend(k,j,i) &395 & + ( kmzp * ( ( u(k+1,j,i) - u(k,j,i) ) * ddzu(k+1)&396 & + ( w(k,j,i) - w(k,j,i-1) ) * ddx &397 & ) * rho_air_zw(k) &398 & - kmzm * ( ( u(k,j,i) - u(k-1,j,i) ) * ddzu(k)&399 & + ( w(k-1,j,i) - w(k-1,j,i-1) ) * ddx &400 & ) * rho_air_zw(k-1) &401 & ) * ddzw(k) * drho_air(k)402 ENDIF403 ENDDO404 405 ENDDO406 ENDDO407 408 !409 !-- Vertical diffusion at the first grid point above the surface,410 !-- if the momentum flux at the bottom is given by the Prandtl law or411 !-- if it is prescribed by the user.412 !-- Difference quotient of the momentum flux is not formed over half413 !-- of the grid spacing (2.0*ddzw(k)) any more, since the comparison414 !-- with other (LES) models showed that the values of the momentum415 !-- flux becomes too large in this case.416 !-- The term containing w(k-1,..) (see above equation) is removed here417 !-- because the vertical velocity is assumed to be zero at the surface.418 IF ( use_surface_fluxes ) THEN419 420 DO i = i_left, i_right421 DO j = j_south, j_north422 423 k = nzb_u_inner(j,i)+1424 !425 !-- Interpolate eddy diffusivities on staggered gridpoints426 kmzp = 0.25_wp * &427 ( km(k,j,i)+km(k+1,j,i)+km(k,j,i-1)+km(k+1,j,i-1) )428 429 tend(k,j,i) = tend(k,j,i) &430 & + ( kmzp * ( ( u(k+1,j,i) - u(k,j,i) ) * ddzu(k+1) &431 & + ( w(k,j,i) - w(k,j,i-1) ) * ddx &432 & ) * rho_air_zw(k) &433 & - ( -usws(j,i) ) &434 & ) * ddzw(k) * drho_air(k)435 ENDDO436 ENDDO437 438 ENDIF439 440 !441 !-- Vertical diffusion at the first gridpoint below the top boundary,442 !-- if the momentum flux at the top is prescribed by the user443 IF ( use_top_fluxes .AND. constant_top_momentumflux ) THEN444 445 k = nzt446 447 DO i = i_left, i_right448 DO j = j_south, j_north449 450 !451 !-- Interpolate eddy diffusivities on staggered gridpoints452 kmzm = 0.25_wp * &453 ( km(k,j,i)+km(k-1,j,i)+km(k,j,i-1)+km(k-1,j,i-1) )454 455 tend(k,j,i) = tend(k,j,i) &456 & + ( ( -uswst(j,i) ) &457 & - kmzm * ( ( u(k,j,i) - u(k-1,j,i) ) * ddzu(k) &458 & + ( w(k-1,j,i) - w(k-1,j,i-1) ) * ddx &459 & ) * rho_air_zw(k-1) &460 & ) * ddzw(k) * drho_air(k)461 ENDDO462 ENDDO463 464 ENDIF465 !$acc end kernels466 467 END SUBROUTINE diffusion_u_acc468 469 470 !------------------------------------------------------------------------------!471 ! Description:472 ! ------------473 276 !> Call for grid point i,j 474 277 !------------------------------------------------------------------------------! -
TabularUnified palm/trunk/SOURCE/diffusion_v.f90 ¶
r2101 r2118 20 20 ! Current revisions: 21 21 ! ----------------- 22 ! 22 ! OpenACC version of subroutine removed 23 23 ! 24 24 ! Former revisions: … … 35 35 ! Module renamed (removed _mod) 36 36 ! 37 !38 37 ! 1850 2016-04-08 13:29:27Z maronga 39 38 ! Module renamed 40 !41 39 ! 42 40 ! 1740 2016-01-13 08:19:40Z raasch … … 92 90 93 91 PRIVATE 94 PUBLIC diffusion_v , diffusion_v_acc92 PUBLIC diffusion_v 95 93 96 94 INTERFACE diffusion_v … … 98 96 MODULE PROCEDURE diffusion_v_ij 99 97 END INTERFACE diffusion_v 100 101 INTERFACE diffusion_v_acc102 MODULE PROCEDURE diffusion_v_acc103 END INTERFACE diffusion_v_acc104 98 105 99 CONTAINS … … 275 269 ! Description: 276 270 ! ------------ 277 !> Call for all grid points - accelerator version278 !------------------------------------------------------------------------------!279 SUBROUTINE diffusion_v_acc280 281 USE arrays_3d, &282 ONLY: ddzu, ddzw, km, tend, u, v, vsws, vswst, w, &283 drho_air, rho_air_zw284 285 USE control_parameters, &286 ONLY: constant_top_momentumflux, topography, use_surface_fluxes, &287 use_top_fluxes288 289 USE grid_variables, &290 ONLY: ddx, ddy, ddy2, fxm, fxp, wall_v291 292 USE indices, &293 ONLY: i_left, i_right, j_north, j_south, nxl, nxr, nyn, nys, nzb, &294 nzb_diff_v, nzb_v_inner, nzb_v_outer, nzt, nzt_diff295 296 USE kinds297 298 IMPLICIT NONE299 300 INTEGER(iwp) :: i !<301 INTEGER(iwp) :: j !<302 INTEGER(iwp) :: k !<303 REAL(wp) :: kmxm !<304 REAL(wp) :: kmxp !<305 REAL(wp) :: kmzm !<306 REAL(wp) :: kmzp !<307 308 REAL(wp), DIMENSION(nzb:nzt+1,nys:nyn,nxl:nxr) :: vsus !<309 !$acc declare create ( vsus )310 311 !312 !-- First calculate horizontal momentum flux v'u' at vertical walls,313 !-- if neccessary314 IF ( topography /= 'flat' ) THEN315 CALL wall_fluxes_acc( vsus, 0.0_wp, 1.0_wp, 0.0_wp, 0.0_wp, &316 nzb_v_inner, nzb_v_outer, wall_v )317 ENDIF318 319 !$acc kernels present ( u, v, w, km, tend, vsws, vswst ) &320 !$acc present ( ddzu, ddzw, fxm, fxp, wall_v ) &321 !$acc present ( nzb_v_inner, nzb_v_outer, nzb_diff_v )322 DO i = i_left, i_right323 DO j = j_south, j_north324 !325 !-- Compute horizontal diffusion326 DO k = 1, nzt327 IF ( k > nzb_v_outer(j,i) ) THEN328 !329 !-- Interpolate eddy diffusivities on staggered gridpoints330 kmxp = 0.25_wp * &331 ( km(k,j,i)+km(k,j,i+1)+km(k,j-1,i)+km(k,j-1,i+1) )332 kmxm = 0.25_wp * &333 ( km(k,j,i)+km(k,j,i-1)+km(k,j-1,i)+km(k,j-1,i-1) )334 335 tend(k,j,i) = tend(k,j,i) &336 & + ( kmxp * ( v(k,j,i+1) - v(k,j,i) ) * ddx &337 & + kmxp * ( u(k,j,i+1) - u(k,j-1,i+1) ) * ddy &338 & - kmxm * ( v(k,j,i) - v(k,j,i-1) ) * ddx &339 & - kmxm * ( u(k,j,i) - u(k,j-1,i) ) * ddy &340 & ) * ddx &341 & + 2.0_wp * ( &342 & km(k,j,i) * ( v(k,j+1,i) - v(k,j,i) ) &343 & - km(k,j-1,i) * ( v(k,j,i) - v(k,j-1,i) ) &344 & ) * ddy2345 ENDIF346 ENDDO347 348 !349 !-- Wall functions at the left and right walls, respectively350 DO k = 1, nzt351 IF( k > nzb_v_inner(j,i) .AND. k <= nzb_v_outer(j,i) .AND. &352 wall_v(j,i) /= 0.0_wp ) THEN353 354 kmxp = 0.25_wp * &355 ( km(k,j,i)+km(k,j,i+1)+km(k,j-1,i)+km(k,j-1,i+1) )356 kmxm = 0.25_wp * &357 ( km(k,j,i)+km(k,j,i-1)+km(k,j-1,i)+km(k,j-1,i-1) )358 359 tend(k,j,i) = tend(k,j,i) &360 + 2.0_wp * ( &361 km(k,j,i) * ( v(k,j+1,i) - v(k,j,i) ) &362 - km(k,j-1,i) * ( v(k,j,i) - v(k,j-1,i) ) &363 ) * ddy2 &364 + ( fxp(j,i) * ( &365 kmxp * ( v(k,j,i+1) - v(k,j,i) ) * ddx &366 + kmxp * ( u(k,j,i+1) - u(k,j-1,i+1) ) * ddy &367 ) &368 - fxm(j,i) * ( &369 kmxm * ( v(k,j,i) - v(k,j,i-1) ) * ddx &370 + kmxm * ( u(k,j,i) - u(k,j-1,i) ) * ddy &371 ) &372 + wall_v(j,i) * vsus(k,j,i) &373 ) * ddx374 ENDIF375 ENDDO376 377 !378 !-- Compute vertical diffusion. In case of simulating a Prandtl379 !-- layer, index k starts at nzb_v_inner+2.380 DO k = 1, nzt_diff381 IF ( k >= nzb_diff_v(j,i) ) THEN382 !383 !-- Interpolate eddy diffusivities on staggered gridpoints384 kmzp = 0.25_wp * &385 ( km(k,j,i)+km(k+1,j,i)+km(k,j-1,i)+km(k+1,j-1,i) )386 kmzm = 0.25_wp * &387 ( km(k,j,i)+km(k-1,j,i)+km(k,j-1,i)+km(k-1,j-1,i) )388 389 tend(k,j,i) = tend(k,j,i) &390 & + ( kmzp * ( ( v(k+1,j,i) - v(k,j,i) ) * ddzu(k+1)&391 & + ( w(k,j,i) - w(k,j-1,i) ) * ddy &392 & ) * rho_air_zw(k) &393 & - kmzm * ( ( v(k,j,i) - v(k-1,j,i) ) * ddzu(k)&394 & + ( w(k-1,j,i) - w(k-1,j-1,i) ) * ddy &395 & ) * rho_air_zw(k-1) &396 & ) * ddzw(k) * drho_air(k)397 ENDIF398 ENDDO399 400 ENDDO401 ENDDO402 403 !404 !-- Vertical diffusion at the first grid point above the surface,405 !-- if the momentum flux at the bottom is given by the Prandtl law406 !-- or if it is prescribed by the user.407 !-- Difference quotient of the momentum flux is not formed over408 !-- half of the grid spacing (2.0*ddzw(k)) any more, since the409 !-- comparison with other (LES) models showed that the values of410 !-- the momentum flux becomes too large in this case.411 !-- The term containing w(k-1,..) (see above equation) is removed here412 !-- because the vertical velocity is assumed to be zero at the surface.413 IF ( use_surface_fluxes ) THEN414 415 DO i = i_left, i_right416 DO j = j_south, j_north417 418 k = nzb_v_inner(j,i)+1419 !420 !-- Interpolate eddy diffusivities on staggered gridpoints421 kmzp = 0.25_wp * &422 ( km(k,j,i)+km(k+1,j,i)+km(k,j-1,i)+km(k+1,j-1,i) )423 424 tend(k,j,i) = tend(k,j,i) &425 & + ( kmzp * ( ( v(k+1,j,i) - v(k,j,i) ) * ddzu(k+1) &426 & + ( w(k,j,i) - w(k,j-1,i) ) * ddy &427 & ) * rho_air_zw(k) &428 & - ( -vsws(j,i) ) &429 & ) * ddzw(k) * drho_air(k)430 ENDDO431 ENDDO432 433 ENDIF434 435 !436 !-- Vertical diffusion at the first gridpoint below the top boundary,437 !-- if the momentum flux at the top is prescribed by the user438 IF ( use_top_fluxes .AND. constant_top_momentumflux ) THEN439 440 k = nzt441 442 DO i = i_left, i_right443 DO j = j_south, j_north444 445 !446 !-- Interpolate eddy diffusivities on staggered gridpoints447 kmzm = 0.25_wp * &448 ( km(k,j,i)+km(k-1,j,i)+km(k,j-1,i)+km(k-1,j-1,i) )449 450 tend(k,j,i) = tend(k,j,i) &451 & + ( ( -vswst(j,i) ) &452 & - kmzm * ( ( v(k,j,i) - v(k-1,j,i) ) * ddzu(k) &453 & + ( w(k-1,j,i) - w(k-1,j-1,i) ) * ddy &454 & ) * rho_air_zw(k-1) &455 & ) * ddzw(k) * drho_air(k)456 ENDDO457 ENDDO458 459 ENDIF460 !$acc end kernels461 462 END SUBROUTINE diffusion_v_acc463 464 465 !------------------------------------------------------------------------------!466 ! Description:467 ! ------------468 271 !> Call for grid point i,j 469 272 !------------------------------------------------------------------------------! -
TabularUnified palm/trunk/SOURCE/diffusion_w.f90 ¶
r2101 r2118 20 20 ! Current revisions: 21 21 ! ----------------- 22 ! 22 ! OpenACC version of subroutine removed 23 23 ! 24 24 ! Former revisions: … … 35 35 ! Module renamed (removed _mod) 36 36 ! 37 !38 37 ! 1850 2016-04-08 13:29:27Z maronga 39 38 ! Module renamed 40 !41 39 ! 42 40 ! 1682 2015-10-07 23:56:08Z knoop … … 97 95 98 96 USE wall_fluxes_mod, & 99 ONLY : wall_fluxes , wall_fluxes_acc97 ONLY : wall_fluxes 100 98 101 99 PRIVATE 102 PUBLIC diffusion_w , diffusion_w_acc100 PUBLIC diffusion_w 103 101 104 102 INTERFACE diffusion_w … … 106 104 MODULE PROCEDURE diffusion_w_ij 107 105 END INTERFACE diffusion_w 108 109 INTERFACE diffusion_w_acc110 MODULE PROCEDURE diffusion_w_acc111 END INTERFACE diffusion_w_acc112 106 113 107 CONTAINS … … 248 242 ! Description: 249 243 ! ------------ 250 !> Call for all grid points - accelerator version251 !------------------------------------------------------------------------------!252 SUBROUTINE diffusion_w_acc253 254 USE arrays_3d, &255 ONLY : ddzu, ddzw, km, tend, u, v, w, drho_air_zw, rho_air256 257 USE control_parameters, &258 ONLY : topography259 260 USE grid_variables, &261 ONLY : ddx, ddy, fwxm, fwxp, fwym, fwyp, wall_w_x, wall_w_y262 263 USE indices, &264 ONLY : i_left, i_right, j_north, j_south, nxl, nxr, nyn, nys, nzb, &265 nzb_w_inner, nzb_w_outer, nzt266 267 USE kinds268 269 IMPLICIT NONE270 271 INTEGER(iwp) :: i !<272 INTEGER(iwp) :: j !<273 INTEGER(iwp) :: k !<274 275 REAL(wp) :: kmxm !<276 REAL(wp) :: kmxp !<277 REAL(wp) :: kmym !<278 REAL(wp) :: kmyp !<279 280 REAL(wp), DIMENSION(nzb:nzt+1,nys:nyn,nxl:nxr) :: wsus !<281 REAL(wp), DIMENSION(nzb:nzt+1,nys:nyn,nxl:nxr) :: wsvs !<282 !$acc declare create ( wsus, wsvs )283 284 !285 !-- First calculate horizontal momentum flux w'u' and/or w'v' at vertical286 !-- walls, if neccessary287 IF ( topography /= 'flat' ) THEN288 CALL wall_fluxes_acc( wsus, 0.0_wp, 0.0_wp, 0.0_wp, 1.0_wp, &289 nzb_w_inner, nzb_w_outer, wall_w_x )290 CALL wall_fluxes_acc( wsvs, 0.0_wp, 0.0_wp, 0.0_wp, 1.0_wp, &291 nzb_w_inner, nzb_w_outer, wall_w_y )292 ENDIF293 294 !$acc kernels present ( u, v, w, km, tend ) &295 !$acc present ( ddzu, ddzw, fwxm, fwxp, fwym, fwyp, wall_w_x, wall_w_y ) &296 !$acc present ( nzb_w_inner, nzb_w_outer )297 DO i = i_left, i_right298 DO j = j_south, j_north299 DO k = 1, nzt300 IF ( k > nzb_w_outer(j,i) ) THEN301 !302 !-- Interpolate eddy diffusivities on staggered gridpoints303 kmxp = 0.25_wp * &304 ( km(k,j,i)+km(k,j,i+1)+km(k+1,j,i)+km(k+1,j,i+1) )305 kmxm = 0.25_wp * &306 ( km(k,j,i)+km(k,j,i-1)+km(k+1,j,i)+km(k+1,j,i-1) )307 kmyp = 0.25_wp * &308 ( km(k,j,i)+km(k+1,j,i)+km(k,j+1,i)+km(k+1,j+1,i) )309 kmym = 0.25_wp * &310 ( km(k,j,i)+km(k+1,j,i)+km(k,j-1,i)+km(k+1,j-1,i) )311 312 tend(k,j,i) = tend(k,j,i) &313 & + ( kmxp * ( w(k,j,i+1) - w(k,j,i) ) * ddx &314 & + kmxp * ( u(k+1,j,i+1) - u(k,j,i+1) ) * ddzu(k+1) &315 & - kmxm * ( w(k,j,i) - w(k,j,i-1) ) * ddx &316 & - kmxm * ( u(k+1,j,i) - u(k,j,i) ) * ddzu(k+1) &317 & ) * ddx &318 & + ( kmyp * ( w(k,j+1,i) - w(k,j,i) ) * ddy &319 & + kmyp * ( v(k+1,j+1,i) - v(k,j+1,i) ) * ddzu(k+1) &320 & - kmym * ( w(k,j,i) - w(k,j-1,i) ) * ddy &321 & - kmym * ( v(k+1,j,i) - v(k,j,i) ) * ddzu(k+1) &322 & ) * ddy &323 & + 2.0_wp * ( &324 & km(k+1,j,i) * ( w(k+1,j,i) - w(k,j,i) ) * ddzw(k+1) &325 & * rho_air(k+1) &326 & - km(k,j,i) * ( w(k,j,i) - w(k-1,j,i) ) * ddzw(k) &327 & * rho_air(k) &328 & ) * ddzu(k+1) * drho_air_zw(k)329 ENDIF330 ENDDO331 332 !333 !-- Wall functions at all vertical walls, where necessary334 DO k = 1,nzt335 336 IF ( k > nzb_w_inner(j,i) .AND. k <= nzb_w_outer(j,i) .AND. &337 wall_w_x(j,i) /= 0.0_wp .AND. wall_w_y(j,i) /= 0.0_wp ) THEN338 !339 !-- Interpolate eddy diffusivities on staggered gridpoints340 kmxp = 0.25_wp * &341 ( km(k,j,i)+km(k,j,i+1)+km(k+1,j,i)+km(k+1,j,i+1) )342 kmxm = 0.25_wp * &343 ( km(k,j,i)+km(k,j,i-1)+km(k+1,j,i)+km(k+1,j,i-1) )344 kmyp = 0.25_wp * &345 ( km(k,j,i)+km(k+1,j,i)+km(k,j+1,i)+km(k+1,j+1,i) )346 kmym = 0.25_wp * &347 ( km(k,j,i)+km(k+1,j,i)+km(k,j-1,i)+km(k+1,j-1,i) )348 349 tend(k,j,i) = tend(k,j,i) &350 + ( fwxp(j,i) * ( &351 kmxp * ( w(k,j,i+1) - w(k,j,i) ) * ddx &352 + kmxp * ( u(k+1,j,i+1) - u(k,j,i+1) ) * ddzu(k+1) &353 ) &354 - fwxm(j,i) * ( &355 kmxm * ( w(k,j,i) - w(k,j,i-1) ) * ddx &356 + kmxm * ( u(k+1,j,i) - u(k,j,i) ) * ddzu(k+1) &357 ) &358 + wall_w_x(j,i) * wsus(k,j,i) &359 ) * ddx &360 + ( fwyp(j,i) * ( &361 kmyp * ( w(k,j+1,i) - w(k,j,i) ) * ddy &362 + kmyp * ( v(k+1,j+1,i) - v(k,j+1,i) ) * ddzu(k+1) &363 ) &364 - fwym(j,i) * ( &365 kmym * ( w(k,j,i) - w(k,j-1,i) ) * ddy &366 + kmym * ( v(k+1,j,i) - v(k,j,i) ) * ddzu(k+1) &367 ) &368 + wall_w_y(j,i) * wsvs(k,j,i) &369 ) * ddy &370 + 2.0_wp * ( &371 km(k+1,j,i) * ( w(k+1,j,i) - w(k,j,i) ) * ddzw(k+1) &372 * rho_air(k+1) &373 - km(k,j,i) * ( w(k,j,i) - w(k-1,j,i) ) * ddzw(k) &374 * rho_air(k) &375 ) * ddzu(k+1) * drho_air_zw(k)376 ENDIF377 ENDDO378 379 ENDDO380 ENDDO381 !$acc end kernels382 383 END SUBROUTINE diffusion_w_acc384 385 386 !------------------------------------------------------------------------------!387 ! Description:388 ! ------------389 244 !> Call for grid point i,j 390 245 !------------------------------------------------------------------------------! -
TabularUnified palm/trunk/SOURCE/diffusivities.f90 ¶
r2101 r2118 20 20 ! Current revisions: 21 21 ! ----------------- 22 ! 22 ! OpenACC directives removed 23 23 ! 24 24 ! Former revisions: … … 130 130 131 131 ! 132 !-- Data declerations for accelerators133 !$acc data present( dd2zu, e, km, kh, l_grid, l_wall, nzb_s_inner, var )134 !$acc kernels135 136 !137 132 !-- Introduce an optional minimum tke 138 133 IF ( e_min > 0.0_wp ) THEN 139 134 !$OMP DO 140 !$acc loop141 135 DO i = nxlg, nxrg 142 136 DO j = nysg, nyng 143 !$acc loop vector( 32 )144 137 DO k = 1, nzt 145 138 IF ( k > nzb_s_inner(j,i) ) THEN … … 152 145 153 146 !$OMP DO 154 !$acc loop155 147 DO i = nxlg, nxrg 156 148 DO j = nysg, nyng 157 !$acc loop vector( 32 )158 149 DO k = 1, nzt 159 150 … … 191 182 kh(k,j,i) = ( 1.0_wp + 2.0_wp * l / ll ) * km(k,j,i) 192 183 193 #if ! defined( __openacc ) 194 ! 195 !++ Statistics still have to be realized for accelerators 184 ! 196 185 !-- Summation for averaged profile (cf. flow_statistics) 197 186 DO sr = 0, statistic_regions 198 187 sums_l_l(k,sr,tn) = sums_l_l(k,sr,tn) + l * rmask(j,i,sr) 199 188 ENDDO 200 #endif 189 201 190 ENDIF 202 191 … … 205 194 ENDDO 206 195 207 #if ! defined( __openacc )208 !209 !++ Statistics still have to be realized for accelerators210 196 sums_l_l(nzt+1,:,tn) = sums_l_l(nzt,:,tn) ! quasi boundary-condition for 211 ! data output 212 #endif 213 !$OMP END PARALLEL 197 ! data output 198 !$OMP END PARALLEL 214 199 215 200 ! … … 219 204 !-- values of the diffusivities are not needed 220 205 !$OMP PARALLEL DO 221 !$acc loop222 206 DO i = nxlg, nxrg 223 207 DO j = nysg, nyng … … 249 233 ENDIF 250 234 251 !$acc end kernels252 !$acc end data253 254 235 END SUBROUTINE diffusivities -
TabularUnified palm/trunk/SOURCE/exchange_horiz.f90 ¶
r2101 r2118 20 20 ! Current revisions: 21 21 ! ----------------- 22 ! 22 ! OpenACC directives and related code removed 23 23 ! 24 24 ! Former revisions: … … 86 86 USE control_parameters, & 87 87 ONLY: bc_lr, bc_lr_cyc, bc_ns, bc_ns_cyc, grid_level, & 88 mg_switch_to_pe0, on_device,synchronous_exchange88 mg_switch_to_pe0, synchronous_exchange 89 89 90 90 USE cpulog, & … … 254 254 !-- with array syntax, explicit loops are used. 255 255 IF ( bc_lr == 'cyclic' ) THEN 256 IF ( on_device ) THEN 257 !$acc kernels present( ar ) 258 !$acc loop independent 259 DO i = 0, nbgp_local-1 260 DO j = nys-nbgp_local, nyn+nbgp_local 261 DO k = nzb, nzt+1 262 ar(k,j,nxl-nbgp_local+i) = ar(k,j,nxr-nbgp_local+1+i) 263 ar(k,j,nxr+1+i) = ar(k,j,nxl+i) 264 ENDDO 265 ENDDO 266 ENDDO 267 !$acc end kernels 268 ELSE 269 ar(:,:,nxl-nbgp_local:nxl-1) = ar(:,:,nxr-nbgp_local+1:nxr) 270 ar(:,:,nxr+1:nxr+nbgp_local) = ar(:,:,nxl:nxl+nbgp_local-1) 271 ENDIF 256 ar(:,:,nxl-nbgp_local:nxl-1) = ar(:,:,nxr-nbgp_local+1:nxr) 257 ar(:,:,nxr+1:nxr+nbgp_local) = ar(:,:,nxl:nxl+nbgp_local-1) 272 258 ENDIF 273 259 274 260 IF ( bc_ns == 'cyclic' ) THEN 275 IF ( on_device ) THEN 276 !$acc kernels present( ar ) 277 DO i = nxl-nbgp_local, nxr+nbgp_local 278 !$acc loop independent 279 DO j = 0, nbgp_local-1 280 !$acc loop independent 281 DO k = nzb, nzt+1 282 ar(k,nys-nbgp_local+j,i) = ar(k,nyn-nbgp_local+1+j,i) 283 ar(k,nyn+1+j,i) = ar(k,nys+j,i) 284 ENDDO 285 ENDDO 286 ENDDO 287 !$acc end kernels 288 ELSE 289 ar(:,nys-nbgp_local:nys-1,:) = ar(:,nyn-nbgp_local+1:nyn,:) 290 ar(:,nyn+1:nyn+nbgp_local,:) = ar(:,nys:nys+nbgp_local-1,:) 291 ENDIF 261 ar(:,nys-nbgp_local:nys-1,:) = ar(:,nyn-nbgp_local+1:nyn,:) 262 ar(:,nyn+1:nyn+nbgp_local,:) = ar(:,nys:nys+nbgp_local-1,:) 292 263 ENDIF 293 264 -
TabularUnified palm/trunk/SOURCE/fft_xy_mod.f90 ¶
r2101 r2118 20 20 ! Current revisions: 21 21 ! ----------------- 22 ! 22 ! OpenACC directives and CUDA-fft related code removed 23 23 ! 24 24 ! Former revisions: … … 31 31 ! 1850 2016-04-08 13:29:27Z maronga 32 32 ! Module renamed 33 !34 33 ! 35 34 ! 1815 2016-04-06 13:49:59Z raasch … … 139 138 ONLY: nx, ny, nz 140 139 141 #if defined( __cuda_fft ) 142 USE ISO_C_BINDING 143 #elif defined( __fftw ) 140 #if defined( __fftw ) 144 141 USE, INTRINSIC :: ISO_C_BINDING 145 142 #endif … … 192 189 REAL(wp), DIMENSION(:), ALLOCATABLE, SAVE :: trig_yf !< 193 190 194 #elif defined( __cuda_fft )195 INTEGER(C_INT), SAVE :: plan_xf !<196 INTEGER(C_INT), SAVE :: plan_xi !<197 INTEGER(C_INT), SAVE :: plan_yf !<198 INTEGER(C_INT), SAVE :: plan_yi !<199 200 INTEGER(iwp), SAVE :: total_points_x_transpo !<201 INTEGER(iwp), SAVE :: total_points_y_transpo !<202 191 #endif 203 192 … … 261 250 SUBROUTINE fft_init 262 251 263 USE cuda_fft_interfaces264 265 252 IMPLICIT NONE 266 253 … … 338 325 CALL ZDFFTM( 0, ny+1, nz1, sqr_dny, work_y, ny+4, work_y, ny+4, & 339 326 trig_yb, worky, 0 ) 340 #elif defined( __cuda_fft )341 total_points_x_transpo = (nx+1) * (nyn_x-nys_x+1) * (nzt_x-nzb_x+1)342 total_points_y_transpo = (ny+1) * (nxr_y-nxl_y+1) * (nzt_y-nzb_y+1)343 CALL CUFFTPLAN1D( plan_xf, nx+1, CUFFT_D2Z, (nyn_x-nys_x+1) * (nzt_x-nzb_x+1) )344 CALL CUFFTPLAN1D( plan_xi, nx+1, CUFFT_Z2D, (nyn_x-nys_x+1) * (nzt_x-nzb_x+1) )345 CALL CUFFTPLAN1D( plan_yf, ny+1, CUFFT_D2Z, (nxr_y-nxl_y+1) * (nzt_y-nzb_y+1) )346 CALL CUFFTPLAN1D( plan_yi, ny+1, CUFFT_Z2D, (nxr_y-nxl_y+1) * (nzt_y-nzb_y+1) )347 327 #else 348 328 message_string = 'no system-specific fft-call available' … … 403 383 404 384 405 USE cuda_fft_interfaces406 #if defined( __cuda_fft )407 USE ISO_C_BINDING408 #endif409 410 385 IMPLICIT NONE 411 386 … … 429 404 #elif defined( __nec ) 430 405 REAL(wp), DIMENSION(6*(nx+1)) :: work2 !< 431 #elif defined( __cuda_fft )432 COMPLEX(dp), DIMENSION(0:(nx+1)/2,nys_x:nyn_x,nzb_x:nzt_x) :: &433 ar_tmp !<434 ! following does not work for PGI 14.1 -> to be removed later435 ! !$acc declare create( ar_tmp )436 406 #endif 437 407 … … 737 707 738 708 ENDIF 739 740 #elif defined( __cuda_fft )741 742 !$acc data create( ar_tmp )743 IF ( forward_fft ) THEN744 745 !$acc data present( ar )746 CALL CUFFTEXECD2Z( plan_xf, ar, ar_tmp )747 748 !$acc kernels749 DO k = nzb_x, nzt_x750 DO j = nys_x, nyn_x751 752 DO i = 0, (nx+1)/2753 ar(i,j,k) = REAL( ar_tmp(i,j,k), KIND=wp ) * dnx754 ENDDO755 756 DO i = 1, (nx+1)/2 - 1757 ar(nx+1-i,j,k) = AIMAG( ar_tmp(i,j,k) ) * dnx758 ENDDO759 760 ENDDO761 ENDDO762 !$acc end kernels763 !$acc end data764 765 ELSE766 767 !$acc data present( ar )768 !$acc kernels769 DO k = nzb_x, nzt_x770 DO j = nys_x, nyn_x771 772 ar_tmp(0,j,k) = CMPLX( ar(0,j,k), 0.0_wp, KIND=wp )773 774 DO i = 1, (nx+1)/2 - 1775 ar_tmp(i,j,k) = CMPLX( ar(i,j,k), ar(nx+1-i,j,k), &776 KIND=wp )777 ENDDO778 ar_tmp((nx+1)/2,j,k) = CMPLX( ar((nx+1)/2,j,k), 0.0_wp, &779 KIND=wp )780 781 ENDDO782 ENDDO783 !$acc end kernels784 785 CALL CUFFTEXECZ2D( plan_xi, ar_tmp, ar )786 !$acc end data787 788 ENDIF789 !$acc end data790 709 791 710 #else … … 1052 971 1053 972 1054 USE cuda_fft_interfaces1055 #if defined( __cuda_fft )1056 USE ISO_C_BINDING1057 #endif1058 1059 973 IMPLICIT NONE 1060 974 … … 1082 996 #elif defined( __nec ) 1083 997 REAL(wp), DIMENSION(6*(ny+1)) :: work2 !< 1084 #elif defined( __cuda_fft )1085 COMPLEX(dp), DIMENSION(0:(ny+1)/2,nxl_y:nxr_y,nzb_y:nzt_y) :: &1086 ar_tmp !<1087 ! following does not work for PGI 14.1 -> to be removed later1088 ! !$acc declare create( ar_tmp )1089 998 #endif 1090 999 … … 1364 1273 1365 1274 ENDIF 1366 #elif defined( __cuda_fft )1367 1368 !$acc data create( ar_tmp )1369 IF ( forward_fft ) THEN1370 1371 !$acc data present( ar )1372 CALL CUFFTEXECD2Z( plan_yf, ar, ar_tmp )1373 1374 !$acc kernels1375 DO k = nzb_y, nzt_y1376 DO i = nxl_y, nxr_y1377 1378 DO j = 0, (ny+1)/21379 ar(j,i,k) = REAL( ar_tmp(j,i,k), KIND=wp ) * dny1380 ENDDO1381 1382 DO j = 1, (ny+1)/2 - 11383 ar(ny+1-j,i,k) = AIMAG( ar_tmp(j,i,k) ) * dny1384 ENDDO1385 1386 ENDDO1387 ENDDO1388 !$acc end kernels1389 !$acc end data1390 1391 ELSE1392 1393 !$acc data present( ar )1394 !$acc kernels1395 DO k = nzb_y, nzt_y1396 DO i = nxl_y, nxr_y1397 1398 ar_tmp(0,i,k) = CMPLX( ar(0,i,k), 0.0_wp, KIND=wp )1399 1400 DO j = 1, (ny+1)/2 - 11401 ar_tmp(j,i,k) = CMPLX( ar(j,i,k), ar(ny+1-j,i,k), &1402 KIND=wp )1403 ENDDO1404 ar_tmp((ny+1)/2,i,k) = CMPLX( ar((ny+1)/2,i,k), 0.0_wp, &1405 KIND=wp )1406 1407 ENDDO1408 ENDDO1409 !$acc end kernels1410 1411 CALL CUFFTEXECZ2D( plan_yi, ar_tmp, ar )1412 !$acc end data1413 1414 ENDIF1415 !$acc end data1416 1417 1275 #else 1418 1276 message_string = 'no system-specific fft-call available' -
TabularUnified palm/trunk/SOURCE/flow_statistics.f90 ¶
r2101 r2118 20 20 ! Current revisions: 21 21 ! ----------------- 22 ! 22 ! OpenACC version of subroutine removed 23 23 ! 24 24 ! Former revisions: … … 216 216 !> are zero at the walls and inside buildings. 217 217 !------------------------------------------------------------------------------! 218 #if ! defined( __openacc )219 218 SUBROUTINE flow_statistics 220 219 … … 309 308 CALL cpu_log( log_point(10), 'flow_statistics', 'start' ) 310 309 311 !$acc update host( km, kh, e, ol, pt, qs, qsws, shf, ts, u, usws, v, vsws, w )312 310 313 311 ! … … 1756 1754 1757 1755 END SUBROUTINE flow_statistics 1758 1759 1760 #else1761 1762 1763 !------------------------------------------------------------------------------!1764 ! Description:1765 ! ------------1766 !> flow statistics - accelerator version1767 !------------------------------------------------------------------------------!1768 SUBROUTINE flow_statistics1769 1770 USE arrays_3d, &1771 ONLY: ddzu, ddzw, e, heatflux_output_conversion, hyp, km, kh, &1772 momentumflux_output_conversion, nr, p, prho, pt, q, qc, ql, qr, &1773 qs, qsws, qswst, rho_air, rho_air_zw, rho_ocean, s, sa, saswsb, &1774 saswst, shf, ss, ssws, sswst, td_lsa_lpt, td_lsa_q, td_sub_lpt, &1775 td_sub_q, time_vert, ts, tswst, u, ug, us, usws, uswst, vsws, &1776 v, vg, vpt, vswst, w, w_subs, waterflux_output_conversion, zw1777 1778 1779 USE cloud_parameters, &1780 ONLY: l_d_cp, prr, pt_d_t1781 1782 USE control_parameters, &1783 ONLY : average_count_pr, cloud_droplets, cloud_physics, do_sum, &1784 dt_3d, g, humidity, kappa, large_scale_forcing, &1785 large_scale_subsidence, max_pr_user, message_string, &1786 microphysics_seifert, neutral, ocean, passive_scalar, &1787 simulated_time, use_subsidence_tendencies, use_surface_fluxes, &1788 use_top_fluxes, ws_scheme_mom, ws_scheme_sca1789 1790 USE cpulog, &1791 ONLY: cpu_log, log_point1792 1793 USE grid_variables, &1794 ONLY: ddx, ddy1795 1796 USE indices, &1797 ONLY: ngp_2dh, ngp_2dh_s_inner, ngp_3d, ngp_3d_inner, ngp_sums, &1798 ngp_sums_ls, nxl, nxr, nyn, nys, nzb, nzb_diff_s_inner, &1799 nzb_s_inner, nzt, nzt_diff, rflags_invers1800 1801 USE kinds1802 1803 USE land_surface_model_mod, &1804 ONLY: ghf_eb, land_surface, m_soil, nzb_soil, nzt_soil, &1805 qsws_eb, qsws_liq_eb, qsws_soil_eb, qsws_veg_eb, r_a, r_s, &1806 shf_eb, t_soil1807 1808 USE netcdf_interface, &1809 ONLY: dots_rad, dots_soil1810 1811 USE pegrid1812 1813 USE radiation_model_mod, &1814 ONLY: radiation, radiation_scheme, rad_net, &1815 rad_lw_in, rad_lw_out, rad_sw_in, rad_sw_out1816 1817 #if defined ( __rrtmg )1818 USE radiation_model_mod, &1819 ONLY: rrtm_aldif, rrtm_aldir, rrtm_asdif, rrtm_asdir, rad_lw_cs_hr, &1820 rad_lw_hr, rad_sw_cs_hr, rad_sw_hr1821 #endif1822 1823 USE statistics1824 1825 IMPLICIT NONE1826 1827 INTEGER(iwp) :: i !<1828 INTEGER(iwp) :: j !<1829 INTEGER(iwp) :: k !<1830 INTEGER(iwp) :: k_surface_level !<1831 INTEGER(iwp) :: nt !<1832 INTEGER(iwp) :: omp_get_thread_num !<1833 INTEGER(iwp) :: sr !<1834 INTEGER(iwp) :: tn !<1835 1836 LOGICAL :: first !<1837 1838 REAL(wp) :: dptdz_threshold !<1839 REAL(wp) :: fac !<1840 REAL(wp) :: height !<1841 REAL(wp) :: pts !<1842 REAL(wp) :: sums_l_eper !<1843 REAL(wp) :: sums_l_etot !<1844 REAL(wp) :: s1 !<1845 REAL(wp) :: s2 !<1846 REAL(wp) :: s3 !<1847 REAL(wp) :: s4 !<1848 REAL(wp) :: s5 !<1849 REAL(wp) :: s6 !<1850 REAL(wp) :: s7 !<1851 REAL(wp) :: ust !<1852 REAL(wp) :: ust2 !<1853 REAL(wp) :: u2 !<1854 REAL(wp) :: vst !<1855 REAL(wp) :: vst2 !<1856 REAL(wp) :: v2 !<1857 REAL(wp) :: w2 !<1858 REAL(wp) :: z_i(2) !<1859 1860 REAL(wp) :: dptdz(nzb+1:nzt+1) !<1861 REAL(wp) :: sums_ll(nzb:nzt+1,2) !<1862 1863 CALL cpu_log( log_point(10), 'flow_statistics', 'start' )1864 1865 !1866 !-- To be on the safe side, check whether flow_statistics has already been1867 !-- called once after the current time step1868 IF ( flow_statistics_called ) THEN1869 1870 message_string = 'flow_statistics is called two times within one ' // &1871 'timestep'1872 CALL message( 'flow_statistics', 'PA0190', 1, 2, 0, 6, 0 )1873 1874 ENDIF1875 1876 !$acc data create( sums, sums_l )1877 !$acc update device( hom )1878 1879 !1880 !-- Compute statistics for each (sub-)region1881 DO sr = 0, statistic_regions1882 1883 !1884 !-- Initialize (local) summation array1885 sums_l = 0.0_wp1886 1887 !1888 !-- Store sums that have been computed in other subroutines in summation1889 !-- array1890 sums_l(:,11,:) = sums_l_l(:,sr,:) ! mixing length from diffusivities1891 !-- WARNING: next line still has to be adjusted for OpenMP1892 sums_l(:,21,0) = sums_wsts_bc_l(:,sr) * &1893 heatflux_output_conversion ! heat flux from advec_s_bc1894 sums_l(nzb+9,pr_palm,0) = sums_divold_l(sr) ! old divergence from pres1895 sums_l(nzb+10,pr_palm,0) = sums_divnew_l(sr) ! new divergence from pres1896 1897 !1898 !-- When calcuating horizontally-averaged total (resolved- plus subgrid-1899 !-- scale) vertical fluxes and velocity variances by using commonly-1900 !-- applied Reynolds-based methods ( e.g. <w'pt'> = (w-<w>)*(pt-<pt>) )1901 !-- in combination with the 5th order advection scheme, pronounced1902 !-- artificial kinks could be observed in the vertical profiles near the1903 !-- surface. Please note: these kinks were not related to the model truth,1904 !-- i.e. these kinks are just related to an evaluation problem.1905 !-- In order avoid these kinks, vertical fluxes and horizontal as well1906 !-- vertical velocity variances are calculated directly within the advection1907 !-- routines, according to the numerical discretization, to evaluate the1908 !-- statistical quantities as they will appear within the prognostic1909 !-- equations.1910 !-- Copy the turbulent quantities, evaluated in the advection routines to1911 !-- the local array sums_l() for further computations.1912 IF ( ws_scheme_mom .AND. sr == 0 ) THEN1913 1914 !1915 !-- According to the Neumann bc for the horizontal velocity components,1916 !-- the corresponding fluxes has to satisfiy the same bc.1917 IF ( ocean ) THEN1918 sums_us2_ws_l(nzt+1,:) = sums_us2_ws_l(nzt,:)1919 sums_vs2_ws_l(nzt+1,:) = sums_vs2_ws_l(nzt,:)1920 ENDIF1921 1922 DO i = 0, threads_per_task-11923 !1924 !-- Swap the turbulent quantities evaluated in advec_ws.1925 sums_l(:,13,i) = sums_wsus_ws_l(:,i) &1926 * momentumflux_output_conversion ! w*u*1927 sums_l(:,15,i) = sums_wsvs_ws_l(:,i) &1928 * momentumflux_output_conversion ! w*v*1929 sums_l(:,30,i) = sums_us2_ws_l(:,i) ! u*21930 sums_l(:,31,i) = sums_vs2_ws_l(:,i) ! v*21931 sums_l(:,32,i) = sums_ws2_ws_l(:,i) ! w*21932 sums_l(:,34,i) = sums_l(:,34,i) + 0.5_wp * &1933 ( sums_us2_ws_l(:,i) + sums_vs2_ws_l(:,i) + &1934 sums_ws2_ws_l(:,i) ) ! e*1935 DO k = nzb, nzt1936 sums_l(nzb+5,pr_palm,i) = sums_l(nzb+5,pr_palm,i) + 0.5_wp * ( &1937 sums_us2_ws_l(k,i) + &1938 sums_vs2_ws_l(k,i) + &1939 sums_ws2_ws_l(k,i) )1940 ENDDO1941 ENDDO1942 1943 ENDIF1944 1945 IF ( ws_scheme_sca .AND. sr == 0 ) THEN1946 1947 DO i = 0, threads_per_task-11948 sums_l(:,17,i) = sums_wspts_ws_l(:,i) &1949 * heatflux_output_conversion ! w*pt* from advec_s_ws1950 IF ( ocean ) sums_l(:,66,i) = sums_wssas_ws_l(:,i) ! w*sa*1951 IF ( humidity ) sums_l(:,49,i) = sums_wsqs_ws_l(:,i) &1952 * waterflux_output_conversion !w*q*1953 IF ( passive_scalar ) sums_l(:,116,i) = sums_wsss_ws_l(:,i) !w*s*1954 ENDDO1955 1956 ENDIF1957 !1958 !-- Horizontally averaged profiles of horizontal velocities and temperature.1959 !-- They must have been computed before, because they are already required1960 !-- for other horizontal averages.1961 tn = 01962 1963 !$OMP PARALLEL PRIVATE( i, j, k, tn )1964 !$ tn = omp_get_thread_num()1965 1966 !$acc update device( sums_l )1967 1968 !$OMP DO1969 !$acc parallel loop gang present( pt, rflags_invers, rmask, sums_l, u, v ) create( s1, s2, s3 )1970 DO k = nzb, nzt+11971 s1 = 01972 s2 = 01973 s3 = 01974 !$acc loop vector collapse( 2 ) reduction( +: s1, s2, s3 )1975 DO i = nxl, nxr1976 DO j = nys, nyn1977 !1978 !-- k+1 is used in rflags since rflags is set 0 at surface points1979 s1 = s1 + u(k,j,i) * rmask(j,i,sr) * rflags_invers(j,i,k+1)1980 s2 = s2 + v(k,j,i) * rmask(j,i,sr) * rflags_invers(j,i,k+1)1981 s3 = s3 + pt(k,j,i) * rmask(j,i,sr) * rflags_invers(j,i,k+1)1982 ENDDO1983 ENDDO1984 sums_l(k,1,tn) = s11985 sums_l(k,2,tn) = s21986 sums_l(k,4,tn) = s31987 ENDDO1988 !$acc end parallel loop1989 1990 !1991 !-- Horizontally averaged profile of salinity1992 IF ( ocean ) THEN1993 !$OMP DO1994 !$acc parallel loop gang present( rflags_invers, rmask, sums_l, sa ) create( s1 )1995 DO k = nzb, nzt+11996 s1 = 01997 !$acc loop vector collapse( 2 ) reduction( +: s1 )1998 DO i = nxl, nxr1999 DO j = nys, nyn2000 s1 = s1 + sa(k,j,i) * rmask(j,i,sr) * rflags_invers(j,i,k+1)2001 ENDDO2002 ENDDO2003 sums_l(k,23,tn) = s12004 ENDDO2005 !$acc end parallel loop2006 ENDIF2007 2008 !2009 !-- Horizontally averaged profiles of virtual potential temperature,2010 !-- total water content, specific humidity and liquid water potential2011 !-- temperature2012 IF ( humidity ) THEN2013 2014 !$OMP DO2015 !$acc parallel loop gang present( q, rflags_invers, rmask, sums_l, vpt ) create( s1, s2 )2016 DO k = nzb, nzt+12017 s1 = 02018 s2 = 02019 !$acc loop vector collapse( 2 ) reduction( +: s1, s2 )2020 DO i = nxl, nxr2021 DO j = nys, nyn2022 s1 = s1 + q(k,j,i) * rmask(j,i,sr) * rflags_invers(j,i,k+1)2023 s2 = s2 + vpt(k,j,i) * rmask(j,i,sr) * rflags_invers(j,i,k+1)2024 ENDDO2025 ENDDO2026 sums_l(k,41,tn) = s12027 sums_l(k,44,tn) = s22028 ENDDO2029 !$acc end parallel loop2030 2031 IF ( cloud_physics ) THEN2032 !$OMP DO2033 !$acc parallel loop gang present( pt, q, ql, rflags_invers, rmask, sums_l ) create( s1, s2 )2034 DO k = nzb, nzt+12035 s1 = 02036 s2 = 02037 !$acc loop vector collapse( 2 ) reduction( +: s1, s2 )2038 DO i = nxl, nxr2039 DO j = nys, nyn2040 s1 = s1 + ( q(k,j,i) - ql(k,j,i) ) * &2041 rmask(j,i,sr) * rflags_invers(j,i,k+1)2042 s2 = s2 + ( pt(k,j,i) + l_d_cp*pt_d_t(k) * ql(k,j,i) ) * &2043 rmask(j,i,sr) * rflags_invers(j,i,k+1)2044 ENDDO2045 ENDDO2046 sums_l(k,42,tn) = s12047 sums_l(k,43,tn) = s22048 ENDDO2049 !$acc end parallel loop2050 ENDIF2051 ENDIF2052 2053 !2054 !-- Horizontally averaged profiles of passive scalar2055 IF ( passive_scalar ) THEN2056 !$OMP DO2057 !$acc parallel loop gang present( s, rflags_invers, rmask, sums_l ) create( s1 )2058 DO k = nzb, nzt+12059 s1 = 02060 !$acc loop vector collapse( 2 ) reduction( +: s1 )2061 DO i = nxl, nxr2062 DO j = nys, nyn2063 s1 = s1 + s(k,j,i) * rmask(j,i,sr) * rflags_invers(j,i,k+1)2064 ENDDO2065 ENDDO2066 sums_l(k,117,tn) = s12067 ENDDO2068 !$acc end parallel loop2069 ENDIF2070 !$OMP END PARALLEL2071 2072 !2073 !-- Summation of thread sums2074 IF ( threads_per_task > 1 ) THEN2075 DO i = 1, threads_per_task-12076 !$acc parallel present( sums_l )2077 sums_l(:,1,0) = sums_l(:,1,0) + sums_l(:,1,i)2078 sums_l(:,2,0) = sums_l(:,2,0) + sums_l(:,2,i)2079 sums_l(:,4,0) = sums_l(:,4,0) + sums_l(:,4,i)2080 !$acc end parallel2081 IF ( ocean ) THEN2082 !$acc parallel present( sums_l )2083 sums_l(:,23,0) = sums_l(:,23,0) + sums_l(:,23,i)2084 !$acc end parallel2085 ENDIF2086 IF ( humidity ) THEN2087 !$acc parallel present( sums_l )2088 sums_l(:,41,0) = sums_l(:,41,0) + sums_l(:,41,i)2089 sums_l(:,44,0) = sums_l(:,44,0) + sums_l(:,44,i)2090 !$acc end parallel2091 IF ( cloud_physics ) THEN2092 !$acc parallel present( sums_l )2093 sums_l(:,42,0) = sums_l(:,42,0) + sums_l(:,42,i)2094 sums_l(:,43,0) = sums_l(:,43,0) + sums_l(:,43,i)2095 !$acc end parallel2096 ENDIF2097 ENDIF2098 IF ( passive_scalar ) THEN2099 !$acc parallel present( sums_l )2100 sums_l(:,117,0) = sums_l(:,117,0) + sums_l(:,117,i)2101 !$acc end parallel2102 ENDIF2103 ENDDO2104 ENDIF2105 2106 #if defined( __parallel )2107 !2108 !-- Compute total sum from local sums2109 !$acc update host( sums_l )2110 IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )2111 CALL MPI_ALLREDUCE( sums_l(nzb,1,0), sums(nzb,1), nzt+2-nzb, MPI_REAL, &2112 MPI_SUM, comm2d, ierr )2113 IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )2114 CALL MPI_ALLREDUCE( sums_l(nzb,2,0), sums(nzb,2), nzt+2-nzb, MPI_REAL, &2115 MPI_SUM, comm2d, ierr )2116 IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )2117 CALL MPI_ALLREDUCE( sums_l(nzb,4,0), sums(nzb,4), nzt+2-nzb, MPI_REAL, &2118 MPI_SUM, comm2d, ierr )2119 IF ( ocean ) THEN2120 IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )2121 CALL MPI_ALLREDUCE( sums_l(nzb,23,0), sums(nzb,23), nzt+2-nzb, &2122 MPI_REAL, MPI_SUM, comm2d, ierr )2123 ENDIF2124 IF ( humidity ) THEN2125 IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )2126 CALL MPI_ALLREDUCE( sums_l(nzb,44,0), sums(nzb,44), nzt+2-nzb, &2127 MPI_REAL, MPI_SUM, comm2d, ierr )2128 IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )2129 CALL MPI_ALLREDUCE( sums_l(nzb,41,0), sums(nzb,41), nzt+2-nzb, &2130 MPI_REAL, MPI_SUM, comm2d, ierr )2131 IF ( cloud_physics ) THEN2132 IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )2133 CALL MPI_ALLREDUCE( sums_l(nzb,42,0), sums(nzb,42), nzt+2-nzb, &2134 MPI_REAL, MPI_SUM, comm2d, ierr )2135 IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )2136 CALL MPI_ALLREDUCE( sums_l(nzb,43,0), sums(nzb,43), nzt+2-nzb, &2137 MPI_REAL, MPI_SUM, comm2d, ierr )2138 ENDIF2139 ENDIF2140 2141 IF ( passive_scalar ) THEN2142 IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )2143 CALL MPI_ALLREDUCE( sums_l(nzb,117,0), sums(nzb,117), nzt+2-nzb, &2144 MPI_REAL, MPI_SUM, comm2d, ierr )2145 ENDIF2146 !$acc update device( sums )2147 #else2148 !$acc parallel present( sums, sums_l )2149 sums(:,1) = sums_l(:,1,0)2150 sums(:,2) = sums_l(:,2,0)2151 sums(:,4) = sums_l(:,4,0)2152 !$acc end parallel2153 IF ( ocean ) THEN2154 !$acc parallel present( sums, sums_l )2155 sums(:,23) = sums_l(:,23,0)2156 !$acc end parallel2157 ENDIF2158 IF ( humidity ) THEN2159 !$acc parallel present( sums, sums_l )2160 sums(:,44) = sums_l(:,44,0)2161 sums(:,41) = sums_l(:,41,0)2162 !$acc end parallel2163 IF ( cloud_physics ) THEN2164 !$acc parallel present( sums, sums_l )2165 sums(:,42) = sums_l(:,42,0)2166 sums(:,43) = sums_l(:,43,0)2167 !$acc end parallel2168 ENDIF2169 ENDIF2170 IF ( passive_scalar ) THEN2171 !$acc parallel present( sums, sums_l )2172 sums(:,117) = sums_l(:,117,0)2173 !$acc end parallel2174 ENDIF2175 #endif2176 2177 !2178 !-- Final values are obtained by division by the total number of grid points2179 !-- used for summation. After that store profiles.2180 !$acc parallel present( hom, ngp_2dh, ngp_2dh_s_inner, sums )2181 sums(:,1) = sums(:,1) / ngp_2dh(sr)2182 sums(:,2) = sums(:,2) / ngp_2dh(sr)2183 sums(:,4) = sums(:,4) / ngp_2dh_s_inner(:,sr)2184 hom(:,1,1,sr) = sums(:,1) ! u2185 hom(:,1,2,sr) = sums(:,2) ! v2186 hom(:,1,4,sr) = sums(:,4) ! pt2187 !$acc end parallel2188 2189 !2190 !-- Salinity2191 IF ( ocean ) THEN2192 !$acc parallel present( hom, ngp_2dh_s_inner, sums )2193 sums(:,23) = sums(:,23) / ngp_2dh_s_inner(:,sr)2194 hom(:,1,23,sr) = sums(:,23) ! sa2195 !$acc end parallel2196 ENDIF2197 2198 !2199 !-- Humidity and cloud parameters2200 IF ( humidity ) THEN2201 !$acc parallel present( hom, ngp_2dh_s_inner, sums )2202 sums(:,44) = sums(:,44) / ngp_2dh_s_inner(:,sr)2203 sums(:,41) = sums(:,41) / ngp_2dh_s_inner(:,sr)2204 hom(:,1,44,sr) = sums(:,44) ! vpt2205 hom(:,1,41,sr) = sums(:,41) ! qv (q)2206 !$acc end parallel2207 IF ( cloud_physics ) THEN2208 !$acc parallel present( hom, ngp_2dh_s_inner, sums )2209 sums(:,42) = sums(:,42) / ngp_2dh_s_inner(:,sr)2210 sums(:,43) = sums(:,43) / ngp_2dh_s_inner(:,sr)2211 hom(:,1,42,sr) = sums(:,42) ! qv2212 hom(:,1,43,sr) = sums(:,43) ! pt2213 !$acc end parallel2214 ENDIF2215 ENDIF2216 2217 !2218 !-- Passive scalar2219 IF ( passive_scalar ) THEN2220 !$acc parallel present( hom, ngp_2dh_s_inner, sums )2221 sums(:,117) = sums(:,117) / ngp_2dh_s_inner(:,sr)2222 hom(:,1,117,sr) = sums(:,117) ! s2223 !$acc end parallel2224 ENDIF2225 2226 !2227 !-- Horizontally averaged profiles of the remaining prognostic variables,2228 !-- variances, the total and the perturbation energy (single values in last2229 !-- column of sums_l) and some diagnostic quantities.2230 !-- NOTE: for simplicity, nzb_s_inner is used below, although strictly2231 !-- ---- speaking the following k-loop would have to be split up and2232 !-- rearranged according to the staggered grid.2233 !-- However, this implies no error since staggered velocity components2234 !-- are zero at the walls and inside buildings.2235 tn = 02236 !$OMP PARALLEL PRIVATE( i, j, k, pts, sums_ll, sums_l_eper, &2237 !$OMP sums_l_etot, tn, ust, ust2, u2, vst, vst2, v2, &2238 !$OMP w2 )2239 !$ tn = omp_get_thread_num()2240 2241 !$OMP DO2242 !$acc parallel loop gang present( e, hom, kh, km, p, pt, w, rflags_invers, rmask, sums_l ) create( s1, s2, s3, s4, s5, s6, s7 )2243 DO k = nzb, nzt+12244 s1 = 02245 s2 = 02246 s3 = 02247 s4 = 02248 s5 = 02249 s6 = 02250 s7 = 02251 !$acc loop vector collapse( 2 ) reduction( +: s1, s2, s3, s4, s5, s6, s7 )2252 DO i = nxl, nxr2253 DO j = nys, nyn2254 !2255 !-- Prognostic and diagnostic variables2256 s1 = s1 + w(k,j,i) * rmask(j,i,sr) * rflags_invers(j,i,k+1)2257 s2 = s2 + e(k,j,i) * rmask(j,i,sr) * rflags_invers(j,i,k+1)2258 s3 = s3 + km(k,j,i) * rmask(j,i,sr) * rflags_invers(j,i,k+1)2259 s4 = s4 + kh(k,j,i) * rmask(j,i,sr) * rflags_invers(j,i,k+1)2260 s5 = s5 + p(k,j,i) * rmask(j,i,sr) * rflags_invers(j,i,k+1)2261 s6 = s6 + ( pt(k,j,i)-hom(k,1,4,sr) )**2 * rmask(j,i,sr) * &2262 rflags_invers(j,i,k+1)2263 !2264 !-- Higher moments2265 !-- (Computation of the skewness of w further below)2266 s7 = s7 + w(k,j,i)**3 * rmask(j,i,sr) * rflags_invers(j,i,k+1)2267 ENDDO2268 ENDDO2269 sums_l(k,3,tn) = s12270 sums_l(k,8,tn) = s22271 sums_l(k,9,tn) = s32272 sums_l(k,10,tn) = s42273 sums_l(k,40,tn) = s52274 sums_l(k,33,tn) = s62275 sums_l(k,38,tn) = s72276 ENDDO2277 !$acc end parallel loop2278 2279 IF ( humidity ) THEN2280 !$OMP DO2281 !$acc parallel loop gang present( hom, q, rflags_invers, rmask, sums_l ) create( s1 )2282 DO k = nzb, nzt+12283 s1 = 02284 !$acc loop vector collapse( 2 ) reduction( +: s1 )2285 DO i = nxl, nxr2286 DO j = nys, nyn2287 s1 = s1 + ( q(k,j,i)-hom(k,1,41,sr) )**2 * rmask(j,i,sr) * &2288 rflags_invers(j,i,k+1)2289 ENDDO2290 ENDDO2291 sums_l(k,70,tn) = s12292 ENDDO2293 !$acc end parallel loop2294 ENDIF2295 2296 !2297 !-- Total and perturbation energy for the total domain (being2298 !-- collected in the last column of sums_l).2299 s1 = 02300 !$OMP DO2301 !$acc parallel loop collapse(3) present( rflags_invers, rmask, u, v, w ) reduction(+:s1)2302 DO i = nxl, nxr2303 DO j = nys, nyn2304 DO k = nzb, nzt+12305 s1 = s1 + 0.5_wp * &2306 ( u(k,j,i)**2 + v(k,j,i)**2 + w(k,j,i)**2 ) * &2307 rmask(j,i,sr) * rflags_invers(j,i,k+1)2308 ENDDO2309 ENDDO2310 ENDDO2311 !$acc end parallel loop2312 !$acc parallel present( sums_l )2313 sums_l(nzb+4,pr_palm,tn) = s12314 !$acc end parallel2315 2316 !$OMP DO2317 !$acc parallel present( rmask, sums_l, us, usws, vsws, ts ) create( s1, s2, s3, s4 )2318 s1 = 02319 s2 = 02320 s3 = 02321 s4 = 02322 !$acc loop vector collapse( 2 ) reduction( +: s1, s2, s3, s4 )2323 DO i = nxl, nxr2324 DO j = nys, nyn2325 !2326 !-- 2D-arrays (being collected in the last column of sums_l)2327 s1 = s1 + us(j,i) * rmask(j,i,sr)2328 s2 = s2 + usws(j,i) * rmask(j,i,sr)2329 s3 = s3 + vsws(j,i) * rmask(j,i,sr)2330 s4 = s4 + ts(j,i) * rmask(j,i,sr)2331 ENDDO2332 ENDDO2333 sums_l(nzb,pr_palm,tn) = s12334 sums_l(nzb+1,pr_palm,tn) = s22335 sums_l(nzb+2,pr_palm,tn) = s32336 sums_l(nzb+3,pr_palm,tn) = s42337 !$acc end parallel2338 2339 IF ( humidity ) THEN2340 !$acc parallel present( qs, rmask, sums_l ) create( s1 )2341 s1 = 02342 !$acc loop vector collapse( 2 ) reduction( +: s1 )2343 DO i = nxl, nxr2344 DO j = nys, nyn2345 s1 = s1 + qs(j,i) * rmask(j,i,sr)2346 ENDDO2347 ENDDO2348 sums_l(nzb+12,pr_palm,tn) = s12349 !$acc end parallel2350 ENDIF2351 2352 IF ( passive_scalar ) THEN2353 !$acc parallel present( ss, rmask, sums_l ) create( s1 )2354 s1 = 02355 !$acc loop vector collapse( 2 ) reduction( +: s1 )2356 DO i = nxl, nxr2357 DO j = nys, nyn2358 s1 = s1 + ss(j,i) * rmask(j,i,sr)2359 ENDDO2360 ENDDO2361 sums_l(nzb+13,pr_palm,tn) = s12362 !$acc end parallel2363 ENDIF2364 2365 !2366 !-- Computation of statistics when ws-scheme is not used. Else these2367 !-- quantities are evaluated in the advection routines.2368 IF ( .NOT. ws_scheme_mom .OR. sr /= 0 .OR. simulated_time == 0.0_wp ) &2369 THEN2370 2371 !$OMP DO2372 !$acc parallel loop gang present( u, v, w, rflags_invers, rmask, sums_l ) create( s1, s2, s3, s4, ust2, vst2, w2 )2373 DO k = nzb, nzt+12374 s1 = 02375 s2 = 02376 s3 = 02377 s4 = 02378 !$acc loop vector collapse( 2 ) reduction( +: s1, s2, s3, s4 )2379 DO i = nxl, nxr2380 DO j = nys, nyn2381 ust2 = ( u(k,j,i) - hom(k,1,1,sr) )**22382 vst2 = ( v(k,j,i) - hom(k,1,2,sr) )**22383 w2 = w(k,j,i)**22384 2385 s1 = s1 + ust2 * rmask(j,i,sr) * rflags_invers(j,i,k+1)2386 s2 = s2 + vst2 * rmask(j,i,sr) * rflags_invers(j,i,k+1)2387 s3 = s3 + w2 * rmask(j,i,sr) * rflags_invers(j,i,k+1)2388 !2389 !-- Perturbation energy2390 s4 = s4 + 0.5_wp * ( ust2 + vst2 + w2 ) * rmask(j,i,sr) * &2391 rflags_invers(j,i,k+1)2392 ENDDO2393 ENDDO2394 sums_l(k,30,tn) = s12395 sums_l(k,31,tn) = s22396 sums_l(k,32,tn) = s32397 sums_l(k,34,tn) = s42398 ENDDO2399 !$acc end parallel loop2400 !2401 !-- Total perturbation TKE2402 !$OMP DO2403 !$acc parallel present( sums_l ) create( s1 )2404 s1 = 02405 !$acc loop reduction( +: s1 )2406 DO k = nzb, nzt+12407 s1 = s1 + sums_l(k,34,tn)2408 ENDDO2409 sums_l(nzb+5,pr_palm,tn) = s12410 !$acc end parallel2411 2412 ENDIF2413 2414 !2415 !-- Horizontally averaged profiles of the vertical fluxes2416 2417 !2418 !-- Subgridscale fluxes.2419 !-- WARNING: If a Prandtl-layer is used (k=nzb for flat terrain), the fluxes2420 !-- ------- should be calculated there in a different way. This is done2421 !-- in the next loop further below, where results from this loop are2422 !-- overwritten. However, THIS WORKS IN CASE OF FLAT TERRAIN ONLY!2423 !-- The non-flat case still has to be handled.2424 !-- NOTE: for simplicity, nzb_s_inner is used below, although2425 !-- ---- strictly speaking the following k-loop would have to be2426 !-- split up according to the staggered grid.2427 !-- However, this implies no error since staggered velocity2428 !-- components are zero at the walls and inside buildings.2429 !$OMP DO2430 !$acc parallel loop gang present( ddzu, kh, km, pt, u, v, w, rflags_invers, rmask, sums_l ) create( s1, s2, s3 )2431 DO k = nzb, nzt_diff2432 s1 = 02433 s2 = 02434 s3 = 02435 !$acc loop vector collapse( 2 ) reduction( +: s1, s2, s3 )2436 DO i = nxl, nxr2437 DO j = nys, nyn2438 2439 !2440 !-- Momentum flux w"u"2441 s1 = s1 - 0.25_wp * ( &2442 km(k,j,i)+km(k+1,j,i)+km(k,j,i-1)+km(k+1,j,i-1) &2443 ) * ( &2444 ( u(k+1,j,i) - u(k,j,i) ) * ddzu(k+1) &2445 + ( w(k,j,i) - w(k,j,i-1) ) * ddx &2446 ) &2447 * rmask(j,i,sr) * rflags_invers(j,i,k+1) &2448 * rho_air_zw(k) &2449 * momentumflux_output_conversion(k)2450 !2451 !-- Momentum flux w"v"2452 s2 = s2 - 0.25_wp * ( &2453 km(k,j,i)+km(k+1,j,i)+km(k,j-1,i)+km(k+1,j-1,i) &2454 ) * ( &2455 ( v(k+1,j,i) - v(k,j,i) ) * ddzu(k+1) &2456 + ( w(k,j,i) - w(k,j-1,i) ) * ddy &2457 ) &2458 * rmask(j,i,sr) * rflags_invers(j,i,k+1) &2459 * rho_air_zw(k) &2460 * momentumflux_output_conversion(k)2461 !2462 !-- Heat flux w"pt"2463 s3 = s3 - 0.5_wp * ( kh(k,j,i) + kh(k+1,j,i) ) &2464 * ( pt(k+1,j,i) - pt(k,j,i) ) &2465 * rho_air_zw(k) &2466 * heatflux_output_conversion(k) &2467 * ddzu(k+1) * rmask(j,i,sr) &2468 * rflags_invers(j,i,k+1)2469 ENDDO2470 ENDDO2471 sums_l(k,12,tn) = s12472 sums_l(k,14,tn) = s22473 sums_l(k,16,tn) = s32474 ENDDO2475 !$acc end parallel loop2476 2477 !2478 !-- Salinity flux w"sa"2479 IF ( ocean ) THEN2480 !$acc parallel loop gang present( ddzu, kh, sa, rflags_invers, rmask, sums_l ) create( s1 )2481 DO k = nzb, nzt_diff2482 s1 = 02483 !$acc loop vector collapse( 2 ) reduction( +: s1 )2484 DO i = nxl, nxr2485 DO j = nys, nyn2486 s1 = s1 - 0.5_wp * ( kh(k,j,i) + kh(k+1,j,i) ) &2487 * ( sa(k+1,j,i) - sa(k,j,i) ) &2488 * ddzu(k+1) * rmask(j,i,sr) &2489 * rflags_invers(j,i,k+1)2490 ENDDO2491 ENDDO2492 sums_l(k,65,tn) = s12493 ENDDO2494 !$acc end parallel loop2495 ENDIF2496 2497 !2498 !-- Buoyancy flux, water flux (humidity flux) w"q"2499 IF ( humidity ) THEN2500 2501 !$acc parallel loop gang present( ddzu, kh, q, vpt, rflags_invers, rmask, sums_l ) create( s1, s2 )2502 DO k = nzb, nzt_diff2503 s1 = 02504 s2 = 02505 !$acc loop vector collapse( 2 ) reduction( +: s1, s2 )2506 DO i = nxl, nxr2507 DO j = nys, nyn2508 s1 = s1 - 0.5_wp * ( kh(k,j,i) + kh(k+1,j,i) ) &2509 * ( vpt(k+1,j,i) - vpt(k,j,i) ) &2510 * rho_air_zw(k) &2511 * heatflux_output_conversion(k) &2512 * ddzu(k+1) * rmask(j,i,sr) &2513 * rflags_invers(j,i,k+1)2514 s2 = s2 - 0.5_wp * ( kh(k,j,i) + kh(k+1,j,i) ) &2515 * ( q(k+1,j,i) - q(k,j,i) ) &2516 * rho_air_zw(k) &2517 * waterflux_output_conversion(k) &2518 * ddzu(k+1) * rmask(j,i,sr) &2519 * rflags_invers(j,i,k+1)2520 ENDDO2521 ENDDO2522 sums_l(k,45,tn) = s12523 sums_l(k,48,tn) = s22524 ENDDO2525 !$acc end parallel loop2526 2527 IF ( cloud_physics ) THEN2528 2529 !$acc parallel loop gang present( ddzu, kh, q, ql, rflags_invers, rmask, sums_l ) create( s1 )2530 DO k = nzb, nzt_diff2531 s1 = 02532 !$acc loop vector collapse( 2 ) reduction( +: s1 )2533 DO i = nxl, nxr2534 DO j = nys, nyn2535 s1 = s1 - 0.5_wp * ( kh(k,j,i) + kh(k+1,j,i) ) &2536 * ( ( q(k+1,j,i) - ql(k+1,j,i) ) &2537 - ( q(k,j,i) - ql(k,j,i) ) ) &2538 * rho_air_zw(k) &2539 * waterflux_output_conversion(k) &2540 * ddzu(k+1) * rmask(j,i,sr) &2541 * rflags_invers(j,i,k+1)2542 ENDDO2543 ENDDO2544 sums_l(k,51,tn) = s12545 ENDDO2546 !$acc end parallel loop2547 2548 ENDIF2549 2550 ENDIF2551 !2552 !-- Passive scalar flux2553 IF ( passive_scalar ) THEN2554 2555 !$acc parallel loop gang present( ddzu, kh, s, rflags_invers, rmask, sums_l ) create( s1 )2556 DO k = nzb, nzt_diff2557 s1 = 02558 !$acc loop vector collapse( 2 ) reduction( +: s1 )2559 DO i = nxl, nxr2560 DO j = nys, nyn2561 s1 = s1 - 0.5_wp * ( kh(k,j,i) + kh(k+1,j,i) ) &2562 * ( s(k+1,j,i) - s(k,j,i) ) &2563 * ddzu(k+1) * rmask(j,i,sr) &2564 * rflags_invers(j,i,k+1)2565 ENDDO2566 ENDDO2567 sums_l(k,119,tn) = s12568 ENDDO2569 !$acc end parallel loop2570 2571 ENDIF2572 2573 IF ( use_surface_fluxes ) THEN2574 2575 !$OMP DO2576 !$acc parallel present( rmask, shf, sums_l, usws, vsws ) create( s1, s2, s3, s4, s5 )2577 s1 = 02578 s2 = 02579 s3 = 02580 s4 = 02581 s5 = 02582 !$acc loop vector collapse( 2 ) reduction( +: s1, s2, s3, s4, s5 )2583 DO i = nxl, nxr2584 DO j = nys, nyn2585 !2586 !-- Subgridscale fluxes in the Prandtl layer2587 s1 = s1 + usws(j,i) * momentumflux_output_conversion(nzb) &2588 * rmask(j,i,sr) ! w"u"2589 s2 = s2 + vsws(j,i) * momentumflux_output_conversion(nzb) &2590 * rmask(j,i,sr) ! w"v"2591 s3 = s3 + shf(j,i) * heatflux_output_conversion(nzb) &2592 * rmask(j,i,sr) ! w"pt"2593 s4 = s4 + 0.0_wp * rmask(j,i,sr) ! u"pt"2594 s5 = s5 + 0.0_wp * rmask(j,i,sr) ! v"pt"2595 ENDDO2596 ENDDO2597 sums_l(nzb,12,tn) = s12598 sums_l(nzb,14,tn) = s22599 sums_l(nzb,16,tn) = s32600 sums_l(nzb,58,tn) = s42601 sums_l(nzb,61,tn) = s52602 !$acc end parallel2603 2604 IF ( ocean ) THEN2605 2606 !$OMP DO2607 !$acc parallel present( rmask, saswsb, sums_l ) create( s1 )2608 s1 = 02609 !$acc loop vector collapse( 2 ) reduction( +: s1 )2610 DO i = nxl, nxr2611 DO j = nys, nyn2612 s1 = s1 + saswsb(j,i) * rmask(j,i,sr) ! w"sa"2613 ENDDO2614 ENDDO2615 sums_l(nzb,65,tn) = s12616 !$acc end parallel2617 2618 ENDIF2619 2620 IF ( humidity ) THEN2621 2622 !$OMP DO2623 !$acc parallel present( pt, q, qsws, rmask, shf, sums_l ) create( s1, s2 )2624 s1 = 02625 s2 = 02626 !$acc loop vector collapse( 2 ) reduction( +: s1, s2 )2627 DO i = nxl, nxr2628 DO j = nys, nyn2629 s1 = s1 + qsws(j,i) * waterflux_output_conversion(nzb) &2630 * rmask(j,i,sr) ! w"q" (w"qv")2631 s2 = s2 + ( ( 1.0_wp + 0.61_wp * q(nzb,j,i) ) * shf(j,i) &2632 + 0.61_wp * pt(nzb,j,i) * qsws(j,i) ) &2633 * heatflux_output_conversion(nzb)2634 ENDDO2635 ENDDO2636 sums_l(nzb,48,tn) = s12637 sums_l(nzb,45,tn) = s22638 !$acc end parallel2639 2640 IF ( cloud_droplets ) THEN2641 2642 !$OMP DO2643 !$acc parallel present( pt, q, ql, qsws, rmask, shf, sums_l ) create( s1 )2644 s1 = 02645 !$acc loop vector collapse( 2 ) reduction( +: s1 )2646 DO i = nxl, nxr2647 DO j = nys, nyn2648 s1 = s1 + ( ( 1.0_wp + &2649 0.61_wp * q(nzb,j,i) - ql(nzb,j,i) ) * &2650 shf(j,i) + 0.61_wp * pt(nzb,j,i) * qsws(j,i) )&2651 * heatflux_output_conversion(nzb)2652 ENDDO2653 ENDDO2654 sums_l(nzb,45,tn) = s12655 !$acc end parallel2656 2657 ENDIF2658 2659 IF ( cloud_physics ) THEN2660 2661 !$OMP DO2662 !$acc parallel present( qsws, rmask, sums_l ) create( s1 )2663 s1 = 02664 !$acc loop vector collapse( 2 ) reduction( +: s1 )2665 DO i = nxl, nxr2666 DO j = nys, nyn2667 !2668 !-- Formula does not work if ql(nzb) /= 0.02669 s1 = s1 + qsws(j,i) * waterflux_output_conversion(nzb) &2670 * rmask(j,i,sr) ! w"q" (w"qv")2671 ENDDO2672 ENDDO2673 sums_l(nzb,51,tn) = s12674 !$acc end parallel2675 2676 ENDIF2677 2678 ENDIF2679 2680 IF ( passive_scalar ) THEN2681 2682 !$OMP DO2683 !$acc parallel present( ssws, rmask, sums_l ) create( s1 )2684 s1 = 02685 !$acc loop vector collapse( 2 ) reduction( +: s1 )2686 DO i = nxl, nxr2687 DO j = nys, nyn2688 s1 = s1 + ssws(j,i) * rmask(j,i,sr) ! w"s"2689 ENDDO2690 ENDDO2691 sums_l(nzb,119,tn) = s12692 !$acc end parallel2693 2694 ENDIF2695 2696 ENDIF2697 2698 !2699 !-- Subgridscale fluxes at the top surface2700 IF ( use_top_fluxes ) THEN2701 2702 !$OMP DO2703 !$acc parallel present( rmask, sums_l, tswst, uswst, vswst ) create( s1, s2, s3, s4, s5 )2704 s1 = 02705 s2 = 02706 s3 = 02707 s4 = 02708 s5 = 02709 !$acc loop vector collapse( 2 ) reduction( +: s1, s2, s3, s4, s5 )2710 DO i = nxl, nxr2711 DO j = nys, nyn2712 s1 = s1 + uswst(j,i) * momentumflux_output_conversion(nzt:nzt+1) &2713 * rmask(j,i,sr) ! w"u"2714 s2 = s2 + vswst(j,i) * momentumflux_output_conversion(nzt:nzt+1) &2715 * rmask(j,i,sr) ! w"v"2716 s3 = s3 + tswst(j,i) * heatflux_output_conversion(nzt:nzt+1) &2717 * rmask(j,i,sr) ! w"pt"2718 s4 = s4 + 0.0_wp * rmask(j,i,sr) ! u"pt"2719 s5 = s5 + 0.0_wp * rmask(j,i,sr) ! v"pt"2720 ENDDO2721 ENDDO2722 sums_l(nzt:nzt+1,12,tn) = s12723 sums_l(nzt:nzt+1,14,tn) = s22724 sums_l(nzt:nzt+1,16,tn) = s32725 sums_l(nzt:nzt+1,58,tn) = s42726 sums_l(nzt:nzt+1,61,tn) = s52727 !$acc end parallel2728 2729 IF ( ocean ) THEN2730 2731 !$OMP DO2732 !$acc parallel present( rmask, saswst, sums_l ) create( s1 )2733 s1 = 02734 !$acc loop vector collapse( 2 ) reduction( +: s1 )2735 DO i = nxl, nxr2736 DO j = nys, nyn2737 s1 = s1 + saswst(j,i) * rmask(j,i,sr) ! w"sa"2738 ENDDO2739 ENDDO2740 sums_l(nzt,65,tn) = s12741 !$acc end parallel2742 2743 ENDIF2744 2745 IF ( humidity ) THEN2746 2747 !$OMP DO2748 !$acc parallel present( pt, q, qswst, rmask, tswst, sums_l ) create( s1, s2 )2749 s1 = 02750 s2 = 02751 !$acc loop vector collapse( 2 ) reduction( +: s1, s2 )2752 DO i = nxl, nxr2753 DO j = nys, nyn2754 s1 = s1 + qswst(j,i) * waterflux_output_conversion(nzt) &2755 * rmask(j,i,sr) ! w"q" (w"qv")2756 s2 = s2 + ( ( 1.0_wp + 0.61_wp * q(nzt,j,i) ) * tswst(j,i) +&2757 0.61_wp * pt(nzt,j,i) * qswst(j,i) ) &2758 * heatflux_output_conversion(nzt)2759 ENDDO2760 ENDDO2761 sums_l(nzt,48,tn) = s12762 sums_l(nzt,45,tn) = s22763 !$acc end parallel2764 2765 IF ( cloud_droplets ) THEN2766 2767 !$OMP DO2768 !$acc parallel present( pt, q, ql, qswst, rmask, tswst, sums_l ) create( s1 )2769 s1 = 02770 !$acc loop vector collapse( 2 ) reduction( +: s1 )2771 DO i = nxl, nxr2772 DO j = nys, nyn2773 s1 = s1 + ( ( 1.0_wp + &2774 0.61_wp * q(nzt,j,i) - ql(nzt,j,i) ) * &2775 tswst(j,i) + &2776 0.61_wp * pt(nzt,j,i) * qswst(j,i) ) &2777 * heatflux_output_conversion(nzt)2778 ENDDO2779 ENDDO2780 sums_l(nzt,45,tn) = s12781 !$acc end parallel2782 2783 ENDIF2784 2785 IF ( cloud_physics ) THEN2786 2787 !$OMP DO2788 !$acc parallel present( qswst, rmask, sums_l ) create( s1 )2789 s1 = 02790 !$acc loop vector collapse( 2 ) reduction( +: s1 )2791 DO i = nxl, nxr2792 DO j = nys, nyn2793 !2794 !-- Formula does not work if ql(nzb) /= 0.02795 s1 = s1 + qswst(j,i) * waterflux_output_conversion(nzt) &2796 * rmask(j,i,sr) ! w"q" (w"qv")2797 ENDDO2798 ENDDO2799 sums_l(nzt,51,tn) = s12800 !$acc end parallel2801 2802 ENDIF2803 2804 ENDIF2805 2806 IF ( passive_scalar ) THEN2807 2808 !$OMP DO2809 !$acc parallel present( sswst, rmask, sums_l ) create( s1 )2810 s1 = 02811 !$acc loop vector collapse( 2 ) reduction( +: s1 )2812 DO i = nxl, nxr2813 DO j = nys, nyn2814 s1 = s1 + sswst(j,i) * rmask(j,i,sr) ! w"s"2815 ENDDO2816 ENDDO2817 sums_l(nzt,119,tn) = s12818 !$acc end parallel2819 2820 ENDIF2821 2822 ENDIF2823 2824 !2825 !-- Resolved fluxes (can be computed for all horizontal points)2826 !-- NOTE: for simplicity, nzb_s_inner is used below, although strictly2827 !-- ---- speaking the following k-loop would have to be split up and2828 !-- rearranged according to the staggered grid.2829 !$acc parallel loop gang present( hom, pt, rflags_invers, rmask, sums_l, u, v, w ) create( s1, s2, s3 )2830 DO k = nzb, nzt_diff2831 s1 = 02832 s2 = 02833 s3 = 02834 !$acc loop vector collapse( 2 ) reduction( +: s1, s2, s3 )2835 DO i = nxl, nxr2836 DO j = nys, nyn2837 ust = 0.5_wp * ( u(k,j,i) - hom(k,1,1,sr) + &2838 u(k+1,j,i) - hom(k+1,1,1,sr) )2839 vst = 0.5_wp * ( v(k,j,i) - hom(k,1,2,sr) + &2840 v(k+1,j,i) - hom(k+1,1,2,sr) )2841 pts = 0.5_wp * ( pt(k,j,i) - hom(k,1,4,sr) + &2842 pt(k+1,j,i) - hom(k+1,1,4,sr) )2843 !2844 !-- Higher moments2845 s1 = s1 + pts * w(k,j,i)**2 * rmask(j,i,sr) * rflags_invers(j,i,k+1)2846 s2 = s2 + pts**2 * w(k,j,i) * rmask(j,i,sr) * rflags_invers(j,i,k+1)2847 !2848 !-- Energy flux w*e* (has to be adjusted?)2849 s3 = s3 + w(k,j,i) * 0.5_wp * ( ust**2 + vst**2 + w(k,j,i)**2 )&2850 * rmask(j,i,sr) * rflags_invers(j,i,k+1) &2851 * momentumflux_output_conversion(k)2852 ENDDO2853 ENDDO2854 sums_l(k,35,tn) = s12855 sums_l(k,36,tn) = s22856 sums_l(k,37,tn) = s32857 ENDDO2858 !$acc end parallel loop2859 2860 !2861 !-- Salinity flux and density (density does not belong to here,2862 !-- but so far there is no other suitable place to calculate)2863 IF ( ocean ) THEN2864 2865 IF( .NOT. ws_scheme_sca .OR. sr /= 0 ) THEN2866 2867 !$acc parallel loop gang present( hom, rflags_invers, rmask, sa, sums_l, w ) create( s1 )2868 DO k = nzb, nzt_diff2869 s1 = 02870 !$acc loop vector collapse( 2 ) reduction( +: s1 )2871 DO i = nxl, nxr2872 DO j = nys, nyn2873 s1 = s1 + 0.5_wp * ( sa(k,j,i) - hom(k,1,23,sr) + &2874 sa(k+1,j,i) - hom(k+1,1,23,sr) ) &2875 * w(k,j,i) * rmask(j,i,sr) &2876 * rflags_invers(j,i,k+1)2877 ENDDO2878 ENDDO2879 sums_l(k,66,tn) = s12880 ENDDO2881 !$acc end parallel loop2882 2883 ENDIF2884 2885 !$acc parallel loop gang present( rflags_invers, rho_ocean, prho, rmask, sums_l ) create( s1, s2 )2886 DO k = nzb, nzt_diff2887 s1 = 02888 s2 = 02889 !$acc loop vector collapse( 2 ) reduction( +: s1, s2 )2890 DO i = nxl, nxr2891 DO j = nys, nyn2892 s1 = s1 + rho_ocean(k,j,i) * rmask(j,i,sr) * rflags_invers(j,i,k+1)2893 s2 = s2 + prho(k,j,i) * rmask(j,i,sr) * rflags_invers(j,i,k+1)2894 ENDDO2895 ENDDO2896 sums_l(k,64,tn) = s12897 sums_l(k,71,tn) = s22898 ENDDO2899 !$acc end parallel loop2900 2901 ENDIF2902 2903 !2904 !-- Buoyancy flux, water flux, humidity flux, liquid water2905 !-- content, rain drop concentration and rain water content2906 IF ( humidity ) THEN2907 2908 IF ( cloud_physics .OR. cloud_droplets ) THEN2909 2910 !$acc parallel loop gang present( hom, rflags_invers, rmask, sums_l, vpt, w ) create( s1 )2911 DO k = nzb, nzt_diff2912 s1 = 02913 !$acc loop vector collapse( 2 ) reduction( +: s1 )2914 DO i = nxl, nxr2915 DO j = nys, nyn2916 s1 = s1 + 0.5_wp * ( vpt(k,j,i) - hom(k,1,44,sr) + &2917 vpt(k+1,j,i) - hom(k+1,1,44,sr) ) * &2918 heatflux_output_conversion(k) * &2919 w(k,j,i) * rmask(j,i,sr) * rflags_invers(j,i,k+1)2920 ENDDO2921 ENDDO2922 sums_l(k,46,tn) = s12923 ENDDO2924 !$acc end parallel loop2925 2926 IF ( .NOT. cloud_droplets ) THEN2927 2928 !$acc parallel loop gang present( hom, q, ql, rflags_invers, rmask, sums_l, w ) create( s1 )2929 DO k = nzb, nzt_diff2930 s1 = 02931 !$acc loop vector collapse( 2 ) reduction( +: s1 )2932 DO i = nxl, nxr2933 DO j = nys, nyn2934 s1 = s1 + 0.5_wp * ( ( q(k,j,i) - ql(k,j,i) ) - hom(k,1,42,sr) + &2935 ( q(k+1,j,i) - ql(k+1,j,i) ) - hom(k+1,1,42,sr) ) &2936 * waterflux_output_conversion(k) &2937 * w(k,j,i) * rmask(j,i,sr) * rflags_invers(j,i,k+1)2938 ENDDO2939 ENDDO2940 sums_l(k,52,tn) = s12941 ENDDO2942 !$acc end parallel loop2943 2944 IF ( microphysics_seifert ) THEN2945 2946 !$acc parallel loop gang present( qc, ql, rflags_invers, rmask, sums_l ) create( s1, s2 )2947 DO k = nzb, nzt_diff2948 s1 = 02949 !$acc loop vector collapse( 2 ) reduction( +: s1, s2 )2950 DO i = nxl, nxr2951 DO j = nys, nyn2952 s1 = s1 + ql(k,j,i) * rmask(j,i,sr) * rflags_invers(j,i,k+1)2953 s2 = s2 + qc(k,j,i) * rmask(j,i,sr) * rflags_invers(j,i,k+1)2954 ENDDO2955 ENDDO2956 sums_l(k,54,tn) = s12957 sums_l(k,75,tn) = s22958 ENDDO2959 !$acc end parallel loop2960 2961 !$acc parallel loop gang present( nr, qr, prr, rflags_invers, rmask, sums_l ) create( s1, s2, s3 )2962 DO k = nzb, nzt_diff2963 s1 = 02964 s2 = 02965 s3 = 02966 !$acc loop vector collapse( 2 ) reduction( +: s1, s2, s3 )2967 DO i = nxl, nxr2968 DO j = nys, nyn2969 s1 = s1 + nr(k,j,i) * rmask(j,i,sr) * rflags_invers(j,i,k+1)2970 s2 = s2 + qr(k,j,i) * rmask(j,i,sr) * rflags_invers(j,i,k+1)2971 s3 = s3 + prr(k,j,i) * rmask(j,i,sr) * rflags_invers(j,i,k+1)2972 ENDDO2973 ENDDO2974 sums_l(k,73,tn) = s12975 sums_l(k,74,tn) = s22976 sums_l(k,76,tn) = s32977 ENDDO2978 !$acc end parallel loop2979 2980 ELSE2981 2982 !$acc parallel loop gang present( ql, rflags_invers, rmask, sums_l ) create( s1 )2983 DO k = nzb, nzt_diff2984 s1 = 02985 !$acc loop vector collapse( 2 ) reduction( +: s1 )2986 DO i = nxl, nxr2987 DO j = nys, nyn2988 s1 = s1 + ql(k,j,i) * rmask(j,i,sr) * rflags_invers(j,i,k+1)2989 ENDDO2990 ENDDO2991 sums_l(k,54,tn) = s12992 ENDDO2993 !$acc end parallel loop2994 2995 ENDIF2996 2997 ELSE2998 2999 !$acc parallel loop gang present( ql, rflags_invers, rmask, sums_l ) create( s1 )3000 DO k = nzb, nzt_diff3001 s1 = 03002 !$acc loop vector collapse( 2 ) reduction( +: s1 )3003 DO i = nxl, nxr3004 DO j = nys, nyn3005 s1 = s1 + ql(k,j,i) * rmask(j,i,sr) * rflags_invers(j,i,k+1)3006 ENDDO3007 ENDDO3008 sums_l(k,54,tn) = s13009 ENDDO3010 !$acc end parallel loop3011 3012 ENDIF3013 3014 ELSE3015 3016 IF( .NOT. ws_scheme_sca .OR. sr /= 0 ) THEN3017 3018 !$acc parallel loop gang present( hom, rflags_invers, rmask, sums_l, vpt, w ) create( s1 )3019 DO k = nzb, nzt_diff3020 s1 = 03021 !$acc loop vector collapse( 2 ) reduction( +: s1 )3022 DO i = nxl, nxr3023 DO j = nys, nyn3024 s1 = s1 + 0.5_wp * ( vpt(k,j,i) - hom(k,1,44,sr) + &3025 vpt(k+1,j,i) - hom(k+1,1,44,sr) ) &3026 * heatflux_output_conversion(k) &3027 * w(k,j,i) * rmask(j,i,sr) * rflags_invers(j,i,k+1)3028 ENDDO3029 ENDDO3030 sums_l(k,46,tn) = s13031 ENDDO3032 !$acc end parallel loop3033 3034 ELSEIF ( ws_scheme_sca .AND. sr == 0 ) THEN3035 3036 !$acc parallel loop present( hom, sums_l )3037 DO k = nzb, nzt_diff3038 sums_l(k,46,tn) = ( ( 1.0_wp + 0.61_wp * hom(k,1,41,sr) ) * &3039 sums_l(k,17,tn) + 0.61_wp * &3040 hom(k,1,4,sr) * sums_l(k,49,tn) &3041 ) * heatflux_output_conversion(k)3042 ENDDO3043 !$acc end parallel loop3044 3045 ENDIF3046 3047 ENDIF3048 3049 ENDIF3050 !3051 !-- Passive scalar flux3052 IF ( passive_scalar .AND. ( .NOT. ws_scheme_sca .OR. sr /= 0 ) ) THEN3053 3054 !$acc parallel loop gang present( hom, s, rflags_invers, rmask, sums_l, w ) create( s1 )3055 DO k = nzb, nzt_diff3056 s1 = 03057 !$acc loop vector collapse( 2 ) reduction( +: s1 )3058 DO i = nxl, nxr3059 DO j = nys, nyn3060 s1 = s1 + 0.5_wp * ( s(k,j,i) - hom(k,1,117,sr) + &3061 s(k+1,j,i) - hom(k+1,1,117,sr) ) &3062 * w(k,j,i) * rmask(j,i,sr) &3063 * rflags_invers(j,i,k+1)3064 ENDDO3065 ENDDO3066 sums_l(k,49,tn) = s13067 ENDDO3068 !$acc end parallel loop3069 3070 ENDIF3071 3072 !3073 !-- For speed optimization fluxes which have been computed in part directly3074 !-- inside the WS advection routines are treated seperatly3075 !-- Momentum fluxes first:3076 IF ( .NOT. ws_scheme_mom .OR. sr /= 0 ) THEN3077 3078 !$OMP DO3079 !$acc parallel loop gang present( hom, rflags_invers, rmask, sums_l, u, v, w ) create( s1, s2 )3080 DO k = nzb, nzt_diff3081 s1 = 03082 s2 = 03083 !$acc loop vector collapse( 2 ) reduction( +: s1, s2 )3084 DO i = nxl, nxr3085 DO j = nys, nyn3086 ust = 0.5_wp * ( u(k,j,i) - hom(k,1,1,sr) + &3087 u(k+1,j,i) - hom(k+1,1,1,sr) )3088 vst = 0.5_wp * ( v(k,j,i) - hom(k,1,2,sr) + &3089 v(k+1,j,i) - hom(k+1,1,2,sr) )3090 !3091 !-- Momentum flux w*u*3092 s1 = s1 + 0.5_wp * ( w(k,j,i-1) + w(k,j,i) ) &3093 * ust * rmask(j,i,sr) &3094 * momentumflux_output_conversion(k) &3095 * rflags_invers(j,i,k+1)3096 !3097 !-- Momentum flux w*v*3098 s2 = s2 + 0.5_wp * ( w(k,j-1,i) + w(k,j,i) ) &3099 * vst * rmask(j,i,sr) &3100 * momentumflux_output_conversion(k) &3101 * rflags_invers(j,i,k+1)3102 ENDDO3103 ENDDO3104 sums_l(k,13,tn) = s13105 sums_l(k,15,tn) = s23106 ENDDO3107 !$acc end parallel loop3108 3109 ENDIF3110 3111 IF ( .NOT. ws_scheme_sca .OR. sr /= 0 ) THEN3112 3113 !$OMP DO3114 !$acc parallel loop gang present( hom, pt, rflags_invers, rmask, sums_l, w ) create( s1 )3115 DO k = nzb, nzt_diff3116 s1 = 03117 !$acc loop vector collapse( 2 ) reduction( +: s1 )3118 DO i = nxl, nxr3119 DO j = nys, nyn3120 !3121 !-- Vertical heat flux3122 s1 = s1 + 0.5_wp * ( pt(k,j,i) - hom(k,1,4,sr) + &3123 pt(k+1,j,i) - hom(k+1,1,4,sr) ) &3124 * heatflux_output_conversion(k) &3125 * w(k,j,i) * rmask(j,i,sr) &3126 * rflags_invers(j,i,k+1)3127 ENDDO3128 ENDDO3129 sums_l(k,17,tn) = s13130 ENDDO3131 !$acc end parallel loop3132 3133 IF ( humidity ) THEN3134 3135 !$acc parallel loop gang present( hom, q, rflags_invers, rmask, sums_l, w ) create( s1 )3136 DO k = nzb, nzt_diff3137 s1 = 03138 !$acc loop vector collapse( 2 ) reduction( +: s1 )3139 DO i = nxl, nxr3140 DO j = nys, nyn3141 s1 = s1 + 0.5_wp * ( q(k,j,i) - hom(k,1,41,sr) + &3142 q(k+1,j,i) - hom(k+1,1,41,sr) ) &3143 * waterflux_output_conversion(k) &3144 * w(k,j,i) * rmask(j,i,sr) &3145 * rflags_invers(j,i,k+1)3146 ENDDO3147 ENDDO3148 sums_l(k,49,tn) = s13149 ENDDO3150 !$acc end parallel loop3151 3152 ENDIF3153 3154 IF ( passive_scalar ) THEN3155 3156 !$acc parallel loop gang present( hom, s, rflags_invers, rmask, sums_l, w ) create( s1 )3157 DO k = nzb, nzt_diff3158 s1 = 03159 !$acc loop vector collapse( 2 ) reduction( +: s1 )3160 DO i = nxl, nxr3161 DO j = nys, nyn3162 s1 = s1 + 0.5_wp * ( s(k,j,i) - hom(k,1,117,sr) + &3163 s(k+1,j,i) - hom(k+1,1,117,sr) ) &3164 * w(k,j,i) * rmask(j,i,sr) &3165 * rflags_invers(j,i,k+1)3166 ENDDO3167 ENDDO3168 sums_l(k,116,tn) = s13169 ENDDO3170 !$acc end parallel loop3171 3172 ENDIF3173 3174 ENDIF3175 3176 3177 !3178 !-- Density at top follows Neumann condition3179 IF ( ocean ) THEN3180 !$acc parallel present( sums_l )3181 sums_l(nzt+1,64,tn) = sums_l(nzt,64,tn)3182 sums_l(nzt+1,71,tn) = sums_l(nzt,71,tn)3183 !$acc end parallel3184 ENDIF3185 3186 !3187 !-- Divergence of vertical flux of resolved scale energy and pressure3188 !-- fluctuations as well as flux of pressure fluctuation itself (68).3189 !-- First calculate the products, then the divergence.3190 !-- Calculation is time consuming. Do it only, if profiles shall be plotted.3191 IF ( hom(nzb+1,2,55,0) /= 0.0_wp .OR. hom(nzb+1,2,68,0) /= 0.0_wp ) THEN3192 3193 STOP '+++ openACC porting for vertical flux div of resolved scale TKE in flow_statistics is still missing'3194 sums_ll = 0.0_wp ! local array3195 3196 !$OMP DO3197 DO i = nxl, nxr3198 DO j = nys, nyn3199 DO k = nzb_s_inner(j,i)+1, nzt3200 3201 sums_ll(k,1) = sums_ll(k,1) + 0.5_wp * w(k,j,i) * ( &3202 ( 0.25_wp * ( u(k,j,i)+u(k+1,j,i)+u(k,j,i+1)+u(k+1,j,i+1) ) &3203 - 0.5_wp * ( hom(k,1,1,sr) + hom(k+1,1,1,sr) ) )**2&3204 + ( 0.25_wp * ( v(k,j,i)+v(k+1,j,i)+v(k,j+1,i)+v(k+1,j+1,i) ) &3205 - 0.5_wp * ( hom(k,1,2,sr) + hom(k+1,1,2,sr) ) )**2&3206 + w(k,j,i)**2 )3207 3208 sums_ll(k,2) = sums_ll(k,2) + 0.5_wp * w(k,j,i) &3209 * ( p(k,j,i) + p(k+1,j,i) )3210 3211 ENDDO3212 ENDDO3213 ENDDO3214 sums_ll(0,1) = 0.0_wp ! because w is zero at the bottom3215 sums_ll(nzt+1,1) = 0.0_wp3216 sums_ll(0,2) = 0.0_wp3217 sums_ll(nzt+1,2) = 0.0_wp3218 3219 DO k = nzb+1, nzt3220 sums_l(k,55,tn) = ( sums_ll(k,1) - sums_ll(k-1,1) ) * ddzw(k)3221 sums_l(k,56,tn) = ( sums_ll(k,2) - sums_ll(k-1,2) ) * ddzw(k)3222 sums_l(k,68,tn) = sums_ll(k,2)3223 ENDDO3224 sums_l(nzb,55,tn) = sums_l(nzb+1,55,tn)3225 sums_l(nzb,56,tn) = sums_l(nzb+1,56,tn)3226 sums_l(nzb,68,tn) = 0.0_wp ! because w* = 0 at nzb3227 3228 ENDIF3229 3230 !3231 !-- Divergence of vertical flux of SGS TKE and the flux itself (69)3232 IF ( hom(nzb+1,2,57,0) /= 0.0_wp .OR. hom(nzb+1,2,69,0) /= 0.0_wp ) THEN3233 3234 STOP '+++ openACC porting for vertical flux div of SGS TKE in flow_statistics is still missing'3235 !$OMP DO3236 DO i = nxl, nxr3237 DO j = nys, nyn3238 DO k = nzb_s_inner(j,i)+1, nzt3239 3240 sums_l(k,57,tn) = sums_l(k,57,tn) - 0.5_wp * ( &3241 (km(k,j,i)+km(k+1,j,i)) * (e(k+1,j,i)-e(k,j,i)) * ddzu(k+1) &3242 - (km(k-1,j,i)+km(k,j,i)) * (e(k,j,i)-e(k-1,j,i)) * ddzu(k) &3243 ) * ddzw(k)3244 3245 sums_l(k,69,tn) = sums_l(k,69,tn) - 0.5_wp * ( &3246 (km(k,j,i)+km(k+1,j,i)) * (e(k+1,j,i)-e(k,j,i)) * ddzu(k+1) &3247 )3248 3249 ENDDO3250 ENDDO3251 ENDDO3252 sums_l(nzb,57,tn) = sums_l(nzb+1,57,tn)3253 sums_l(nzb,69,tn) = sums_l(nzb+1,69,tn)3254 3255 ENDIF3256 3257 !3258 !-- Horizontal heat fluxes (subgrid, resolved, total).3259 !-- Do it only, if profiles shall be plotted.3260 IF ( hom(nzb+1,2,58,0) /= 0.0_wp ) THEN3261 3262 STOP '+++ openACC porting for horizontal flux calculation in flow_statistics is still missing'3263 !$OMP DO3264 DO i = nxl, nxr3265 DO j = nys, nyn3266 DO k = nzb_s_inner(j,i)+1, nzt3267 !3268 !-- Subgrid horizontal heat fluxes u"pt", v"pt"3269 sums_l(k,58,tn) = sums_l(k,58,tn) - 0.5_wp * &3270 ( kh(k,j,i) + kh(k,j,i-1) ) &3271 * ( pt(k,j,i-1) - pt(k,j,i) ) &3272 * rho_air_zw(k) &3273 * heatflux_output_conversion(k) &3274 * ddx * rmask(j,i,sr)3275 sums_l(k,61,tn) = sums_l(k,61,tn) - 0.5_wp * &3276 ( kh(k,j,i) + kh(k,j-1,i) ) &3277 * ( pt(k,j-1,i) - pt(k,j,i) ) &3278 * rho_air_zw(k) &3279 * heatflux_output_conversion(k) &3280 * ddy * rmask(j,i,sr)3281 !3282 !-- Resolved horizontal heat fluxes u*pt*, v*pt*3283 sums_l(k,59,tn) = sums_l(k,59,tn) + &3284 ( u(k,j,i) - hom(k,1,1,sr) ) * 0.5_wp * &3285 ( pt(k,j,i-1) - hom(k,1,4,sr) + &3286 pt(k,j,i) - hom(k,1,4,sr) ) &3287 * heatflux_output_conversion(k)3288 pts = 0.5_wp * ( pt(k,j-1,i) - hom(k,1,4,sr) + &3289 pt(k,j,i) - hom(k,1,4,sr) )3290 sums_l(k,62,tn) = sums_l(k,62,tn) + &3291 ( v(k,j,i) - hom(k,1,2,sr) ) * 0.5_wp * &3292 ( pt(k,j-1,i) - hom(k,1,4,sr) + &3293 pt(k,j,i) - hom(k,1,4,sr) ) &3294 * heatflux_output_conversion(k)3295 ENDDO3296 ENDDO3297 ENDDO3298 !3299 !-- Fluxes at the surface must be zero (e.g. due to the Prandtl-layer)3300 sums_l(nzb,58,tn) = 0.0_wp3301 sums_l(nzb,59,tn) = 0.0_wp3302 sums_l(nzb,60,tn) = 0.0_wp3303 sums_l(nzb,61,tn) = 0.0_wp3304 sums_l(nzb,62,tn) = 0.0_wp3305 sums_l(nzb,63,tn) = 0.0_wp3306 3307 ENDIF3308 3309 !3310 !-- Collect current large scale advection and subsidence tendencies for3311 !-- data output3312 IF ( large_scale_forcing .AND. ( simulated_time > 0.0_wp ) ) THEN3313 !3314 !-- Interpolation in time of LSF_DATA3315 nt = 13316 DO WHILE ( simulated_time - dt_3d > time_vert(nt) )3317 nt = nt + 13318 ENDDO3319 IF ( simulated_time - dt_3d /= time_vert(nt) ) THEN3320 nt = nt - 13321 ENDIF3322 3323 fac = ( simulated_time - dt_3d - time_vert(nt) ) &3324 / ( time_vert(nt+1)-time_vert(nt) )3325 3326 3327 DO k = nzb, nzt3328 sums_ls_l(k,0) = td_lsa_lpt(k,nt) &3329 + fac * ( td_lsa_lpt(k,nt+1) - td_lsa_lpt(k,nt) )3330 sums_ls_l(k,1) = td_lsa_q(k,nt) &3331 + fac * ( td_lsa_q(k,nt+1) - td_lsa_q(k,nt) )3332 ENDDO3333 3334 sums_ls_l(nzt+1,0) = sums_ls_l(nzt,0)3335 sums_ls_l(nzt+1,1) = sums_ls_l(nzt,1)3336 3337 IF ( large_scale_subsidence .AND. use_subsidence_tendencies ) THEN3338 3339 DO k = nzb, nzt3340 sums_ls_l(k,2) = td_sub_lpt(k,nt) + fac * &3341 ( td_sub_lpt(k,nt+1) - td_sub_lpt(k,nt) )3342 sums_ls_l(k,3) = td_sub_q(k,nt) + fac * &3343 ( td_sub_q(k,nt+1) - td_sub_q(k,nt) )3344 ENDDO3345 3346 sums_ls_l(nzt+1,2) = sums_ls_l(nzt,2)3347 sums_ls_l(nzt+1,3) = sums_ls_l(nzt,3)3348 3349 ENDIF3350 3351 ENDIF3352 3353 3354 IF ( land_surface ) THEN3355 !$OMP DO3356 DO i = nxl, nxr3357 DO j = nys, nyn3358 DO k = nzb_soil, nzt_soil3359 sums_l(k,89,tn) = sums_l(k,89,tn) + t_soil(k,j,i) &3360 * rmask(j,i,sr)3361 sums_l(k,91,tn) = sums_l(k,91,tn) + m_soil(k,j,i) &3362 * rmask(j,i,sr)3363 ENDDO3364 ENDDO3365 ENDDO3366 ENDIF3367 3368 3369 IF ( radiation .AND. radiation_scheme == 'rrtmg' ) THEN3370 !$OMP DO3371 DO i = nxl, nxr3372 DO j = nys, nyn3373 DO k = nzb_s_inner(j,i)+1, nzt+13374 sums_l(k,102,tn) = sums_l(k,102,tn) + rad_lw_in(k,j,i) &3375 * rmask(j,i,sr)3376 sums_l(k,103,tn) = sums_l(k,103,tn) + rad_lw_out(k,j,i) &3377 * rmask(j,i,sr)3378 sums_l(k,104,tn) = sums_l(k,104,tn) + rad_sw_in(k,j,i) &3379 * rmask(j,i,sr)3380 sums_l(k,105,tn) = sums_l(k,105,tn) + rad_sw_out(k,j,i) &3381 * rmask(j,i,sr)3382 #if defined ( __rrtmg )3383 sums_l(k,106,tn) = sums_l(k,106,tn) + rad_lw_cs_hr(k,j,i) &3384 * rmask(j,i,sr)3385 sums_l(k,107,tn) = sums_l(k,107,tn) + rad_lw_hr(k,j,i) &3386 * rmask(j,i,sr)3387 sums_l(k,108,tn) = sums_l(k,108,tn) + rad_sw_cs_hr(k,j,i) &3388 * rmask(j,i,sr)3389 sums_l(k,109,tn) = sums_l(k,109,tn) + rad_sw_hr(k,j,i) &3390 * rmask(j,i,sr)3391 #endif3392 ENDDO3393 ENDDO3394 ENDDO3395 ENDIF3396 3397 !3398 !-- Calculate the user-defined profiles3399 CALL user_statistics( 'profiles', sr, tn )3400 !$OMP END PARALLEL3401 3402 !3403 !-- Summation of thread sums3404 IF ( threads_per_task > 1 ) THEN3405 STOP '+++ openACC porting for threads_per_task > 1 in flow_statistics is still missing'3406 DO i = 1, threads_per_task-13407 sums_l(:,3,0) = sums_l(:,3,0) + sums_l(:,3,i)3408 sums_l(:,4:40,0) = sums_l(:,4:40,0) + sums_l(:,4:40,i)3409 sums_l(:,45:pr_palm,0) = sums_l(:,45:pr_palm,0) + &3410 sums_l(:,45:pr_palm,i)3411 IF ( max_pr_user > 0 ) THEN3412 sums_l(:,pr_palm+1:pr_palm+max_pr_user,0) = &3413 sums_l(:,pr_palm+1:pr_palm+max_pr_user,0) + &3414 sums_l(:,pr_palm+1:pr_palm+max_pr_user,i)3415 ENDIF3416 ENDDO3417 ENDIF3418 3419 !$acc update host( hom, sums, sums_l )3420 3421 #if defined( __parallel )3422 3423 !3424 !-- Compute total sum from local sums3425 IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )3426 CALL MPI_ALLREDUCE( sums_l(nzb,1,0), sums(nzb,1), ngp_sums, MPI_REAL, &3427 MPI_SUM, comm2d, ierr )3428 IF ( large_scale_forcing ) THEN3429 CALL MPI_ALLREDUCE( sums_ls_l(nzb,2), sums(nzb,83), ngp_sums_ls, &3430 MPI_REAL, MPI_SUM, comm2d, ierr )3431 ENDIF3432 #else3433 sums = sums_l(:,:,0)3434 IF ( large_scale_forcing ) THEN3435 sums(:,81:88) = sums_ls_l3436 ENDIF3437 #endif3438 3439 !3440 !-- Final values are obtained by division by the total number of grid points3441 !-- used for summation. After that store profiles.3442 !-- Check, if statistical regions do contain at least one grid point at the3443 !-- respective k-level, otherwise division by zero will lead to undefined3444 !-- values, which may cause e.g. problems with NetCDF output3445 !-- Profiles:3446 DO k = nzb, nzt+13447 sums(k,3) = sums(k,3) / ngp_2dh(sr)3448 sums(k,12:22) = sums(k,12:22) / ngp_2dh(sr)3449 sums(k,30:32) = sums(k,30:32) / ngp_2dh(sr)3450 sums(k,35:39) = sums(k,35:39) / ngp_2dh(sr)3451 sums(k,45:53) = sums(k,45:53) / ngp_2dh(sr)3452 sums(k,55:63) = sums(k,55:63) / ngp_2dh(sr)3453 sums(k,81:88) = sums(k,81:88) / ngp_2dh(sr)3454 sums(k,89:114) = sums(k,89:114) / ngp_2dh(sr)3455 IF ( ngp_2dh_s_inner(k,sr) /= 0 ) THEN3456 sums(k,8:11) = sums(k,8:11) / ngp_2dh_s_inner(k,sr)3457 sums(k,23:29) = sums(k,23:29) / ngp_2dh_s_inner(k,sr)3458 sums(k,33:34) = sums(k,33:34) / ngp_2dh_s_inner(k,sr)3459 sums(k,40) = sums(k,40) / ngp_2dh_s_inner(k,sr)3460 sums(k,54) = sums(k,54) / ngp_2dh_s_inner(k,sr)3461 sums(k,64) = sums(k,64) / ngp_2dh_s_inner(k,sr)3462 sums(k,70:80) = sums(k,70:80) / ngp_2dh_s_inner(k,sr)3463 sums(k,115:pr_palm-2) = sums(k,115:pr_palm-2) / ngp_2dh_s_inner(k,sr)3464 ENDIF3465 ENDDO3466 3467 !-- u* and so on3468 !-- As sums(nzb:nzb+3,pr_palm) are full 2D arrays (us, usws, vsws, ts) whose3469 !-- size is always ( nx + 1 ) * ( ny + 1 ), defined at the first grid layer3470 !-- above the topography, they are being divided by ngp_2dh(sr)3471 sums(nzb:nzb+3,pr_palm) = sums(nzb:nzb+3,pr_palm) / &3472 ngp_2dh(sr)3473 sums(nzb+12,pr_palm) = sums(nzb+12,pr_palm) / & ! qs3474 ngp_2dh(sr)3475 !-- eges, e*3476 sums(nzb+4:nzb+5,pr_palm) = sums(nzb+4:nzb+5,pr_palm) / &3477 ngp_3d(sr)3478 !-- Old and new divergence3479 sums(nzb+9:nzb+10,pr_palm) = sums(nzb+9:nzb+10,pr_palm) / &3480 ngp_3d_inner(sr)3481 3482 !-- User-defined profiles3483 IF ( max_pr_user > 0 ) THEN3484 DO k = nzb, nzt+13485 IF ( ngp_2dh_s_inner(k,sr) /= 0 ) THEN3486 sums(k,pr_palm+1:pr_palm+max_pr_user) = &3487 sums(k,pr_palm+1:pr_palm+max_pr_user) / &3488 ngp_2dh_s_inner(k,sr)3489 ENDIF3490 ENDDO3491 ENDIF3492 3493 !3494 !-- Collect horizontal average in hom.3495 !-- Compute deduced averages (e.g. total heat flux)3496 hom(:,1,3,sr) = sums(:,3) ! w3497 hom(:,1,8,sr) = sums(:,8) ! e profiles 5-7 are initial profiles3498 hom(:,1,9,sr) = sums(:,9) ! km3499 hom(:,1,10,sr) = sums(:,10) ! kh3500 hom(:,1,11,sr) = sums(:,11) ! l3501 hom(:,1,12,sr) = sums(:,12) ! w"u"3502 hom(:,1,13,sr) = sums(:,13) ! w*u*3503 hom(:,1,14,sr) = sums(:,14) ! w"v"3504 hom(:,1,15,sr) = sums(:,15) ! w*v*3505 hom(:,1,16,sr) = sums(:,16) ! w"pt"3506 hom(:,1,17,sr) = sums(:,17) ! w*pt*3507 hom(:,1,18,sr) = sums(:,16) + sums(:,17) ! wpt3508 hom(:,1,19,sr) = sums(:,12) + sums(:,13) ! wu3509 hom(:,1,20,sr) = sums(:,14) + sums(:,15) ! wv3510 hom(:,1,21,sr) = sums(:,21) ! w*pt*BC3511 hom(:,1,22,sr) = sums(:,16) + sums(:,21) ! wptBC3512 ! profile 24 is initial profile (sa)3513 ! profiles 25-29 left empty for initial3514 ! profiles3515 hom(:,1,30,sr) = sums(:,30) ! u*23516 hom(:,1,31,sr) = sums(:,31) ! v*23517 hom(:,1,32,sr) = sums(:,32) ! w*23518 hom(:,1,33,sr) = sums(:,33) ! pt*23519 hom(:,1,34,sr) = sums(:,34) ! e*3520 hom(:,1,35,sr) = sums(:,35) ! w*2pt*3521 hom(:,1,36,sr) = sums(:,36) ! w*pt*23522 hom(:,1,37,sr) = sums(:,37) ! w*e*3523 hom(:,1,38,sr) = sums(:,38) ! w*33524 hom(:,1,39,sr) = sums(:,38) / ( abs( sums(:,32) ) + 1E-20_wp )**1.5_wp ! Sw3525 hom(:,1,40,sr) = sums(:,40) ! p3526 hom(:,1,45,sr) = sums(:,45) ! w"vpt"3527 hom(:,1,46,sr) = sums(:,46) ! w*vpt*3528 hom(:,1,47,sr) = sums(:,45) + sums(:,46) ! wvpt3529 hom(:,1,48,sr) = sums(:,48) ! w"q" (w"qv")3530 hom(:,1,49,sr) = sums(:,49) ! w*q* (w*qv*)3531 hom(:,1,50,sr) = sums(:,48) + sums(:,49) ! wq (wqv)3532 hom(:,1,51,sr) = sums(:,51) ! w"qv"3533 hom(:,1,52,sr) = sums(:,52) ! w*qv*3534 hom(:,1,53,sr) = sums(:,52) + sums(:,51) ! wq (wqv)3535 hom(:,1,54,sr) = sums(:,54) ! ql3536 hom(:,1,55,sr) = sums(:,55) ! w*u*u*/dz3537 hom(:,1,56,sr) = sums(:,56) ! w*p*/dz3538 hom(:,1,57,sr) = sums(:,57) ! ( w"e + w"p"/rho_ocean )/dz3539 hom(:,1,58,sr) = sums(:,58) ! u"pt"3540 hom(:,1,59,sr) = sums(:,59) ! u*pt*3541 hom(:,1,60,sr) = sums(:,58) + sums(:,59) ! upt_t3542 hom(:,1,61,sr) = sums(:,61) ! v"pt"3543 hom(:,1,62,sr) = sums(:,62) ! v*pt*3544 hom(:,1,63,sr) = sums(:,61) + sums(:,62) ! vpt_t3545 hom(:,1,64,sr) = sums(:,64) ! rho_ocean3546 hom(:,1,65,sr) = sums(:,65) ! w"sa"3547 hom(:,1,66,sr) = sums(:,66) ! w*sa*3548 hom(:,1,67,sr) = sums(:,65) + sums(:,66) ! wsa3549 hom(:,1,68,sr) = sums(:,68) ! w*p*3550 hom(:,1,69,sr) = sums(:,69) ! w"e + w"p"/rho_ocean3551 hom(:,1,70,sr) = sums(:,70) ! q*23552 hom(:,1,71,sr) = sums(:,71) ! prho3553 hom(:,1,72,sr) = hyp * 1E-4_wp ! hyp in dbar3554 hom(:,1,73,sr) = sums(:,73) ! nr3555 hom(:,1,74,sr) = sums(:,74) ! qr3556 hom(:,1,75,sr) = sums(:,75) ! qc3557 hom(:,1,76,sr) = sums(:,76) ! prr (precipitation rate)3558 ! 77 is initial density profile3559 hom(:,1,78,sr) = ug ! ug3560 hom(:,1,79,sr) = vg ! vg3561 hom(:,1,80,sr) = w_subs ! w_subs3562 3563 IF ( large_scale_forcing ) THEN3564 hom(:,1,81,sr) = sums_ls_l(:,0) ! td_lsa_lpt3565 hom(:,1,82,sr) = sums_ls_l(:,1) ! td_lsa_q3566 IF ( use_subsidence_tendencies ) THEN3567 hom(:,1,83,sr) = sums_ls_l(:,2) ! td_sub_lpt3568 hom(:,1,84,sr) = sums_ls_l(:,3) ! td_sub_q3569 ELSE3570 hom(:,1,83,sr) = sums(:,83) ! td_sub_lpt3571 hom(:,1,84,sr) = sums(:,84) ! td_sub_q3572 ENDIF3573 hom(:,1,85,sr) = sums(:,85) ! td_nud_lpt3574 hom(:,1,86,sr) = sums(:,86) ! td_nud_q3575 hom(:,1,87,sr) = sums(:,87) ! td_nud_u3576 hom(:,1,88,sr) = sums(:,88) ! td_nud_v3577 END IF3578 3579 hom(:,1,121,sr) = rho_air ! rho_air in Kg/m^33580 hom(:,1,122,sr) = rho_air_zw ! rho_air_zw in Kg/m^33581 3582 hom(:,1,pr_palm,sr) = sums(:,pr_palm)3583 ! u*, w'u', w'v', t* (in last profile)3584 3585 IF ( max_pr_user > 0 ) THEN ! user-defined profiles3586 hom(:,1,pr_palm+1:pr_palm+max_pr_user,sr) = &3587 sums(:,pr_palm+1:pr_palm+max_pr_user)3588 ENDIF3589 3590 !3591 !-- Determine the boundary layer height using two different schemes.3592 !-- First scheme: Starting from the Earth's (Ocean's) surface, look for the3593 !-- first relative minimum (maximum) of the total heat flux.3594 !-- The corresponding height is assumed as the boundary layer height, if it3595 !-- is less than 1.5 times the height where the heat flux becomes negative3596 !-- (positive) for the first time.3597 z_i(1) = 0.0_wp3598 first = .TRUE.3599 3600 IF ( ocean ) THEN3601 DO k = nzt, nzb+1, -13602 IF ( first .AND. hom(k,1,18,sr) < -1.0E-8_wp ) THEN3603 first = .FALSE.3604 height = zw(k)3605 ENDIF3606 IF ( hom(k,1,18,sr) < -1.0E-8_wp .AND. &3607 hom(k-1,1,18,sr) > hom(k,1,18,sr) ) THEN3608 IF ( zw(k) < 1.5_wp * height ) THEN3609 z_i(1) = zw(k)3610 ELSE3611 z_i(1) = height3612 ENDIF3613 EXIT3614 ENDIF3615 ENDDO3616 ELSE3617 DO k = nzb, nzt-13618 IF ( first .AND. hom(k,1,18,sr) < -1.0E-8_wp ) THEN3619 first = .FALSE.3620 height = zw(k)3621 ENDIF3622 IF ( hom(k,1,18,sr) < -1.0E-8_wp .AND. &3623 hom(k+1,1,18,sr) > hom(k,1,18,sr) ) THEN3624 IF ( zw(k) < 1.5_wp * height ) THEN3625 z_i(1) = zw(k)3626 ELSE3627 z_i(1) = height3628 ENDIF3629 EXIT3630 ENDIF3631 ENDDO3632 ENDIF3633 3634 !3635 !-- Second scheme: Gradient scheme from Sullivan et al. (1998), modified3636 !-- by Uhlenbrock(2006). The boundary layer height is the height with the3637 !-- maximal local temperature gradient: starting from the second (the last3638 !-- but one) vertical gridpoint, the local gradient must be at least3639 !-- 0.2K/100m and greater than the next four gradients.3640 !-- WARNING: The threshold value of 0.2K/100m must be adjusted for the3641 !-- ocean case!3642 z_i(2) = 0.0_wp3643 DO k = nzb+1, nzt+13644 dptdz(k) = ( hom(k,1,4,sr) - hom(k-1,1,4,sr) ) * ddzu(k)3645 ENDDO3646 dptdz_threshold = 0.2_wp / 100.0_wp3647 3648 IF ( ocean ) THEN3649 DO k = nzt+1, nzb+5, -13650 IF ( dptdz(k) > dptdz_threshold .AND. &3651 dptdz(k) > dptdz(k-1) .AND. dptdz(k) > dptdz(k-2) .AND. &3652 dptdz(k) > dptdz(k-3) .AND. dptdz(k) > dptdz(k-4) ) THEN3653 z_i(2) = zw(k-1)3654 EXIT3655 ENDIF3656 ENDDO3657 ELSE3658 DO k = nzb+1, nzt-33659 IF ( dptdz(k) > dptdz_threshold .AND. &3660 dptdz(k) > dptdz(k+1) .AND. dptdz(k) > dptdz(k+2) .AND. &3661 dptdz(k) > dptdz(k+3) .AND. dptdz(k) > dptdz(k+4) ) THEN3662 z_i(2) = zw(k-1)3663 EXIT3664 ENDIF3665 ENDDO3666 ENDIF3667 3668 hom(nzb+6,1,pr_palm,sr) = z_i(1)3669 hom(nzb+7,1,pr_palm,sr) = z_i(2)3670 3671 !3672 !-- Determine vertical index which is nearest to the mean surface level3673 !-- height of the respective statistic region3674 DO k = nzb, nzt3675 IF ( zw(k) >= mean_surface_level_height(sr) ) THEN3676 k_surface_level = k3677 EXIT3678 ENDIF3679 ENDDO3680 3681 !3682 !-- Computation of both the characteristic vertical velocity and3683 !-- the characteristic convective boundary layer temperature.3684 !-- The inversion height entering into the equation is defined with respect3685 !-- to the mean surface level height of the respective statistic region.3686 !-- The horizontal average at surface level index + 1 is input for the3687 !-- average temperature.3688 IF ( hom(nzb,1,18,sr) > 1.0E-8_wp .AND. z_i(1) /= 0.0_wp ) THEN3689 hom(nzb+8,1,pr_palm,sr) = &3690 ( g / hom(k_surface_level+1,1,4,sr) * &3691 ( hom(k_surface_level,1,18,sr) / heatflux_output_conversion(nzb) )&3692 * ABS( z_i(1) - mean_surface_level_height(sr) ) )**0.333333333_wp3693 ELSE3694 hom(nzb+8,1,pr_palm,sr) = 0.0_wp3695 ENDIF3696 3697 !3698 !-- Collect the time series quantities3699 ts_value(1,sr) = hom(nzb+4,1,pr_palm,sr) ! E3700 ts_value(2,sr) = hom(nzb+5,1,pr_palm,sr) ! E*3701 ts_value(3,sr) = dt_3d3702 ts_value(4,sr) = hom(nzb,1,pr_palm,sr) ! u*3703 ts_value(5,sr) = hom(nzb+3,1,pr_palm,sr) ! th*3704 ts_value(6,sr) = u_max3705 ts_value(7,sr) = v_max3706 ts_value(8,sr) = w_max3707 ts_value(9,sr) = hom(nzb+10,1,pr_palm,sr) ! new divergence3708 ts_value(10,sr) = hom(nzb+9,1,pr_palm,sr) ! old Divergence3709 ts_value(11,sr) = hom(nzb+6,1,pr_palm,sr) ! z_i(1)3710 ts_value(12,sr) = hom(nzb+7,1,pr_palm,sr) ! z_i(2)3711 ts_value(13,sr) = hom(nzb+8,1,pr_palm,sr) ! w*3712 ts_value(14,sr) = hom(nzb,1,16,sr) ! w'pt' at k=03713 ts_value(15,sr) = hom(nzb+1,1,16,sr) ! w'pt' at k=13714 ts_value(16,sr) = hom(nzb+1,1,18,sr) ! wpt at k=13715 ts_value(17,sr) = hom(nzb,1,4,sr) ! pt(0)3716 ts_value(18,sr) = hom(nzb+1,1,4,sr) ! pt(zp)3717 ts_value(19,sr) = hom(nzb+1,1,pr_palm,sr) ! u'w' at k=03718 ts_value(20,sr) = hom(nzb+2,1,pr_palm,sr) ! v'w' at k=03719 ts_value(21,sr) = hom(nzb,1,48,sr) ! w"q" at k=03720 3721 IF ( .NOT. neutral ) THEN3722 ts_value(22,sr) = hom(nzb,1,114,sr) ! L3723 ELSE3724 ts_value(22,sr) = 1.0E10_wp3725 ENDIF3726 3727 ts_value(23,sr) = hom(nzb+12,1,pr_palm,sr) ! q*3728 3729 !3730 !-- Collect land surface model timeseries3731 IF ( land_surface ) THEN3732 ts_value(dots_soil ,sr) = hom(nzb,1,93,sr) ! ghf_eb3733 ts_value(dots_soil+1,sr) = hom(nzb,1,94,sr) ! shf_eb3734 ts_value(dots_soil+2,sr) = hom(nzb,1,95,sr) ! qsws_eb3735 ts_value(dots_soil+3,sr) = hom(nzb,1,96,sr) ! qsws_liq_eb3736 ts_value(dots_soil+4,sr) = hom(nzb,1,97,sr) ! qsws_soil_eb3737 ts_value(dots_soil+5,sr) = hom(nzb,1,98,sr) ! qsws_veg_eb3738 ts_value(dots_soil+6,sr) = hom(nzb,1,99,sr) ! r_a3739 ts_value(dots_soil+7,sr) = hom(nzb,1,100,sr) ! r_s3740 ENDIF3741 !3742 !-- Collect radiation model timeseries3743 IF ( radiation ) THEN3744 ts_value(dots_rad,sr) = hom(nzb,1,101,sr) ! rad_net3745 ts_value(dots_rad+1,sr) = hom(nzb,1,102,sr) ! rad_lw_in3746 ts_value(dots_rad+2,sr) = hom(nzb,1,103,sr) ! rad_lw_out3747 ts_value(dots_rad+3,sr) = hom(nzb,1,104,sr) ! rad_sw_in3748 ts_value(dots_rad+4,sr) = hom(nzb,1,105,sr) ! rad_sw_out3749 3750 IF ( radiation_scheme == 'rrtmg' ) THEN3751 ts_value(dots_rad+5,sr) = hom(nzb,1,106,sr) ! rrtm_aldif3752 ts_value(dots_rad+6,sr) = hom(nzb,1,107,sr) ! rrtm_aldir3753 ts_value(dots_rad+7,sr) = hom(nzb,1,108,sr) ! rrtm_asdif3754 ts_value(dots_rad+8,sr) = hom(nzb,1,109,sr) ! rrtm_asdir3755 ENDIF3756 3757 ENDIF3758 3759 !3760 !-- Calculate additional statistics provided by the user interface3761 CALL user_statistics( 'time_series', sr, 0 )3762 3763 ENDDO ! loop of the subregions3764 3765 !$acc end data3766 3767 !3768 !-- If required, sum up horizontal averages for subsequent time averaging3769 !-- Do not sum, if flow statistics is called before the first initial time step.3770 IF ( do_sum .AND. simulated_time /= 0.0_wp ) THEN3771 IF ( average_count_pr == 0 ) hom_sum = 0.0_wp3772 hom_sum = hom_sum + hom(:,1,:,:)3773 average_count_pr = average_count_pr + 13774 do_sum = .FALSE.3775 ENDIF3776 3777 !3778 !-- Set flag for other UPs (e.g. output routines, but also buoyancy).3779 !-- This flag is reset after each time step in time_integration.3780 flow_statistics_called = .TRUE.3781 3782 CALL cpu_log( log_point(10), 'flow_statistics', 'stop' )3783 3784 3785 END SUBROUTINE flow_statistics3786 #endif -
TabularUnified palm/trunk/SOURCE/header.f90 ¶
r2101 r2118 20 20 ! Current revisions: 21 21 ! ----------------- 22 ! 22 ! OpenACC relatec code removed 23 23 ! 24 24 ! Former revisions: … … 513 513 threads_per_task, pdims(1), pdims(2), TRIM( char1 ) 514 514 ENDIF 515 IF ( num_acc_per_node /= 0 ) WRITE ( io, 117 ) num_acc_per_node516 515 IF ( ( host(1:3) == 'ibm' .OR. host(1:3) == 'nec' .OR. & 517 516 host(1:2) == 'lc' .OR. host(1:3) == 'dec' ) .AND. & … … 528 527 WRITE ( io, 108 ) maximum_parallel_io_streams 529 528 ENDIF 530 #else531 IF ( num_acc_per_node /= 0 ) WRITE ( io, 120 ) num_acc_per_node532 529 #endif 533 530 … … 1915 1912 35X,'independent precursor runs'/ & 1916 1913 35X,42('-')) 1917 117 FORMAT (' Accelerator boards / node: ',I2)1918 1914 #endif 1919 1915 110 FORMAT (/' Numerical Schemes:'/ & … … 1932 1928 ' translation velocity = ',A/ & 1933 1929 ' distance advected ',A,': ',F8.3,' km(x) ',F8.3,' km(y)') 1934 120 FORMAT (' Accelerator boards: ',8X,I2)1935 1930 122 FORMAT (' --> Time differencing scheme: ',A) 1936 1931 123 FORMAT (' --> Rayleigh-Damping active, starts ',A,' z = ',F8.2,' m'/ & -
TabularUnified palm/trunk/SOURCE/init_3d_model.f90 ¶
r2101 r2118 20 20 ! Current revisions: 21 21 ! ------------------ 22 ! 22 ! OpenACC directives removed 23 23 ! 24 24 ! Former revisions: … … 800 800 !-- 3D-array for storing the dissipation, needed for calculating the sgs 801 801 !-- particle velocities 802 IF ( use_sgs_for_particles .OR. wang_kernel .OR. collision_turbulence 803 .OR. num_acc_per_node > 0 )THEN802 IF ( use_sgs_for_particles .OR. wang_kernel .OR. collision_turbulence )& 803 THEN 804 804 ALLOCATE( diss(nzb:nzt+1,nysg:nyng,nxlg:nxrg) ) 805 805 ENDIF … … 1920 1920 CALL location_message( 'calling pressure solver', .FALSE. ) 1921 1921 n_sor = nsor_ini 1922 !$acc data copyin( d, ddzu, ddzw, nzb_s_inner, nzb_u_inner ) &1923 !$acc copyin( nzb_v_inner, nzb_w_inner, p, rflags_s_inner, tend ) &1924 !$acc copyin( weight_pres, weight_substep ) &1925 !$acc copy( tri, tric, u, v, w )1926 1922 CALL pres 1927 !$acc end data1928 1923 n_sor = nsor 1929 1924 CALL location_message( 'finished', .TRUE. ) -
TabularUnified palm/trunk/SOURCE/modules.f90 ¶
r2108 r2118 20 20 ! Current revisions: 21 21 ! ------------------ 22 ! 22 ! -acc_rank, background_communication, i_left, i_right, j_south, j_north, 23 ! num_acc_per_node, on_device 23 24 ! 24 25 ! Former revisions: … … 1142 1143 LOGICAL :: nudging = .FALSE. !< 1143 1144 LOGICAL :: ocean = .FALSE. !< 1144 LOGICAL :: on_device = .FALSE. !<1145 1145 LOGICAL :: outflow_l = .FALSE. !< 1146 1146 LOGICAL :: outflow_n = .FALSE. !< … … 1557 1557 USE kinds 1558 1558 1559 INTEGER(iwp) :: i_left !<1560 INTEGER(iwp) :: i_right !<1561 INTEGER(iwp) :: j_north !<1562 INTEGER(iwp) :: j_south !<1563 1559 INTEGER(iwp) :: nbgp = 3 !< 1564 1560 INTEGER(iwp) :: ngp_sums !< … … 1801 1797 CHARACTER(LEN=7) :: myid_char = '' 1802 1798 1803 INTEGER(iwp) :: acc_rank !<1804 1799 INTEGER(iwp) :: comm1dx !< 1805 1800 INTEGER(iwp) :: comm1dy !< … … 1824 1819 INTEGER(iwp) :: numprocs = 1 !< 1825 1820 INTEGER(iwp) :: numprocs_previous_run = -1 !< 1826 INTEGER(iwp) :: num_acc_per_node = 0 !<1827 1821 INTEGER(iwp) :: pleft !< 1828 1822 INTEGER(iwp) :: pnorth !< … … 1848 1842 INTEGER(iwp), DIMENSION(:,:), ALLOCATABLE :: hor_index_bounds_previous_run !< 1849 1843 1850 LOGICAL :: background_communication =.FALSE. !<1851 1844 LOGICAL :: collective_wait = .FALSE. !< 1852 1845 LOGICAL :: sendrecv_in_background = .FALSE. !< -
TabularUnified palm/trunk/SOURCE/palm.f90 ¶
r2101 r2118 20 20 ! Current revisions: 21 21 ! ----------------- 22 ! 22 ! OpenACC directives and related code removed 23 23 ! 24 24 ! Former revisions: … … 213 213 ONLY: usm_write_restart_data 214 214 215 #if defined( __openacc )216 USE OPENACC217 #endif218 219 215 IMPLICIT NONE 220 216 … … 226 222 INTEGER(iwp) :: i !< 227 223 INTEGER(iwp) :: myid_openmpi !< OpenMPI local rank for CUDA aware MPI 228 #if defined( __openacc )229 REAL(wp), DIMENSION(100) :: acc_dum !<230 #endif231 224 232 225 version = 'PALM 4.0' … … 265 258 ENDIF 266 259 #endif 267 268 #if defined( __openacc )269 !270 !-- Get the local MPI rank in case of CUDA aware OpenMPI. Important, if there271 !-- is more than one accelerator board on the node272 CALL GET_ENVIRONMENT_VARIABLE('OMPI_COMM_WORLD_LOCAL_RANK', &273 VALUE=env_string, STATUS=env_stat )274 READ( env_string, '(I1)' ) myid_openmpi275 PRINT*, '### local_rank = ', myid_openmpi, ' status=',env_stat276 !277 !-- Get the number of accelerator boards per node and assign the MPI processes278 !-- to these boards279 PRINT*, '*** ACC_DEVICE_NVIDIA = ', ACC_DEVICE_NVIDIA280 num_acc_per_node = ACC_GET_NUM_DEVICES( ACC_DEVICE_NVIDIA )281 IF ( numprocs == 1 .AND. num_acc_per_node > 0 ) num_acc_per_node = 1282 PRINT*, '*** myid = ', myid_openmpi, ' num_acc_per_node = ', num_acc_per_node283 acc_rank = MOD( myid_openmpi, num_acc_per_node )284 CALL ACC_SET_DEVICE_NUM ( acc_rank, ACC_DEVICE_NVIDIA )285 !286 !-- Test output (to be removed later)287 WRITE (*,'(A,I6,A,I3,A,I3,A,I3)') '*** Connect MPI-Task ', myid_openmpi, &288 ' to CPU ', acc_rank, ' Devices: ', &289 num_acc_per_node, ' connected to:', &290 ACC_GET_DEVICE_NUM( ACC_DEVICE_NVIDIA )291 #endif292 293 !294 !-- Ensure that OpenACC first attaches the GPU devices by copying a dummy data295 !-- region296 !$acc data copyin( acc_dum )297 260 298 261 ! … … 422 385 ENDIF 423 386 424 !425 !-- Declare and initialize variables in the accelerator memory with their426 !-- host values427 !$acc data copyin( d, diss, e, e_p, kh, km, p, pt, pt_p, q, ql, tend, te_m, tpt_m, tu_m, tv_m, tw_m, u, u_p, v, vpt, v_p, w, w_p ) &428 !$acc copyin( tri, tric, dzu, ddzu, ddzw, dd2zu, l_grid, l_wall, ptdf_x, ptdf_y, pt_init, rdf, rdf_sc, ref_state, ug, u_init, vg, v_init, zu, zw ) &429 !$acc copyin( hom, ol, pt1, qs, qsws, qswst, qv1, rif_wall, shf, ts, tswst, us, usws, uswst, uv_total, vsws, vswst, z0, z0h ) &430 !$acc copyin( fxm, fxp, fym, fyp, fwxm, fwxp, fwym, fwyp, nzb_diff_s_inner, nzb_diff_s_outer, nzb_diff_u ) &431 !$acc copyin( nzb_diff_v, nzb_s_inner, nzb_s_outer, nzb_u_inner ) &432 !$acc copyin( nzb_u_outer, nzb_v_inner, nzb_v_outer, nzb_w_inner ) &433 !$acc copyin( nzb_w_outer, rflags_invers, rflags_s_inner, rmask, wall_heatflux, wall_e_x, wall_e_y, wall_u, wall_v, wall_w_x, wall_w_y, wall_flags_0, wall_flags_00 ) &434 !$acc copyin( ngp_2dh, ngp_2dh_s_inner ) &435 !$acc copyin( weight_pres, weight_substep )436 387 ! 437 388 !-- Integration of the model equations using timestep-scheme … … 513 464 514 465 ! 515 !-- Close the OpenACC dummy data region516 !$acc end data517 !$acc end data518 519 !520 466 !-- Take final CPU-time for CPU-time analysis 521 467 CALL cpu_log( log_point(1), 'total', 'stop' ) -
TabularUnified palm/trunk/SOURCE/parin.f90 ¶
r2101 r2118 20 20 ! Current revisions: 21 21 ! ----------------- 22 ! 22 ! -background_communication from inipar 23 23 ! 24 24 ! Former revisions: … … 319 319 320 320 321 NAMELIST /inipar/ alpha_surface, approximation, & 322 background_communication, bc_e_b, bc_lr, & 321 NAMELIST /inipar/ alpha_surface, approximation, bc_e_b, bc_lr, & 323 322 bc_ns, bc_p_b, bc_p_t, bc_pt_b, bc_pt_t, bc_q_b, & 324 323 bc_q_t,bc_s_b, bc_s_t, bc_sa_t, bc_uv_b, bc_uv_t, & -
TabularUnified palm/trunk/SOURCE/poisfft_mod.f90 ¶
r2101 r2118 20 20 ! Current revisions: 21 21 ! ----------------- 22 ! 22 ! OpenACC directives and related code removed 23 23 ! 24 24 ! Former revisions: … … 242 242 REAL(wp), DIMENSION(1:nz,nys:nyn,nxl:nxr) :: ar !< 243 243 REAL(wp), DIMENSION(nys:nyn,nxl:nxr,1:nz) :: ar_inv !< 244 !$acc declare create( ar_inv )245 244 246 245 REAL(wp), DIMENSION(:,:,:), ALLOCATABLE :: ar1 !< … … 257 256 ! 258 257 !-- Two-dimensional Fourier Transformation in x- and y-direction. 259 IF ( pdims(2) == 1 .AND. pdims(1) > 1 .AND. num_acc_per_node == 0 ) & 260 THEN 258 IF ( pdims(2) == 1 .AND. pdims(1) > 1 ) THEN 261 259 262 260 ! … … 273 271 CALL tr_xy_ffty( ar, ar ) 274 272 275 ELSEIF ( pdims(1) == 1 .AND. pdims(2) > 1 .AND. num_acc_per_node == 0 ) & 276 THEN 273 ELSEIF ( pdims(1) == 1 .AND. pdims(2) > 1 ) THEN 277 274 278 275 ! … … 300 297 301 298 CALL cpu_log( log_point_s(4), 'fft_x', 'start' ) 302 IF ( fft_method /= 'system-specific' ) THEN303 !$acc update host( ar )304 ENDIF305 299 CALL fft_x( ar, 'forward' ) 306 IF ( fft_method /= 'system-specific' ) THEN307 !$acc update device( ar )308 ENDIF309 300 CALL cpu_log( log_point_s(4), 'fft_x', 'pause' ) 310 301 … … 317 308 318 309 CALL cpu_log( log_point_s(7), 'fft_y', 'start' ) 319 IF ( fft_method /= 'system-specific' ) THEN320 !$acc update host( ar )321 ENDIF322 310 CALL fft_y( ar, 'forward', ar_tr = ar, & 323 311 nxl_y_bound = nxl_y, nxr_y_bound = nxr_y, & 324 312 nxl_y_l = nxl_y, nxr_y_l = nxr_y ) 325 IF ( fft_method /= 'system-specific' ) THEN326 !$acc update device( ar )327 ENDIF328 313 CALL cpu_log( log_point_s(7), 'fft_y', 'pause' ) 329 314 … … 350 335 351 336 CALL cpu_log( log_point_s(7), 'fft_y', 'continue' ) 352 IF ( fft_method /= 'system-specific' ) THEN353 !$acc update host( ar )354 ENDIF355 337 CALL fft_y( ar, 'backward', ar_tr = ar, & 356 338 nxl_y_bound = nxl_y, nxr_y_bound = nxr_y, & 357 339 nxl_y_l = nxl_y, nxr_y_l = nxr_y ) 358 IF ( fft_method /= 'system-specific' ) THEN359 !$acc update device( ar )360 ENDIF361 340 CALL cpu_log( log_point_s(7), 'fft_y', 'stop' ) 362 341 … … 369 348 370 349 CALL cpu_log( log_point_s(4), 'fft_x', 'continue' ) 371 IF ( fft_method /= 'system-specific' ) THEN372 !$acc update host( ar )373 ENDIF374 350 CALL fft_x( ar, 'backward' ) 375 IF ( fft_method /= 'system-specific' ) THEN376 !$acc update device( ar )377 ENDIF378 351 CALL cpu_log( log_point_s(4), 'fft_x', 'stop' ) 379 352 -
TabularUnified palm/trunk/SOURCE/pres.f90 ¶
r2101 r2118 20 20 ! Current revisions: 21 21 ! ------------------ 22 ! 22 ! OpenACC directives and related code removed 23 23 ! 24 24 ! Former revisions: … … 141 141 gathered_size, ibc_p_b, ibc_p_t, intermediate_timestep_count, & 142 142 intermediate_timestep_count_max, mg_switch_to_pe0_level, & 143 nest_domain, o n_device, outflow_l, outflow_n, outflow_r,&143 nest_domain, outflow_l, outflow_n, outflow_r, & 144 144 outflow_s, psolver, subdomain_size, topography, volume_flow, & 145 145 volume_flow_area, volume_flow_initial … … 386 386 ELSE 387 387 !$OMP PARALLEL DO SCHEDULE( STATIC ) 388 !$acc kernels present( d )389 388 DO i = nxl, nxr 390 389 DO j = nys, nyn … … 394 393 ENDDO 395 394 ENDDO 396 !$acc end kernels397 395 ENDIF 398 396 … … 430 428 !$OMP PARALLEL PRIVATE (i,j,k) 431 429 !$OMP DO SCHEDULE( STATIC ) 432 !$acc kernels present( d, ddzw, rflags_s_inner, u, v, w )433 !$acc loop collapse( 3 )434 430 DO i = nxl, nxr 435 431 DO j = nys, nyn … … 443 439 ENDDO 444 440 ENDDO 445 !$acc end kernels446 441 !$OMP END PARALLEL 447 442 … … 453 448 !$OMP PARALLEL PRIVATE (i,j,k) FIRSTPRIVATE(threadsum) REDUCTION(+:localsum) 454 449 !$OMP DO SCHEDULE( STATIC ) 455 !$acc parallel loop collapse(3) present( d ) reduction(+:threadsum)456 450 DO i = nxl, nxr 457 451 DO j = nys, nyn … … 461 455 ENDDO 462 456 ENDDO 463 !$acc end parallel loop464 457 localsum = localsum + threadsum * dt_3d * weight_pres_l 465 458 !$OMP END PARALLEL … … 489 482 !-- z-direction 490 483 !$OMP PARALLEL DO 491 !$acc kernels present( d, tend )492 484 DO i = nxl, nxr 493 485 DO j = nys, nyn … … 497 489 ENDDO 498 490 ENDDO 499 !$acc end kernels500 491 501 492 ! … … 507 498 !-- Neumann (dp/dz = 0) 508 499 !$OMP PARALLEL DO 509 !$acc kernels present( nzb_s_inner, tend )510 500 DO i = nxlg, nxrg 511 501 DO j = nysg, nyng … … 513 503 ENDDO 514 504 ENDDO 515 !$acc end kernels516 505 517 506 ELSE … … 519 508 !-- Dirichlet 520 509 !$OMP PARALLEL DO 521 !$acc kernels present( tend )522 510 DO i = nxlg, nxrg 523 511 DO j = nysg, nyng … … 525 513 ENDDO 526 514 ENDDO 527 !$acc end kernels528 515 529 516 ENDIF … … 535 522 !-- Neumann 536 523 !$OMP PARALLEL DO 537 !$acc kernels present( tend )538 524 DO i = nxlg, nxrg 539 525 DO j = nysg, nyng … … 541 527 ENDDO 542 528 ENDDO 543 !$acc end kernels544 529 545 530 ELSE … … 547 532 !-- Dirichlet 548 533 !$OMP PARALLEL DO 549 !$acc kernels present( tend )550 534 DO i = nxlg, nxrg 551 535 DO j = nysg, nyng … … 553 537 ENDDO 554 538 ENDDO 555 !$acc end kernels556 539 557 540 ENDIF … … 559 542 ! 560 543 !-- Exchange boundaries for p 561 IF ( numprocs == 1 ) THEN ! workaround for single-core GPU runs562 on_device = .TRUE. ! to be removed after complete porting563 ELSE ! of ghost point exchange564 !$acc update host( tend )565 ENDIF566 544 CALL exchange_horiz( tend, nbgp ) 567 IF ( numprocs == 1 ) THEN ! workaround for single-core GPU runs568 on_device = .FALSE. ! to be removed after complete porting569 ELSE ! of ghost point exchange570 !$acc update device( tend )571 ENDIF572 545 573 546 ELSEIF ( psolver == 'sor' ) THEN … … 628 601 !$OMP PARALLEL PRIVATE (i,j,k) 629 602 !$OMP DO 630 !$acc kernels present( p, tend, weight_substep_l )631 !$acc loop independent632 603 DO i = nxl-1, nxr+1 633 !$acc loop independent634 604 DO j = nys-1, nyn+1 635 !$acc loop independent636 605 DO k = nzb, nzt+1 637 606 p(k,j,i) = tend(k,j,i) * & … … 640 609 ENDDO 641 610 ENDDO 642 !$acc end kernels643 611 !$OMP END PARALLEL 644 612 … … 646 614 !$OMP PARALLEL PRIVATE (i,j,k) 647 615 !$OMP DO 648 !$acc kernels present( p, tend, weight_substep_l )649 !$acc loop independent650 616 DO i = nxl-1, nxr+1 651 !$acc loop independent652 617 DO j = nys-1, nyn+1 653 !$acc loop independent654 618 DO k = nzb, nzt+1 655 619 p(k,j,i) = p(k,j,i) + tend(k,j,i) * & … … 658 622 ENDDO 659 623 ENDDO 660 !$acc end kernels661 624 !$OMP END PARALLEL 662 625 … … 677 640 !$OMP PARALLEL PRIVATE (i,j,k) 678 641 !$OMP DO 679 !$acc kernels present( ddzu, nzb_u_inner, nzb_v_inner, nzb_w_inner, tend, u, v, w )680 !$acc loop independent681 642 DO i = nxl, nxr 682 !$acc loop independent683 643 DO j = nys, nyn 684 !$acc loop independent 644 685 645 DO k = 1, nzt 686 646 IF ( k > nzb_w_inner(j,i) ) THEN … … 690 650 ENDIF 691 651 ENDDO 692 !$acc loop independent 652 693 653 DO k = 1, nzt 694 654 IF ( k > nzb_u_inner(j,i) ) THEN … … 698 658 ENDIF 699 659 ENDDO 700 !$acc loop independent 660 701 661 DO k = 1, nzt 702 662 IF ( k > nzb_v_inner(j,i) ) THEN … … 709 669 ENDDO 710 670 ENDDO 711 !$acc end kernels712 671 !$OMP END PARALLEL 713 672 … … 780 739 ! 781 740 !-- Exchange of boundaries for the velocities 782 IF ( numprocs == 1 ) THEN ! workaround for single-core GPU runs783 on_device = .TRUE. ! to be removed after complete porting784 ELSE ! of ghost point exchange785 !$acc update host( u, v, w )786 ENDIF787 741 CALL exchange_horiz( u, nbgp ) 788 742 CALL exchange_horiz( v, nbgp ) 789 743 CALL exchange_horiz( w, nbgp ) 790 IF ( numprocs == 1 ) THEN ! workaround for single-core GPU runs791 on_device = .FALSE. ! to be removed after complete porting792 ELSE ! of ghost point exchange793 !$acc update device( u, v, w )794 ENDIF795 744 796 745 ! … … 829 778 #else 830 779 !$OMP DO SCHEDULE( STATIC ) 831 !$acc kernels present( d, ddzw, rflags_s_inner, u, v, w )832 !$acc loop collapse( 3 )833 780 DO i = nxl, nxr 834 781 DO j = nys, nyn … … 842 789 ENDDO 843 790 ENDDO 844 !$acc end kernels845 791 ! 846 792 !-- Compute possible PE-sum of divergences for flow_statistics 847 793 !$OMP DO SCHEDULE( STATIC ) 848 !$acc parallel loop collapse(3) present( d ) reduction(+:threadsum)849 794 DO i = nxl, nxr 850 795 DO j = nys, nyn … … 854 799 ENDDO 855 800 ENDDO 856 !$acc end parallel loop857 801 #endif 858 802 -
TabularUnified palm/trunk/SOURCE/production_e.f90 ¶
r2101 r2118 20 20 ! Current revisions: 21 21 ! ----------------- 22 ! 22 ! OpenACC version of subroutine removed 23 23 ! 24 24 ! Former revisions: … … 103 103 104 104 USE wall_fluxes_mod, & 105 ONLY: wall_fluxes_e , wall_fluxes_e_acc105 ONLY: wall_fluxes_e 106 106 107 107 USE kinds 108 108 109 109 PRIVATE 110 PUBLIC production_e, production_e_ acc, production_e_init110 PUBLIC production_e, production_e_init 111 111 112 112 LOGICAL, SAVE :: first_call = .TRUE. !< … … 120 120 END INTERFACE production_e 121 121 122 INTERFACE production_e_acc123 MODULE PROCEDURE production_e_acc124 END INTERFACE production_e_acc125 126 122 INTERFACE production_e_init 127 123 MODULE PROCEDURE production_e_init … … 740 736 ! Description: 741 737 ! ------------ 742 !> Call for all grid points - accelerator version743 !------------------------------------------------------------------------------!744 SUBROUTINE production_e_acc745 746 USE arrays_3d, &747 ONLY: ddzw, dd2zu, kh, km, pt, q, ql, qsws, qswst, rho_ocean, shf, &748 tend, tswst, u, v, vpt, w749 750 USE cloud_parameters, &751 ONLY: l_d_cp, l_d_r, pt_d_t, t_d_pt752 753 USE control_parameters, &754 ONLY: cloud_droplets, cloud_physics, constant_flux_layer, g, &755 humidity, kappa, neutral, ocean, pt_reference, &756 rho_reference, topography, use_single_reference_value, &757 use_surface_fluxes, use_top_fluxes758 759 USE grid_variables, &760 ONLY: ddx, dx, ddy, dy, wall_e_x, wall_e_y761 762 USE indices, &763 ONLY: i_left, i_right, j_north, j_south, nxl, nxr, nys, nyn, nzb, &764 nzb_diff_s_inner, nzb_diff_s_outer, nzb_s_inner, nzt, &765 nzt_diff766 767 IMPLICIT NONE768 769 INTEGER(iwp) :: i !<770 INTEGER(iwp) :: j !<771 INTEGER(iwp) :: k !<772 773 REAL(wp) :: def !<774 REAL(wp) :: dudx !<775 REAL(wp) :: dudy !<776 REAL(wp) :: dudz !<777 REAL(wp) :: dvdx !<778 REAL(wp) :: dvdy !<779 REAL(wp) :: dvdz !<780 REAL(wp) :: dwdx !<781 REAL(wp) :: dwdy !<782 REAL(wp) :: dwdz !<783 REAL(wp) :: k1 !<784 REAL(wp) :: k2 !<785 REAL(wp) :: km_neutral !<786 REAL(wp) :: theta !<787 REAL(wp) :: temp !<788 789 REAL(wp), DIMENSION(nzb:nzt+1,nys:nyn,nxl:nxr) :: usvs !<790 REAL(wp), DIMENSION(nzb:nzt+1,nys:nyn,nxl:nxr) :: vsus !<791 REAL(wp), DIMENSION(nzb:nzt+1,nys:nyn,nxl:nxr) :: wsus !<792 REAL(wp), DIMENSION(nzb:nzt+1,nys:nyn,nxl:nxr) :: wsvs !<793 !$acc declare create ( usvs, vsus, wsus, wsvs )794 795 !796 !-- First calculate horizontal momentum flux u'v', w'v', v'u', w'u' at797 !-- vertical walls, if neccessary798 !-- CAUTION: results are slightly different from the ij-version!!799 !-- ij-version should be called further below within the ij-loops!!800 IF ( topography /= 'flat' ) THEN801 CALL wall_fluxes_e_acc( usvs, 1.0_wp, 0.0_wp, 0.0_wp, 0.0_wp, wall_e_y )802 CALL wall_fluxes_e_acc( wsvs, 0.0_wp, 0.0_wp, 1.0_wp, 0.0_wp, wall_e_y )803 CALL wall_fluxes_e_acc( vsus, 0.0_wp, 1.0_wp, 0.0_wp, 0.0_wp, wall_e_x )804 CALL wall_fluxes_e_acc( wsus, 0.0_wp, 0.0_wp, 0.0_wp, 1.0_wp, wall_e_x )805 ENDIF806 807 808 !809 !-- Calculate TKE production by shear810 !$acc kernels present( ddzw, dd2zu, kh, km, nzb_diff_s_inner, nzb_diff_s_outer ) &811 !$acc present( nzb_s_inner, pt, q, ql, qsws, qswst, rho_ocean ) &812 !$acc present( shf, tend, tswst, u, v, vpt, w, wall_e_x, wall_e_y ) &813 !$acc copyin( u_0, v_0 )814 DO i = i_left, i_right815 DO j = j_south, j_north816 DO k = 1, nzt817 818 IF ( k >= nzb_diff_s_outer(j,i) ) THEN819 820 dudx = ( u(k,j,i+1) - u(k,j,i) ) * ddx821 dudy = 0.25_wp * ( u(k,j+1,i) + u(k,j+1,i+1) - &822 u(k,j-1,i) - u(k,j-1,i+1) ) * ddy823 dudz = 0.5_wp * ( u(k+1,j,i) + u(k+1,j,i+1) - &824 u(k-1,j,i) - u(k-1,j,i+1) ) * dd2zu(k)825 826 dvdx = 0.25_wp * ( v(k,j,i+1) + v(k,j+1,i+1) - &827 v(k,j,i-1) - v(k,j+1,i-1) ) * ddx828 dvdy = ( v(k,j+1,i) - v(k,j,i) ) * ddy829 dvdz = 0.5_wp * ( v(k+1,j,i) + v(k+1,j+1,i) - &830 v(k-1,j,i) - v(k-1,j+1,i) ) * dd2zu(k)831 832 dwdx = 0.25_wp * ( w(k,j,i+1) + w(k-1,j,i+1) - &833 w(k,j,i-1) - w(k-1,j,i-1) ) * ddx834 dwdy = 0.25_wp * ( w(k,j+1,i) + w(k-1,j+1,i) - &835 w(k,j-1,i) - w(k-1,j-1,i) ) * ddy836 dwdz = ( w(k,j,i) - w(k-1,j,i) ) * ddzw(k)837 838 def = 2.0_wp * ( dudx**2 + dvdy**2 + dwdz**2 ) + &839 dudy**2 + dvdx**2 + dwdx**2 + dwdy**2 + dudz**2 + &840 dvdz**2 + 2.0_wp * ( dvdx*dudy + dwdx*dudz + dwdy*dvdz )841 842 IF ( def < 0.0_wp ) def = 0.0_wp843 844 tend(k,j,i) = tend(k,j,i) + km(k,j,i) * def845 846 ENDIF847 848 ENDDO849 ENDDO850 ENDDO851 852 IF ( constant_flux_layer ) THEN853 854 !855 !-- Position beneath wall856 !-- (2) - Will allways be executed.857 !-- 'bottom and wall: use u_0,v_0 and wall functions'858 DO i = i_left, i_right859 DO j = j_south, j_north860 DO k = 1, nzt861 862 IF ( ( wall_e_x(j,i) /= 0.0_wp ).OR.( wall_e_y(j,i) /= 0.0_wp ) ) &863 THEN864 865 IF ( k == nzb_diff_s_inner(j,i) - 1 ) THEN866 dudx = ( u(k,j,i+1) - u(k,j,i) ) * ddx867 dudz = 0.5_wp * ( u(k+1,j,i) + u(k+1,j,i+1) - &868 u_0(j,i) - u_0(j,i+1) ) * dd2zu(k)869 dvdy = ( v(k,j+1,i) - v(k,j,i) ) * ddy870 dvdz = 0.5_wp * ( v(k+1,j,i) + v(k+1,j+1,i) - &871 v_0(j,i) - v_0(j+1,i) ) * dd2zu(k)872 dwdz = ( w(k,j,i) - w(k-1,j,i) ) * ddzw(k)873 874 IF ( wall_e_y(j,i) /= 0.0_wp ) THEN875 !876 !-- Inconsistency removed: as the thermal stratification is877 !-- not taken into account for the evaluation of the wall878 !-- fluxes at vertical walls, the eddy viscosity km must not879 !-- be used for the evaluation of the velocity gradients dudy880 !-- and dwdy881 !-- Note: The validity of the new method has not yet been882 !-- shown, as so far no suitable data for a validation883 !-- has been available884 ! CALL wall_fluxes_e( i, j, k, nzb_diff_s_outer(j,i)-2, &885 ! usvs, 1.0_wp, 0.0_wp, 0.0_wp, 0.0_wp )886 ! CALL wall_fluxes_e( i, j, k, nzb_diff_s_outer(j,i)-2, &887 ! wsvs, 0.0_wp, 0.0_wp, 1.0_wp, 0.0_wp )888 km_neutral = kappa * &889 ( usvs(k,j,i)**2 + wsvs(k,j,i)**2 )**0.25_wp * &890 0.5_wp * dy891 IF ( km_neutral > 0.0_wp ) THEN892 dudy = - wall_e_y(j,i) * usvs(k,j,i) / km_neutral893 dwdy = - wall_e_y(j,i) * wsvs(k,j,i) / km_neutral894 ELSE895 dudy = 0.0_wp896 dwdy = 0.0_wp897 ENDIF898 ELSE899 dudy = 0.25_wp * ( u(k,j+1,i) + u(k,j+1,i+1) - &900 u(k,j-1,i) - u(k,j-1,i+1) ) * ddy901 dwdy = 0.25_wp * ( w(k,j+1,i) + w(k-1,j+1,i) - &902 w(k,j-1,i) - w(k-1,j-1,i) ) * ddy903 ENDIF904 905 IF ( wall_e_x(j,i) /= 0.0_wp ) THEN906 !907 !-- Inconsistency removed: as the thermal stratification is908 !-- not taken into account for the evaluation of the wall909 !-- fluxes at vertical walls, the eddy viscosity km must not910 !-- be used for the evaluation of the velocity gradients dvdx911 !-- and dwdx912 !-- Note: The validity of the new method has not yet been913 !-- shown, as so far no suitable data for a validation914 !-- has been available915 ! CALL wall_fluxes_e( i, j, k, nzb_diff_s_outer(j,i)-2, &916 ! vsus, 0.0_wp, 1.0_wp, 0.0_wp, 0.0_wp )917 ! CALL wall_fluxes_e( i, j, k, nzb_diff_s_outer(j,i)-2, &918 ! wsus, 0.0_wp, 0.0_wp, 0.0_wp, 1.0_wp )919 km_neutral = kappa * &920 ( vsus(k,j,i)**2 + wsus(k,j,i)**2 )**0.25_wp * &921 0.5_wp * dx922 IF ( km_neutral > 0.0_wp ) THEN923 dvdx = - wall_e_x(j,i) * vsus(k,j,i) / km_neutral924 dwdx = - wall_e_x(j,i) * wsus(k,j,i) / km_neutral925 ELSE926 dvdx = 0.0_wp927 dwdx = 0.0_wp928 ENDIF929 ELSE930 dvdx = 0.25_wp * ( v(k,j,i+1) + v(k,j+1,i+1) - &931 v(k,j,i-1) - v(k,j+1,i-1) ) * ddx932 dwdx = 0.25_wp * ( w(k,j,i+1) + w(k-1,j,i+1) - &933 w(k,j,i-1) - w(k-1,j,i-1) ) * ddx934 ENDIF935 936 def = 2.0_wp * ( dudx**2 + dvdy**2 + dwdz**2 ) + &937 dudy**2 + dvdx**2 + dwdx**2 + dwdy**2 + dudz**2 + &938 dvdz**2 + 2.0_wp * ( dvdx*dudy + dwdx*dudz + dwdy*dvdz )939 940 IF ( def < 0.0_wp ) def = 0.0_wp941 942 tend(k,j,i) = tend(k,j,i) + km(k,j,i) * def943 944 ENDIF945 !946 !-- (3) - will be executed only, if there is at least one level947 !-- between (2) and (4), i.e. the topography must have a948 !-- minimum height of 2 dz. Wall fluxes for this case have949 !-- already been calculated for (2).950 !-- 'wall only: use wall functions'951 952 IF ( k >= nzb_diff_s_inner(j,i) .AND. &953 k <= nzb_diff_s_outer(j,i)-2 ) THEN954 955 dudx = ( u(k,j,i+1) - u(k,j,i) ) * ddx956 dudz = 0.5_wp * ( u(k+1,j,i) + u(k+1,j,i+1) - &957 u(k-1,j,i) - u(k-1,j,i+1) ) * dd2zu(k)958 dvdy = ( v(k,j+1,i) - v(k,j,i) ) * ddy959 dvdz = 0.5_wp * ( v(k+1,j,i) + v(k+1,j+1,i) - &960 v(k-1,j,i) - v(k-1,j+1,i) ) * dd2zu(k)961 dwdz = ( w(k,j,i) - w(k-1,j,i) ) * ddzw(k)962 963 IF ( wall_e_y(j,i) /= 0.0_wp ) THEN964 !965 !-- Inconsistency removed: as the thermal stratification966 !-- is not taken into account for the evaluation of the967 !-- wall fluxes at vertical walls, the eddy viscosity km968 !-- must not be used for the evaluation of the velocity969 !-- gradients dudy and dwdy970 !-- Note: The validity of the new method has not yet971 !-- been shown, as so far no suitable data for a972 !-- validation has been available973 km_neutral = kappa * ( usvs(k,j,i)**2 + &974 wsvs(k,j,i)**2 )**0.25_wp * 0.5_wp * dy975 IF ( km_neutral > 0.0_wp ) THEN976 dudy = - wall_e_y(j,i) * usvs(k,j,i) / km_neutral977 dwdy = - wall_e_y(j,i) * wsvs(k,j,i) / km_neutral978 ELSE979 dudy = 0.0_wp980 dwdy = 0.0_wp981 ENDIF982 ELSE983 dudy = 0.25_wp * ( u(k,j+1,i) + u(k,j+1,i+1) - &984 u(k,j-1,i) - u(k,j-1,i+1) ) * ddy985 dwdy = 0.25_wp * ( w(k,j+1,i) + w(k-1,j+1,i) - &986 w(k,j-1,i) - w(k-1,j-1,i) ) * ddy987 ENDIF988 989 IF ( wall_e_x(j,i) /= 0.0_wp ) THEN990 !991 !-- Inconsistency removed: as the thermal stratification992 !-- is not taken into account for the evaluation of the993 !-- wall fluxes at vertical walls, the eddy viscosity km994 !-- must not be used for the evaluation of the velocity995 !-- gradients dvdx and dwdx996 !-- Note: The validity of the new method has not yet997 !-- been shown, as so far no suitable data for a998 !-- validation has been available999 km_neutral = kappa * ( vsus(k,j,i)**2 + &1000 wsus(k,j,i)**2 )**0.25_wp * 0.5_wp * dx1001 IF ( km_neutral > 0.0_wp ) THEN1002 dvdx = - wall_e_x(j,i) * vsus(k,j,i) / km_neutral1003 dwdx = - wall_e_x(j,i) * wsus(k,j,i) / km_neutral1004 ELSE1005 dvdx = 0.0_wp1006 dwdx = 0.0_wp1007 ENDIF1008 ELSE1009 dvdx = 0.25_wp * ( v(k,j,i+1) + v(k,j+1,i+1) - &1010 v(k,j,i-1) - v(k,j+1,i-1) ) * ddx1011 dwdx = 0.25_wp * ( w(k,j,i+1) + w(k-1,j,i+1) - &1012 w(k,j,i-1) - w(k-1,j,i-1) ) * ddx1013 ENDIF1014 1015 def = 2.0_wp * ( dudx**2 + dvdy**2 + dwdz**2 ) + &1016 dudy**2 + dvdx**2 + dwdx**2 + dwdy**2 + dudz**2 + &1017 dvdz**2 + 2.0_wp * ( dvdx*dudy + dwdx*dudz + dwdy*dvdz )1018 1019 IF ( def < 0.0_wp ) def = 0.0_wp1020 1021 tend(k,j,i) = tend(k,j,i) + km(k,j,i) * def1022 1023 ENDIF1024 1025 !1026 !-- (4) - will allways be executed.1027 !-- 'special case: free atmosphere' (as for case (0))1028 IF ( k == nzb_diff_s_outer(j,i)-1 ) THEN1029 1030 dudx = ( u(k,j,i+1) - u(k,j,i) ) * ddx1031 dudy = 0.25_wp * ( u(k,j+1,i) + u(k,j+1,i+1) - &1032 u(k,j-1,i) - u(k,j-1,i+1) ) * ddy1033 dudz = 0.5_wp * ( u(k+1,j,i) + u(k+1,j,i+1) - &1034 u(k-1,j,i) - u(k-1,j,i+1) ) * dd2zu(k)1035 1036 dvdx = 0.25_wp * ( v(k,j,i+1) + v(k,j+1,i+1) - &1037 v(k,j,i-1) - v(k,j+1,i-1) ) * ddx1038 dvdy = ( v(k,j+1,i) - v(k,j,i) ) * ddy1039 dvdz = 0.5_wp * ( v(k+1,j,i) + v(k+1,j+1,i) - &1040 v(k-1,j,i) - v(k-1,j+1,i) ) * dd2zu(k)1041 1042 dwdx = 0.25_wp * ( w(k,j,i+1) + w(k-1,j,i+1) - &1043 w(k,j,i-1) - w(k-1,j,i-1) ) * ddx1044 dwdy = 0.25_wp * ( w(k,j+1,i) + w(k-1,j+1,i) - &1045 w(k,j-1,i) - w(k-1,j-1,i) ) * ddy1046 dwdz = ( w(k,j,i) - w(k-1,j,i) ) * ddzw(k)1047 1048 def = 2.0_wp * ( dudx**2 + dvdy**2 + dwdz**2 ) + &1049 dudy**2 + dvdx**2 + dwdx**2 + dwdy**2 + dudz**2 + &1050 dvdz**2 + 2.0_wp * ( dvdx*dudy + dwdx*dudz + dwdy*dvdz )1051 1052 IF ( def < 0.0_wp ) def = 0.0_wp1053 1054 tend(k,j,i) = tend(k,j,i) + km(k,j,i) * def1055 1056 ENDIF1057 1058 ENDIF1059 1060 ENDDO1061 ENDDO1062 ENDDO1063 1064 !1065 !-- Position without adjacent wall1066 !-- (1) - will allways be executed.1067 !-- 'bottom only: use u_0,v_0'1068 DO i = i_left, i_right1069 DO j = j_south, j_north1070 DO k = 1, nzt1071 1072 IF ( ( wall_e_x(j,i) == 0.0_wp ) .AND. ( wall_e_y(j,i) == 0.0_wp ) ) &1073 THEN1074 1075 IF ( k == nzb_diff_s_inner(j,i)-1 ) THEN1076 1077 dudx = ( u(k,j,i+1) - u(k,j,i) ) * ddx1078 dudy = 0.25_wp * ( u(k,j+1,i) + u(k,j+1,i+1) - &1079 u(k,j-1,i) - u(k,j-1,i+1) ) * ddy1080 dudz = 0.5_wp * ( u(k+1,j,i) + u(k+1,j,i+1) - &1081 u_0(j,i) - u_0(j,i+1) ) * dd2zu(k)1082 1083 dvdx = 0.25_wp * ( v(k,j,i+1) + v(k,j+1,i+1) - &1084 v(k,j,i-1) - v(k,j+1,i-1) ) * ddx1085 dvdy = ( v(k,j+1,i) - v(k,j,i) ) * ddy1086 dvdz = 0.5_wp * ( v(k+1,j,i) + v(k+1,j+1,i) - &1087 v_0(j,i) - v_0(j+1,i) ) * dd2zu(k)1088 1089 dwdx = 0.25_wp * ( w(k,j,i+1) + w(k-1,j,i+1) - &1090 w(k,j,i-1) - w(k-1,j,i-1) ) * ddx1091 dwdy = 0.25_wp * ( w(k,j+1,i) + w(k-1,j+1,i) - &1092 w(k,j-1,i) - w(k-1,j-1,i) ) * ddy1093 dwdz = ( w(k,j,i) - w(k-1,j,i) ) * ddzw(k)1094 1095 def = 2.0_wp * ( dudx**2 + dvdy**2 + dwdz**2 ) + &1096 dudy**2 + dvdx**2 + dwdx**2 + dwdy**2 + dudz**2 + &1097 dvdz**2 + 2.0_wp * ( dvdx*dudy + dwdx*dudz + dwdy*dvdz )1098 1099 IF ( def < 0.0_wp ) def = 0.0_wp1100 1101 tend(k,j,i) = tend(k,j,i) + km(k,j,i) * def1102 1103 ENDIF1104 1105 ENDIF1106 1107 ENDDO1108 ENDDO1109 ENDDO1110 1111 ELSEIF ( use_surface_fluxes ) THEN1112 1113 DO i = i_left, i_right1114 DO j = j_south, j_north1115 DO k = 1, nzt1116 1117 IF ( k == nzb_diff_s_outer(j,i)-1 ) THEN1118 1119 dudx = ( u(k,j,i+1) - u(k,j,i) ) * ddx1120 dudy = 0.25_wp * ( u(k,j+1,i) + u(k,j+1,i+1) - &1121 u(k,j-1,i) - u(k,j-1,i+1) ) * ddy1122 dudz = 0.5_wp * ( u(k+1,j,i) + u(k+1,j,i+1) - &1123 u(k-1,j,i) - u(k-1,j,i+1) ) * dd2zu(k)1124 1125 dvdx = 0.25_wp * ( v(k,j,i+1) + v(k,j+1,i+1) - &1126 v(k,j,i-1) - v(k,j+1,i-1) ) * ddx1127 dvdy = ( v(k,j+1,i) - v(k,j,i) ) * ddy1128 dvdz = 0.5_wp * ( v(k+1,j,i) + v(k+1,j+1,i) - &1129 v(k-1,j,i) - v(k-1,j+1,i) ) * dd2zu(k)1130 1131 dwdx = 0.25_wp * ( w(k,j,i+1) + w(k-1,j,i+1) - &1132 w(k,j,i-1) - w(k-1,j,i-1) ) * ddx1133 dwdy = 0.25_wp * ( w(k,j+1,i) + w(k-1,j+1,i) - &1134 w(k,j-1,i) - w(k-1,j-1,i) ) * ddy1135 dwdz = ( w(k,j,i) - w(k-1,j,i) ) * ddzw(k)1136 1137 def = 2.0_wp * ( dudx**2 + dvdy**2 + dwdz**2 ) + &1138 dudy**2 + dvdx**2 + dwdx**2 + dwdy**2 + dudz**2 + &1139 dvdz**2 + 2.0_wp * ( dvdx*dudy + dwdx*dudz + dwdy*dvdz )1140 1141 IF ( def < 0.0_wp ) def = 0.0_wp1142 1143 tend(k,j,i) = tend(k,j,i) + km(k,j,i) * def1144 1145 ENDIF1146 1147 ENDDO1148 ENDDO1149 ENDDO1150 1151 ENDIF1152 1153 !1154 !-- If required, calculate TKE production by buoyancy1155 IF ( .NOT. neutral ) THEN1156 1157 IF ( .NOT. humidity ) THEN1158 1159 IF ( use_single_reference_value ) THEN1160 1161 IF ( ocean ) THEN1162 !1163 !-- So far in the ocean no special treatment of density flux1164 !-- in the bottom and top surface layer1165 DO i = i_left, i_right1166 DO j = j_south, j_north1167 DO k = 1, nzt1168 IF ( k > nzb_s_inner(j,i) ) THEN1169 tend(k,j,i) = tend(k,j,i) + &1170 kh(k,j,i) * g / rho_reference * &1171 ( rho_ocean(k+1,j,i) - rho_ocean(k-1,j,i) ) * &1172 dd2zu(k)1173 ENDIF1174 ENDDO1175 ENDDO1176 ENDDO1177 1178 ELSE1179 1180 DO i = i_left, i_right1181 DO j = j_south, j_north1182 DO k = 1, nzt_diff1183 IF ( k >= nzb_diff_s_inner(j,i) ) THEN1184 tend(k,j,i) = tend(k,j,i) - &1185 kh(k,j,i) * g / pt_reference * &1186 ( pt(k+1,j,i) - pt(k-1,j,i) ) * &1187 dd2zu(k)1188 ENDIF1189 1190 IF ( k == nzb_diff_s_inner(j,i)-1 .AND. &1191 use_surface_fluxes ) THEN1192 tend(k,j,i) = tend(k,j,i) + g / pt_reference * &1193 shf(j,i)1194 ENDIF1195 1196 IF ( k == nzt .AND. use_top_fluxes ) THEN1197 tend(k,j,i) = tend(k,j,i) + g / pt_reference * &1198 tswst(j,i)1199 ENDIF1200 ENDDO1201 ENDDO1202 ENDDO1203 1204 ENDIF1205 1206 ELSE1207 1208 IF ( ocean ) THEN1209 !1210 !-- So far in the ocean no special treatment of density flux1211 !-- in the bottom and top surface layer1212 DO i = i_left, i_right1213 DO j = j_south, j_north1214 DO k = 1, nzt1215 IF ( k > nzb_s_inner(j,i) ) THEN1216 tend(k,j,i) = tend(k,j,i) + &1217 kh(k,j,i) * g / rho_ocean(k,j,i) * &1218 ( rho_ocean(k+1,j,i) - rho_ocean(k-1,j,i) ) * &1219 dd2zu(k)1220 ENDIF1221 ENDDO1222 ENDDO1223 ENDDO1224 1225 ELSE1226 1227 DO i = i_left, i_right1228 DO j = j_south, j_north1229 DO k = 1, nzt_diff1230 IF( k >= nzb_diff_s_inner(j,i) ) THEN1231 tend(k,j,i) = tend(k,j,i) - &1232 kh(k,j,i) * g / pt(k,j,i) * &1233 ( pt(k+1,j,i) - pt(k-1,j,i) ) * &1234 dd2zu(k)1235 ENDIF1236 1237 IF ( k == nzb_diff_s_inner(j,i)-1 .AND. &1238 use_surface_fluxes ) THEN1239 tend(k,j,i) = tend(k,j,i) + g / pt(k,j,i) * &1240 shf(j,i)1241 ENDIF1242 1243 IF ( k == nzt .AND. use_top_fluxes ) THEN1244 tend(k,j,i) = tend(k,j,i) + g / pt(k,j,i) * &1245 tswst(j,i)1246 ENDIF1247 ENDDO1248 ENDDO1249 ENDDO1250 1251 ENDIF1252 1253 ENDIF1254 1255 ELSE1256 !1257 !++ This part gives the PGI compiler problems in the previous loop1258 !++ even without any acc statements????1259 ! STOP '+++ production_e problems with acc-directives'1260 ! !acc loop1261 ! DO i = nxl, nxr1262 ! DO j = nys, nyn1263 ! !acc loop vector1264 ! DO k = 1, nzt_diff1265 !1266 ! IF ( k >= nzb_diff_s_inner(j,i) ) THEN1267 !1268 ! IF ( .NOT. cloud_physics .AND. .NOT. cloud_droplets ) THEN1269 ! k1 = 1.0_wp + 0.61_wp * q(k,j,i)1270 ! k2 = 0.61_wp * pt(k,j,i)1271 ! tend(k,j,i) = tend(k,j,i) - kh(k,j,i) * &1272 ! g / vpt(k,j,i) * &1273 ! ( k1 * ( pt(k+1,j,i)-pt(k-1,j,i) ) + &1274 ! k2 * ( q(k+1,j,i) - q(k-1,j,i) ) &1275 ! ) * dd2zu(k)1276 ! ELSE IF ( cloud_physics ) THEN1277 ! IF ( ql(k,j,i) == 0.0_wp ) THEN1278 ! k1 = 1.0_wp + 0.61_wp * q(k,j,i)1279 ! k2 = 0.61_wp * pt(k,j,i)1280 ! ELSE1281 ! theta = pt(k,j,i) + pt_d_t(k) * l_d_cp * ql(k,j,i)1282 ! temp = theta * t_d_pt(k)1283 ! k1 = ( 1.0_wp - q(k,j,i) + 1.61_wp * &1284 ! ( q(k,j,i) - ql(k,j,i) ) * &1285 ! ( 1.0_wp + 0.622_wp * l_d_r / temp ) ) / &1286 ! ( 1.0_wp + 0.622_wp * l_d_r * l_d_cp * &1287 ! ( q(k,j,i) - ql(k,j,i) ) / ( temp * temp ) )1288 ! k2 = theta * ( l_d_cp / temp * k1 - 1.0_wp )1289 ! ENDIF1290 ! tend(k,j,i) = tend(k,j,i) - kh(k,j,i) * &1291 ! g / vpt(k,j,i) * &1292 ! ( k1 * ( pt(k+1,j,i)-pt(k-1,j,i) ) + &1293 ! k2 * ( q(k+1,j,i) - q(k-1,j,i) ) &1294 ! ) * dd2zu(k)1295 ! ELSE IF ( cloud_droplets ) THEN1296 ! k1 = 1.0_wp + 0.61_wp * q(k,j,i) - ql(k,j,i)1297 ! k2 = 0.61_wp * pt(k,j,i)1298 ! tend(k,j,i) = tend(k,j,i) - &1299 ! kh(k,j,i) * g / vpt(k,j,i) * &1300 ! ( k1 * ( pt(k+1,j,i)- pt(k-1,j,i) ) + &1301 ! k2 * ( q(k+1,j,i) - q(k-1,j,i) ) - &1302 ! pt(k,j,i) * ( ql(k+1,j,i) - &1303 ! ql(k-1,j,i) ) ) * dd2zu(k)1304 ! ENDIF1305 !1306 ! ENDIF1307 !1308 ! ENDDO1309 ! ENDDO1310 ! ENDDO1311 !1312 1313 !!++ Next two loops are probably very inefficiently parallellized1314 !!++ and will require better optimization1315 ! IF ( use_surface_fluxes ) THEN1316 !1317 ! !acc loop1318 ! DO i = nxl, nxr1319 ! DO j = nys, nyn1320 ! !acc loop vector1321 ! DO k = 1, nzt_diff1322 !1323 ! IF ( k == nzb_diff_s_inner(j,i)-1 ) THEN1324 !1325 ! IF ( .NOT. cloud_physics .AND. .NOT. cloud_droplets ) THEN1326 ! k1 = 1.0_wp + 0.61_wp * q(k,j,i)1327 ! k2 = 0.61_wp * pt(k,j,i)1328 ! ELSE IF ( cloud_physics ) THEN1329 ! IF ( ql(k,j,i) == 0.0_wp ) THEN1330 ! k1 = 1.0_wp + 0.61_wp * q(k,j,i)1331 ! k2 = 0.61_wp * pt(k,j,i)1332 ! ELSE1333 ! theta = pt(k,j,i) + pt_d_t(k) * l_d_cp * ql(k,j,i)1334 ! temp = theta * t_d_pt(k)1335 ! k1 = ( 1.0_wp - q(k,j,i) + 1.61_wp * &1336 ! ( q(k,j,i) - ql(k,j,i) ) * &1337 ! ( 1.0_wp + 0.622_wp * l_d_r / temp ) ) /&1338 ! ( 1.0_wp + 0.622_wp * l_d_r * l_d_cp * &1339 ! ( q(k,j,i) - ql(k,j,i) ) / ( temp * temp ) )1340 ! k2 = theta * ( l_d_cp / temp * k1 - 1.0_wp )1341 ! ENDIF1342 ! ELSE IF ( cloud_droplets ) THEN1343 ! k1 = 1.0_wp + 0.61_wp * q(k,j,i) - ql(k,j,i)1344 ! k2 = 0.61_wp * pt(k,j,i)1345 ! ENDIF1346 !1347 ! tend(k,j,i) = tend(k,j,i) + g / vpt(k,j,i) * &1348 ! ( k1* shf(j,i) + k2 * qsws(j,i) )1349 ! ENDIF1350 !1351 ! ENDDO1352 ! ENDDO1353 ! ENDDO1354 !1355 ! ENDIF1356 !1357 ! IF ( use_top_fluxes ) THEN1358 !1359 ! !acc loop1360 ! DO i = nxl, nxr1361 ! DO j = nys, nyn1362 ! !acc loop vector1363 ! DO k = 1, nzt1364 ! IF ( k == nzt ) THEN1365 !1366 ! IF ( .NOT. cloud_physics .AND. .NOT. cloud_droplets ) THEN1367 ! k1 = 1.0_wp + 0.61_wp * q(k,j,i)1368 ! k2 = 0.61_wp * pt(k,j,i)1369 ! ELSE IF ( cloud_physics ) THEN1370 ! IF ( ql(k,j,i) == 0.0_wp ) THEN1371 ! k1 = 1.0_wp + 0.61_wp * q(k,j,i)1372 ! k2 = 0.61_wp * pt(k,j,i)1373 ! ELSE1374 ! theta = pt(k,j,i) + pt_d_t(k) * l_d_cp * ql(k,j,i)1375 ! temp = theta * t_d_pt(k)1376 ! k1 = ( 1.0_wp - q(k,j,i) + 1.61_wp * &1377 ! ( q(k,j,i) - ql(k,j,i) ) * &1378 ! ( 1.0_wp + 0.622_wp * l_d_r / temp ) ) /&1379 ! ( 1.0_wp + 0.622_wp * l_d_r * l_d_cp * &1380 ! ( q(k,j,i) - ql(k,j,i) ) / ( temp * temp ) )1381 ! k2 = theta * ( l_d_cp / temp * k1 - 1.0_wp )1382 ! ENDIF1383 ! ELSE IF ( cloud_droplets ) THEN1384 ! k1 = 1.0_wp + 0.61_wp * q(k,j,i) - ql(k,j,i)1385 ! k2 = 0.61_wp * pt(k,j,i)1386 ! ENDIF1387 !1388 ! tend(k,j,i) = tend(k,j,i) + g / vpt(k,j,i) * &1389 ! ( k1* tswst(j,i) + k2 * qswst(j,i) )1390 !1391 ! ENDIF1392 !1393 ! ENDDO1394 ! ENDDO1395 ! ENDDO1396 !1397 ! ENDIF1398 1399 ENDIF1400 1401 ENDIF1402 !$acc end kernels1403 1404 END SUBROUTINE production_e_acc1405 1406 1407 !------------------------------------------------------------------------------!1408 ! Description:1409 ! ------------1410 738 !> Call for grid point i,j 1411 739 !------------------------------------------------------------------------------! -
TabularUnified palm/trunk/SOURCE/prognostic_equations.f90 ¶
r2101 r2118 20 20 ! Current revisions: 21 21 ! ------------------ 22 ! 22 ! OpenACC version of subroutine removed 23 23 ! 24 24 ! Former revisions: … … 246 246 247 247 USE indices, & 248 ONLY: i_left, i_right, j_north, j_south, nxl, nxlu, nxr, nyn, nys,&249 n ysv, nzb_s_inner, nzb_u_inner, nzb_v_inner, nzb_w_inner, nzt248 ONLY: nxl, nxlu, nxr, nyn, nys, nysv, nzb_s_inner, nzb_u_inner, & 249 nzb_v_inner, nzb_w_inner, nzt 250 250 251 251 USE advec_ws, & 252 ONLY: advec_s_ws, advec_s_ws_acc, advec_u_ws, advec_u_ws_acc, & 253 advec_v_ws, advec_v_ws_acc, advec_w_ws, advec_w_ws_acc 252 ONLY: advec_s_ws, advec_u_ws, advec_v_ws, advec_w_ws 254 253 255 254 USE advec_s_bc_mod, & … … 281 280 282 281 USE buoyancy_mod, & 283 ONLY: buoyancy , buoyancy_acc282 ONLY: buoyancy 284 283 285 284 USE calc_radiation_mod, & … … 287 286 288 287 USE coriolis_mod, & 289 ONLY: coriolis , coriolis_acc288 ONLY: coriolis 290 289 291 290 USE diffusion_e_mod, & 292 ONLY: diffusion_e , diffusion_e_acc291 ONLY: diffusion_e 293 292 294 293 USE diffusion_s_mod, & 295 ONLY: diffusion_s , diffusion_s_acc294 ONLY: diffusion_s 296 295 297 296 USE diffusion_u_mod, & 298 ONLY: diffusion_u , diffusion_u_acc297 ONLY: diffusion_u 299 298 300 299 USE diffusion_v_mod, & 301 ONLY: diffusion_v , diffusion_v_acc300 ONLY: diffusion_v 302 301 303 302 USE diffusion_w_mod, & 304 ONLY: diffusion_w , diffusion_w_acc303 ONLY: diffusion_w 305 304 306 305 USE kinds … … 319 318 320 319 USE production_e_mod, & 321 ONLY: production_e , production_e_acc320 ONLY: production_e 322 321 323 322 USE radiation_model_mod, & … … 342 341 343 342 PRIVATE 344 PUBLIC prognostic_equations_cache, prognostic_equations_vector, & 345 prognostic_equations_acc 343 PUBLIC prognostic_equations_cache, prognostic_equations_vector 346 344 347 345 INTERFACE prognostic_equations_cache … … 352 350 MODULE PROCEDURE prognostic_equations_vector 353 351 END INTERFACE prognostic_equations_vector 354 355 INTERFACE prognostic_equations_acc356 MODULE PROCEDURE prognostic_equations_acc357 END INTERFACE prognostic_equations_acc358 352 359 353 … … 2001 1995 2002 1996 2003 !------------------------------------------------------------------------------!2004 ! Description:2005 ! ------------2006 !> Version for accelerator boards2007 !------------------------------------------------------------------------------!2008 2009 SUBROUTINE prognostic_equations_acc2010 2011 2012 IMPLICIT NONE2013 2014 INTEGER(iwp) :: i !<2015 INTEGER(iwp) :: j !<2016 INTEGER(iwp) :: k !<2017 INTEGER(iwp) :: runge_step !<2018 2019 REAL(wp) :: sbt !<2020 2021 !2022 !-- Set switch for intermediate Runge-Kutta step2023 runge_step = 02024 IF ( timestep_scheme(1:5) == 'runge' ) THEN2025 IF ( intermediate_timestep_count == 1 ) THEN2026 runge_step = 12027 ELSEIF ( intermediate_timestep_count < &2028 intermediate_timestep_count_max ) THEN2029 runge_step = 22030 ENDIF2031 ENDIF2032 2033 !2034 !-- If required, calculate cloud microphysical impacts (two-moment scheme)2035 IF ( cloud_physics .AND. .NOT. microphysics_sat_adjust .AND. &2036 ( intermediate_timestep_count == 1 .OR. &2037 call_microphysics_at_all_substeps ) &2038 ) THEN2039 CALL cpu_log( log_point(51), 'microphysics', 'start' )2040 CALL microphysics_control2041 CALL cpu_log( log_point(51), 'microphysics', 'stop' )2042 ENDIF2043 2044 !2045 !-- u-velocity component2046 !++ Statistics still not completely ported to accelerators2047 !$acc update device( hom, ref_state )2048 CALL cpu_log( log_point(5), 'u-equation', 'start' )2049 2050 IF ( timestep_scheme(1:5) == 'runge' ) THEN2051 IF ( ws_scheme_mom ) THEN2052 CALL advec_u_ws_acc2053 ELSE2054 tend = 0.0_wp ! to be removed later??2055 CALL advec_u_pw2056 ENDIF2057 ELSE2058 CALL advec_u_up2059 ENDIF2060 CALL diffusion_u_acc2061 CALL coriolis_acc( 1 )2062 IF ( sloping_surface .AND. .NOT. neutral ) THEN2063 CALL buoyancy( pt, 1 )2064 ENDIF2065 2066 !2067 !-- Drag by plant canopy2068 IF ( plant_canopy ) CALL pcm_tendency( 1 )2069 2070 !2071 !-- External pressure gradient2072 IF ( dp_external ) THEN2073 DO i = i_left, i_right2074 DO j = j_south, j_north2075 DO k = dp_level_ind_b+1, nzt2076 tend(k,j,i) = tend(k,j,i) - dpdxy(1) * dp_smooth_factor(k)2077 ENDDO2078 ENDDO2079 ENDDO2080 ENDIF2081 2082 !2083 !-- Nudging2084 IF ( nudging ) CALL nudge( simulated_time, 'u' )2085 2086 !2087 !-- Forces by wind turbines2088 IF ( wind_turbine ) CALL wtm_tendencies( 1 )2089 2090 CALL user_actions( 'u-tendency' )2091 2092 !2093 !-- Prognostic equation for u-velocity component2094 !$acc kernels present( nzb_u_inner, rdf, tend, tu_m, u, u_init, u_p )2095 !$acc loop independent2096 DO i = i_left, i_right2097 !$acc loop independent2098 DO j = j_south, j_north2099 !$acc loop independent2100 DO k = 1, nzt2101 IF ( k > nzb_u_inner(j,i) ) THEN2102 u_p(k,j,i) = u(k,j,i) + dt_3d * ( tsc(2) * tend(k,j,i) + &2103 tsc(3) * tu_m(k,j,i) ) &2104 - tsc(5) * rdf(k) * ( u(k,j,i) - u_init(k) )2105 !2106 !-- Tendencies for the next Runge-Kutta step2107 IF ( runge_step == 1 ) THEN2108 tu_m(k,j,i) = tend(k,j,i)2109 ELSEIF ( runge_step == 2 ) THEN2110 tu_m(k,j,i) = -9.5625_wp * tend(k,j,i) + 5.3125_wp * tu_m(k,j,i)2111 ENDIF2112 ENDIF2113 ENDDO2114 ENDDO2115 ENDDO2116 !$acc end kernels2117 2118 CALL cpu_log( log_point(5), 'u-equation', 'stop' )2119 2120 !2121 !-- v-velocity component2122 CALL cpu_log( log_point(6), 'v-equation', 'start' )2123 2124 IF ( timestep_scheme(1:5) == 'runge' ) THEN2125 IF ( ws_scheme_mom ) THEN2126 CALL advec_v_ws_acc2127 ELSE2128 tend = 0.0_wp ! to be removed later??2129 CALL advec_v_pw2130 END IF2131 ELSE2132 CALL advec_v_up2133 ENDIF2134 CALL diffusion_v_acc2135 CALL coriolis_acc( 2 )2136 2137 !2138 !-- Drag by plant canopy2139 IF ( plant_canopy ) CALL pcm_tendency( 2 )2140 2141 !2142 !-- External pressure gradient2143 IF ( dp_external ) THEN2144 DO i = i_left, i_right2145 DO j = j_south, j_north2146 DO k = dp_level_ind_b+1, nzt2147 tend(k,j,i) = tend(k,j,i) - dpdxy(2) * dp_smooth_factor(k)2148 ENDDO2149 ENDDO2150 ENDDO2151 ENDIF2152 2153 !2154 !-- Nudging2155 IF ( nudging ) CALL nudge( simulated_time, 'v' )2156 2157 !2158 !-- Forces by wind turbines2159 IF ( wind_turbine ) CALL wtm_tendencies( 2 )2160 2161 CALL user_actions( 'v-tendency' )2162 2163 !2164 !-- Prognostic equation for v-velocity component2165 !$acc kernels present( nzb_v_inner, rdf, tend, tv_m, v, v_init, v_p )2166 !$acc loop independent2167 DO i = i_left, i_right2168 !$acc loop independent2169 DO j = j_south, j_north2170 !$acc loop independent2171 DO k = 1, nzt2172 IF ( k > nzb_v_inner(j,i) ) THEN2173 v_p(k,j,i) = v(k,j,i) + dt_3d * ( tsc(2) * tend(k,j,i) + &2174 tsc(3) * tv_m(k,j,i) ) &2175 - tsc(5) * rdf(k) * ( v(k,j,i) - v_init(k) )2176 !2177 !-- Tendencies for the next Runge-Kutta step2178 IF ( runge_step == 1 ) THEN2179 tv_m(k,j,i) = tend(k,j,i)2180 ELSEIF ( runge_step == 2 ) THEN2181 tv_m(k,j,i) = -9.5625_wp * tend(k,j,i) + 5.3125_wp * tv_m(k,j,i)2182 ENDIF2183 ENDIF2184 ENDDO2185 ENDDO2186 ENDDO2187 !$acc end kernels2188 2189 CALL cpu_log( log_point(6), 'v-equation', 'stop' )2190 2191 !2192 !-- w-velocity component2193 CALL cpu_log( log_point(7), 'w-equation', 'start' )2194 2195 IF ( timestep_scheme(1:5) == 'runge' ) THEN2196 IF ( ws_scheme_mom ) THEN2197 CALL advec_w_ws_acc2198 ELSE2199 tend = 0.0_wp ! to be removed later??2200 CALL advec_w_pw2201 ENDIF2202 ELSE2203 CALL advec_w_up2204 ENDIF2205 CALL diffusion_w_acc2206 CALL coriolis_acc( 3 )2207 2208 IF ( .NOT. neutral ) THEN2209 IF ( ocean ) THEN2210 CALL buoyancy( rho_ocean, 3 )2211 ELSE2212 IF ( .NOT. humidity ) THEN2213 CALL buoyancy_acc( pt, 3 )2214 ELSE2215 CALL buoyancy( vpt, 3 )2216 ENDIF2217 ENDIF2218 ENDIF2219 2220 !2221 !-- Drag by plant canopy2222 IF ( plant_canopy ) CALL pcm_tendency( 3 )2223 2224 !2225 !-- Forces by wind turbines2226 IF ( wind_turbine ) CALL wtm_tendencies( 3 )2227 2228 CALL user_actions( 'w-tendency' )2229 2230 !2231 !-- Prognostic equation for w-velocity component2232 !$acc kernels present( nzb_w_inner, rdf, tend, tw_m, w, w_p )2233 !$acc loop independent2234 DO i = i_left, i_right2235 !$acc loop independent2236 DO j = j_south, j_north2237 !$acc loop independent2238 DO k = 1, nzt-12239 IF ( k > nzb_w_inner(j,i) ) THEN2240 w_p(k,j,i) = w(k,j,i) + dt_3d * ( tsc(2) * tend(k,j,i) + &2241 tsc(3) * tw_m(k,j,i) ) &2242 - tsc(5) * rdf(k) * w(k,j,i)2243 !2244 !-- Tendencies for the next Runge-Kutta step2245 IF ( runge_step == 1 ) THEN2246 tw_m(k,j,i) = tend(k,j,i)2247 ELSEIF ( runge_step == 2 ) THEN2248 tw_m(k,j,i) = -9.5625_wp * tend(k,j,i) + 5.3125_wp * tw_m(k,j,i)2249 ENDIF2250 ENDIF2251 ENDDO2252 ENDDO2253 ENDDO2254 !$acc end kernels2255 2256 CALL cpu_log( log_point(7), 'w-equation', 'stop' )2257 2258 2259 !2260 !-- If required, compute prognostic equation for potential temperature2261 IF ( .NOT. neutral ) THEN2262 2263 CALL cpu_log( log_point(13), 'pt-equation', 'start' )2264 2265 !2266 !-- pt-tendency terms with communication2267 sbt = tsc(2)2268 IF ( scalar_advec == 'bc-scheme' ) THEN2269 2270 IF ( timestep_scheme(1:5) /= 'runge' ) THEN2271 !2272 !-- Bott-Chlond scheme always uses Euler time step. Thus:2273 sbt = 1.0_wp2274 ENDIF2275 tend = 0.0_wp2276 CALL advec_s_bc( pt, 'pt' )2277 2278 ENDIF2279 2280 !2281 !-- pt-tendency terms with no communication2282 IF ( scalar_advec /= 'bc-scheme' ) THEN2283 tend = 0.0_wp2284 IF ( timestep_scheme(1:5) == 'runge' ) THEN2285 IF ( ws_scheme_sca ) THEN2286 CALL advec_s_ws_acc( pt, 'pt' )2287 ELSE2288 tend = 0.0_wp ! to be removed later??2289 CALL advec_s_pw( pt )2290 ENDIF2291 ELSE2292 CALL advec_s_up( pt )2293 ENDIF2294 ENDIF2295 2296 CALL diffusion_s_acc( pt, shf, tswst, wall_heatflux )2297 2298 !2299 !-- Tendency pt from wall heat flux from urban surface2300 IF ( urban_surface ) THEN2301 CALL usm_wall_heat_flux2302 ENDIF2303 2304 !2305 !-- If required compute heating/cooling due to long wave radiation processes2306 IF ( cloud_top_radiation ) THEN2307 CALL calc_radiation2308 ENDIF2309 2310 !2311 !-- Consideration of heat sources within the plant canopy2312 IF ( plant_canopy .AND. ( cthf /= 0.0_wp ) ) THEN2313 CALL pcm_tendency( 4 )2314 ENDIF2315 2316 !2317 !-- Large scale advection2318 IF ( large_scale_forcing ) THEN2319 CALL ls_advec( simulated_time, 'pt' )2320 ENDIF2321 2322 !2323 !-- Nudging2324 IF ( nudging ) CALL nudge( simulated_time, 'pt' )2325 2326 !2327 !-- If required compute influence of large-scale subsidence/ascent2328 IF ( large_scale_subsidence .AND. &2329 .NOT. use_subsidence_tendencies ) THEN2330 CALL subsidence( tend, pt, pt_init, 2 )2331 ENDIF2332 2333 IF ( radiation .AND. &2334 simulated_time > skip_time_do_radiation ) THEN2335 CALL radiation_tendency ( tend )2336 ENDIF2337 2338 CALL user_actions( 'pt-tendency' )2339 2340 !2341 !-- Prognostic equation for potential temperature2342 !$acc kernels present( nzb_s_inner, rdf_sc, ptdf_x, ptdf_y, pt_init ) &2343 !$acc present( tend, tpt_m, pt, pt_p )2344 !$acc loop independent2345 DO i = i_left, i_right2346 !$acc loop independent2347 DO j = j_south, j_north2348 !$acc loop independent2349 DO k = 1, nzt2350 IF ( k > nzb_s_inner(j,i) ) THEN2351 pt_p(k,j,i) = pt(k,j,i) + dt_3d * ( sbt * tend(k,j,i) + &2352 tsc(3) * tpt_m(k,j,i) ) &2353 - tsc(5) * ( pt(k,j,i) - pt_init(k) ) *&2354 ( rdf_sc(k) + ptdf_x(i) + ptdf_y(j) )2355 !2356 !-- Tendencies for the next Runge-Kutta step2357 IF ( runge_step == 1 ) THEN2358 tpt_m(k,j,i) = tend(k,j,i)2359 ELSEIF ( runge_step == 2 ) THEN2360 tpt_m(k,j,i) = -9.5625_wp * tend(k,j,i) + 5.3125_wp * tpt_m(k,j,i)2361 ENDIF2362 ENDIF2363 ENDDO2364 ENDDO2365 ENDDO2366 !$acc end kernels2367 2368 CALL cpu_log( log_point(13), 'pt-equation', 'stop' )2369 2370 ENDIF2371 2372 !2373 !-- If required, compute prognostic equation for salinity2374 IF ( ocean ) THEN2375 2376 CALL cpu_log( log_point(37), 'sa-equation', 'start' )2377 2378 !2379 !-- sa-tendency terms with communication2380 sbt = tsc(2)2381 IF ( scalar_advec == 'bc-scheme' ) THEN2382 2383 IF ( timestep_scheme(1:5) /= 'runge' ) THEN2384 !2385 !-- Bott-Chlond scheme always uses Euler time step. Thus:2386 sbt = 1.0_wp2387 ENDIF2388 tend = 0.0_wp2389 CALL advec_s_bc( sa, 'sa' )2390 2391 ENDIF2392 2393 !2394 !-- sa-tendency terms with no communication2395 IF ( scalar_advec /= 'bc-scheme' ) THEN2396 tend = 0.0_wp2397 IF ( timestep_scheme(1:5) == 'runge' ) THEN2398 IF ( ws_scheme_sca ) THEN2399 CALL advec_s_ws( sa, 'sa' )2400 ELSE2401 CALL advec_s_pw( sa )2402 ENDIF2403 ELSE2404 CALL advec_s_up( sa )2405 ENDIF2406 ENDIF2407 2408 CALL diffusion_s( sa, saswsb, saswst, wall_salinityflux )2409 2410 CALL user_actions( 'sa-tendency' )2411 2412 !2413 !-- Prognostic equation for salinity2414 DO i = i_left, i_right2415 DO j = j_south, j_north2416 DO k = nzb_s_inner(j,i)+1, nzt2417 sa_p(k,j,i) = sa(k,j,i) + dt_3d * ( sbt * tend(k,j,i) + &2418 tsc(3) * tsa_m(k,j,i) ) &2419 - tsc(5) * rdf_sc(k) * &2420 ( sa(k,j,i) - sa_init(k) )2421 IF ( sa_p(k,j,i) < 0.0_wp ) sa_p(k,j,i) = 0.1_wp * sa(k,j,i)2422 !2423 !-- Tendencies for the next Runge-Kutta step2424 IF ( runge_step == 1 ) THEN2425 tsa_m(k,j,i) = tend(k,j,i)2426 ELSEIF ( runge_step == 2 ) THEN2427 tsa_m(k,j,i) = -9.5625_wp * tend(k,j,i) + 5.3125_wp * tsa_m(k,j,i)2428 ENDIF2429 ENDDO2430 ENDDO2431 ENDDO2432 2433 CALL cpu_log( log_point(37), 'sa-equation', 'stop' )2434 2435 !2436 !-- Calculate density by the equation of state for seawater2437 CALL cpu_log( log_point(38), 'eqns-seawater', 'start' )2438 CALL eqn_state_seawater2439 CALL cpu_log( log_point(38), 'eqns-seawater', 'stop' )2440 2441 ENDIF2442 2443 !2444 !-- If required, compute prognostic equation for total water content2445 IF ( humidity ) THEN2446 2447 CALL cpu_log( log_point(29), 'q-equation', 'start' )2448 2449 !2450 !-- Scalar/q-tendency terms with communication2451 sbt = tsc(2)2452 IF ( scalar_advec == 'bc-scheme' ) THEN2453 2454 IF ( timestep_scheme(1:5) /= 'runge' ) THEN2455 !2456 !-- Bott-Chlond scheme always uses Euler time step. Thus:2457 sbt = 1.0_wp2458 ENDIF2459 tend = 0.0_wp2460 CALL advec_s_bc( q, 'q' )2461 2462 ENDIF2463 2464 !2465 !-- Scalar/q-tendency terms with no communication2466 IF ( scalar_advec /= 'bc-scheme' ) THEN2467 tend = 0.0_wp2468 IF ( timestep_scheme(1:5) == 'runge' ) THEN2469 IF ( ws_scheme_sca ) THEN2470 CALL advec_s_ws( q, 'q' )2471 ELSE2472 CALL advec_s_pw( q )2473 ENDIF2474 ELSE2475 CALL advec_s_up( q )2476 ENDIF2477 ENDIF2478 2479 CALL diffusion_s( q, qsws, qswst, wall_qflux )2480 2481 !2482 !-- Sink or source of scalar concentration due to canopy elements2483 IF ( plant_canopy ) CALL pcm_tendency( 5 )2484 2485 !2486 !-- Large scale advection2487 IF ( large_scale_forcing ) THEN2488 CALL ls_advec( simulated_time, 'q' )2489 ENDIF2490 2491 !2492 !-- Nudging2493 IF ( nudging ) CALL nudge( simulated_time, 'q' )2494 2495 !2496 !-- If required compute influence of large-scale subsidence/ascent2497 IF ( large_scale_subsidence .AND. &2498 .NOT. use_subsidence_tendencies ) THEN2499 CALL subsidence( tend, q, q_init, 3 )2500 ENDIF2501 2502 CALL user_actions( 'q-tendency' )2503 2504 !2505 !-- Prognostic equation for total water content / scalar2506 DO i = i_left, i_right2507 DO j = j_south, j_north2508 DO k = nzb_s_inner(j,i)+1, nzt2509 q_p(k,j,i) = q(k,j,i) + dt_3d * ( sbt * tend(k,j,i) + &2510 tsc(3) * tq_m(k,j,i) ) &2511 - tsc(5) * rdf_sc(k) * &2512 ( q(k,j,i) - q_init(k) )2513 IF ( q_p(k,j,i) < 0.0_wp ) q_p(k,j,i) = 0.1_wp * q(k,j,i)2514 !2515 !-- Tendencies for the next Runge-Kutta step2516 IF ( runge_step == 1 ) THEN2517 tq_m(k,j,i) = tend(k,j,i)2518 ELSEIF ( runge_step == 2 ) THEN2519 tq_m(k,j,i) = -9.5625_wp * tend(k,j,i) + 5.3125_wp * tq_m(k,j,i)2520 ENDIF2521 ENDDO2522 ENDDO2523 ENDDO2524 2525 CALL cpu_log( log_point(29), 'q-equation', 'stop' )2526 2527 !2528 !-- If required, calculate prognostic equations for rain water content2529 !-- and rain drop concentration2530 IF ( cloud_physics .AND. microphysics_seifert ) THEN2531 2532 CALL cpu_log( log_point(52), 'qr-equation', 'start' )2533 !2534 !-- qr-tendency terms with communication2535 sbt = tsc(2)2536 IF ( scalar_advec == 'bc-scheme' ) THEN2537 2538 IF ( timestep_scheme(1:5) /= 'runge' ) THEN2539 !2540 !-- Bott-Chlond scheme always uses Euler time step. Thus:2541 sbt = 1.0_wp2542 ENDIF2543 tend = 0.0_wp2544 CALL advec_s_bc( qr, 'qr' )2545 2546 ENDIF2547 2548 !2549 !-- qr-tendency terms with no communication2550 IF ( scalar_advec /= 'bc-scheme' ) THEN2551 tend = 0.0_wp2552 IF ( timestep_scheme(1:5) == 'runge' ) THEN2553 IF ( ws_scheme_sca ) THEN2554 CALL advec_s_ws( qr, 'qr' )2555 ELSE2556 CALL advec_s_pw( qr )2557 ENDIF2558 ELSE2559 CALL advec_s_up( qr )2560 ENDIF2561 ENDIF2562 2563 CALL diffusion_s( qr, qrsws, qrswst, wall_qrflux )2564 2565 !2566 !-- Prognostic equation for rain water content2567 DO i = i_left, i_right2568 DO j = j_south, j_north2569 DO k = nzb_s_inner(j,i)+1, nzt2570 qr_p(k,j,i) = qr(k,j,i) + dt_3d * ( sbt * tend(k,j,i) + &2571 tsc(3) * tqr_m(k,j,i) ) &2572 - tsc(5) * rdf_sc(k) * qr(k,j,i)2573 IF ( qr_p(k,j,i) < 0.0_wp ) qr_p(k,j,i) = 0.0_wp2574 !2575 !-- Tendencies for the next Runge-Kutta step2576 IF ( runge_step == 1 ) THEN2577 tqr_m(k,j,i) = tend(k,j,i)2578 ELSEIF ( runge_step == 2 ) THEN2579 tqr_m(k,j,i) = -9.5625_wp * tend(k,j,i) + 5.3125_wp * &2580 tqr_m(k,j,i)2581 ENDIF2582 ENDDO2583 ENDDO2584 ENDDO2585 2586 CALL cpu_log( log_point(52), 'qr-equation', 'stop' )2587 CALL cpu_log( log_point(53), 'nr-equation', 'start' )2588 2589 !2590 !-- nr-tendency terms with communication2591 sbt = tsc(2)2592 IF ( scalar_advec == 'bc-scheme' ) THEN2593 2594 IF ( timestep_scheme(1:5) /= 'runge' ) THEN2595 !2596 !-- Bott-Chlond scheme always uses Euler time step. Thus:2597 sbt = 1.0_wp2598 ENDIF2599 tend = 0.0_wp2600 CALL advec_s_bc( nr, 'nr' )2601 2602 ENDIF2603 2604 !2605 !-- nr-tendency terms with no communication2606 IF ( scalar_advec /= 'bc-scheme' ) THEN2607 tend = 0.0_wp2608 IF ( timestep_scheme(1:5) == 'runge' ) THEN2609 IF ( ws_scheme_sca ) THEN2610 CALL advec_s_ws( nr, 'nr' )2611 ELSE2612 CALL advec_s_pw( nr )2613 ENDIF2614 ELSE2615 CALL advec_s_up( nr )2616 ENDIF2617 ENDIF2618 2619 CALL diffusion_s( nr, nrsws, nrswst, wall_nrflux )2620 2621 !2622 !-- Prognostic equation for rain drop concentration2623 DO i = i_left, i_right2624 DO j = j_south, j_north2625 DO k = nzb_s_inner(j,i)+1, nzt2626 nr_p(k,j,i) = nr(k,j,i) + dt_3d * ( sbt * tend(k,j,i) + &2627 tsc(3) * tnr_m(k,j,i) ) &2628 - tsc(5) * rdf_sc(k) * nr(k,j,i)2629 IF ( nr_p(k,j,i) < 0.0_wp ) nr_p(k,j,i) = 0.0_wp2630 !2631 !-- Tendencies for the next Runge-Kutta step2632 IF ( runge_step == 1 ) THEN2633 tnr_m(k,j,i) = tend(k,j,i)2634 ELSEIF ( runge_step == 2 ) THEN2635 tnr_m(k,j,i) = -9.5625_wp * tend(k,j,i) + 5.3125_wp * &2636 tnr_m(k,j,i)2637 ENDIF2638 ENDDO2639 ENDDO2640 ENDDO2641 2642 CALL cpu_log( log_point(53), 'nr-equation', 'stop' )2643 2644 ENDIF2645 2646 ENDIF2647 2648 !2649 !-- If required, compute prognostic equation for scalar2650 IF ( passive_scalar ) THEN2651 2652 CALL cpu_log( log_point(66), 's-equation', 'start' )2653 2654 !2655 !-- Scalar/q-tendency terms with communication2656 sbt = tsc(2)2657 IF ( scalar_advec == 'bc-scheme' ) THEN2658 2659 IF ( timestep_scheme(1:5) /= 'runge' ) THEN2660 !2661 !-- Bott-Chlond scheme always uses Euler time step. Thus:2662 sbt = 1.0_wp2663 ENDIF2664 tend = 0.0_wp2665 CALL advec_s_bc( s, 's' )2666 2667 ENDIF2668 2669 !2670 !-- Scalar/q-tendency terms with no communication2671 IF ( scalar_advec /= 'bc-scheme' ) THEN2672 tend = 0.0_wp2673 IF ( timestep_scheme(1:5) == 'runge' ) THEN2674 IF ( ws_scheme_sca ) THEN2675 CALL advec_s_ws( s, 's' )2676 ELSE2677 CALL advec_s_pw( s )2678 ENDIF2679 ELSE2680 CALL advec_s_up( s )2681 ENDIF2682 ENDIF2683 2684 CALL diffusion_s( s, ssws, sswst, wall_sflux )2685 2686 !2687 !-- Sink or source of scalar concentration due to canopy elements2688 IF ( plant_canopy ) CALL pcm_tendency( 7 )2689 2690 !2691 !-- Large scale advection. Not implemented so far.2692 ! IF ( large_scale_forcing ) THEN2693 ! CALL ls_advec( simulated_time, 's' )2694 ! ENDIF2695 2696 !2697 !-- Nudging. Not implemented so far.2698 ! IF ( nudging ) CALL nudge( simulated_time, 's' )2699 2700 !2701 !-- If required compute influence of large-scale subsidence/ascent.2702 !-- Not implemented so far.2703 IF ( large_scale_subsidence .AND. &2704 .NOT. use_subsidence_tendencies .AND. &2705 .NOT. large_scale_forcing ) THEN2706 CALL subsidence( tend, s, s_init, 3 )2707 ENDIF2708 2709 CALL user_actions( 's-tendency' )2710 2711 !2712 !-- Prognostic equation for total water content / scalar2713 DO i = i_left, i_right2714 DO j = j_south, j_north2715 DO k = nzb_s_inner(j,i)+1, nzt2716 s_p(k,j,i) = s(k,j,i) + dt_3d * ( sbt * tend(k,j,i) + &2717 tsc(3) * ts_m(k,j,i) ) &2718 - tsc(5) * rdf_sc(k) * &2719 ( s(k,j,i) - s_init(k) )2720 IF ( s_p(k,j,i) < 0.0_wp ) s_p(k,j,i) = 0.1_wp * s(k,j,i)2721 !2722 !-- Tendencies for the next Runge-Kutta step2723 IF ( runge_step == 1 ) THEN2724 ts_m(k,j,i) = tend(k,j,i)2725 ELSEIF ( runge_step == 2 ) THEN2726 ts_m(k,j,i) = -9.5625_wp * tend(k,j,i) + 5.3125_wp * ts_m(k,j,i)2727 ENDIF2728 ENDDO2729 ENDDO2730 ENDDO2731 2732 CALL cpu_log( log_point(66), 's-equation', 'stop' )2733 2734 ENDIF2735 !2736 !-- If required, compute prognostic equation for turbulent kinetic2737 !-- energy (TKE)2738 IF ( .NOT. constant_diffusion ) THEN2739 2740 CALL cpu_log( log_point(16), 'tke-equation', 'start' )2741 2742 sbt = tsc(2)2743 IF ( .NOT. use_upstream_for_tke ) THEN2744 IF ( scalar_advec == 'bc-scheme' ) THEN2745 2746 IF ( timestep_scheme(1:5) /= 'runge' ) THEN2747 !2748 !-- Bott-Chlond scheme always uses Euler time step. Thus:2749 sbt = 1.0_wp2750 ENDIF2751 tend = 0.0_wp2752 CALL advec_s_bc( e, 'e' )2753 2754 ENDIF2755 ENDIF2756 2757 !2758 !-- TKE-tendency terms with no communication2759 IF ( scalar_advec /= 'bc-scheme' .OR. use_upstream_for_tke ) THEN2760 IF ( use_upstream_for_tke ) THEN2761 tend = 0.0_wp2762 CALL advec_s_up( e )2763 ELSE2764 IF ( timestep_scheme(1:5) == 'runge' ) THEN2765 IF ( ws_scheme_sca ) THEN2766 CALL advec_s_ws_acc( e, 'e' )2767 ELSE2768 tend = 0.0_wp ! to be removed later??2769 CALL advec_s_pw( e )2770 ENDIF2771 ELSE2772 tend = 0.0_wp ! to be removed later??2773 CALL advec_s_up( e )2774 ENDIF2775 ENDIF2776 ENDIF2777 2778 IF ( .NOT. humidity ) THEN2779 IF ( ocean ) THEN2780 CALL diffusion_e( prho, prho_reference )2781 ELSE2782 CALL diffusion_e_acc( pt, pt_reference )2783 ENDIF2784 ELSE2785 CALL diffusion_e( vpt, pt_reference )2786 ENDIF2787 2788 CALL production_e_acc2789 2790 !2791 !-- Additional sink term for flows through plant canopies2792 IF ( plant_canopy ) CALL pcm_tendency( 6 )2793 CALL user_actions( 'e-tendency' )2794 2795 !2796 !-- Prognostic equation for TKE.2797 !-- Eliminate negative TKE values, which can occur due to numerical2798 !-- reasons in the course of the integration. In such cases the old TKE2799 !-- value is reduced by 90%.2800 !$acc kernels present( e, e_p, nzb_s_inner, tend, te_m )2801 !$acc loop independent2802 DO i = i_left, i_right2803 !$acc loop independent2804 DO j = j_south, j_north2805 !$acc loop independent2806 DO k = 1, nzt2807 IF ( k > nzb_s_inner(j,i) ) THEN2808 e_p(k,j,i) = e(k,j,i) + dt_3d * ( sbt * tend(k,j,i) + &2809 tsc(3) * te_m(k,j,i) )2810 IF ( e_p(k,j,i) < 0.0_wp ) e_p(k,j,i) = 0.1_wp * e(k,j,i)2811 !2812 !-- Tendencies for the next Runge-Kutta step2813 IF ( runge_step == 1 ) THEN2814 te_m(k,j,i) = tend(k,j,i)2815 ELSEIF ( runge_step == 2 ) THEN2816 te_m(k,j,i) = -9.5625_wp * tend(k,j,i) + 5.3125_wp * te_m(k,j,i)2817 ENDIF2818 ENDIF2819 ENDDO2820 ENDDO2821 ENDDO2822 !$acc end kernels2823 2824 CALL cpu_log( log_point(16), 'tke-equation', 'stop' )2825 2826 ENDIF2827 2828 END SUBROUTINE prognostic_equations_acc2829 2830 2831 1997 END MODULE prognostic_equations_mod -
TabularUnified palm/trunk/SOURCE/surface_layer_fluxes_mod.f90 ¶
r2101 r2118 20 20 ! Current revisions: 21 21 ! ------------------ 22 ! 22 ! OpenACC directives and related code removed 23 23 ! 24 24 ! Former revisions: … … 163 163 !> 164 164 !> @todo (re)move large_scale_forcing actions 165 !> @todo check/optimize OpenMP and OpenACCdirectives165 !> @todo check/optimize OpenMP directives 166 166 !------------------------------------------------------------------------------! 167 167 MODULE surface_layer_fluxes_mod … … 472 472 473 473 !$OMP PARALLEL DO PRIVATE( k ) 474 !$acc kernels loop present( nzb_s_inner, u, uv_total, v ) private( j, k )475 474 DO i = nxl, nxr 476 475 DO j = nys, nyn … … 492 491 ! 493 492 !-- Values of uv_total need to be exchanged at the ghost boundaries 494 !$acc update host( uv_total )495 493 CALL exchange_horiz_2d( uv_total ) 496 !$acc update device( uv_total )497 494 498 495 END SUBROUTINE calc_uv_total … … 522 519 IF ( TRIM( most_method ) /= 'circular' ) THEN 523 520 524 !$acc data present( nzb_s_inner, pt, q, qsws, rib, shf, uv_total, vpt, zu, zw )525 526 521 !$OMP PARALLEL DO PRIVATE( k, z_mo ) 527 !$acc kernels loop private( j, k, z_mo )528 522 DO i = nxl, nxr 529 523 DO j = nys, nyn … … 564 558 ENDDO 565 559 ENDDO 566 !$acc end data567 560 568 561 ENDIF … … 574 567 575 568 !$OMP PARALLEL DO PRIVATE( k, z_mo ) 576 !# WARNING: does not work on GPU so far because of DO-loop with577 !# undetermined iterations578 !!!!!!$acc kernels loop579 569 DO i = nxl, nxr 580 570 DO j = nys, nyn … … 695 685 696 686 !$OMP PARALLEL DO PRIVATE( k, l, z_mo ) FIRSTPRIVATE( l_bnd ) LASTPRIVATE( l_bnd ) 697 !# WARNING: does not work on GPU so far because of DO WHILE construct698 !!!!!!$acc kernels loop699 687 DO i = nxl, nxr 700 688 DO j = nys, nyn … … 736 724 737 725 !$OMP PARALLEL DO PRIVATE( k, z_mo ) 738 !$acc kernels loop present( nzb_s_inner, ol, pt, pt1, q, ql, qs, qv1, ts, us, vpt, zu, zw ) private( j, k, z_mo )739 726 DO i = nxl, nxr 740 727 DO j = nys, nyn … … 775 762 !-- Values of ol at ghost point locations are needed for the evaluation 776 763 !-- of usws and vsws. 777 !$acc update host( ol )778 764 CALL exchange_horiz_2d( ol ) 779 !$acc update device( ol )780 765 781 766 END SUBROUTINE calc_ol … … 788 773 789 774 !$OMP PARALLEL DO PRIVATE( k, z_mo ) 790 !$acc kernels loop present( nzb_s_inner, ol, us, uv_total, zu, zw, z0 ) private( j, k, z_mo )791 775 DO i = nxlg, nxrg 792 776 DO j = nysg, nyng … … 811 795 IMPLICIT NONE 812 796 813 !$acc kernels loop present( nzb_s_inner, pt, pt1, pt_d_t, q, ql, qv1 ) private( j, k )814 797 DO i = nxlg, nxrg 815 798 DO j = nysg, nyng … … 828 811 IMPLICIT NONE 829 812 830 !831 !-- Data information for accelerators832 !$acc data present( e, nrsws, nzb_u_inner, nzb_v_inner, nzb_s_inner, pt ) &833 !$acc present( q, qs, qsws, qrsws, shf, ts, u, us, usws, v ) &834 !$acc present( vpt, vsws, zu, zw, z0, z0h )835 813 ! 836 814 !-- Compute theta* … … 840 818 !-- For a given heat flux in the surface layer: 841 819 !$OMP PARALLEL DO 842 !$acc kernels loop private( j, k )843 820 DO i = nxlg, nxrg 844 821 DO j = nysg, nyng … … 858 835 IF ( large_scale_forcing .AND. lsf_surf ) THEN 859 836 !$OMP PARALLEL DO 860 !$acc kernels loop private( j, k )861 837 DO i = nxlg, nxrg 862 838 DO j = nysg, nyng … … 868 844 869 845 !$OMP PARALLEL DO PRIVATE( k, z_mo ) 870 !$acc kernels loop present( nzb_s_inner, ol, pt, pt1, ts, zu, zw, z0h ) private( j, k, z_mo )871 846 DO i = nxlg, nxrg 872 847 DO j = nysg, nyng … … 898 873 !-- For a given water flux in the surface layer 899 874 !$OMP PARALLEL DO 900 !$acc kernels loop private( j )901 875 DO i = nxlg, nxrg 902 876 DO j = nysg, nyng … … 912 886 IF ( large_scale_forcing .AND. lsf_surf ) THEN 913 887 !$OMP PARALLEL DO 914 !$acc kernels loop private( j, k )915 888 DO i = nxlg, nxrg 916 889 DO j = nysg, nyng … … 922 895 923 896 !$OMP PARALLEL DO PRIVATE( e_s, k, z_mo ) 924 !$acc kernels loop independent present( nzb_s_inner, ol, pt, q, qs, qv1, zu, zw, z0q ) private( e_s, j, k, z_mo )925 897 DO i = nxlg, nxrg 926 !$acc loop independent927 898 DO j = nysg, nyng 928 899 … … 965 936 !-- For a given water flux in the surface layer 966 937 !$OMP PARALLEL DO 967 !$acc kernels loop private( j )968 938 DO i = nxlg, nxrg 969 939 DO j = nysg, nyng … … 981 951 982 952 !$OMP PARALLEL DO PRIVATE( k, z_mo ) 983 !$acc kernels loop independent present( nr, nrs, nzb_s_inner, ol, qr, qrs, zu, zw, z0q ) private( j, k, z_mo )984 953 DO i = nxlg, nxrg 985 !$acc loop independent986 954 DO j = nysg, nyng 987 955 … … 1002 970 1003 971 ENDIF 1004 !$acc end data1005 972 1006 973 END SUBROUTINE calc_scaling_parameters … … 1020 987 !-- First compute the corresponding component of u* and square it. 1021 988 !$OMP PARALLEL DO PRIVATE( k, ol_mid, z_mo ) 1022 !$acc kernels loop present( nzb_u_inner, ol, u, us, usws, zu, zw, z0 ) private( j, k, z_mo )1023 989 DO i = nxl, nxr 1024 990 DO j = nys, nyn … … 1048 1014 !-- First compute the corresponding component of u* and square it. 1049 1015 !$OMP PARALLEL DO PRIVATE( k, ol_mid, z_mo ) 1050 !$acc kernels loop present( nzb_v_inner, ol, v, us, vsws, zu, zw, z0 ) private( j, k, ol_mid, z_mo )1051 1016 DO i = nxl, nxr 1052 1017 DO j = nys, nyn … … 1075 1040 ! 1076 1041 !-- Exchange the boundaries for the momentum fluxes (is this still required?) 1077 !$acc update host( usws, vsws )1078 1042 CALL exchange_horiz_2d( usws ) 1079 1043 CALL exchange_horiz_2d( vsws ) 1080 !$acc update device( usws, vsws )1081 1044 1082 1045 ! … … 1086 1049 .NOT. urban_surface ) THEN 1087 1050 !$OMP PARALLEL DO 1088 !$acc kernels loop independent present( shf, ts, us )1089 1051 DO i = nxlg, nxrg 1090 !$acc loop independent1091 1052 DO j = nysg, nyng 1092 1053 k = nzb_s_inner(j,i) … … 1103 1064 .OR. .NOT. land_surface ) ) THEN 1104 1065 !$OMP PARALLEL DO 1105 !$acc kernels loop independent present( qs, qsws, us )1106 1066 DO i = nxlg, nxrg 1107 !$acc loop independent1108 1067 DO j = nysg, nyng 1109 1068 k = nzb_s_inner(j,i) … … 1120 1079 .OR. .NOT. land_surface ) ) THEN 1121 1080 !$OMP PARALLEL DO 1122 !$acc kernels loop independent present( qs, qsws, us )1123 1081 DO i = nxlg, nxrg 1124 !$acc loop independent1125 1082 DO j = nysg, nyng 1126 1083 ssws(j,i) = -ss(j,i) * us(j,i) … … 1134 1091 IF ( cloud_physics .AND. microphysics_seifert ) THEN 1135 1092 !$OMP PARALLEL DO 1136 !$acc kernels loop independent present( nrs, nrsws, qrs, qrsws, us )1137 1093 DO i = nxlg, nxrg 1138 !$acc loop independent1139 1094 DO j = nysg, nyng 1140 1095 qrsws(j,i) = -qrs(j,i) * us(j,i) … … 1148 1103 IF ( ibc_e_b == 2 ) THEN 1149 1104 !$OMP PARALLEL DO 1150 !$acc kernels loop independent present( e, nzb_s_inner, us )1151 1105 DO i = nxlg, nxrg 1152 !$acc loop independent1153 1106 DO j = nysg, nyng 1154 1107 k = nzb_s_inner(j,i) -
TabularUnified palm/trunk/SOURCE/swap_timelevel.f90 ¶
r2101 r2118 20 20 ! Current revisions: 21 21 ! ----------------- 22 ! 22 ! OpenACC directives removed 23 23 ! 24 24 ! Former revisions: … … 142 142 CALL cpu_log( log_point(28), 'swap_timelevel (nop)', 'start' ) 143 143 144 !$acc kernels present( pt, pt_p, u, u_p, v, v_p, w, w_p )145 !$acc loop independent146 144 DO i = nxlg, nxrg 147 !$acc loop independent148 145 DO j = nysg, nyng 149 !$acc loop independent150 146 DO k = nzb, nzt+1 151 147 u(k,j,i) = u_p(k,j,i) … … 156 152 ENDDO 157 153 ENDDO 158 ! u = u_p 159 ! v = v_p 160 ! w = w_p 161 ! pt = pt_p 162 !$acc end kernels 154 163 155 IF ( .NOT. constant_diffusion ) THEN 164 !$acc kernels present( e, e_p )165 !$acc loop independent166 156 DO i = nxlg, nxrg 167 !$acc loop independent168 157 DO j = nysg, nyng 169 !$acc loop independent170 158 DO k = nzb, nzt+1 171 159 e(k,j,i) = e_p(k,j,i) … … 173 161 ENDDO 174 162 ENDDO 175 ! e = e_p 176 !$acc end kernels 177 ENDIF 163 ENDIF 164 178 165 IF ( ocean ) THEN 179 166 sa = sa_p 180 167 ENDIF 168 181 169 IF ( humidity ) THEN 182 170 q = q_p … … 186 174 ENDIF 187 175 ENDIF 176 188 177 IF ( passive_scalar ) s = s_p 189 178 -
TabularUnified palm/trunk/SOURCE/time_integration.f90 ¶
r2101 r2118 20 20 ! Current revisions: 21 21 ! ------------------ 22 ! 22 ! OpenACC directives and related code removed 23 23 ! 24 24 ! Former revisions: … … 273 273 microphysics_seifert, mid, nest_domain, & 274 274 neutral, nr_timesteps_this_run, nudging, & 275 ocean, on_device, passive_scalar,&275 ocean, passive_scalar, & 276 276 prho_reference, pt_reference, pt_slope_offset, random_heatflux, & 277 277 run_coupled, simulated_time, simulated_time_chr, & … … 298 298 299 299 USE indices, & 300 ONLY: i_left, i_right, j_north, j_south, nbgp, nx, nxl, nxlg, nxr,&301 n xrg, nyn, nyng, nys, nysg, nzb, nzt, nzb_u_inner, nzb_v_inner300 ONLY: nbgp, nx, nxl, nxlg, nxr, nxrg, nyn, nyng, nys, nysg, nzb, nzt, & 301 nzb_u_inner, nzb_v_inner 302 302 303 303 USE interaction_droplets_ptq_mod, & … … 338 338 339 339 USE prognostic_equations_mod, & 340 ONLY: prognostic_equations_acc, prognostic_equations_cache, & 341 prognostic_equations_vector 340 ONLY: prognostic_equations_cache, prognostic_equations_vector 342 341 343 342 USE radiation_model_mod, & … … 522 521 ELSEIF ( loop_optimization == 'vector' ) THEN 523 522 CALL prognostic_equations_vector 524 ELSEIF ( loop_optimization == 'acc' ) THEN525 i_left = nxl; i_right = nxr526 j_south = nys; j_north = nyn527 CALL prognostic_equations_acc528 529 ! i_left = nxl; i_right = nxl+nbgp-1530 ! j_south = nys; j_north = nyn531 ! CALL prognostic_equations_acc532 ! i_left = nxr-nbgp+1; i_right = nxr533 ! j_south = nys; j_north = nyn534 ! CALL prognostic_equations_acc535 536 !537 !-- Exchange of ghost points (lateral boundary conditions)538 IF ( background_communication ) THEN539 540 CALL cpu_log( log_point(26), 'exchange-horiz-progn', 'start' )541 542 send_receive = 'lr'543 sendrecv_in_background = .TRUE.544 req = 0545 req_count = 0546 547 IF ( numprocs == 1 ) THEN ! workaround for single-core GPU runs548 on_device = .TRUE. ! to be removed after complete porting549 ELSE ! of ghost point exchange550 !$acc update host( e_p, pt_p, u_p, v_p, w_p )551 ENDIF552 553 CALL exchange_horiz( u_p, nbgp )554 CALL exchange_horiz( v_p, nbgp )555 CALL exchange_horiz( w_p, nbgp )556 CALL exchange_horiz( pt_p, nbgp )557 IF ( .NOT. constant_diffusion ) CALL exchange_horiz( e_p, nbgp )558 IF ( ocean ) THEN559 CALL exchange_horiz( sa_p, nbgp )560 CALL exchange_horiz( rho_ocean, nbgp )561 CALL exchange_horiz( prho, nbgp )562 ENDIF563 IF ( humidity ) THEN564 CALL exchange_horiz( q_p, nbgp )565 IF ( cloud_physics .AND. microphysics_seifert ) THEN566 CALL exchange_horiz( qr_p, nbgp )567 CALL exchange_horiz( nr_p, nbgp )568 ENDIF569 ENDIF570 IF ( cloud_droplets ) THEN571 CALL exchange_horiz( ql, nbgp )572 CALL exchange_horiz( ql_c, nbgp )573 CALL exchange_horiz( ql_v, nbgp )574 CALL exchange_horiz( ql_vp, nbgp )575 ENDIF576 IF ( wang_kernel .OR. collision_turbulence .OR. &577 use_sgs_for_particles ) THEN578 CALL exchange_horiz( diss, nbgp )579 ENDIF580 IF ( passive_scalar ) CALL exchange_horiz( s_p, nbgp )581 582 IF ( numprocs == 1 ) THEN ! workaround for single-core GPU runs583 on_device = .FALSE. ! to be removed after complete porting584 ELSE ! of ghost point exchange585 !$acc update device( e_p, pt_p, u_p, v_p, w_p )586 ENDIF587 588 sendrecv_in_background = .FALSE.589 590 CALL cpu_log( log_point(26), 'exchange-horiz-progn', 'pause' )591 592 ENDIF593 594 ! i_left = nxl+nbgp; i_right = nxr-nbgp595 ! j_south = nys; j_north = nys+nbgp-1596 ! CALL prognostic_equations_acc597 ! i_left = nxl+nbgp; i_right = nxr-nbgp598 ! j_south = nyn-nbgp+1; j_north = nyn599 ! CALL prognostic_equations_acc600 601 IF ( background_communication ) THEN602 CALL cpu_log( log_point(41), 'exchange-horiz-wait', 'start' )603 #if defined( __parallel )604 CALL MPI_WAITALL( req_count, req, wait_stat, ierr )605 #endif606 CALL cpu_log( log_point(41), 'exchange-horiz-wait', 'pause' )607 608 CALL cpu_log( log_point(26), 'exchange-horiz-progn', 'continue' )609 610 send_receive = 'ns'611 sendrecv_in_background = .TRUE.612 req = 0613 req_count = 0614 615 IF ( numprocs == 1 ) THEN ! workaround for single-core GPU runs616 on_device = .TRUE. ! to be removed after complete porting617 ELSE ! of ghost point exchange618 !$acc update host( e_p, pt_p, u_p, v_p, w_p )619 ENDIF620 621 CALL exchange_horiz( u_p, nbgp )622 CALL exchange_horiz( v_p, nbgp )623 CALL exchange_horiz( w_p, nbgp )624 CALL exchange_horiz( pt_p, nbgp )625 IF ( .NOT. constant_diffusion ) CALL exchange_horiz( e_p, nbgp )626 IF ( ocean ) THEN627 CALL exchange_horiz( sa_p, nbgp )628 CALL exchange_horiz( rho_ocean, nbgp )629 CALL exchange_horiz( prho, nbgp )630 ENDIF631 IF ( humidity ) THEN632 CALL exchange_horiz( q_p, nbgp )633 IF ( cloud_physics .AND. microphysics_seifert ) THEN634 CALL exchange_horiz( qr_p, nbgp )635 CALL exchange_horiz( nr_p, nbgp )636 ENDIF637 ENDIF638 IF ( cloud_droplets ) THEN639 CALL exchange_horiz( ql, nbgp )640 CALL exchange_horiz( ql_c, nbgp )641 CALL exchange_horiz( ql_v, nbgp )642 CALL exchange_horiz( ql_vp, nbgp )643 ENDIF644 IF ( wang_kernel .OR. collision_turbulence .OR. &645 use_sgs_for_particles ) THEN646 CALL exchange_horiz( diss, nbgp )647 ENDIF648 IF ( passive_scalar ) CALL exchange_horiz( s_p, nbgp )649 650 IF ( numprocs == 1 ) THEN ! workaround for single-core GPU runs651 on_device = .FALSE. ! to be removed after complete porting652 ELSE ! of ghost point exchange653 !$acc update device( e_p, pt_p, u_p, v_p, w_p )654 ENDIF655 656 sendrecv_in_background = .FALSE.657 658 CALL cpu_log( log_point(26), 'exchange-horiz-progn', 'stop' )659 660 ENDIF661 662 ! i_left = nxl+nbgp; i_right = nxr-nbgp663 ! j_south = nys+nbgp; j_north = nyn-nbgp664 ! CALL prognostic_equations_acc665 666 IF ( background_communication ) THEN667 CALL cpu_log( log_point(41), 'exchange-horiz-wait', 'continue' )668 #if defined( __parallel )669 CALL MPI_WAITALL( req_count, req, wait_stat, ierr )670 #endif671 send_receive = 'al'672 CALL cpu_log( log_point(41), 'exchange-horiz-wait', 'stop' )673 ENDIF674 675 523 ENDIF 676 524 … … 699 547 ! 700 548 !-- Exchange of ghost points (lateral boundary conditions) 701 IF ( .NOT. background_communication ) THEN 702 703 CALL cpu_log( log_point(26), 'exchange-horiz-progn', 'start' ) 704 705 IF ( numprocs == 1 ) THEN ! workaround for single-core GPU runs 706 on_device = .TRUE. ! to be removed after complete porting 707 ELSE ! of ghost point exchange 708 !$acc update host( e_p, pt_p, u_p, v_p, w_p ) 709 ENDIF 710 711 CALL exchange_horiz( u_p, nbgp ) 712 CALL exchange_horiz( v_p, nbgp ) 713 CALL exchange_horiz( w_p, nbgp ) 714 CALL exchange_horiz( pt_p, nbgp ) 715 IF ( .NOT. constant_diffusion ) CALL exchange_horiz( e_p, nbgp ) 716 IF ( ocean ) THEN 717 CALL exchange_horiz( sa_p, nbgp ) 718 CALL exchange_horiz( rho_ocean, nbgp ) 719 CALL exchange_horiz( prho, nbgp ) 720 ENDIF 721 IF ( humidity ) THEN 722 CALL exchange_horiz( q_p, nbgp ) 723 IF ( cloud_physics .AND. microphysics_seifert ) THEN 724 CALL exchange_horiz( qr_p, nbgp ) 725 CALL exchange_horiz( nr_p, nbgp ) 726 ENDIF 727 ENDIF 728 IF ( cloud_droplets ) THEN 729 CALL exchange_horiz( ql, nbgp ) 730 CALL exchange_horiz( ql_c, nbgp ) 731 CALL exchange_horiz( ql_v, nbgp ) 732 CALL exchange_horiz( ql_vp, nbgp ) 733 ENDIF 734 IF ( wang_kernel .OR. collision_turbulence .OR. & 735 use_sgs_for_particles ) THEN 736 CALL exchange_horiz( diss, nbgp ) 737 ENDIF 738 IF ( passive_scalar ) CALL exchange_horiz( s_p, nbgp ) 739 740 IF ( numprocs == 1 ) THEN ! workaround for single-core GPU runs 741 on_device = .FALSE. ! to be removed after complete porting 742 ELSE ! of ghost point exchange 743 !$acc update device( e_p, pt_p, u_p, v_p, w_p ) 744 ENDIF 745 746 CALL cpu_log( log_point(26), 'exchange-horiz-progn', 'stop' ) 747 748 ENDIF 549 CALL cpu_log( log_point(26), 'exchange-horiz-progn', 'start' ) 550 551 CALL exchange_horiz( u_p, nbgp ) 552 CALL exchange_horiz( v_p, nbgp ) 553 CALL exchange_horiz( w_p, nbgp ) 554 CALL exchange_horiz( pt_p, nbgp ) 555 IF ( .NOT. constant_diffusion ) CALL exchange_horiz( e_p, nbgp ) 556 IF ( ocean ) THEN 557 CALL exchange_horiz( sa_p, nbgp ) 558 CALL exchange_horiz( rho_ocean, nbgp ) 559 CALL exchange_horiz( prho, nbgp ) 560 ENDIF 561 IF ( humidity ) THEN 562 CALL exchange_horiz( q_p, nbgp ) 563 IF ( cloud_physics .AND. microphysics_seifert ) THEN 564 CALL exchange_horiz( qr_p, nbgp ) 565 CALL exchange_horiz( nr_p, nbgp ) 566 ENDIF 567 ENDIF 568 IF ( cloud_droplets ) THEN 569 CALL exchange_horiz( ql, nbgp ) 570 CALL exchange_horiz( ql_c, nbgp ) 571 CALL exchange_horiz( ql_v, nbgp ) 572 CALL exchange_horiz( ql_vp, nbgp ) 573 ENDIF 574 IF ( wang_kernel .OR. collision_turbulence .OR. & 575 use_sgs_for_particles ) THEN 576 CALL exchange_horiz( diss, nbgp ) 577 ENDIF 578 IF ( passive_scalar ) CALL exchange_horiz( s_p, nbgp ) 579 580 CALL cpu_log( log_point(26), 'exchange-horiz-progn', 'stop' ) 749 581 750 582 ! … … 821 653 time_disturb = time_disturb + dt_3d 822 654 IF ( time_disturb >= dt_disturb ) THEN 823 !$acc update host( u, v )824 IF ( numprocs == 1 ) on_device = .FALSE. ! workaround, remove later825 655 IF ( disturbance_energy_limit /= 0.0_wp .AND. & 826 656 hom(nzb+5,1,pr_palm,0) < disturbance_energy_limit ) THEN … … 836 666 dist_range = 0 837 667 ENDIF 838 IF ( numprocs == 1 ) on_device = .TRUE. ! workaround, remove later839 !$acc update device( u, v )840 668 time_disturb = time_disturb - dt_disturb 841 669 ENDIF … … 854 682 IF ( cloud_physics ) THEN 855 683 CALL calc_liquid_water_content 856 !$acc update device( ql )857 684 ENDIF 858 685 ! … … 860 687 IF ( humidity ) THEN 861 688 CALL compute_vpt 862 !$acc update device( vpt )863 689 ENDIF 864 690 -
TabularUnified palm/trunk/SOURCE/timestep.f90 ¶
r2101 r2118 20 20 ! Current revisions: 21 21 ! ------------------ 22 ! 22 ! OpenACC directives and related part of code removed 23 23 ! 24 24 ! Former revisions: … … 182 182 u_gtrans_l = 0.0_wp 183 183 v_gtrans_l = 0.0_wp 184 !$acc parallel present( u, v )185 184 DO i = nxl, nxr 186 185 DO j = nys, nyn … … 191 190 ENDDO 192 191 ENDDO 193 !$acc end parallel194 192 uv_gtrans_l(1) = u_gtrans_l / REAL( (nxr-nxl+1)*(nyn-nys+1)*(nzt-nzb), KIND=wp ) 195 193 uv_gtrans_l(2) = v_gtrans_l / REAL( (nxr-nxl+1)*(nyn-nys+1)*(nzt-nzb), KIND=wp ) … … 210 208 !-- Determine the maxima of the velocity components, including their 211 209 !-- grid index positions. 212 #if defined( __openacc )213 IF ( dt_fixed ) THEN ! otherwise do it further below for better cache usage214 u_max_l = -999999.9_wp215 u_min_l = 999999.9_wp216 v_max_l = -999999.9_wp217 v_min_l = 999999.9_wp218 w_max_l = -999999.9_wp219 w_min_l = 999999.9_wp220 !$acc parallel present( u, v, w )221 DO i = nxl, nxr222 DO j = nys, nyn223 DO k = nzb+1, nzt224 u_max_l = MAX( u_max_l, u(k,j,i) )225 u_min_l = MIN( u_min_l, u(k,j,i) )226 v_max_l = MAX( v_max_l, v(k,j,i) )227 v_min_l = MIN( v_min_l, v(k,j,i) )228 w_max_l = MAX( w_max_l, w(k,j,i) )229 w_min_l = MIN( w_min_l, w(k,j,i) )230 ENDDO231 ENDDO232 ENDDO233 !$acc end parallel234 #if defined( __parallel )235 reduce_l(1) = u_max_l236 reduce_l(2) = v_max_l237 reduce_l(3) = w_max_l238 IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )239 CALL MPI_ALLREDUCE( reduce_l, reduce, 3, MPI_REAL, MPI_MAX, comm2d, ierr )240 u_max = reduce(1)241 v_max = reduce(2)242 w_max = reduce(3)243 reduce_l(1) = u_min_l244 reduce_l(2) = v_min_l245 reduce_l(3) = w_min_l246 IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )247 CALL MPI_ALLREDUCE( reduce_l, reduce, 3, MPI_REAL, MPI_MIN, comm2d, ierr )248 IF ( ABS( reduce(1) ) > u_max ) u_max = reduce(1)249 IF ( ABS( reduce(2) ) > v_max ) v_max = reduce(2)250 IF ( ABS( reduce(3) ) > w_max ) w_max = reduce(3)251 #else252 IF ( ABS( u_min_l ) > u_max_l ) THEN253 u_max = u_min_l254 ELSE255 u_max = u_max_l256 ENDIF257 IF ( ABS( v_min_l ) > v_max_l ) THEN258 v_max = v_min_l259 ELSE260 v_max = v_max_l261 ENDIF262 IF ( ABS( w_min_l ) > w_max_l ) THEN263 w_max = w_min_l264 ELSE265 w_max = w_max_l266 ENDIF267 #endif268 ENDIF269 #else270 210 CALL global_min_max( nzb, nzt+1, nysg, nyng, nxlg, nxrg, u, 'abs', 0.0_wp, & 271 211 u_max, u_max_ijk ) … … 274 214 CALL global_min_max( nzb, nzt+1, nysg, nyng, nxlg, nxrg, w, 'abs', 0.0_wp, & 275 215 w_max, w_max_ijk ) 276 #endif277 216 278 217 IF ( .NOT. dt_fixed ) THEN 279 #if defined( __openacc )280 !281 !-- Variable time step:282 !-- Calculate the maximum time step according to the CFL-criterion,283 !-- individually for each velocity component284 dt_u_l = 999999.9_wp285 dt_v_l = 999999.9_wp286 dt_w_l = 999999.9_wp287 u_max_l = -999999.9_wp288 u_min_l = 999999.9_wp289 v_max_l = -999999.9_wp290 v_min_l = 999999.9_wp291 w_max_l = -999999.9_wp292 w_min_l = 999999.9_wp293 !$acc parallel loop collapse(3) present( u, v, w )294 DO i = nxl, nxr295 DO j = nys, nyn296 DO k = nzb+1, nzt297 dt_u_l = MIN( dt_u_l, ( dx / ( ABS( u(k,j,i) - u_gtrans ) + 1.0E-10_wp ) ) )298 dt_v_l = MIN( dt_v_l, ( dy / ( ABS( v(k,j,i) - v_gtrans ) + 1.0E-10_wp ) ) )299 dt_w_l = MIN( dt_w_l, ( dzu(k) / ( ABS( w(k,j,i) ) + 1.0E-10_wp ) ) )300 u_max_l = MAX( u_max_l, u(k,j,i) )301 u_min_l = MIN( u_min_l, u(k,j,i) )302 v_max_l = MAX( v_max_l, v(k,j,i) )303 v_min_l = MIN( v_min_l, v(k,j,i) )304 w_max_l = MAX( w_max_l, w(k,j,i) )305 w_min_l = MIN( w_min_l, w(k,j,i) )306 ENDDO307 ENDDO308 ENDDO309 !$acc end parallel310 311 #if defined( __parallel )312 reduce_l(1) = dt_u_l313 reduce_l(2) = dt_v_l314 reduce_l(3) = dt_w_l315 IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )316 CALL MPI_ALLREDUCE( reduce_l, reduce, 3, MPI_REAL, MPI_MIN, comm2d, ierr )317 dt_u = reduce(1)318 dt_v = reduce(2)319 dt_w = reduce(3)320 321 reduce_l(1) = u_max_l322 reduce_l(2) = v_max_l323 reduce_l(3) = w_max_l324 IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )325 CALL MPI_ALLREDUCE( reduce_l, reduce, 3, MPI_REAL, MPI_MAX, comm2d, ierr )326 u_max = reduce(1)327 v_max = reduce(2)328 w_max = reduce(3)329 reduce_l(1) = u_min_l330 reduce_l(2) = v_min_l331 reduce_l(3) = w_min_l332 IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )333 CALL MPI_ALLREDUCE( reduce_l, reduce, 3, MPI_REAL, MPI_MIN, comm2d, ierr )334 IF ( ABS( reduce(1) ) > u_max ) u_max = reduce(1)335 IF ( ABS( reduce(2) ) > v_max ) v_max = reduce(2)336 IF ( ABS( reduce(3) ) > w_max ) w_max = reduce(3)337 #else338 dt_u = dt_u_l339 dt_v = dt_v_l340 dt_w = dt_w_l341 342 IF ( ABS( u_min_l ) > u_max_l ) THEN343 u_max = u_min_l344 ELSE345 u_max = u_max_l346 ENDIF347 IF ( ABS( v_min_l ) > v_max_l ) THEN348 v_max = v_min_l349 ELSE350 v_max = v_max_l351 ENDIF352 IF ( ABS( w_min_l ) > w_max_l ) THEN353 w_max = w_min_l354 ELSE355 w_max = w_max_l356 ENDIF357 #endif358 359 #else360 218 ! 361 219 !-- Variable time step: … … 390 248 #endif 391 249 392 #endif393 394 250 ! 395 251 !-- Compute time step according to the diffusion criterion. … … 404 260 ENDDO 405 261 406 !$OMP PARALLEL private(i,j,k,value) reduction(MIN: dt_diff_l) 407 !$OMP DO 408 !$acc parallel loop collapse(3) present( kh, km ) 262 !$OMP PARALLEL private(i,j,k,value) reduction(MIN: dt_diff_l) 263 !$OMP DO 409 264 DO i = nxl, nxr 410 265 DO j = nys, nyn … … 415 270 ENDDO 416 271 ENDDO 417 !$acc end parallel 418 !$OMP END PARALLEL 272 !$OMP END PARALLEL 419 273 #if defined( __parallel ) 420 274 IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr ) -
TabularUnified palm/trunk/SOURCE/transpose.f90 ¶
r2101 r2118 20 20 ! Current revisions: 21 21 ! ----------------- 22 ! 22 ! OpenACC directives removed 23 23 ! 24 24 ! Former revisions: … … 107 107 !$OMP PARALLEL PRIVATE ( i, j, k ) 108 108 !$OMP DO 109 !$acc kernels present( f_in, f_inv )110 109 DO i = 0, nx 111 110 DO k = nzb_x, nzt_x … … 115 114 ENDDO 116 115 ENDDO 117 !$acc end kernels118 116 !$OMP END PARALLEL 119 117 … … 165 163 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start', cpu_log_nowait ) 166 164 IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr ) 167 !$acc update host( f_inv )168 165 CALL MPI_ALLTOALL( f_inv(nys_x,nzb_x,0), sendrecvcount_xy, MPI_REAL, & 169 166 work(1,nzb_y,nxl_y,0), sendrecvcount_xy, MPI_REAL, & … … 175 172 !$OMP PARALLEL PRIVATE ( i, j, k, l, ys ) 176 173 !$OMP DO 177 !$acc data copyin( work )178 174 DO l = 0, pdims(2) - 1 179 175 ys = 0 + l * ( nyn_x - nys_x + 1 ) 180 !$acc kernels present( f_out, work )181 176 DO i = nxl_y, nxr_y 182 177 DO k = nzb_y, nzt_y … … 186 181 ENDDO 187 182 ENDDO 188 !$acc end kernels 189 ENDDO 190 !$acc end data 183 ENDDO 191 184 !$OMP END PARALLEL 192 185 #endif … … 198 191 !$OMP PARALLEL PRIVATE ( i, j, k ) 199 192 !$OMP DO 200 !$acc kernels present( f_inv, f_out )201 193 DO k = nzb_y, nzt_y 202 194 DO i = nxl_y, nxr_y … … 206 198 ENDDO 207 199 ENDDO 208 !$acc end kernels209 200 !$OMP END PARALLEL 210 201 … … 243 234 !$OMP PARALLEL PRIVATE ( i, j, k ) 244 235 !$OMP DO 245 !$acc kernels present( f_inv, f_out )246 236 DO k = 1, nz 247 237 DO i = nxl, nxr … … 251 241 ENDDO 252 242 ENDDO 253 !$acc end kernels254 243 !$OMP END PARALLEL 255 244 … … 304 293 !$OMP PARALLEL PRIVATE ( i, j, k, l, xs ) 305 294 !$OMP DO 306 !$acc data copyout( work )307 295 DO l = 0, pdims(1) - 1 308 296 xs = 0 + l * nnx 309 !$acc kernels present( f_in, work )310 297 DO k = nzb_x, nzt_x 311 298 DO i = xs, xs + nnx - 1 … … 315 302 ENDDO 316 303 ENDDO 317 !$acc end kernels 318 ENDDO 319 !$acc end data 304 ENDDO 320 305 !$OMP END PARALLEL 321 306 … … 327 312 f_inv(nys,nxl,1), sendrecvcount_zx, MPI_REAL, & 328 313 comm1dx, ierr ) 329 !$acc update device( f_inv )330 314 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' ) 331 315 #endif … … 337 321 !$OMP PARALLEL PRIVATE ( i, j, k ) 338 322 !$OMP DO 339 !$acc kernels present( f_in, f_inv )340 323 DO i = nxl, nxr 341 324 DO j = nys, nyn … … 345 328 ENDDO 346 329 ENDDO 347 !$acc end kernels348 330 !$OMP END PARALLEL 349 331 … … 384 366 !$OMP PARALLEL PRIVATE ( i, j, k ) 385 367 !$OMP DO 386 !$acc kernels present( f_inv, f_out )387 368 DO i = 0, nx 388 369 DO k = nzb_x, nzt_x … … 392 373 ENDDO 393 374 ENDDO 394 !$acc end kernels395 375 !$OMP END PARALLEL 396 376 … … 442 422 !$OMP PARALLEL PRIVATE ( i, j, k, l, ys ) 443 423 !$OMP DO 444 !$acc data copyout( work )445 424 DO l = 0, pdims(2) - 1 446 425 ys = 0 + l * ( nyn_x - nys_x + 1 ) 447 !$acc kernels present( f_in, work )448 426 DO i = nxl_y, nxr_y 449 427 DO k = nzb_y, nzt_y … … 453 431 ENDDO 454 432 ENDDO 455 !$acc end kernels 456 ENDDO 457 !$acc end data 433 ENDDO 458 434 !$OMP END PARALLEL 459 435 … … 465 441 f_inv(nys_x,nzb_x,0), sendrecvcount_xy, MPI_REAL, & 466 442 comm1dy, ierr ) 467 !$acc update device( f_inv )468 443 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' ) 469 444 #endif … … 475 450 !$OMP PARALLEL PRIVATE ( i, j, k ) 476 451 !$OMP DO 477 !$acc kernels present( f_in, f_inv )478 452 DO i = nxl_y, nxr_y 479 453 DO k = nzb_y, nzt_y … … 483 457 ENDDO 484 458 ENDDO 485 !$acc end kernels486 459 !$OMP END PARALLEL 487 460 … … 602 575 !$OMP PARALLEL PRIVATE ( i, j, k ) 603 576 !$OMP DO 604 !$acc kernels present( f_in, f_inv )605 577 DO j = 0, ny 606 578 DO k = nzb_y, nzt_y … … 610 582 ENDDO 611 583 ENDDO 612 !$acc end kernels613 584 !$OMP END PARALLEL 614 585 … … 660 631 !$OMP PARALLEL PRIVATE ( i, j, k ) 661 632 !$OMP DO 662 !$acc kernels present( f_inv, f_out )663 633 DO j = 0, ny 664 634 DO k = nzb_y, nzt_y … … 668 638 ENDDO 669 639 ENDDO 670 !$acc end kernels671 640 !$OMP END PARALLEL 672 641 … … 678 647 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start', cpu_log_nowait ) 679 648 IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr ) 680 !$acc update host( f_inv )681 649 CALL MPI_ALLTOALL( f_inv(nxl_y,nzb_y,0), sendrecvcount_yz, MPI_REAL, & 682 650 work(nxl_z,1,nys_z,0), sendrecvcount_yz, MPI_REAL, & … … 688 656 !$OMP PARALLEL PRIVATE ( i, j, k, l, zs ) 689 657 !$OMP DO 690 !$acc data copyin( work )691 658 DO l = 0, pdims(1) - 1 692 659 zs = 1 + l * ( nzt_y - nzb_y + 1 ) 693 !$acc kernels present( f_out )694 660 DO j = nys_z, nyn_z 695 661 DO k = zs, zs + nzt_y - nzb_y … … 699 665 ENDDO 700 666 ENDDO 701 !$acc end kernels 702 ENDDO 703 !$acc end data 667 ENDDO 704 668 !$OMP END PARALLEL 705 669 #endif … … 738 702 !$OMP PARALLEL PRIVATE ( i, j, k ) 739 703 !$OMP DO 740 !$acc kernels present( f_in, f_inv )741 704 DO k = 1,nz 742 705 DO i = nxl, nxr … … 746 709 ENDDO 747 710 ENDDO 748 !$acc end kernels749 711 !$OMP END PARALLEL 750 712 … … 796 758 !$OMP PARALLEL PRIVATE ( i, j, k ) 797 759 !$OMP DO 798 !$acc kernels present( f_inv, f_out )799 760 DO k = 1, nz 800 761 DO i = nxl, nxr … … 804 765 ENDDO 805 766 ENDDO 806 !$acc end kernels807 767 !$OMP END PARALLEL 808 768 … … 814 774 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'start', cpu_log_nowait ) 815 775 IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr ) 816 !$acc update host( f_inv )817 776 CALL MPI_ALLTOALL( f_inv(nys,nxl,1), sendrecvcount_zx, MPI_REAL, & 818 777 work(nys_x,1,nzb_x,0), sendrecvcount_zx, MPI_REAL, & … … 824 783 !$OMP PARALLEL PRIVATE ( i, j, k, l, xs ) 825 784 !$OMP DO 826 !$acc data copyin( work )827 785 DO l = 0, pdims(1) - 1 828 786 xs = 0 + l * nnx 829 !$acc kernels present( f_out )830 787 DO k = nzb_x, nzt_x 831 788 DO i = xs, xs + nnx - 1 … … 835 792 ENDDO 836 793 ENDDO 837 !$acc end kernels 838 ENDDO 839 !$acc end data 794 ENDDO 840 795 !$OMP END PARALLEL 841 796 #endif … … 878 833 !$OMP PARALLEL PRIVATE ( i, j, k ) 879 834 !$OMP DO 880 !$acc kernels present( f_inv, f_out )881 835 DO k = nzb_y, nzt_y 882 836 DO j = 0, ny … … 886 840 ENDDO 887 841 ENDDO 888 !$acc end kernels889 842 !$OMP END PARALLEL 890 843 … … 938 891 !$OMP PARALLEL PRIVATE ( i, j, k, l, zs ) 939 892 !$OMP DO 940 !$acc data copyout( work )941 893 DO l = 0, pdims(1) - 1 942 894 zs = 1 + l * ( nzt_y - nzb_y + 1 ) 943 !$acc kernels present( f_in, work )944 895 DO j = nys_z, nyn_z 945 896 DO k = zs, zs + nzt_y - nzb_y … … 949 900 ENDDO 950 901 ENDDO 951 !$acc end kernels 952 ENDDO 953 !$acc end data 902 ENDDO 954 903 !$OMP END PARALLEL 955 904 … … 961 910 f_inv(nxl_y,nzb_y,0), sendrecvcount_yz, MPI_REAL, & 962 911 comm1dx, ierr ) 963 !$acc update device( f_inv )964 912 CALL cpu_log( log_point_s(32), 'mpi_alltoall', 'stop' ) 965 913 #endif … … 970 918 !$OMP PARALLEL PRIVATE ( i, j, k ) 971 919 !$OMP DO 972 !$acc kernels present( f_in, f_inv )973 920 DO k = nzb_y, nzt_y 974 921 DO j = 0, ny … … 978 925 ENDDO 979 926 ENDDO 980 !$acc end kernels981 927 !$OMP END PARALLEL 982 928 -
TabularUnified palm/trunk/SOURCE/tridia_solver_mod.f90 ¶
r2101 r2118 20 20 ! Current revisions: 21 21 ! ------------------ 22 ! 22 ! OpenACC directives removed 23 23 ! 24 24 ! Former revisions: … … 195 195 196 196 REAL(wp) :: ll(nxl_z:nxr_z,nys_z:nyn_z) !< 197 !$acc declare create( ll )198 197 199 198 … … 201 200 nnyh = ( ny + 1 ) / 2 202 201 203 !$acc kernels present( tric )204 202 DO j = nys_z, nyn_z 205 203 DO i = nxl_z, nxr_z … … 239 237 ENDDO 240 238 ENDDO 241 !$acc end kernels242 239 243 240 IF ( ibc_p_b == 1 ) THEN 244 !$acc kernels present( tric )245 241 DO j = nys_z, nyn_z 246 242 DO i = nxl_z, nxr_z … … 248 244 ENDDO 249 245 ENDDO 250 !$acc end kernels251 246 ENDIF 252 247 IF ( ibc_p_t == 1 ) THEN 253 !$acc kernels present( tric )254 248 DO j = nys_z, nyn_z 255 249 DO i = nxl_z, nxr_z … … 257 251 ENDDO 258 252 ENDDO 259 !$acc end kernels260 253 ENDIF 261 254 … … 288 281 289 282 REAL(wp), DIMENSION(nxl_z:nxr_z,nys_z:nyn_z,0:nz-1) :: ar1 !< 290 !$acc declare create( ar1 )291 283 292 284 ! 293 285 !-- Forward substitution 294 286 DO k = 0, nz - 1 295 !$acc kernels present( ar, tri )296 287 DO j = nys_z, nyn_z 297 288 DO i = nxl_z, nxr_z … … 305 296 ENDDO 306 297 ENDDO 307 !$acc end kernels308 298 ENDDO 309 299 … … 314 304 !-- the model domain. 315 305 DO k = nz-1, 0, -1 316 !$acc kernels present( ar, tri )317 306 DO j = nys_z, nyn_z 318 307 DO i = nxl_z, nxr_z … … 326 315 ENDDO 327 316 ENDDO 328 !$acc end kernels329 317 ENDDO 330 318 … … 335 323 IF ( ibc_p_b == 1 .AND. ibc_p_t == 1 ) THEN 336 324 IF ( nys_z == 0 .AND. nxl_z == 0 ) THEN 337 !$acc kernels loop present( ar )338 325 DO k = 1, nz 339 326 ar(nxl_z,nys_z,k) = 0.0_wp 340 327 ENDDO 341 !$acc end kernels loop342 328 ENDIF 343 329 ENDIF … … 372 358 373 359 REAL(wp), DIMENSION(nxl_z:nxr_z,nys_z:nyn_z,0:nz-1) :: ar1 !< 374 !$acc declare create( ar1 )375 360 376 361 ! 377 362 !-- Forward substitution 378 363 DO k = 0, nz - 1 379 !$acc kernels present( ar, tri )380 !$acc loop381 364 DO j = nys_z, nyn_z 382 365 DO i = nxl_z, nxr_z … … 390 373 ENDDO 391 374 ENDDO 392 !$acc end kernels393 375 ENDDO 394 376 … … 399 381 !-- the model domain. 400 382 DO k = nz-1, 0, -1 401 !$acc kernels present( ar, tri )402 !$acc loop403 383 DO j = nys_z, nyn_z 404 384 DO i = nxl_z, nxr_z … … 412 392 ENDDO 413 393 ENDDO 414 !$acc end kernels415 394 ENDDO 416 395 … … 421 400 IF ( ibc_p_b == 1 .AND. ibc_p_t == 1 ) THEN 422 401 IF ( nys_z == 0 .AND. nxl_z == 0 ) THEN 423 !$acc kernels loop present( ar )424 402 DO k = 1, nz 425 403 ar(nxl_z,nys_z,k) = 0.0_wp … … 451 429 ! 452 430 !-- Splitting 453 !$acc kernels present( tri, tric )454 !$acc loop455 431 DO j = nys_z, nyn_z 456 !$acc loop vector( 32 )457 432 DO i = nxl_z, nxr_z 458 433 tri(i,j,0,1) = tric(i,j,0) 459 434 ENDDO 460 435 ENDDO 461 !$acc end kernels462 436 463 437 DO k = 1, nz-1 464 !$acc kernels present( tri, tric ) 465 !$acc loop 466 DO j = nys_z, nyn_z 467 !$acc loop vector( 32 ) 438 DO j = nys_z, nyn_z 468 439 DO i = nxl_z, nxr_z 469 440 tri(i,j,k,2) = ddzuw(k,1) / tri(i,j,k-1,1) … … 471 442 ENDDO 472 443 ENDDO 473 !$acc end kernels474 444 ENDDO 475 445 -
TabularUnified palm/trunk/SOURCE/wall_fluxes.f90 ¶
r2101 r2118 20 20 ! Current revisions: 21 21 ! ----------------- 22 ! 22 ! OpenACC versions of subroutines removed 23 23 ! 24 24 ! Former revisions: … … 90 90 91 91 PRIVATE 92 PUBLIC wall_fluxes, wall_fluxes_ acc, wall_fluxes_e, wall_fluxes_e_acc92 PUBLIC wall_fluxes, wall_fluxes_e 93 93 94 94 INTERFACE wall_fluxes … … 97 97 END INTERFACE wall_fluxes 98 98 99 INTERFACE wall_fluxes_acc100 MODULE PROCEDURE wall_fluxes_acc101 END INTERFACE wall_fluxes_acc102 103 99 INTERFACE wall_fluxes_e 104 100 MODULE PROCEDURE wall_fluxes_e … … 106 102 END INTERFACE wall_fluxes_e 107 103 108 INTERFACE wall_fluxes_e_acc109 MODULE PROCEDURE wall_fluxes_e_acc110 END INTERFACE wall_fluxes_e_acc111 112 104 CONTAINS 113 105 … … 299 291 ! Description: 300 292 ! ------------ 301 !> Call for all grid points - accelerator version 302 !------------------------------------------------------------------------------! 303 SUBROUTINE wall_fluxes_acc( wall_flux, a, b, c1, c2, nzb_uvw_inner, & 304 nzb_uvw_outer, wall ) 293 !> Call for all grid point i,j 294 !------------------------------------------------------------------------------! 295 SUBROUTINE wall_fluxes_ij( i, j, nzb_w, nzt_w, wall_flux, a, b, c1, c2 ) 305 296 306 297 USE arrays_3d, & … … 314 305 315 306 USE indices, & 316 ONLY: i_left, i_right, j_north, j_south, nxl, nxlg, nxr, nxrg, & 317 nyn, nyng, nys, nysg, nzb, nzt 307 ONLY: nzb, nzt 318 308 319 309 USE kinds … … 327 317 INTEGER(iwp) :: j !< 328 318 INTEGER(iwp) :: k !< 329 INTEGER(iwp) :: max_outer!<330 INTEGER(iwp) :: min_inner!<319 INTEGER(iwp) :: nzb_w !< 320 INTEGER(iwp) :: nzt_w !< 331 321 INTEGER(iwp) :: wall_index !< 332 333 INTEGER(iwp), &334 DIMENSION(nysg:nyng,nxlg:nxrg) :: &335 nzb_uvw_inner !<336 INTEGER(iwp), &337 DIMENSION(nysg:nyng,nxlg:nxrg) :: &338 nzb_uvw_outer !<339 322 340 323 REAL(wp) :: a !< … … 355 338 REAL(wp) :: wspts !< 356 339 357 REAL(wp), &358 DIMENSION(nysg:nyng,nxlg:nxrg) :: &359 wall !<360 361 REAL(wp), &362 DIMENSION(nzb:nzt+1,nys:nyn,nxl:nxr) :: &363 wall_flux !<364 365 366 zp = 0.5_wp * ( (a+c1) * dy + (b+c2) * dx )367 wall_flux = 0.0_wp368 wall_index = NINT( a+ 2*b + 3*c1 + 4*c2 )369 370 min_inner = MINVAL( nzb_uvw_inner(nys:nyn,nxl:nxr) ) + 1371 max_outer = MINVAL( nzb_uvw_outer(nys:nyn,nxl:nxr) )372 373 !$acc kernels present( hom, nzb_uvw_inner, nzb_uvw_outer, pt, rif_wall ) &374 !$acc present( u, v, w, wall, wall_flux, z0 )375 !$acc loop independent376 DO i = i_left, i_right377 DO j = j_south, j_north378 379 IF ( wall(j,i) /= 0.0_wp ) THEN380 !381 !-- All subsequent variables are computed for the respective382 !-- location where the respective flux is defined.383 !$acc loop independent384 DO k = nzb_uvw_inner(j,i)+1, nzb_uvw_outer(j,i)385 386 !387 !-- (1) Compute rifs, u_i, v_i, ws, pt' and w'pt'388 rifs = rif_wall(k,j,i,wall_index)389 390 u_i = a * u(k,j,i) + c1 * 0.25_wp * &391 ( u(k+1,j,i+1) + u(k+1,j,i) + u(k,j,i+1) + u(k,j,i) )392 393 v_i = b * v(k,j,i) + c2 * 0.25_wp * &394 ( v(k+1,j+1,i) + v(k+1,j,i) + v(k,j+1,i) + v(k,j,i) )395 396 ws = ( c1 + c2 ) * w(k,j,i) + 0.25_wp * ( &397 a * ( w(k-1,j,i-1) + w(k-1,j,i) + w(k,j,i-1) + w(k,j,i) ) &398 + b * ( w(k-1,j-1,i) + w(k-1,j,i) + w(k,j-1,i) + w(k,j,i) ) &399 )400 pt_i = 0.5_wp * ( pt(k,j,i) + a * pt(k,j,i-1) + &401 b * pt(k,j-1,i) + ( c1 + c2 ) * pt(k+1,j,i) )402 403 pts = pt_i - hom(k,1,4,0)404 wspts = ws * pts405 406 !407 !-- (2) Compute wall-parallel absolute velocity vel_total408 vel_total = SQRT( ws**2 + (a+c1) * u_i**2 + (b+c2) * v_i**2 )409 410 !411 !-- (3) Compute wall friction velocity us_wall412 IF ( rifs >= 0.0_wp ) THEN413 414 !415 !-- Stable stratification (and neutral)416 us_wall = kappa * vel_total / ( LOG( zp / z0(j,i) ) + &417 5.0_wp * rifs * ( zp - z0(j,i) ) / zp &418 )419 ELSE420 421 !422 !-- Unstable stratification423 h1 = SQRT( SQRT( 1.0_wp - 16.0_wp * rifs ) )424 h2 = SQRT( SQRT( 1.0_wp - 16.0_wp * rifs * z0(j,i) / zp ) )425 426 us_wall = kappa * vel_total / ( &427 LOG( zp / z0(j,i) ) - &428 LOG( ( 1.0_wp + h1 )**2 * ( 1.0_wp + h1**2 ) / ( &429 ( 1.0_wp + h2 )**2 * ( 1.0_wp + h2**2 ) ) ) +&430 2.0_wp * ( ATAN( h1 ) - ATAN( h2 ) ) &431 )432 ENDIF433 434 !435 !-- (4) Compute zp/L (corresponds to neutral Richardson flux436 !-- number rifs)437 rifs = -1.0_wp * zp * kappa * g * wspts / &438 ( pt_i * ( us_wall**3 + 1E-30 ) )439 440 !441 !-- Limit the value range of the Richardson numbers.442 !-- This is necessary for very small velocities (u,w --> 0),443 !-- because the absolute value of rif can then become very444 !-- large, which in consequence would result in very large445 !-- shear stresses and very small momentum fluxes (both are446 !-- generally unrealistic).447 IF ( rifs < zeta_min ) rifs = zeta_min448 IF ( rifs > zeta_max ) rifs = zeta_max449 450 !451 !-- (5) Compute wall_flux (u'v', v'u', w'v', or w'u')452 IF ( rifs >= 0.0_wp ) THEN453 454 !455 !-- Stable stratification (and neutral)456 wall_flux(k,j,i) = kappa * &457 ( a*u(k,j,i) + b*v(k,j,i) + (c1+c2)*w(k,j,i) ) / &458 ( LOG( zp / z0(j,i) ) + &459 5.0_wp * rifs * ( zp - z0(j,i) ) / zp &460 )461 ELSE462 463 !464 !-- Unstable stratification465 h1 = SQRT( SQRT( 1.0_wp - 16.0_wp * rifs ) )466 h2 = SQRT( SQRT( 1.0_wp - 16.0_wp * rifs * z0(j,i) / zp ) )467 468 wall_flux(k,j,i) = kappa * &469 ( a*u(k,j,i) + b*v(k,j,i) + (c1+c2)*w(k,j,i) ) / ( &470 LOG( zp / z0(j,i) ) - &471 LOG( ( 1.0_wp + h1 )**2 * ( 1.0_wp + h1**2 ) / ( &472 ( 1.0_wp + h2 )**2 * ( 1.0_wp + h2**2 ) ) ) +&473 2.0_wp * ( ATAN( h1 ) - ATAN( h2 ) ) &474 )475 ENDIF476 wall_flux(k,j,i) = -wall_flux(k,j,i) * us_wall477 478 !479 !-- store rifs for next time step480 rif_wall(k,j,i,wall_index) = rifs481 482 ENDDO483 484 ENDIF485 486 ENDDO487 ENDDO488 !$acc end kernels489 490 END SUBROUTINE wall_fluxes_acc491 492 493 !------------------------------------------------------------------------------!494 ! Description:495 ! ------------496 !> Call for all grid point i,j497 !------------------------------------------------------------------------------!498 SUBROUTINE wall_fluxes_ij( i, j, nzb_w, nzt_w, wall_flux, a, b, c1, c2 )499 500 USE arrays_3d, &501 ONLY: rif_wall, pt, u, v, w, z0502 503 USE control_parameters, &504 ONLY: g, kappa, zeta_max, zeta_min505 506 USE grid_variables, &507 ONLY: dx, dy508 509 USE indices, &510 ONLY: nzb, nzt511 512 USE kinds513 514 USE statistics, &515 ONLY: hom516 517 IMPLICIT NONE518 519 INTEGER(iwp) :: i !<520 INTEGER(iwp) :: j !<521 INTEGER(iwp) :: k !<522 INTEGER(iwp) :: nzb_w !<523 INTEGER(iwp) :: nzt_w !<524 INTEGER(iwp) :: wall_index !<525 526 REAL(wp) :: a !<527 REAL(wp) :: b !<528 REAL(wp) :: c1 !<529 REAL(wp) :: c2 !<530 REAL(wp) :: h1 !<531 REAL(wp) :: h2 !<532 REAL(wp) :: zp !<533 REAL(wp) :: pts !<534 REAL(wp) :: pt_i !<535 REAL(wp) :: rifs !<536 REAL(wp) :: u_i !<537 REAL(wp) :: v_i !<538 REAL(wp) :: us_wall !<539 REAL(wp) :: vel_total !<540 REAL(wp) :: ws !<541 REAL(wp) :: wspts !<542 543 340 REAL(wp), DIMENSION(nzb:nzt+1) :: wall_flux !< 544 341 … … 811 608 ! Description: 812 609 ! ------------ 813 !> Call for all grid points - accelerator version 814 !> Calculates momentum fluxes at vertical walls for routine production_e 815 !> assuming Monin-Obukhov similarity. 816 !> Indices: usvs a=1, vsus b=1, wsvs c1=1, wsus c2=1 (other=0). 817 !------------------------------------------------------------------------------! 818 SUBROUTINE wall_fluxes_e_acc( wall_flux, a, b, c1, c2, wall ) 819 610 !> Call for grid point i,j 611 !------------------------------------------------------------------------------! 612 SUBROUTINE wall_fluxes_e_ij( i, j, nzb_w, nzt_w, wall_flux, a, b, c1, c2 ) 820 613 821 614 USE arrays_3d, & … … 829 622 830 623 USE indices, & 831 ONLY: i_left, i_right, j_north, j_south, nxl, nxlg, nxr, nxrg, & 832 nyn, nyng, nys, nysg, nzb, nzb_diff_s_inner, & 833 nzb_diff_s_outer, nzt 624 ONLY: nzb, nzt 834 625 835 626 USE kinds … … 841 632 INTEGER(iwp) :: k !< 842 633 INTEGER(iwp) :: kk !< 843 INTEGER(iwp) :: max_outer!<844 INTEGER(iwp) :: min_inner!<634 INTEGER(iwp) :: nzb_w !< 635 INTEGER(iwp) :: nzt_w !< 845 636 INTEGER(iwp) :: wall_index !< 846 637 … … 860 651 REAL(wp) :: rifs !< 861 652 862 REAL(wp), &863 DIMENSION(nysg:nyng,nxlg:nxrg) :: &864 wall !<865 866 REAL(wp), &867 DIMENSION(nzb:nzt+1,nys:nyn,nxl:nxr) :: &868 wall_flux !<869 870 871 zp = 0.5_wp * ( (a+c1) * dy + (b+c2) * dx )872 wall_flux = 0.0_wp873 wall_index = NINT( a+ 2*b + 3*c1 + 4*c2 )874 875 min_inner = MINVAL( nzb_diff_s_inner(nys:nyn,nxl:nxr) ) - 1876 max_outer = MAXVAL( nzb_diff_s_outer(nys:nyn,nxl:nxr) ) - 2877 878 !$acc kernels present( nzb_diff_s_inner, nzb_diff_s_outer, rif_wall ) &879 !$acc present( u, v, w, wall, wall_flux, z0 )880 DO i = i_left, i_right881 DO j = j_south, j_north882 DO k = min_inner, max_outer883 !884 !-- All subsequent variables are computed for scalar locations885 IF ( k >= nzb_diff_s_inner(j,i)-1 .AND. &886 k <= nzb_diff_s_outer(j,i)-2 .AND. &887 wall(j,i) /= 0.0_wp ) THEN888 !889 !-- (1) Compute rifs, u_i, v_i, and ws890 IF ( k == nzb_diff_s_inner(j,i)-1 ) THEN891 kk = nzb_diff_s_inner(j,i)-1892 ELSE893 kk = k-1894 ENDIF895 rifs = 0.5_wp * ( rif_wall(k,j,i,wall_index) + &896 a * rif_wall(k,j,i+1,1) + &897 b * rif_wall(k,j+1,i,2) + &898 c1 * rif_wall(kk,j,i,3) + &899 c2 * rif_wall(kk,j,i,4) &900 )901 902 u_i = 0.5_wp * ( u(k,j,i) + u(k,j,i+1) )903 v_i = 0.5_wp * ( v(k,j,i) + v(k,j+1,i) )904 ws = 0.5_wp * ( w(k,j,i) + w(k-1,j,i) )905 !906 !-- (2) Compute wall-parallel absolute velocity vel_total and907 !-- interpolate appropriate velocity component vel_zp.908 vel_total = SQRT( ws**2 + (a+c1) * u_i**2 + (b+c2) * v_i**2 )909 vel_zp = 0.5_wp * ( a * u_i + b * v_i + (c1+c2) * ws )910 !911 !-- (3) Compute wall friction velocity us_wall912 IF ( rifs >= 0.0_wp ) THEN913 914 !915 !-- Stable stratification (and neutral)916 us_wall = kappa * vel_total / ( LOG( zp / z0(j,i) ) + &917 5.0_wp * rifs * ( zp - z0(j,i) ) / zp &918 )919 ELSE920 921 !922 !-- Unstable stratification923 h1 = SQRT( SQRT( 1.0_wp - 16.0_wp * rifs ) )924 h2 = SQRT( SQRT( 1.0_wp - 16.0_wp * rifs * z0(j,i) / zp ) )925 926 us_wall = kappa * vel_total / ( &927 LOG( zp / z0(j,i) ) - &928 LOG( ( 1.0_wp + h1 )**2 * ( 1.0_wp + h1**2 ) / ( &929 ( 1.0_wp + h2 )**2 * ( 1.0_wp + h2**2 ) ) ) +&930 2.0_wp * ( ATAN( h1 ) - ATAN( h2 ) ) &931 )932 ENDIF933 934 !935 !-- Skip step (4) of wall_fluxes, because here rifs is already936 !-- available from (1)937 !938 !-- (5) Compute wall_flux (u'v', v'u', w'v', or w'u')939 940 IF ( rifs >= 0.0_wp ) THEN941 942 !943 !-- Stable stratification (and neutral)944 wall_flux(k,j,i) = kappa * vel_zp / ( &945 LOG( zp/z0(j,i) ) + &946 5.0_wp * rifs * ( zp-z0(j,i) ) / zp &947 )948 ELSE949 950 !951 !-- Unstable stratification952 h1 = SQRT( SQRT( 1.0_wp - 16.0_wp * rifs ) )953 h2 = SQRT( SQRT( 1.0_wp - 16.0_wp * rifs * z0(j,i) / zp ) )954 955 wall_flux(k,j,i) = kappa * vel_zp / ( &956 LOG( zp / z0(j,i) ) - &957 LOG( ( 1.0_wp + h1 )**2 * ( 1.0_wp + h1**2 ) / ( &958 ( 1.0_wp + h2 )**2 * ( 1.0_wp + h2**2 ) ) ) +&959 2.0_wp * ( ATAN( h1 ) - ATAN( h2 ) ) &960 )961 ENDIF962 wall_flux(k,j,i) = - wall_flux(k,j,i) * us_wall963 964 ENDIF965 966 ENDDO967 ENDDO968 ENDDO969 !$acc end kernels970 971 END SUBROUTINE wall_fluxes_e_acc972 973 974 !------------------------------------------------------------------------------!975 ! Description:976 ! ------------977 !> Call for grid point i,j978 !------------------------------------------------------------------------------!979 SUBROUTINE wall_fluxes_e_ij( i, j, nzb_w, nzt_w, wall_flux, a, b, c1, c2 )980 981 USE arrays_3d, &982 ONLY: rif_wall, u, v, w, z0983 984 USE control_parameters, &985 ONLY: kappa986 987 USE grid_variables, &988 ONLY: dx, dy989 990 USE indices, &991 ONLY: nzb, nzt992 993 USE kinds994 995 IMPLICIT NONE996 997 INTEGER(iwp) :: i !<998 INTEGER(iwp) :: j !<999 INTEGER(iwp) :: k !<1000 INTEGER(iwp) :: kk !<1001 INTEGER(iwp) :: nzb_w !<1002 INTEGER(iwp) :: nzt_w !<1003 INTEGER(iwp) :: wall_index !<1004 1005 REAL(wp) :: a !<1006 REAL(wp) :: b !<1007 REAL(wp) :: c1 !<1008 REAL(wp) :: c2 !<1009 REAL(wp) :: h1 !<1010 REAL(wp) :: h2 !<1011 REAL(wp) :: u_i !<1012 REAL(wp) :: v_i !<1013 REAL(wp) :: us_wall !<1014 REAL(wp) :: vel_total !<1015 REAL(wp) :: vel_zp !<1016 REAL(wp) :: ws !<1017 REAL(wp) :: zp !<1018 REAL(wp) :: rifs !<1019 1020 653 REAL(wp), DIMENSION(nzb:nzt+1) :: wall_flux !< 1021 654
Note: See TracChangeset
for help on using the changeset viewer.