Changeset 2118
- Timestamp:
- Jan 17, 2017 4:38:49 PM (6 years ago)
- Location:
- palm/trunk/SOURCE
- Files:
-
- 1 deleted
- 32 edited
Legend:
- Unmodified
- Added
- Removed
-
palm/trunk/SOURCE/Makefile
r2051 r2118 20 20 # Current revisions: 21 21 # ------------------ 22 # 22 # -cuda_fft_interfaces_mod 23 23 # 24 24 # Former revisions: … … 312 312 check_for_restart.f90 check_open.f90 check_parameters.f90 \ 313 313 close_file.f90 compute_vpt.f90 coriolis.f90 cpulog_mod.f90 \ 314 cuda_fft_interfaces_mod.f90data_log.f90 data_output_dvrp.f90 \314 data_log.f90 data_output_dvrp.f90 \ 315 315 data_output_mask.f90 data_output_profiles.f90 \ 316 316 data_output_ptseries.f90 data_output_spectra.f90 data_output_flight.f90\ … … 426 426 cpulog_mod.o: modules.o mod_kinds.o 427 427 cpu_statistics.o: modules.o mod_kinds.o 428 cuda_fft_interfaces_mod.o: cuda_fft_interfaces_mod.f90 modules.o mod_kinds.o429 428 data_log.o: modules.o mod_kinds.o 430 429 data_output_dvrp.o: modules.o cpulog_mod.o mod_kinds.o … … 456 455 exchange_horiz.o: modules.o cpulog_mod.o mod_kinds.o 457 456 exchange_horiz_2d.o: modules.o cpulog_mod.o mod_kinds.o pmc_interface_mod.o 458 fft_xy_mod.o: cuda_fft_interfaces_mod.omodules.o mod_kinds.o singleton_mod.o temperton_fft_mod.o457 fft_xy_mod.o: modules.o mod_kinds.o singleton_mod.o temperton_fft_mod.o 459 458 flow_statistics.o: modules.o cpulog_mod.o mod_kinds.o land_surface_model_mod.o \ 460 459 netcdf_interface_mod.o radiation_model_mod.o -
palm/trunk/SOURCE/advec_ws.f90
r2101 r2118 20 20 ! Current revisions: 21 21 ! ------------------ 22 ! 22 ! OpenACC version of subroutines removed 23 23 ! 24 24 ! Former revisions: … … 181 181 182 182 PRIVATE 183 PUBLIC advec_s_ws, advec_s_ws_acc, advec_u_ws, advec_u_ws_acc, & 184 advec_v_ws, advec_v_ws_acc, advec_w_ws, advec_w_ws_acc, & 185 ws_init, ws_init_flags, ws_statistics 183 PUBLIC advec_s_ws, advec_u_ws, advec_v_ws, advec_w_ws, ws_init, & 184 ws_init_flags, ws_statistics 186 185 187 186 INTERFACE ws_init … … 207 206 END INTERFACE advec_u_ws 208 207 209 INTERFACE advec_u_ws_acc210 MODULE PROCEDURE advec_u_ws_acc211 END INTERFACE advec_u_ws_acc212 213 208 INTERFACE advec_v_ws 214 209 MODULE PROCEDURE advec_v_ws … … 216 211 END INTERFACE advec_v_ws 217 212 218 INTERFACE advec_v_ws_acc219 MODULE PROCEDURE advec_v_ws_acc220 END INTERFACE advec_v_ws_acc221 222 213 INTERFACE advec_w_ws 223 214 MODULE PROCEDURE advec_w_ws 224 215 MODULE PROCEDURE advec_w_ws_ij 225 216 END INTERFACE advec_w_ws 226 227 INTERFACE advec_w_ws_acc228 MODULE PROCEDURE advec_w_ws_acc229 END INTERFACE advec_w_ws_acc230 217 231 218 CONTAINS … … 4029 4016 4030 4017 4031 !------------------------------------------------------------------------------!4032 ! Description:4033 ! ------------4034 !> Scalar advection - Call for all grid points - accelerator version4035 !------------------------------------------------------------------------------!4036 SUBROUTINE advec_s_ws_acc ( sk, sk_char )4037 4038 USE arrays_3d, &4039 ONLY: ddzw, drho_air, tend, u, v, w, rho_air, rho_air_zw4040 4041 USE constants, &4042 ONLY: adv_sca_1, adv_sca_3, adv_sca_54043 4044 USE control_parameters, &4045 ONLY: intermediate_timestep_count, monotonic_adjustment, u_gtrans,&4046 v_gtrans4047 4048 USE grid_variables, &4049 ONLY: ddx, ddy4050 4051 USE indices, &4052 ONLY: i_left, i_right, j_north, j_south, nxlg, nxrg, nyng, nysg, &4053 nzb, nzb_max, nzt, wall_flags_04054 4055 USE kinds4056 4057 ! USE statistics, &4058 ! ONLY: sums_wspts_ws_l, sums_wsqs_ws_l, sums_wssas_ws_l, &4059 ! sums_wsqrs_ws_l, sums_wsnrs_ws_l, weight_substep4060 4061 IMPLICIT NONE4062 4063 CHARACTER (LEN = *), INTENT(IN) :: sk_char !<4064 4065 INTEGER(iwp) :: i !<4066 INTEGER(iwp) :: ibit0 !<4067 INTEGER(iwp) :: ibit1 !<4068 INTEGER(iwp) :: ibit2 !<4069 INTEGER(iwp) :: ibit3 !<4070 INTEGER(iwp) :: ibit4 !<4071 INTEGER(iwp) :: ibit5 !<4072 INTEGER(iwp) :: ibit6 !<4073 INTEGER(iwp) :: ibit7 !<4074 INTEGER(iwp) :: ibit8 !<4075 INTEGER(iwp) :: j !<4076 INTEGER(iwp) :: k !<4077 INTEGER(iwp) :: k_mm !<4078 INTEGER(iwp) :: k_mmm !<4079 INTEGER(iwp) :: k_pp !<4080 INTEGER(iwp) :: k_ppp !<4081 INTEGER(iwp) :: tn = 0 !<4082 4083 REAL(wp) :: diss_d !<4084 REAL(wp) :: diss_l !<4085 REAL(wp) :: diss_n !<4086 REAL(wp) :: diss_r !<4087 REAL(wp) :: diss_s !<4088 REAL(wp) :: diss_t !<4089 REAL(wp) :: div !<4090 REAL(wp) :: flux_d !<4091 REAL(wp) :: flux_l !<4092 REAL(wp) :: flux_n !<4093 REAL(wp) :: flux_r !<4094 REAL(wp) :: flux_s !<4095 REAL(wp) :: flux_t !<4096 REAL(wp) :: fd_1 !<4097 REAL(wp) :: fl_1 !<4098 REAL(wp) :: fn_1 !<4099 REAL(wp) :: fr_1 !<4100 REAL(wp) :: fs_1 !<4101 REAL(wp) :: ft_1 !<4102 REAL(wp) :: phi_d !<4103 REAL(wp) :: phi_l !<4104 REAL(wp) :: phi_n !<4105 REAL(wp) :: phi_r !<4106 REAL(wp) :: phi_s !<4107 REAL(wp) :: phi_t !<4108 REAL(wp) :: rd !<4109 REAL(wp) :: rl !<4110 REAL(wp) :: rn !<4111 REAL(wp) :: rr !<4112 REAL(wp) :: rs !<4113 REAL(wp) :: rt !<4114 REAL(wp) :: u_comp !<4115 REAL(wp) :: v_comp !<4116 4117 REAL(wp), INTENT(IN), DIMENSION(nzb:nzt+1,nysg:nyng,nxlg:nxrg) :: sk !<4118 4119 !4120 !-- Computation of fluxes and tendency terms4121 !$acc kernels present( ddzw, sk, tend, u, v, w, wall_flags_0 )4122 DO i = i_left, i_right4123 DO j = j_south, j_north4124 DO k = nzb+1, nzt4125 4126 ibit2 = IBITS(wall_flags_0(k,j,i-1),2,1)4127 ibit1 = IBITS(wall_flags_0(k,j,i-1),1,1)4128 ibit0 = IBITS(wall_flags_0(k,j,i-1),0,1)4129 4130 u_comp = u(k,j,i) - u_gtrans4131 flux_l = u_comp * ( &4132 ( 37.0_wp * ibit2 * adv_sca_5 &4133 + 7.0_wp * ibit1 * adv_sca_3 &4134 + ibit0 * adv_sca_1 &4135 ) * &4136 ( sk(k,j,i) + sk(k,j,i-1) ) &4137 - ( 8.0_wp * ibit2 * adv_sca_5 &4138 + ibit1 * adv_sca_3 &4139 ) * &4140 ( sk(k,j,i+1) + sk(k,j,i-2) ) &4141 + ( ibit2 * adv_sca_5 &4142 ) * &4143 ( sk(k,j,i+2) + sk(k,j,i-3) ) &4144 )4145 4146 diss_l = -ABS( u_comp ) * ( &4147 ( 10.0_wp * ibit2 * adv_sca_5 &4148 + 3.0_wp * ibit1 * adv_sca_3 &4149 + ibit0 * adv_sca_1 &4150 ) * &4151 ( sk(k,j,i) - sk(k,j,i-1) ) &4152 - ( 5.0_wp * ibit2 * adv_sca_5 &4153 + ibit1 * adv_sca_3 &4154 ) * &4155 ( sk(k,j,i+1) - sk(k,j,i-2) ) &4156 + ( ibit2 * adv_sca_5 &4157 ) * &4158 ( sk(k,j,i+2) - sk(k,j,i-3) ) &4159 )4160 4161 ibit2 = IBITS(wall_flags_0(k,j,i),2,1)4162 ibit1 = IBITS(wall_flags_0(k,j,i),1,1)4163 ibit0 = IBITS(wall_flags_0(k,j,i),0,1)4164 4165 u_comp = u(k,j,i+1) - u_gtrans4166 flux_r = u_comp * ( &4167 ( 37.0_wp * ibit2 * adv_sca_5 &4168 + 7.0_wp * ibit1 * adv_sca_3 &4169 + ibit0 * adv_sca_1 &4170 ) * &4171 ( sk(k,j,i+1) + sk(k,j,i) ) &4172 - ( 8.0_wp * ibit2 * adv_sca_5 &4173 + ibit1 * adv_sca_3 &4174 ) * &4175 ( sk(k,j,i+2) + sk(k,j,i-1) ) &4176 + ( ibit2 * adv_sca_5 &4177 ) * &4178 ( sk(k,j,i+3) + sk(k,j,i-2) ) &4179 )4180 4181 diss_r = -ABS( u_comp ) * ( &4182 ( 10.0_wp * ibit2 * adv_sca_5 &4183 + 3.0_wp * ibit1 * adv_sca_3 &4184 + ibit0 * adv_sca_1 &4185 ) * &4186 ( sk(k,j,i+1) - sk(k,j,i) ) &4187 - ( 5.0_wp * ibit2 * adv_sca_5 &4188 + ibit1 * adv_sca_3 &4189 ) * &4190 ( sk(k,j,i+2) - sk(k,j,i-1) ) &4191 + ( ibit2 * adv_sca_5 &4192 ) * &4193 ( sk(k,j,i+3) - sk(k,j,i-2) ) &4194 )4195 4196 ibit5 = IBITS(wall_flags_0(k,j-1,i),5,1)4197 ibit4 = IBITS(wall_flags_0(k,j-1,i),4,1)4198 ibit3 = IBITS(wall_flags_0(k,j-1,i),3,1)4199 4200 v_comp = v(k,j,i) - v_gtrans4201 flux_s = v_comp * ( &4202 ( 37.0_wp * ibit5 * adv_sca_5 &4203 + 7.0_wp * ibit4 * adv_sca_3 &4204 + ibit3 * adv_sca_1 &4205 ) * &4206 ( sk(k,j,i) + sk(k,j-1,i) ) &4207 - ( 8.0_wp * ibit5 * adv_sca_5 &4208 + ibit4 * adv_sca_3 &4209 ) * &4210 ( sk(k,j+1,i) + sk(k,j-2,i) ) &4211 + ( ibit5 * adv_sca_5 &4212 ) * &4213 ( sk(k,j+2,i) + sk(k,j-3,i) ) &4214 )4215 4216 diss_s = -ABS( v_comp ) * ( &4217 ( 10.0_wp * ibit5 * adv_sca_5 &4218 + 3.0_wp * ibit4 * adv_sca_3 &4219 + ibit3 * adv_sca_1 &4220 ) * &4221 ( sk(k,j,i) - sk(k,j-1,i) ) &4222 - ( 5.0_wp * ibit5 * adv_sca_5 &4223 + ibit4 * adv_sca_3 &4224 ) * &4225 ( sk(k,j+1,i) - sk(k,j-2,i) ) &4226 + ( ibit5 * adv_sca_5 &4227 ) * &4228 ( sk(k,j+2,i) - sk(k,j-3,i) ) &4229 )4230 4231 ibit5 = IBITS(wall_flags_0(k,j,i),5,1)4232 ibit4 = IBITS(wall_flags_0(k,j,i),4,1)4233 ibit3 = IBITS(wall_flags_0(k,j,i),3,1)4234 4235 v_comp = v(k,j+1,i) - v_gtrans4236 flux_n = v_comp * ( &4237 ( 37.0_wp * ibit5 * adv_sca_5 &4238 + 7.0_wp * ibit4 * adv_sca_3 &4239 + ibit3 * adv_sca_1 &4240 ) * &4241 ( sk(k,j+1,i) + sk(k,j,i) ) &4242 - ( 8.0_wp * ibit5 * adv_sca_5 &4243 + ibit4 * adv_sca_3 &4244 ) * &4245 ( sk(k,j+2,i) + sk(k,j-1,i) ) &4246 + ( ibit5 * adv_sca_5 &4247 ) * &4248 ( sk(k,j+3,i) + sk(k,j-2,i) ) &4249 )4250 4251 diss_n = -ABS( v_comp ) * ( &4252 ( 10.0_wp * ibit5 * adv_sca_5 &4253 + 3.0_wp * ibit4 * adv_sca_3 &4254 + ibit3 * adv_sca_1 &4255 ) * &4256 ( sk(k,j+1,i) - sk(k,j,i) ) &4257 - ( 5.0_wp * ibit5 * adv_sca_5 &4258 + ibit4 * adv_sca_3 &4259 ) * &4260 ( sk(k,j+2,i) - sk(k,j-1,i) ) &4261 + ( ibit5 * adv_sca_5 &4262 ) * &4263 ( sk(k,j+3,i) - sk(k,j-2,i) ) &4264 )4265 4266 !4267 !-- indizes k_m, k_mm, ... should be known at these point4268 ibit8 = IBITS(wall_flags_0(k-1,j,i),8,1)4269 ibit7 = IBITS(wall_flags_0(k-1,j,i),7,1)4270 ibit6 = IBITS(wall_flags_0(k-1,j,i),6,1)4271 4272 k_pp = k + 2 * ibit84273 k_mm = k - 2 * ( ibit7 + ibit8 )4274 k_mmm = k - 3 * ibit84275 4276 flux_d = w(k-1,j,i) * ( &4277 ( 37.0_wp * ibit8 * adv_sca_5 &4278 + 7.0_wp * ibit7 * adv_sca_3 &4279 + ibit6 * adv_sca_1 &4280 ) * &4281 ( sk(k,j,i) + sk(k-1,j,i) ) &4282 - ( 8.0_wp * ibit8 * adv_sca_5 &4283 + ibit7 * adv_sca_3 &4284 ) * &4285 ( sk(k+1,j,i) + sk(k_mm,j,i) ) &4286 + ( ibit8 * adv_sca_5 &4287 ) * ( sk(k_pp,j,i)+ sk(k_mmm,j,i) ) &4288 )4289 4290 diss_d = -ABS( w(k-1,j,i) ) * ( &4291 ( 10.0_wp * ibit8 * adv_sca_5 &4292 + 3.0_wp * ibit7 * adv_sca_3 &4293 + ibit6 * adv_sca_1 &4294 ) * &4295 ( sk(k,j,i) - sk(k-1,j,i) ) &4296 - ( 5.0_wp * ibit8 * adv_sca_5 &4297 + ibit7 * adv_sca_3 &4298 ) * &4299 ( sk(k+1,j,i) - sk(k_mm,j,i) ) &4300 + ( ibit8 * adv_sca_5 &4301 ) * &4302 ( sk(k_pp,j,i) - sk(k_mmm,j,i) ) &4303 )4304 4305 ibit8 = IBITS(wall_flags_0(k,j,i),8,1)4306 ibit7 = IBITS(wall_flags_0(k,j,i),7,1)4307 ibit6 = IBITS(wall_flags_0(k,j,i),6,1)4308 4309 k_ppp = k + 3 * ibit84310 k_pp = k + 2 * ( 1 - ibit6 )4311 k_mm = k - 2 * ibit84312 4313 flux_t = w(k,j,i) * rho_air_zw(k) * ( &4314 ( 37.0_wp * ibit8 * adv_sca_5 &4315 + 7.0_wp * ibit7 * adv_sca_3 &4316 + ibit6 * adv_sca_1 &4317 ) * &4318 ( sk(k+1,j,i) + sk(k,j,i) ) &4319 - ( 8.0_wp * ibit8 * adv_sca_5 &4320 + ibit7 * adv_sca_3 &4321 ) * &4322 ( sk(k_pp,j,i) + sk(k-1,j,i) ) &4323 + ( ibit8 * adv_sca_5 &4324 ) * ( sk(k_ppp,j,i)+ sk(k_mm,j,i) ) &4325 )4326 4327 diss_t = -ABS( w(k,j,i) ) * rho_air_zw(k) * ( &4328 ( 10.0_wp * ibit8 * adv_sca_5 &4329 + 3.0_wp * ibit7 * adv_sca_3 &4330 + ibit6 * adv_sca_1 &4331 ) * &4332 ( sk(k+1,j,i) - sk(k,j,i) ) &4333 - ( 5.0_wp * ibit8 * adv_sca_5 &4334 + ibit7 * adv_sca_3 &4335 ) * &4336 ( sk(k_pp,j,i) - sk(k-1,j,i) ) &4337 + ( ibit8 * adv_sca_5 &4338 ) * &4339 ( sk(k_ppp,j,i) - sk(k_mm,j,i) ) &4340 )4341 !4342 !-- Apply monotonic adjustment.4343 IF ( monotonic_adjustment ) THEN4344 !4345 !-- At first, calculate first order fluxes.4346 u_comp = u(k,j,i) - u_gtrans4347 fl_1 = ( u_comp * ( sk(k,j,i) + sk(k,j,i-1) ) &4348 -ABS( u_comp ) * ( sk(k,j,i) - sk(k,j,i-1) ) &4349 ) * adv_sca_14350 4351 u_comp = u(k,j,i+1) - u_gtrans4352 fr_1 = ( u_comp * ( sk(k,j,i+1) + sk(k,j,i) ) &4353 -ABS( u_comp ) * ( sk(k,j,i+1) - sk(k,j,i) ) &4354 ) * adv_sca_14355 4356 v_comp = v(k,j,i) - v_gtrans4357 fs_1 = ( v_comp * ( sk(k,j,i) + sk(k,j-1,i) ) &4358 -ABS( v_comp ) * ( sk(k,j,i) - sk(k,j-1,i) ) &4359 ) * adv_sca_14360 4361 v_comp = v(k,j+1,i) - v_gtrans4362 fn_1 = ( v_comp * ( sk(k,j+1,i) + sk(k,j,i) ) &4363 -ABS( v_comp ) * ( sk(k,j+1,i) - sk(k,j,i) ) &4364 ) * adv_sca_14365 4366 fd_1 = ( w(k-1,j,i) * ( sk(k,j,i) + sk(k-1,j,i) ) &4367 -ABS( w(k-1,j,i) ) * ( sk(k,j,i) - sk(k-1,j,i) ) &4368 ) * adv_sca_1 * rho_air_zw(k)4369 4370 ft_1 = ( w(k,j,i) * ( sk(k+1,j,i) + sk(k,j,i) ) &4371 -ABS( w(k,j,i) ) * ( sk(k+1,j,i) - sk(k,j,i) ) &4372 ) * adv_sca_1 * rho_air_zw(k)4373 !4374 !-- Calculate ratio of upwind gradients. Note, Min/Max is just4375 !-- to avoid if statements.4376 rl = ( MAX( 0.0_wp, u(k,j,i) - u_gtrans ) * &4377 ABS( ( sk(k,j,i-1) - sk(k,j,i-2) ) /&4378 ( sk(k,j,i) - sk(k,j,i-1) + 1E-20_wp ) &4379 ) + &4380 MIN( 0.0_wp, u(k,j,i) - u_gtrans ) * &4381 ABS( ( sk(k,j,i) - sk(k,j,i+1) ) /&4382 ( sk(k,j,i-1) - sk(k,j,i) + 1E-20_wp ) &4383 ) &4384 ) / ABS( u(k,j,i) - u_gtrans + 1E-20_wp )4385 4386 rr = ( MAX( 0.0_wp, u(k,j,i+1) - u_gtrans ) * &4387 ABS( ( sk(k,j,i) - sk(k,j,i-1) ) /&4388 ( sk(k,j,i+1) - sk(k,j,i) + 1E-20_wp ) &4389 ) + &4390 MIN( 0.0_wp, u(k,j,i+1) - u_gtrans ) * &4391 ABS( ( sk(k,j,i+1) - sk(k,j,i+2) ) /&4392 ( sk(k,j,i) - sk(k,j,i+1) + 1E-20_wp ) &4393 ) &4394 ) / ABS( u(k,j,i+1) - u_gtrans + 1E-20_wp )4395 4396 rs = ( MAX( 0.0_wp, v(k,j,i) - v_gtrans ) * &4397 ABS( ( sk(k,j-1,i) - sk(k,j-2,i) ) /&4398 ( sk(k,j,i) - sk(k,j-1,i) + 1E-20_wp ) &4399 ) + &4400 MIN( 0.0_wp, v(k,j,i) - v_gtrans ) * &4401 ABS( ( sk(k,j,i) - sk(k,j+1,i) ) /&4402 ( sk(k,j-1,i) - sk(k,j,i) + 1E-20_wp ) &4403 ) &4404 ) / ABS( v(k,j,i) - v_gtrans + 1E-20_wp )4405 4406 rn = ( MAX( 0.0_wp, v(k,j+1,i) - v_gtrans ) * &4407 ABS( ( sk(k,j,i) - sk(k,j-1,i) ) /&4408 ( sk(k,j+1,i) - sk(k,j,i) + 1E-20_wp ) &4409 ) + &4410 MIN( 0.0_wp, v(k,j+1,i) - v_gtrans ) * &4411 ABS( ( sk(k,j+1,i) - sk(k,j+2,i) ) /&4412 ( sk(k,j,i) - sk(k,j+1,i) + 1E-20_wp ) &4413 ) &4414 ) / ABS( v(k,j+1,i) - v_gtrans + 1E-20_wp )4415 !4416 !-- Reuse k_mm and compute k_mmm for the vertical gradient ratios.4417 !-- Note, for vertical advection below the third grid point above4418 !-- surface ( or below the model top) rd and rt are set to 0, i.e.4419 !-- use of first order scheme is enforced.4420 k_mmm = k - 3 * ibit84421 4422 rd = ( MAX( 0.0_wp, w(k-1,j,i) ) * &4423 ABS( ( sk(k_mm,j,i) - sk(k_mmm,j,i) ) / &4424 ( sk(k-1,j,i) - sk(k_mm,j,i) + 1E-20_wp ) &4425 ) + &4426 MIN( 0.0_wp, w(k-1,j,i) ) * &4427 ABS( ( sk(k-1,j,i) - sk(k,j,i) ) / &4428 ( sk(k_mm,j,i) - sk(k-1,j,i) + 1E-20_wp ) &4429 ) &4430 ) * ibit8 / ABS( w(k-1,j,i) + 1E-20_wp )4431 4432 rt = ( MAX( 0.0_wp, w(k,j,i) ) * &4433 ABS( ( sk(k,j,i) - sk(k-1,j,i) ) / &4434 ( sk(k+1,j,i) - sk(k,j,i) + 1E-20_wp ) &4435 ) + &4436 MIN( 0.0_wp, w(k,j,i) ) * &4437 ABS( ( sk(k+1,j,i) - sk(k_pp,j,i) ) / &4438 ( sk(k,j,i) - sk(k+1,j,i) + 1E-20_wp ) &4439 ) &4440 ) * ibit8 / ABS( w(k,j,i) + 1E-20_wp )4441 !4442 !-- Calculate empirical limiter function (van Albada2 limiter).4443 phi_l = MIN( 1.0_wp, ( 2.0_wp * ABS( rl ) ) / &4444 ( rl**2 + 1.0_wp ) )4445 phi_r = MIN( 1.0_wp, ( 2.0_wp * ABS( rr ) ) / &4446 ( rr**2 + 1.0_wp ) )4447 phi_s = MIN( 1.0_wp, ( 2.0_wp * ABS( rs ) ) / &4448 ( rs**2 + 1.0_wp ) )4449 phi_n = MIN( 1.0_wp, ( 2.0_wp * ABS( rn ) ) / &4450 ( rn**2 + 1.0_wp ) )4451 phi_d = MIN( 1.0_wp, ( 2.0_wp * ABS( rd ) ) / &4452 ( rd**2 + 1.0_wp ) )4453 phi_t = MIN( 1.0_wp, ( 2.0_wp * ABS( rt ) ) / &4454 ( rt**2 + 1.0_wp ) )4455 !4456 !-- Calculate the resulting monotone flux.4457 flux_l = fl_1 - phi_l * ( fl_1 - flux_l )4458 flux_r = fr_1 - phi_r * ( fr_1 - flux_r )4459 flux_s = fs_1 - phi_s * ( fs_1 - flux_s )4460 flux_n = fn_1 - phi_n * ( fn_1 - flux_n )4461 flux_d = fd_1 - phi_d * ( fd_1 - flux_d )4462 flux_t = ft_1 - phi_t * ( ft_1 - flux_t )4463 !4464 !-- Moreover, modify dissipation flux according to the limiter.4465 diss_l = diss_l * phi_l4466 diss_r = diss_r * phi_r4467 diss_s = diss_s * phi_s4468 diss_n = diss_n * phi_n4469 diss_d = diss_d * phi_d4470 diss_t = diss_t * phi_t4471 4472 ENDIF4473 !4474 !-- Calculate the divergence of the velocity field. A respective4475 !-- correction is needed to overcome numerical instabilities caused4476 !-- by a not sufficient reduction of divergences near topography.4477 div = ( u(k,j,i+1) * ( ibit0 + ibit1 + ibit2 ) &4478 - u(k,j,i) * ( IBITS(wall_flags_0(k,j,i-1),0,1) &4479 + IBITS(wall_flags_0(k,j,i-1),1,1) &4480 + IBITS(wall_flags_0(k,j,i-1),2,1) &4481 ) &4482 ) * rho_air(k) * ddx &4483 + ( v(k,j+1,i) * ( ibit3 + ibit4 + ibit5 ) &4484 - v(k,j,i) * ( IBITS(wall_flags_0(k,j-1,i),3,1) &4485 + IBITS(wall_flags_0(k,j-1,i),4,1) &4486 + IBITS(wall_flags_0(k,j-1,i),5,1) &4487 ) &4488 ) * rho_air(k) * ddy &4489 + ( w(k,j,i) * rho_air_zw(k) * &4490 ( ibit6 + ibit7 + ibit8 ) &4491 - w(k-1,j,i) * rho_air_zw(k-1) * &4492 ( IBITS(wall_flags_0(k-1,j,i),6,1) &4493 + IBITS(wall_flags_0(k-1,j,i),7,1) &4494 + IBITS(wall_flags_0(k-1,j,i),8,1) &4495 ) &4496 ) * ddzw(k)4497 4498 4499 tend(k,j,i) = - ( &4500 ( flux_r + diss_r - flux_l - diss_l ) * ddx &4501 + ( flux_n + diss_n - flux_s - diss_s ) * ddy &4502 + ( ( flux_t + diss_t ) - &4503 ( flux_d + diss_d ) &4504 ) * drho_air(k) * ddzw(k) &4505 ) + div * sk(k,j,i)4506 4507 !++4508 !-- Evaluation of statistics4509 ! SELECT CASE ( sk_char )4510 !4511 ! CASE ( 'pt' )4512 ! sums_wspts_ws_l(k,tn) = sums_wspts_ws_l(k,tn) &4513 ! + ( flux_t + diss_t ) &4514 ! * weight_substep(intermediate_timestep_count)4515 ! CASE ( 'sa' )4516 ! sums_wssas_ws_l(k,tn) = sums_wssas_ws_l(k,tn) &4517 ! + ( flux_t + diss_t ) &4518 ! * weight_substep(intermediate_timestep_count)4519 ! CASE ( 'q' )4520 ! sums_wsqs_ws_l(k,tn) = sums_wsqs_ws_l(k,tn) &4521 ! + ( flux_t + diss_t ) &4522 ! * weight_substep(intermediate_timestep_count)4523 ! CASE ( 'qr' )4524 ! sums_wsqrs_ws_l(k,tn) = sums_wsqrs_ws_l(k,tn) &4525 ! + ( flux_t + diss_t ) &4526 ! * weight_substep(intermediate_timestep_count)4527 ! CASE ( 'nr' )4528 ! sums_wsnrs_ws_l(k,tn) = sums_wsnrs_ws_l(k,tn) &4529 ! + ( flux_t + diss_t ) &4530 ! * weight_substep(intermediate_timestep_count)4531 !4532 ! END SELECT4533 4534 ENDDO4535 ENDDO4536 ENDDO4537 !$acc end kernels4538 4539 END SUBROUTINE advec_s_ws_acc4540 4541 4018 4542 4019 !------------------------------------------------------------------------------! … … 5039 4516 ! Description: 5040 4517 ! ------------ 5041 !> Advection of u - Call for all grid points - accelerator version5042 !------------------------------------------------------------------------------!5043 SUBROUTINE advec_u_ws_acc5044 5045 USE arrays_3d, &5046 ONLY: ddzw, drho_air, tend, u, v, w, rho_air, rho_air_zw5047 5048 USE constants, &5049 ONLY: adv_mom_1, adv_mom_3, adv_mom_55050 5051 USE control_parameters, &5052 ONLY: intermediate_timestep_count, u_gtrans, v_gtrans5053 5054 USE grid_variables, &5055 ONLY: ddx, ddy5056 5057 USE indices, &5058 ONLY: i_left, i_right, j_north, j_south, nxl, nxr, nyn, nys, nzb, &5059 nzb_max, nzt, wall_flags_05060 5061 USE kinds5062 5063 ! USE statistics, &5064 ! ONLY: hom, sums_us2_ws_l, sums_wsus_ws_l, weight_substep5065 5066 IMPLICIT NONE5067 5068 INTEGER(iwp) :: i !<5069 INTEGER(iwp) :: ibit9 !<5070 INTEGER(iwp) :: ibit10 !<5071 INTEGER(iwp) :: ibit11 !<5072 INTEGER(iwp) :: ibit12 !<5073 INTEGER(iwp) :: ibit13 !<5074 INTEGER(iwp) :: ibit14 !<5075 INTEGER(iwp) :: ibit15 !<5076 INTEGER(iwp) :: ibit16 !<5077 INTEGER(iwp) :: ibit17 !<5078 INTEGER(iwp) :: j !<5079 INTEGER(iwp) :: k !<5080 INTEGER(iwp) :: k_mmm !<5081 INTEGER(iwp) :: k_mm !<5082 INTEGER(iwp) :: k_pp !<5083 INTEGER(iwp) :: k_ppp !<5084 INTEGER(iwp) :: tn = 0 !<5085 5086 REAL(wp) :: diss_d !<5087 REAL(wp) :: diss_l !<5088 REAL(wp) :: diss_n !<5089 REAL(wp) :: diss_r !<5090 REAL(wp) :: diss_s !<5091 REAL(wp) :: diss_t !<5092 REAL(wp) :: div !<5093 REAL(wp) :: flux_d !<5094 REAL(wp) :: flux_l !<5095 REAL(wp) :: flux_n !<5096 REAL(wp) :: flux_r !<5097 REAL(wp) :: flux_s !<5098 REAL(wp) :: flux_t !<5099 REAL(wp) :: gu !<5100 REAL(wp) :: gv !<5101 REAL(wp) :: u_comp !<5102 REAL(wp) :: u_comp_l !<5103 REAL(wp) :: v_comp !<5104 REAL(wp) :: v_comp_s !<5105 REAL(wp) :: w_comp !<5106 5107 5108 gu = 2.0_wp * u_gtrans5109 gv = 2.0_wp * v_gtrans5110 5111 !5112 !-- Computation of fluxes and tendency terms5113 !$acc kernels present( ddzw, tend, u, v, w, wall_flags_0 )5114 DO i = i_left, i_right5115 DO j = j_south, j_north5116 DO k = nzb+1, nzt5117 5118 ibit11 = IBITS(wall_flags_0(k,j,i-1),11,1)5119 ibit10 = IBITS(wall_flags_0(k,j,i-1),10,1)5120 ibit9 = IBITS(wall_flags_0(k,j,i-1),9,1)5121 5122 u_comp_l = u(k,j,i) + u(k,j,i-1) - gu5123 flux_l = u_comp_l * ( &5124 ( 37.0_wp * ibit11 * adv_mom_5 &5125 + 7.0_wp * ibit10 * adv_mom_3 &5126 + ibit9 * adv_mom_1 &5127 ) * &5128 ( u(k,j,i) + u(k,j,i-1) ) &5129 - ( 8.0_wp * ibit11 * adv_mom_5 &5130 + ibit10 * adv_mom_3 &5131 ) * &5132 ( u(k,j,i+1) + u(k,j,i-2) ) &5133 + ( ibit11 * adv_mom_5 &5134 ) * &5135 ( u(k,j,i+2) + u(k,j,i-3) ) &5136 )5137 5138 diss_l = - ABS( u_comp_l ) * ( &5139 ( 10.0_wp * ibit11 * adv_mom_5 &5140 + 3.0_wp * ibit10 * adv_mom_3 &5141 + ibit9 * adv_mom_1 &5142 ) * &5143 ( u(k,j,i) - u(k,j,i-1) ) &5144 - ( 5.0_wp * ibit11 * adv_mom_5 &5145 + ibit10 * adv_mom_3 &5146 ) * &5147 ( u(k,j,i+1) - u(k,j,i-2) ) &5148 + ( ibit11 * adv_mom_5 &5149 ) * &5150 ( u(k,j,i+2) - u(k,j,i-3) ) &5151 )5152 5153 ibit11 = IBITS(wall_flags_0(k,j,i),11,1)5154 ibit10 = IBITS(wall_flags_0(k,j,i),10,1)5155 ibit9 = IBITS(wall_flags_0(k,j,i),9,1)5156 5157 u_comp = u(k,j,i+1) + u(k,j,i)5158 flux_r = ( u_comp - gu ) * ( &5159 ( 37.0_wp * ibit11 * adv_mom_5 &5160 + 7.0_wp * ibit10 * adv_mom_3 &5161 + ibit9 * adv_mom_1 &5162 ) * &5163 ( u(k,j,i+1) + u(k,j,i) ) &5164 - ( 8.0_wp * ibit11 * adv_mom_5 &5165 + ibit10 * adv_mom_3 &5166 ) * &5167 ( u(k,j,i+2) + u(k,j,i-1) ) &5168 + ( ibit11 * adv_mom_5 &5169 ) * &5170 ( u(k,j,i+3) + u(k,j,i-2) ) &5171 )5172 5173 diss_r = - ABS( u_comp - gu ) * ( &5174 ( 10.0_wp * ibit11 * adv_mom_5 &5175 + 3.0_wp * ibit10 * adv_mom_3 &5176 + ibit9 * adv_mom_1 &5177 ) * &5178 ( u(k,j,i+1) - u(k,j,i) ) &5179 - ( 5.0_wp * ibit11 * adv_mom_5 &5180 + ibit10 * adv_mom_3 &5181 ) * &5182 ( u(k,j,i+2) - u(k,j,i-1) ) &5183 + ( ibit11 * adv_mom_5 &5184 ) * &5185 ( u(k,j,i+3) - u(k,j,i-2) ) &5186 )5187 5188 ibit14 = IBITS(wall_flags_0(k,j-1,i),14,1)5189 ibit13 = IBITS(wall_flags_0(k,j-1,i),13,1)5190 ibit12 = IBITS(wall_flags_0(k,j-1,i),12,1)5191 5192 v_comp_s = v(k,j,i) + v(k,j,i-1) - gv5193 flux_s = v_comp_s * ( &5194 ( 37.0_wp * ibit14 * adv_mom_5 &5195 + 7.0_wp * ibit13 * adv_mom_3 &5196 + ibit12 * adv_mom_1 &5197 ) * &5198 ( u(k,j,i) + u(k,j-1,i) ) &5199 - ( 8.0_wp * ibit14 * adv_mom_5 &5200 + ibit13 * adv_mom_3 &5201 ) * &5202 ( u(k,j+1,i) + u(k,j-2,i) ) &5203 + ( ibit14 * adv_mom_5 &5204 ) * &5205 ( u(k,j+2,i) + u(k,j-3,i) ) &5206 )5207 5208 diss_s = - ABS ( v_comp_s ) * ( &5209 ( 10.0_wp * ibit14 * adv_mom_5 &5210 + 3.0_wp * ibit13 * adv_mom_3 &5211 + ibit12 * adv_mom_1 &5212 ) * &5213 ( u(k,j,i) - u(k,j-1,i) ) &5214 - ( 5.0_wp * ibit14 * adv_mom_5 &5215 + ibit13 * adv_mom_3 &5216 ) * &5217 ( u(k,j+1,i) - u(k,j-2,i) ) &5218 + ( ibit14 * adv_mom_5 &5219 ) * &5220 ( u(k,j+2,i) - u(k,j-3,i) ) &5221 )5222 5223 5224 ibit14 = IBITS(wall_flags_0(k,j,i),14,1)5225 ibit13 = IBITS(wall_flags_0(k,j,i),13,1)5226 ibit12 = IBITS(wall_flags_0(k,j,i),12,1)5227 5228 v_comp = v(k,j+1,i) + v(k,j+1,i-1) - gv5229 flux_n = v_comp * ( &5230 ( 37.0_wp * ibit14 * adv_mom_5 &5231 + 7.0_wp * ibit13 * adv_mom_3 &5232 + ibit12 * adv_mom_1 &5233 ) * &5234 ( u(k,j+1,i) + u(k,j,i) ) &5235 - ( 8.0_wp * ibit14 * adv_mom_5 &5236 + ibit13 * adv_mom_3 &5237 ) * &5238 ( u(k,j+2,i) + u(k,j-1,i) ) &5239 + ( ibit14 * adv_mom_5 &5240 ) * &5241 ( u(k,j+3,i) + u(k,j-2,i) ) &5242 )5243 5244 diss_n = - ABS ( v_comp ) * ( &5245 ( 10.0_wp * ibit14 * adv_mom_5 &5246 + 3.0_wp * ibit13 * adv_mom_3 &5247 + ibit12 * adv_mom_1 &5248 ) * &5249 ( u(k,j+1,i) - u(k,j,i) ) &5250 - ( 5.0_wp * ibit14 * adv_mom_5 &5251 + ibit13 * adv_mom_3 &5252 ) * &5253 ( u(k,j+2,i) - u(k,j-1,i) ) &5254 + ( ibit14 * adv_mom_5 &5255 ) * &5256 ( u(k,j+3,i) - u(k,j-2,i) ) &5257 )5258 5259 ibit17 = IBITS(wall_flags_0(k-1,j,i),17,1)5260 ibit16 = IBITS(wall_flags_0(k-1,j,i),16,1)5261 ibit15 = IBITS(wall_flags_0(k-1,j,i),15,1)5262 5263 k_pp = k + 2 * ibit175264 k_mm = k - 2 * ( ibit16 + ibit17 )5265 k_mmm = k - 3 * ibit175266 5267 w_comp = w(k-1,j,i) + w(k-1,j,i-1)5268 flux_d = w_comp * ( &5269 ( 37.0_wp * ibit17 * adv_mom_5 &5270 + 7.0_wp * ibit16 * adv_mom_3 &5271 + ibit15 * adv_mom_1 &5272 ) * &5273 ( u(k,j,i) + u(k-1,j,i) ) &5274 - ( 8.0_wp * ibit17 * adv_mom_5 &5275 + ibit16 * adv_mom_3 &5276 ) * &5277 ( u(k+1,j,i) + u(k_mm,j,i) ) &5278 + ( ibit17 * adv_mom_5 &5279 ) * &5280 ( u(k_pp,j,i) + u(k_mmm,j,i) ) &5281 )5282 5283 diss_d = - ABS( w_comp ) * ( &5284 ( 10.0_wp * ibit17 * adv_mom_5 &5285 + 3.0_wp * ibit16 * adv_mom_3 &5286 + ibit15 * adv_mom_1 &5287 ) * &5288 ( u(k,j,i) - u(k-1,j,i) ) &5289 - ( 5.0_wp * ibit17 * adv_mom_5 &5290 + ibit16 * adv_mom_3 &5291 ) * &5292 ( u(k+1,j,i) - u(k_mm,j,i) ) &5293 + ( ibit17 * adv_mom_5 &5294 ) * &5295 ( u(k_pp,j,i) - u(k_mmm,j,i) ) &5296 )5297 !5298 !-- k index has to be modified near bottom and top, else array5299 !-- subscripts will be exceeded.5300 ibit17 = IBITS(wall_flags_0(k,j,i),17,1)5301 ibit16 = IBITS(wall_flags_0(k,j,i),16,1)5302 ibit15 = IBITS(wall_flags_0(k,j,i),15,1)5303 5304 k_ppp = k + 3 * ibit175305 k_pp = k + 2 * ( 1 - ibit15 )5306 k_mm = k - 2 * ibit175307 5308 w_comp = w(k,j,i) + w(k,j,i-1)5309 flux_t = w_comp * rho_air_zw(k) * ( &5310 ( 37.0_wp * ibit17 * adv_mom_5 &5311 + 7.0_wp * ibit16 * adv_mom_3 &5312 + ibit15 * adv_mom_1 &5313 ) * &5314 ( u(k+1,j,i) + u(k,j,i) ) &5315 - ( 8.0_wp * ibit17 * adv_mom_5 &5316 + ibit16 * adv_mom_3 &5317 ) * &5318 ( u(k_pp,j,i) + u(k-1,j,i) ) &5319 + ( ibit17 * adv_mom_5 &5320 ) * &5321 ( u(k_ppp,j,i) + u(k_mm,j,i) ) &5322 )5323 5324 diss_t = - ABS( w_comp ) * rho_air_zw(k) * ( &5325 ( 10.0_wp * ibit17 * adv_mom_5 &5326 + 3.0_wp * ibit16 * adv_mom_3 &5327 + ibit15 * adv_mom_1 &5328 ) * &5329 ( u(k+1,j,i) - u(k,j,i) ) &5330 - ( 5.0_wp * ibit17 * adv_mom_5 &5331 + ibit16 * adv_mom_3 &5332 ) * &5333 ( u(k_pp,j,i) - u(k-1,j,i) ) &5334 + ( ibit17 * adv_mom_5 &5335 ) * &5336 ( u(k_ppp,j,i) - u(k_mm,j,i) ) &5337 )5338 !5339 !-- Calculate the divergence of the velocity field. A respective5340 !-- correction is needed to overcome numerical instabilities caused5341 !-- by a not sufficient reduction of divergences near topography.5342 div = ( ( u_comp * ( ibit9 + ibit10 + ibit11 ) &5343 - ( u(k,j,i) + u(k,j,i-1) ) &5344 * ( IBITS(wall_flags_0(k,j,i-1),9,1) &5345 + IBITS(wall_flags_0(k,j,i-1),10,1) &5346 + IBITS(wall_flags_0(k,j,i-1),11,1) &5347 ) &5348 ) * rho_air(k) * ddx &5349 + ( ( v_comp + gv ) * ( ibit12 + ibit13 + ibit14 ) &5350 - ( v(k,j,i) + v(k,j,i-1 ) ) &5351 * ( IBITS(wall_flags_0(k,j-1,i),12,1) &5352 + IBITS(wall_flags_0(k,j-1,i),13,1) &5353 + IBITS(wall_flags_0(k,j-1,i),14,1) &5354 ) &5355 ) * rho_air(k) * ddy &5356 + ( w_comp * rho_air_zw(k) * ( ibit15 + ibit16 + ibit17 ) &5357 - ( w(k-1,j,i) + w(k-1,j,i-1) ) * rho_air_zw(k-1) &5358 * ( IBITS(wall_flags_0(k-1,j,i),15,1) &5359 + IBITS(wall_flags_0(k-1,j,i),16,1) &5360 + IBITS(wall_flags_0(k-1,j,i),17,1) &5361 ) &5362 ) * ddzw(k) &5363 ) * 0.5_wp5364 5365 5366 tend(k,j,i) = - ( &5367 ( flux_r + diss_r - flux_l - diss_l ) * ddx &5368 + ( flux_n + diss_n - flux_s - diss_s ) * ddy &5369 + ( ( flux_t + diss_t ) - &5370 ( flux_d + diss_d ) &5371 ) * drho_air(k) * ddzw(k) &5372 ) + div * u(k,j,i)5373 5374 !++5375 !-- Statistical Evaluation of u'u'. The factor has to be applied5376 !-- for right evaluation when gallilei_trans = .T. .5377 ! sums_us2_ws_l(k,tn) = sums_us2_ws_l(k,tn) &5378 ! + ( flux_r * &5379 ! ( u_comp - 2.0_wp * hom(k,1,1,0) ) &5380 ! / ( u_comp - gu + 1.0E-20_wp ) &5381 ! + diss_r * &5382 ! ABS( u_comp - 2.0_wp * hom(k,1,1,0) ) &5383 ! / ( ABS( u_comp - gu ) + 1.0E-20_wp ) ) &5384 ! * weight_substep(intermediate_timestep_count)5385 !5386 !-- Statistical Evaluation of w'u'.5387 ! sums_wsus_ws_l(k,tn) = sums_wsus_ws_l(k,tn) &5388 ! + ( flux_t + diss_t ) &5389 ! * weight_substep(intermediate_timestep_count)5390 ENDDO5391 ENDDO5392 ENDDO5393 !$acc end kernels5394 5395 !++5396 ! sums_us2_ws_l(nzb,tn) = sums_us2_ws_l(nzb+1,tn)5397 5398 END SUBROUTINE advec_u_ws_acc5399 5400 5401 !------------------------------------------------------------------------------!5402 ! Description:5403 ! ------------5404 4518 !> Advection of v - Call for all grid points 5405 4519 !------------------------------------------------------------------------------! … … 5906 5020 5907 5021 5908 !------------------------------------------------------------------------------!5909 ! Description:5910 ! ------------5911 !> Advection of v - Call for all grid points - accelerator version5912 !------------------------------------------------------------------------------!5913 SUBROUTINE advec_v_ws_acc5914 5915 USE arrays_3d, &5916 ONLY: ddzw, drho_air, tend, u, v, w, rho_air, rho_air_zw5917 5918 USE constants, &5919 ONLY: adv_mom_1, adv_mom_3, adv_mom_55920 5921 USE control_parameters, &5922 ONLY: intermediate_timestep_count, u_gtrans, v_gtrans5923 5924 USE grid_variables, &5925 ONLY: ddx, ddy5926 5927 USE indices, &5928 ONLY: i_left, i_right, j_north, j_south, nxl, nxr, nyn, nys, nzb, &5929 nzb_max, nzt, wall_flags_05930 5931 USE kinds5932 5933 ! USE statistics, &5934 ! ONLY: hom, sums_vs2_ws_l, sums_wsvs_ws_l, weight_substep5935 5936 IMPLICIT NONE5937 5938 5939 INTEGER(iwp) :: i !<5940 INTEGER(iwp) :: ibit18 !<5941 INTEGER(iwp) :: ibit19 !<5942 INTEGER(iwp) :: ibit20 !<5943 INTEGER(iwp) :: ibit21 !<5944 INTEGER(iwp) :: ibit22 !<5945 INTEGER(iwp) :: ibit23 !<5946 INTEGER(iwp) :: ibit24 !<5947 INTEGER(iwp) :: ibit25 !<5948 INTEGER(iwp) :: ibit26 !<5949 INTEGER(iwp) :: j !<5950 INTEGER(iwp) :: k !<5951 INTEGER(iwp) :: k_mm !<5952 INTEGER(iwp) :: k_mmm !<5953 INTEGER(iwp) :: k_pp !<5954 INTEGER(iwp) :: k_ppp !<5955 INTEGER(iwp) :: tn = 0 !<5956 5957 REAL(wp) :: diss_d !<5958 REAL(wp) :: diss_l !<5959 REAL(wp) :: diss_n !<5960 REAL(wp) :: diss_r !<5961 REAL(wp) :: diss_s !<5962 REAL(wp) :: diss_t !<5963 REAL(wp) :: div !<5964 REAL(wp) :: flux_d !<5965 REAL(wp) :: flux_l !<5966 REAL(wp) :: flux_n !<5967 REAL(wp) :: flux_r !<5968 REAL(wp) :: flux_s !<5969 REAL(wp) :: flux_t !<5970 REAL(wp) :: gu !<5971 REAL(wp) :: gv !<5972 REAL(wp) :: u_comp !<5973 REAL(wp) :: u_comp_l !<5974 REAL(wp) :: v_comp !<5975 REAL(wp) :: v_comp_s !<5976 REAL(wp) :: w_comp !<5977 5978 gu = 2.0_wp * u_gtrans5979 gv = 2.0_wp * v_gtrans5980 5981 !5982 !-- Computation of fluxes and tendency terms5983 !$acc kernels present( ddzw, tend, u, v, w, wall_flags_0 )5984 DO i = i_left, i_right5985 DO j = j_south, j_north5986 DO k = nzb+1, nzt5987 5988 ibit20 = IBITS(wall_flags_0(k,j,i-1),20,1)5989 ibit19 = IBITS(wall_flags_0(k,j,i-1),19,1)5990 ibit18 = IBITS(wall_flags_0(k,j,i-1),18,1)5991 5992 u_comp_l = u(k,j-1,i) + u(k,j,i) - gu5993 flux_l = u_comp_l * ( &5994 ( 37.0_wp * ibit20 * adv_mom_5 &5995 + 7.0_wp * ibit19 * adv_mom_3 &5996 + ibit18 * adv_mom_1 &5997 ) * &5998 ( v(k,j,i) + v(k,j,i-1) ) &5999 - ( 8.0_wp * ibit20 * adv_mom_5 &6000 + ibit19 * adv_mom_3 &6001 ) * &6002 ( v(k,j,i+1) + v(k,j,i-2) ) &6003 + ( ibit20 * adv_mom_5 &6004 ) * &6005 ( v(k,j,i+2) + v(k,j,i-3) ) &6006 )6007 6008 diss_l = - ABS( u_comp_l ) * ( &6009 ( 10.0_wp * ibit20 * adv_mom_5 &6010 + 3.0_wp * ibit19 * adv_mom_3 &6011 + ibit18 * adv_mom_1 &6012 ) * &6013 ( v(k,j,i) - v(k,j,i-1) ) &6014 - ( 5.0_wp * ibit20 * adv_mom_5 &6015 + ibit19 * adv_mom_3 &6016 ) * &6017 ( v(k,j,i+1) - v(k,j,i-2) ) &6018 + ( ibit20 * adv_mom_5 &6019 ) * &6020 ( v(k,j,i+2) - v(k,j,i-3) ) &6021 )6022 6023 ibit20 = IBITS(wall_flags_0(k,j,i),20,1)6024 ibit19 = IBITS(wall_flags_0(k,j,i),19,1)6025 ibit18 = IBITS(wall_flags_0(k,j,i),18,1)6026 6027 u_comp = u(k,j-1,i+1) + u(k,j,i+1) - gu6028 flux_r = u_comp * ( &6029 ( 37.0_wp * ibit20 * adv_mom_5 &6030 + 7.0_wp * ibit19 * adv_mom_3 &6031 + ibit18 * adv_mom_1 &6032 ) * &6033 ( v(k,j,i+1) + v(k,j,i) ) &6034 - ( 8.0_wp * ibit20 * adv_mom_5 &6035 + ibit19 * adv_mom_3 &6036 ) * &6037 ( v(k,j,i+2) + v(k,j,i-1) ) &6038 + ( ibit20 * adv_mom_5 &6039 ) * &6040 ( v(k,j,i+3) + v(k,j,i-2) ) &6041 )6042 6043 diss_r = - ABS( u_comp ) * ( &6044 ( 10.0_wp * ibit20 * adv_mom_5 &6045 + 3.0_wp * ibit19 * adv_mom_3 &6046 + ibit18 * adv_mom_1 &6047 ) * &6048 ( v(k,j,i+1) - v(k,j,i) ) &6049 - ( 5.0_wp * ibit20 * adv_mom_5 &6050 + ibit19 * adv_mom_3 &6051 ) * &6052 ( v(k,j,i+2) - v(k,j,i-1) ) &6053 + ( ibit20 * adv_mom_5 &6054 ) * &6055 ( v(k,j,i+3) - v(k,j,i-2) ) &6056 )6057 6058 ibit23 = IBITS(wall_flags_0(k,j-1,i),23,1)6059 ibit22 = IBITS(wall_flags_0(k,j-1,i),22,1)6060 ibit21 = IBITS(wall_flags_0(k,j-1,i),21,1)6061 6062 6063 v_comp_s = v(k,j,i) + v(k,j-1,i) - gv6064 flux_s = v_comp_s * ( &6065 ( 37.0_wp * ibit23 * adv_mom_5 &6066 + 7.0_wp * ibit22 * adv_mom_3 &6067 + ibit21 * adv_mom_1 &6068 ) * &6069 ( v(k,j,i) + v(k,j-1,i) ) &6070 - ( 8.0_wp * ibit23 * adv_mom_5 &6071 + ibit22 * adv_mom_3 &6072 ) * &6073 ( v(k,j+1,i) + v(k,j-2,i) ) &6074 + ( ibit23 * adv_mom_5 &6075 ) * &6076 ( v(k,j+2,i) + v(k,j-3,i) ) &6077 )6078 6079 diss_s = - ABS( v_comp_s ) * ( &6080 ( 10.0_wp * ibit23 * adv_mom_5 &6081 + 3.0_wp * ibit22 * adv_mom_3 &6082 + ibit21 * adv_mom_1 &6083 ) * &6084 ( v(k,j,i) - v(k,j-1,i) ) &6085 - ( 5.0_wp * ibit23 * adv_mom_5 &6086 + ibit22 * adv_mom_3 &6087 ) * &6088 ( v(k,j+1,i) - v(k,j-2,i) ) &6089 + ( ibit23 * adv_mom_5 &6090 ) * &6091 ( v(k,j+2,i) - v(k,j-3,i) ) &6092 )6093 6094 ibit23 = IBITS(wall_flags_0(k,j,i),23,1)6095 ibit22 = IBITS(wall_flags_0(k,j,i),22,1)6096 ibit21 = IBITS(wall_flags_0(k,j,i),21,1)6097 6098 v_comp = v(k,j+1,i) + v(k,j,i)6099 flux_n = ( v_comp - gv ) * ( &6100 ( 37.0_wp * ibit23 * adv_mom_5 &6101 + 7.0_wp * ibit22 * adv_mom_3 &6102 + ibit21 * adv_mom_1 &6103 ) * &6104 ( v(k,j+1,i) + v(k,j,i) ) &6105 - ( 8.0_wp * ibit23 * adv_mom_5 &6106 + ibit22 * adv_mom_3 &6107 ) * &6108 ( v(k,j+2,i) + v(k,j-1,i) ) &6109 + ( ibit23 * adv_mom_5 &6110 ) * &6111 ( v(k,j+3,i) + v(k,j-2,i) ) &6112 )6113 6114 diss_n = - ABS( v_comp - gv ) * ( &6115 ( 10.0_wp * ibit23 * adv_mom_5 &6116 + 3.0_wp * ibit22 * adv_mom_3 &6117 + ibit21 * adv_mom_1 &6118 ) * &6119 ( v(k,j+1,i) - v(k,j,i) ) &6120 - ( 5.0_wp * ibit23 * adv_mom_5 &6121 + ibit22 * adv_mom_3 &6122 ) * &6123 ( v(k,j+2,i) - v(k,j-1,i) ) &6124 + ( ibit23 * adv_mom_5 &6125 ) * &6126 ( v(k,j+3,i) - v(k,j-2,i) ) &6127 )6128 6129 ibit26 = IBITS(wall_flags_0(k-1,j,i),26,1)6130 ibit25 = IBITS(wall_flags_0(k-1,j,i),25,1)6131 ibit24 = IBITS(wall_flags_0(k-1,j,i),24,1)6132 6133 k_pp = k + 2 * ibit266134 k_mm = k - 2 * ( ibit25 + ibit26 )6135 k_mmm = k - 3 * ibit266136 6137 w_comp = w(k-1,j-1,i) + w(k-1,j,i)6138 flux_d = w_comp * ( &6139 ( 37.0_wp * ibit26 * adv_mom_5 &6140 + 7.0_wp * ibit25 * adv_mom_3 &6141 + ibit24 * adv_mom_1 &6142 ) * &6143 ( v(k,j,i) + v(k-1,j,i) ) &6144 - ( 8.0_wp * ibit26 * adv_mom_5 &6145 + ibit25 * adv_mom_3 &6146 ) * &6147 ( v(k+1,j,i) + v(k_mm,j,i) ) &6148 + ( ibit26 * adv_mom_5 &6149 ) * &6150 ( v(k_pp,j,i) + v(k_mmm,j,i) ) &6151 )6152 6153 diss_d = - ABS( w_comp ) * ( &6154 ( 10.0_wp * ibit26 * adv_mom_5 &6155 + 3.0_wp * ibit25 * adv_mom_3 &6156 + ibit24 * adv_mom_1 &6157 ) * &6158 ( v(k,j,i) - v(k-1,j,i) ) &6159 - ( 5.0_wp * ibit26 * adv_mom_5 &6160 + ibit25 * adv_mom_3 &6161 ) * &6162 ( v(k+1,j,i) - v(k_mm,j,i) ) &6163 + ( ibit26 * adv_mom_5 &6164 ) * &6165 ( v(k_pp,j,i) - v(k_mmm,j,i) ) &6166 )6167 !6168 !-- k index has to be modified near bottom and top, else array6169 !-- subscripts will be exceeded.6170 ibit26 = IBITS(wall_flags_0(k,j,i),26,1)6171 ibit25 = IBITS(wall_flags_0(k,j,i),25,1)6172 ibit24 = IBITS(wall_flags_0(k,j,i),24,1)6173 6174 k_ppp = k + 3 * ibit266175 k_pp = k + 2 * ( 1 - ibit24 )6176 k_mm = k - 2 * ibit266177 6178 w_comp = w(k,j-1,i) + w(k,j,i)6179 flux_t = w_comp * rho_air_zw(k) * ( &6180 ( 37.0_wp * ibit26 * adv_mom_5 &6181 + 7.0_wp * ibit25 * adv_mom_3 &6182 + ibit24 * adv_mom_1 &6183 ) * &6184 ( v(k+1,j,i) + v(k,j,i) ) &6185 - ( 8.0_wp * ibit26 * adv_mom_5 &6186 + ibit25 * adv_mom_3 &6187 ) * &6188 ( v(k_pp,j,i) + v(k-1,j,i) ) &6189 + ( ibit26 * adv_mom_5 &6190 ) * &6191 ( v(k_ppp,j,i) + v(k_mm,j,i) ) &6192 )6193 6194 diss_t = - ABS( w_comp ) * rho_air_zw(k) * ( &6195 ( 10.0_wp * ibit26 * adv_mom_5 &6196 + 3.0_wp * ibit25 * adv_mom_3 &6197 + ibit24 * adv_mom_1 &6198 ) * &6199 ( v(k+1,j,i) - v(k,j,i) ) &6200 - ( 5.0_wp * ibit26 * adv_mom_5 &6201 + ibit25 * adv_mom_3 &6202 ) * &6203 ( v(k_pp,j,i) - v(k-1,j,i) ) &6204 + ( ibit26 * adv_mom_5 &6205 ) * &6206 ( v(k_ppp,j,i) - v(k_mm,j,i) ) &6207 )6208 !6209 !-- Calculate the divergence of the velocity field. A respective6210 !-- correction is needed to overcome numerical instabilities caused6211 !-- by a not sufficient reduction of divergences near topography.6212 div = ( ( ( u_comp + gu ) &6213 * ( ibit18 + ibit19 + ibit20 ) &6214 - ( u(k,j-1,i) + u(k,j,i) ) &6215 * ( IBITS(wall_flags_0(k,j,i-1),18,1) &6216 + IBITS(wall_flags_0(k,j,i-1),19,1) &6217 + IBITS(wall_flags_0(k,j,i-1),20,1) &6218 ) &6219 ) * rho_air(k) * ddx &6220 + ( v_comp &6221 * ( ibit21 + ibit22 + ibit23 ) &6222 - ( v(k,j,i) + v(k,j-1,i) ) &6223 * ( IBITS(wall_flags_0(k,j-1,i),21,1) &6224 + IBITS(wall_flags_0(k,j-1,i),22,1) &6225 + IBITS(wall_flags_0(k,j-1,i),23,1) &6226 ) &6227 ) * rho_air(k) * ddy &6228 + ( w_comp * rho_air_zw(k) &6229 * ( ibit24 + ibit25 + ibit26 ) &6230 - ( w(k-1,j-1,i) + w(k-1,j,i) ) * rho_air_zw(k-1) &6231 * ( IBITS(wall_flags_0(k-1,j,i),24,1) &6232 + IBITS(wall_flags_0(k-1,j,i),25,1) &6233 + IBITS(wall_flags_0(k-1,j,i),26,1) &6234 ) &6235 ) * ddzw(k) &6236 ) * 0.5_wp6237 6238 6239 tend(k,j,i) = - ( &6240 ( flux_r + diss_r - flux_l - diss_l ) * ddx &6241 + ( flux_n + diss_n - flux_s - diss_s ) * ddy &6242 + ( ( flux_t + diss_t ) - &6243 ( flux_d + diss_d ) &6244 ) * drho_air(k) * ddzw(k) &6245 ) + div * v(k,j,i)6246 6247 6248 !++6249 !-- Statistical Evaluation of v'v'. The factor has to be applied6250 !-- for right evaluation when gallilei_trans = .T. .6251 ! sums_vs2_ws_l(k,tn) = sums_vs2_ws_l(k,tn) &6252 ! + ( flux_n &6253 ! * ( v_comp - 2.0_wp * hom(k,1,2,0) ) &6254 ! / ( v_comp - gv + 1.0E-20_wp ) &6255 ! + diss_n &6256 ! * ABS( v_comp - 2.0_wp * hom(k,1,2,0) ) &6257 ! / ( ABS( v_comp - gv ) +1.0E-20_wp ) ) &6258 ! * weight_substep(intermediate_timestep_count)6259 !6260 !-- Statistical Evaluation of w'v'.6261 ! sums_wsvs_ws_l(k,tn) = sums_wsvs_ws_l(k,tn) &6262 ! + ( flux_t + diss_t ) &6263 ! * weight_substep(intermediate_timestep_count)6264 6265 ENDDO6266 ENDDO6267 ENDDO6268 !$acc end kernels6269 6270 !++6271 ! sums_vs2_ws_l(nzb,tn) = sums_vs2_ws_l(nzb+1,tn)6272 6273 END SUBROUTINE advec_v_ws_acc6274 5022 6275 5023 … … 6756 5504 6757 5505 6758 !------------------------------------------------------------------------------!6759 ! Description:6760 ! ------------6761 !> Advection of w - Call for all grid points - accelerator version6762 !------------------------------------------------------------------------------!6763 SUBROUTINE advec_w_ws_acc6764 6765 USE arrays_3d, &6766 ONLY: ddzu, drho_air_zw, tend, u, v, w, rho_air, rho_air_zw6767 6768 USE constants, &6769 ONLY: adv_mom_1, adv_mom_3, adv_mom_56770 6771 USE control_parameters, &6772 ONLY: intermediate_timestep_count, u_gtrans, v_gtrans6773 6774 USE grid_variables, &6775 ONLY: ddx, ddy6776 6777 USE indices, &6778 ONLY: i_left, i_right, j_north, j_south, nxl, nxr, nyn, nys, nzb, &6779 nzb_max, nzt, wall_flags_0, wall_flags_006780 6781 USE kinds6782 6783 ! USE statistics, &6784 ! ONLY: hom, sums_ws2_ws_l, weight_substep6785 6786 IMPLICIT NONE6787 6788 INTEGER(iwp) :: i !<6789 INTEGER(iwp) :: ibit27 !<6790 INTEGER(iwp) :: ibit28 !<6791 INTEGER(iwp) :: ibit29 !<6792 INTEGER(iwp) :: ibit30 !<6793 INTEGER(iwp) :: ibit31 !<6794 INTEGER(iwp) :: ibit32 !<6795 INTEGER(iwp) :: ibit33 !<6796 INTEGER(iwp) :: ibit34 !<6797 INTEGER(iwp) :: ibit35 !<6798 INTEGER(iwp) :: j !<6799 INTEGER(iwp) :: k !<6800 INTEGER(iwp) :: k_mmm !<6801 INTEGER(iwp) :: k_mm !<6802 INTEGER(iwp) :: k_pp !<6803 INTEGER(iwp) :: k_ppp !<6804 INTEGER(iwp) :: tn = 0 !<6805 6806 REAL(wp) :: diss_d !<6807 REAL(wp) :: diss_l !<6808 REAL(wp) :: diss_n !<6809 REAL(wp) :: diss_r !<6810 REAL(wp) :: diss_s !<6811 REAL(wp) :: diss_t !<6812 REAL(wp) :: div !<6813 REAL(wp) :: flux_d !<6814 REAL(wp) :: flux_l !<6815 REAL(wp) :: flux_n !<6816 REAL(wp) :: flux_r !<6817 REAL(wp) :: flux_s !<6818 REAL(wp) :: flux_t !<6819 REAL(wp) :: gu !<6820 REAL(wp) :: gv !<6821 REAL(wp) :: u_comp !<6822 REAL(wp) :: u_comp_l !<6823 REAL(wp) :: v_comp !<6824 REAL(wp) :: v_comp_s !<6825 REAL(wp) :: w_comp !<6826 6827 gu = 2.0_wp * u_gtrans6828 gv = 2.0_wp * v_gtrans6829 6830 6831 !6832 !-- Computation of fluxes and tendency terms6833 !$acc kernels present( ddzu, tend, u, v, w, wall_flags_0, wall_flags_00 )6834 DO i = i_left, i_right6835 DO j = j_south, j_north6836 DO k = nzb+1, nzt6837 6838 ibit27 = IBITS(wall_flags_0(k,j,i-1),27,1)6839 ibit28 = IBITS(wall_flags_0(k,j,i-1),28,1)6840 ibit29 = IBITS(wall_flags_0(k,j,i-1),29,1)6841 6842 u_comp_l = u(k+1,j,i) + u(k,j,i) - gu6843 flux_l = u_comp_l * ( &6844 ( 37.0_wp * ibit29 * adv_mom_5 &6845 + 7.0_wp * ibit28 * adv_mom_3 &6846 + ibit27 * adv_mom_1 &6847 ) * &6848 ( w(k,j,i) + w(k,j,i-1) ) &6849 - ( 8.0_wp * ibit29 * adv_mom_5 &6850 + ibit28 * adv_mom_3 &6851 ) * &6852 ( w(k,j,i+1) + w(k,j,i-2) ) &6853 + ( ibit29 * adv_mom_5 &6854 ) * &6855 ( w(k,j,i+2) + w(k,j,i-3) ) &6856 )6857 6858 diss_l = - ABS( u_comp_l ) * ( &6859 ( 10.0_wp * ibit29 * adv_mom_5 &6860 + 3.0_wp * ibit28 * adv_mom_3 &6861 + ibit27 * adv_mom_1 &6862 ) * &6863 ( w(k,j,i) - w(k,j,i-1) ) &6864 - ( 5.0_wp * ibit29 * adv_mom_5 &6865 + ibit28 * adv_mom_3 &6866 ) * &6867 ( w(k,j,i+1) - w(k,j,i-2) ) &6868 + ( ibit29 * adv_mom_5 &6869 ) * &6870 ( w(k,j,i+2) - w(k,j,i-3) ) &6871 )6872 6873 ibit27 = IBITS(wall_flags_0(k,j,i),27,1)6874 ibit28 = IBITS(wall_flags_0(k,j,i),28,1)6875 ibit29 = IBITS(wall_flags_0(k,j,i),29,1)6876 6877 u_comp = u(k+1,j,i+1) + u(k,j,i+1) - gu6878 flux_r = u_comp * ( &6879 ( 37.0_wp * ibit29 * adv_mom_5 &6880 + 7.0_wp * ibit28 * adv_mom_3 &6881 + ibit27 * adv_mom_1 &6882 ) * &6883 ( w(k,j,i+1) + w(k,j,i) ) &6884 - ( 8.0_wp * ibit29 * adv_mom_5 &6885 + ibit28 * adv_mom_3 &6886 ) * &6887 ( w(k,j,i+2) + w(k,j,i-1) ) &6888 + ( ibit29 * adv_mom_5 &6889 ) * &6890 ( w(k,j,i+3) + w(k,j,i-2) ) &6891 )6892 6893 diss_r = - ABS( u_comp ) * ( &6894 ( 10.0_wp * ibit29 * adv_mom_5 &6895 + 3.0_wp * ibit28 * adv_mom_3 &6896 + ibit27 * adv_mom_1 &6897 ) * &6898 ( w(k,j,i+1) - w(k,j,i) ) &6899 - ( 5.0_wp * ibit29 * adv_mom_5 &6900 + ibit28 * adv_mom_3 &6901 ) * &6902 ( w(k,j,i+2) - w(k,j,i-1) ) &6903 + ( ibit29 * adv_mom_5 &6904 ) * &6905 ( w(k,j,i+3) - w(k,j,i-2) ) &6906 )6907 ibit32 = IBITS(wall_flags_00(k,j-1,i),0,1)6908 ibit31 = IBITS(wall_flags_0(k,j-1,i),31,1)6909 ibit30 = IBITS(wall_flags_0(k,j-1,i),30,1)6910 6911 v_comp_s = v(k+1,j,i) + v(k,j,i) - gv6912 flux_s = v_comp_s * ( &6913 ( 37.0_wp * ibit32 * adv_mom_5 &6914 + 7.0_wp * ibit31 * adv_mom_3 &6915 + ibit30 * adv_mom_1 &6916 ) * &6917 ( w(k,j,i) + w(k,j-1,i) ) &6918 - ( 8.0_wp * ibit32 * adv_mom_5 &6919 + ibit31 * adv_mom_3 &6920 ) * &6921 ( w(k,j+1,i) + w(k,j-2,i) ) &6922 + ( ibit32 * adv_mom_5 &6923 ) * &6924 ( w(k,j+2,i) + w(k,j-3,i) ) &6925 )6926 6927 diss_s = - ABS( v_comp_s ) * ( &6928 ( 10.0_wp * ibit32 * adv_mom_5 &6929 + 3.0_wp * ibit31 * adv_mom_3 &6930 + ibit30 * adv_mom_1 &6931 ) * &6932 ( w(k,j,i) - w(k,j-1,i) ) &6933 - ( 5.0_wp * ibit32 * adv_mom_5 &6934 + ibit31 * adv_mom_3 &6935 ) * &6936 ( w(k,j+1,i) - w(k,j-2,i) ) &6937 + ( ibit32 * adv_mom_5 &6938 ) * &6939 ( w(k,j+2,i) - w(k,j-3,i) ) &6940 )6941 6942 ibit32 = IBITS(wall_flags_00(k,j,i),0,1)6943 ibit31 = IBITS(wall_flags_0(k,j,i),31,1)6944 ibit30 = IBITS(wall_flags_0(k,j,i),30,1)6945 6946 v_comp = v(k+1,j+1,i) + v(k,j+1,i) - gv6947 flux_n = v_comp * ( &6948 ( 37.0_wp * ibit32 * adv_mom_5 &6949 + 7.0_wp * ibit31 * adv_mom_3 &6950 + ibit30 * adv_mom_1 &6951 ) * &6952 ( w(k,j+1,i) + w(k,j,i) ) &6953 - ( 8.0_wp * ibit32 * adv_mom_5 &6954 + ibit31 * adv_mom_3 &6955 ) * &6956 ( w(k,j+2,i) + w(k,j-1,i) ) &6957 + ( ibit32 * adv_mom_5 &6958 ) * &6959 ( w(k,j+3,i) + w(k,j-2,i) ) &6960 )6961 6962 diss_n = - ABS( v_comp ) * ( &6963 ( 10.0_wp * ibit32 * adv_mom_5 &6964 + 3.0_wp * ibit31 * adv_mom_3 &6965 + ibit30 * adv_mom_1 &6966 ) * &6967 ( w(k,j+1,i) - w(k,j,i) ) &6968 - ( 5.0_wp * ibit32 * adv_mom_5 &6969 + ibit31 * adv_mom_3 &6970 ) * &6971 ( w(k,j+2,i) - w(k,j-1,i) ) &6972 + ( ibit32 * adv_mom_5 &6973 ) * &6974 ( w(k,j+3,i) - w(k,j-2,i) ) &6975 )6976 6977 ibit35 = IBITS(wall_flags_00(k-1,j,i),3,1)6978 ibit34 = IBITS(wall_flags_00(k-1,j,i),2,1)6979 ibit33 = IBITS(wall_flags_00(k-1,j,i),1,1)6980 6981 k_pp = k + 2 * ibit356982 k_mm = k - 2 * ( ibit34 + ibit35 )6983 k_mmm = k - 3 * ibit356984 6985 w_comp = w(k,j,i) + w(k-1,j,i)6986 flux_d = w_comp * ( &6987 ( 37.0_wp * ibit35 * adv_mom_5 &6988 + 7.0_wp * ibit34 * adv_mom_3 &6989 + ibit33 * adv_mom_1 &6990 ) * &6991 ( w(k,j,i) + w(k-1,j,i) ) &6992 - ( 8.0_wp * ibit35 * adv_mom_5 &6993 + ibit34 * adv_mom_3 &6994 ) * &6995 ( w(k+1,j,i) + w(k_mm,j,i) ) &6996 + ( ibit35 * adv_mom_5 &6997 ) * &6998 ( w(k_pp,j,i) + w(k_mmm,j,i) ) &6999 )7000 7001 diss_d = - ABS( w_comp ) * ( &7002 ( 10.0_wp * ibit35 * adv_mom_5 &7003 + 3.0_wp * ibit34 * adv_mom_3 &7004 + ibit33 * adv_mom_1 &7005 ) * &7006 ( w(k,j,i) - w(k-1,j,i) ) &7007 - ( 5.0_wp * ibit35 * adv_mom_5 &7008 + ibit34 * adv_mom_3 &7009 ) * &7010 ( w(k+1,j,i) - w(k_mm,j,i) ) &7011 + ( ibit35 * adv_mom_5 &7012 ) * &7013 ( w(k_pp,j,i) - w(k_mmm,j,i) ) &7014 )7015 7016 !7017 !-- k index has to be modified near bottom and top, else array7018 !-- subscripts will be exceeded.7019 ibit35 = IBITS(wall_flags_00(k,j,i),3,1)7020 ibit34 = IBITS(wall_flags_00(k,j,i),2,1)7021 ibit33 = IBITS(wall_flags_00(k,j,i),1,1)7022 7023 k_ppp = k + 3 * ibit357024 k_pp = k + 2 * ( 1 - ibit33 )7025 k_mm = k - 2 * ibit357026 7027 w_comp = w(k+1,j,i) + w(k,j,i)7028 flux_t = w_comp * rho_air(k+1) * ( &7029 ( 37.0_wp * ibit35 * adv_mom_5 &7030 + 7.0_wp * ibit34 * adv_mom_3 &7031 + ibit33 * adv_mom_1 &7032 ) * &7033 ( w(k+1,j,i) + w(k,j,i) ) &7034 - ( 8.0_wp * ibit35 * adv_mom_5 &7035 + ibit34 * adv_mom_3 &7036 ) * &7037 ( w(k_pp,j,i) + w(k-1,j,i) ) &7038 + ( ibit35 * adv_mom_5 &7039 ) * &7040 ( w(k_ppp,j,i) + w(k_mm,j,i) ) &7041 )7042 7043 diss_t = - ABS( w_comp ) * rho_air(k+1) * ( &7044 ( 10.0_wp * ibit35 * adv_mom_5 &7045 + 3.0_wp * ibit34 * adv_mom_3 &7046 + ibit33 * adv_mom_1 &7047 ) * &7048 ( w(k+1,j,i) - w(k,j,i) ) &7049 - ( 5.0_wp * ibit35 * adv_mom_5 &7050 + ibit34 * adv_mom_3 &7051 ) * &7052 ( w(k_pp,j,i) - w(k-1,j,i) ) &7053 + ( ibit35 * adv_mom_5 &7054 ) * &7055 ( w(k_ppp,j,i) - w(k_mm,j,i) ) &7056 )7057 !7058 !-- Calculate the divergence of the velocity field. A respective7059 !-- correction is needed to overcome numerical instabilities caused7060 !-- by a not sufficient reduction of divergences near topography.7061 div = ( ( ( u_comp + gu ) * ( ibit27 + ibit28 + ibit29 ) &7062 - ( u(k+1,j,i) + u(k,j,i) ) &7063 * ( IBITS(wall_flags_0(k,j,i-1),27,1) &7064 + IBITS(wall_flags_0(k,j,i-1),28,1) &7065 + IBITS(wall_flags_0(k,j,i-1),29,1) &7066 ) &7067 ) * rho_air_zw(k) * ddx &7068 + ( ( v_comp + gv ) * ( ibit30 + ibit31 + ibit32 ) &7069 - ( v(k+1,j,i) + v(k,j,i) ) &7070 * ( IBITS(wall_flags_0(k,j-1,i),30,1) &7071 + IBITS(wall_flags_0(k,j-1,i),31,1) &7072 + IBITS(wall_flags_00(k,j-1,i),0,1) &7073 ) &7074 ) * rho_air_zw(k) * ddy &7075 + ( w_comp * rho_air(k+1) * ( ibit33 + ibit34 + ibit35 ) &7076 - ( w(k,j,i) + w(k-1,j,i) ) * rho_air(k) &7077 * ( IBITS(wall_flags_00(k-1,j,i),1,1) &7078 + IBITS(wall_flags_00(k-1,j,i),2,1) &7079 + IBITS(wall_flags_00(k-1,j,i),3,1) &7080 ) &7081 ) * ddzu(k+1) &7082 ) * 0.5_wp7083 7084 7085 tend(k,j,i) = - ( &7086 ( flux_r + diss_r - flux_l - diss_l ) * ddx &7087 + ( flux_n + diss_n - flux_s - diss_s ) * ddy &7088 + ( ( flux_t + diss_t ) - &7089 ( flux_d + diss_d ) * rho_air(k) &7090 ) * drho_air_zw(k) * ddzu(k+1) &7091 ) + div * w(k,j,i)7092 7093 7094 !++7095 !-- Statistical Evaluation of w'w'.7096 ! sums_ws2_ws_l(k,tn) = sums_ws2_ws_l(k,tn) &7097 ! + ( flux_t + diss_t ) &7098 ! * weight_substep(intermediate_timestep_count)7099 7100 ENDDO7101 ENDDO7102 ENDDO7103 !$acc end kernels7104 7105 END SUBROUTINE advec_w_ws_acc7106 7107 5506 END MODULE advec_ws -
palm/trunk/SOURCE/boundary_conds.f90
r2101 r2118 20 20 ! Current revisions: 21 21 ! ----------------- 22 ! 22 ! OpenACC directives removed 23 23 ! 24 24 ! Former revisions: … … 188 188 !-- Bottom boundary 189 189 IF ( ibc_uv_b == 1 ) THEN 190 !$acc kernels present( u_p, v_p )191 190 u_p(nzb,:,:) = u_p(nzb+1,:,:) 192 191 v_p(nzb,:,:) = v_p(nzb+1,:,:) 193 !$acc end kernels 194 ENDIF 195 196 !$acc kernels present( nzb_w_inner, w_p ) 192 ENDIF 193 197 194 DO i = nxlg, nxrg 198 195 DO j = nysg, nyng … … 200 197 ENDDO 201 198 ENDDO 202 !$acc end kernels203 199 204 200 ! 205 201 !-- Top boundary. A nested domain ( ibc_uv_t = 3 ) does not require settings. 206 202 IF ( ibc_uv_t == 0 ) THEN 207 !$acc kernels present( u_init, u_p, v_init, v_p )208 203 u_p(nzt+1,:,:) = u_init(nzt+1) 209 204 v_p(nzt+1,:,:) = v_init(nzt+1) 210 !$acc end kernels211 205 ELSEIF ( ibc_uv_t == 1 ) THEN 212 !$acc kernels present( u_p, v_p )213 206 u_p(nzt+1,:,:) = u_p(nzt,:,:) 214 207 v_p(nzt+1,:,:) = v_p(nzt,:,:) 215 !$acc end kernels216 208 ENDIF 217 209 218 210 IF ( .NOT. nest_domain ) THEN 219 !$acc kernels present( w_p )220 211 w_p(nzt:nzt+1,:,:) = 0.0_wp ! nzt is not a prognostic level (but cf. pres) 221 !$acc end kernels222 212 ENDIF 223 213 … … 227 217 !-- the sea surface temperature of the coupled ocean model. 228 218 IF ( ibc_pt_b == 0 ) THEN 229 !$acc kernels present( nzb_s_inner, pt, pt_p )230 !$acc loop independent231 219 DO i = nxlg, nxrg 232 !$acc loop independent233 220 DO j = nysg, nyng 234 221 pt_p(nzb_s_inner(j,i),j,i) = pt(nzb_s_inner(j,i),j,i) 235 222 ENDDO 236 223 ENDDO 237 !$acc end kernels238 224 ELSEIF ( ibc_pt_b == 1 ) THEN 239 !$acc kernels present( nzb_s_inner, pt_p )240 !$acc loop independent241 225 DO i = nxlg, nxrg 242 !$acc loop independent243 226 DO j = nysg, nyng 244 227 pt_p(nzb_s_inner(j,i),j,i) = pt_p(nzb_s_inner(j,i)+1,j,i) 245 228 ENDDO 246 229 ENDDO 247 !$acc end kernels248 230 ENDIF 249 231 … … 251 233 !-- Temperature at top boundary 252 234 IF ( ibc_pt_t == 0 ) THEN 253 !$acc kernels present( pt, pt_p )254 235 pt_p(nzt+1,:,:) = pt(nzt+1,:,:) 255 236 ! … … 259 240 pt_p(nzt+1,:,:) = pt_init(nzt+1) 260 241 ENDIF 261 !$acc end kernels262 242 ELSEIF ( ibc_pt_t == 1 ) THEN 263 !$acc kernels present( pt_p )264 243 pt_p(nzt+1,:,:) = pt_p(nzt,:,:) 265 !$acc end kernels266 244 ELSEIF ( ibc_pt_t == 2 ) THEN 267 !$acc kernels present( dzu, pt_p )268 245 pt_p(nzt+1,:,:) = pt_p(nzt,:,:) + bc_pt_t_val * dzu(nzt+1) 269 !$acc end kernels270 246 ENDIF 271 247 … … 274 250 !-- Generally Neumann conditions with de/dz=0 are assumed 275 251 IF ( .NOT. constant_diffusion ) THEN 276 !$acc kernels present( e_p, nzb_s_inner )277 !$acc loop independent278 252 DO i = nxlg, nxrg 279 !$acc loop independent280 253 DO j = nysg, nyng 281 254 e_p(nzb_s_inner(j,i),j,i) = e_p(nzb_s_inner(j,i)+1,j,i) … … 285 258 e_p(nzt+1,:,:) = e_p(nzt,:,:) 286 259 ENDIF 287 !$acc end kernels288 260 ENDIF 289 261 -
palm/trunk/SOURCE/buoyancy.f90
r2101 r2118 20 20 ! Current revisions: 21 21 ! ------------------ 22 ! 22 ! OpenACC version of subroutine removed 23 23 ! 24 24 ! Former revisions: … … 102 102 103 103 PRIVATE 104 PUBLIC buoyancy , buoyancy_acc104 PUBLIC buoyancy 105 105 106 106 INTERFACE buoyancy … … 108 108 MODULE PROCEDURE buoyancy_ij 109 109 END INTERFACE buoyancy 110 111 INTERFACE buoyancy_acc112 MODULE PROCEDURE buoyancy_acc113 END INTERFACE buoyancy_acc114 110 115 111 CONTAINS … … 212 208 213 209 END SUBROUTINE buoyancy 214 215 216 !------------------------------------------------------------------------------!217 ! Description:218 ! ------------219 !> Call for all grid points - accelerator version220 !------------------------------------------------------------------------------!221 SUBROUTINE buoyancy_acc( var, wind_component )222 223 USE arrays_3d, &224 ONLY: pt, pt_slope_ref, ref_state, tend225 226 USE control_parameters, &227 ONLY: atmos_ocean_sign, cos_alpha_surface, g, message_string, &228 pt_surface, sin_alpha_surface, sloping_surface229 230 USE indices, &231 ONLY: i_left, i_right, j_north, j_south, nxl, nxlg, nxlu, nxr, &232 nxrg, nyn, nyng, nys, nysg, nzb, nzb_s_inner, nzt233 234 USE kinds235 236 USE pegrid237 238 239 IMPLICIT NONE240 241 INTEGER(iwp) :: i !<242 INTEGER(iwp) :: j !<243 INTEGER(iwp) :: k !<244 INTEGER(iwp) :: wind_component !<245 246 #if defined( __nopointer )247 REAL(wp), DIMENSION(nzb:nzt+1,nysg:nyng,nxlg:nxrg) :: var !<248 #else249 REAL(wp), DIMENSION(:,:,:), POINTER :: var250 #endif251 252 253 IF ( .NOT. sloping_surface ) THEN254 !255 !-- Normal case: horizontal surface256 !$acc kernels present( nzb_s_inner, ref_state, tend, var )257 !$acc loop258 DO i = i_left, i_right259 DO j = j_south, j_north260 !$acc loop independent vector261 DO k = nzb_s_inner(j,i)+1, nzt-1262 tend(k,j,i) = tend(k,j,i) + atmos_ocean_sign * g * 0.5_wp * &263 ( &264 ( var(k,j,i) - ref_state(k) ) / ref_state(k) + &265 ( var(k+1,j,i) - ref_state(k+1) ) / ref_state(k+1) &266 )267 ENDDO268 ENDDO269 ENDDO270 !$acc end kernels271 272 ELSE273 !274 !-- Buoyancy term for a surface with a slope in x-direction. The equations275 !-- for both the u and w velocity-component contain proportionate terms.276 !-- Temperature field at time t=0 serves as environmental temperature.277 !-- Reference temperature (pt_surface) is the one at the lower left corner278 !-- of the total domain.279 IF ( wind_component == 1 ) THEN280 281 DO i = nxlu, nxr282 DO j = nys, nyn283 DO k = nzb_s_inner(j,i)+1, nzt-1284 tend(k,j,i) = tend(k,j,i) + g * sin_alpha_surface * &285 0.5_wp * ( ( pt(k,j,i-1) + pt(k,j,i) ) &286 - ( pt_slope_ref(k,i-1) + pt_slope_ref(k,i) ) &287 ) / pt_surface288 ENDDO289 ENDDO290 ENDDO291 292 ELSEIF ( wind_component == 3 ) THEN293 294 DO i = nxl, nxr295 DO j = nys, nyn296 DO k = nzb_s_inner(j,i)+1, nzt-1297 tend(k,j,i) = tend(k,j,i) + g * cos_alpha_surface * &298 0.5_wp * ( ( pt(k,j,i) + pt(k+1,j,i) ) &299 - ( pt_slope_ref(k,i) + pt_slope_ref(k+1,i) ) &300 ) / pt_surface301 ENDDO302 ENDDO303 ENDDO304 305 ELSE306 307 WRITE( message_string, * ) 'no term for component "', &308 wind_component,'"'309 CALL message( 'buoyancy', 'PA0159', 1, 2, 0, 6, 0 )310 311 ENDIF312 313 ENDIF314 315 END SUBROUTINE buoyancy_acc316 210 317 211 -
palm/trunk/SOURCE/check_parameters.f90
r2101 r2118 20 20 ! Current revisions: 21 21 ! ----------------- 22 ! 22 ! OpenACC related parts of code removed 23 23 ! 24 24 ! Former revisions: … … 518 518 IF ( transpose_compute_overlap ) THEN 519 519 IF ( numprocs == 1 ) STOP '+++ transpose-compute-overlap not implemented for single PE runs' 520 #if defined( __openacc )521 STOP '+++ transpose-compute-overlap not implemented for GPU usage'522 #endif523 520 ENDIF 524 521 … … 774 771 SELECT CASE ( TRIM( loop_optimization ) ) 775 772 776 CASE ( ' acc', 'cache', 'vector' )773 CASE ( 'cache', 'vector' ) 777 774 CONTINUE 778 775 -
palm/trunk/SOURCE/coriolis.f90
r2101 r2118 20 20 ! Current revisions: 21 21 ! ----------------- 22 ! 22 ! OpenACC version of subroutine removed 23 23 ! 24 24 ! Former revisions: … … 35 35 ! 1850 2016-04-08 13:29:27Z maronga 36 36 ! Module renamed 37 !38 37 ! 39 38 ! 1682 2015-10-07 23:56:08Z knoop … … 76 75 77 76 PRIVATE 78 PUBLIC coriolis , coriolis_acc77 PUBLIC coriolis 79 78 80 79 INTERFACE coriolis … … 82 81 MODULE PROCEDURE coriolis_ij 83 82 END INTERFACE coriolis 84 85 INTERFACE coriolis_acc86 MODULE PROCEDURE coriolis_acc87 END INTERFACE coriolis_acc88 83 89 84 CONTAINS … … 177 172 ! Description: 178 173 ! ------------ 179 !> Call for all grid points - accelerator version180 !------------------------------------------------------------------------------!181 SUBROUTINE coriolis_acc( component )182 183 USE arrays_3d, &184 ONLY: tend, u, ug, v, vg, w185 186 USE control_parameters, &187 ONLY: f, fs, message_string188 189 USE indices, &190 ONLY: i_left, i_right, j_north, j_south, nzb_u_inner, &191 nzb_v_inner, nzb_w_inner, nzt192 193 USE kinds194 195 IMPLICIT NONE196 197 INTEGER(iwp) :: component !<198 INTEGER(iwp) :: i !<199 INTEGER(iwp) :: j !<200 INTEGER(iwp) :: k !<201 202 203 !204 !-- Compute Coriolis terms for the three velocity components205 SELECT CASE ( component )206 207 !208 !-- u-component209 CASE ( 1 )210 !$acc kernels present( nzb_u_inner, tend, v, vg, w )211 DO i = i_left, i_right212 DO j = j_south, j_north213 DO k = 1, nzt214 IF ( k > nzb_u_inner(j,i) ) THEN215 tend(k,j,i) = tend(k,j,i) + f * ( 0.25_wp * &216 ( v(k,j,i-1) + v(k,j,i) + v(k,j+1,i-1) + &217 v(k,j+1,i) ) - vg(k) ) &218 - fs * ( 0.25_wp * &219 ( w(k-1,j,i-1) + w(k-1,j,i) + w(k,j,i-1) &220 + w(k,j,i) ) &221 )222 ENDIF223 ENDDO224 ENDDO225 ENDDO226 !$acc end kernels227 228 !229 !-- v-component230 CASE ( 2 )231 !$acc kernels present( nzb_v_inner, tend, u, ug )232 DO i = i_left, i_right233 DO j = j_south, j_north234 DO k = 1, nzt235 IF ( k > nzb_v_inner(j,i) ) THEN236 tend(k,j,i) = tend(k,j,i) - f * ( 0.25_wp * &237 ( u(k,j-1,i) + u(k,j,i) + u(k,j-1,i+1) + &238 u(k,j,i+1) ) - ug(k) )239 ENDIF240 ENDDO241 ENDDO242 ENDDO243 !$acc end kernels244 245 !246 !-- w-component247 CASE ( 3 )248 !$acc kernels present( nzb_w_inner, tend, u )249 DO i = i_left, i_right250 DO j = j_south, j_north251 DO k = 1, nzt252 IF ( k > nzb_w_inner(j,i) ) THEN253 tend(k,j,i) = tend(k,j,i) + fs * 0.25_wp * &254 ( u(k,j,i) + u(k+1,j,i) + u(k,j,i+1) + &255 u(k+1,j,i+1) )256 ENDIF257 ENDDO258 ENDDO259 ENDDO260 !$acc end kernels261 262 CASE DEFAULT263 264 WRITE( message_string, * ) ' wrong component: ', component265 CALL message( 'coriolis', 'PA0173', 1, 2, 0, 6, 0 )266 267 END SELECT268 269 END SUBROUTINE coriolis_acc270 271 272 !------------------------------------------------------------------------------!273 ! Description:274 ! ------------275 174 !> Call for grid point i,j 276 175 !------------------------------------------------------------------------------! -
palm/trunk/SOURCE/cpulog_mod.f90
r2101 r2118 20 20 ! Current revisions: 21 21 ! ----------------- 22 ! 22 ! OpenACC relevant code removed 23 23 ! 24 24 ! Former revisions: … … 401 401 average_cputime 402 402 403 IF ( num_acc_per_node /= 0 ) WRITE ( 18, 108 ) num_acc_per_node404 403 WRITE ( 18, 110 ) 405 404 #else … … 409 408 average_cputime 410 409 411 IF ( num_acc_per_node /= 0 ) WRITE ( 18, 109 ) num_acc_per_node412 410 WRITE ( 18, 110 ) 413 411 #endif … … 565 563 106 FORMAT (/'Exchange of ghostpoints via MPI_ISEND/MPI_IRECV') 566 564 107 FORMAT (//) 567 108 FORMAT ('Accelerator boards per node: ',14X,I2)568 109 FORMAT ('Accelerator boards: ',23X,I2)569 565 110 FORMAT ('-------------------------------------------------------------', & 570 566 &'---------'//& -
palm/trunk/SOURCE/diffusion_e.f90
r2101 r2118 20 20 ! Current revisions: 21 21 ! ----------------- 22 ! 22 ! OpenACC version of subroutine removed 23 23 ! 24 24 ! Former revisions: … … 34 34 ! 1873 2016-04-18 14:50:06Z maronga 35 35 ! Module renamed (removed _mod) 36 !37 36 ! 38 37 ! 1850 2016-04-08 13:29:27Z maronga … … 108 107 109 108 PRIVATE 110 PUBLIC diffusion_e , diffusion_e_acc109 PUBLIC diffusion_e 111 110 112 111 … … 116 115 END INTERFACE diffusion_e 117 116 118 INTERFACE diffusion_e_acc119 MODULE PROCEDURE diffusion_e_acc120 END INTERFACE diffusion_e_acc121 122 117 CONTAINS 123 118 … … 337 332 338 333 END SUBROUTINE diffusion_e 339 340 341 !------------------------------------------------------------------------------!342 ! Description:343 ! ------------344 !> Call for all grid points - accelerator version345 !------------------------------------------------------------------------------!346 SUBROUTINE diffusion_e_acc( var, var_reference )347 348 USE arrays_3d, &349 ONLY: dd2zu, ddzu, ddzw, diss, e, km, l_grid, tend, zu, zw, &350 drho_air, rho_air_zw351 352 USE control_parameters, &353 ONLY: atmos_ocean_sign, g, use_single_reference_value, &354 wall_adjustment, wall_adjustment_factor355 356 USE grid_variables, &357 ONLY: ddx2, ddy2358 359 USE indices, &360 ONLY: i_left, i_right, j_north, j_south, nxlg, nxrg, nyng, nysg, &361 nzb, nzb_s_inner, nzt362 363 USE kinds364 365 USE microphysics_mod, &366 ONLY: collision_turbulence367 368 USE particle_attributes, &369 ONLY: use_sgs_for_particles, wang_kernel370 371 IMPLICIT NONE372 373 INTEGER(iwp) :: i !<374 INTEGER(iwp) :: j !<375 INTEGER(iwp) :: k !<376 REAL(wp) :: dissipation !<377 REAL(wp) :: dvar_dz !<378 REAL(wp) :: l !<379 REAL(wp) :: ll !<380 REAL(wp) :: l_stable !<381 REAL(wp) :: var_reference !<382 383 #if defined( __nopointer )384 REAL(wp), DIMENSION(nzb:nzt+1,nysg:nyng,nxlg:nxrg) :: var !<385 #else386 REAL(wp), DIMENSION(:,:,:), POINTER :: var !<387 #endif388 389 390 !391 !-- This if clause must be outside the k-loop because otherwise392 !-- runtime errors occur with -C hopt on NEC393 IF ( use_single_reference_value ) THEN394 395 !$acc kernels present( ddzu, ddzw, dd2zu, diss, e, km, l_grid ) &396 !$acc present( nzb_s_inner, tend, var, zu, zw )397 DO i = i_left, i_right398 DO j = j_south, j_north399 DO k = 1, nzt400 401 IF ( k > nzb_s_inner(j,i) ) THEN402 !403 !-- Calculate the mixing length (for dissipation)404 dvar_dz = atmos_ocean_sign * &405 ( var(k+1,j,i) - var(k-1,j,i) ) * dd2zu(k)406 IF ( dvar_dz > 0.0_wp ) THEN407 l_stable = 0.76_wp * SQRT( e(k,j,i) ) / &408 SQRT( g / var_reference * dvar_dz ) + 1E-5_wp409 ELSE410 l_stable = l_grid(k)411 ENDIF412 !413 !-- Adjustment of the mixing length414 IF ( wall_adjustment ) THEN415 l = MIN( wall_adjustment_factor * &416 ( zu(k) - zw(nzb_s_inner(j,i)) ), &417 l_grid(k), l_stable )418 ll = MIN( wall_adjustment_factor * &419 ( zu(k) - zw(nzb_s_inner(j,i)) ), &420 l_grid(k) )421 ELSE422 l = MIN( l_grid(k), l_stable )423 ll = l_grid(k)424 ENDIF425 !426 !-- Calculate the tendency terms427 dissipation = ( 0.19_wp + 0.74_wp * l / ll ) * &428 e(k,j,i) * SQRT( e(k,j,i) ) / l429 430 tend(k,j,i) = tend(k,j,i) &431 + ( &432 ( km(k,j,i)+km(k,j,i+1) ) * ( e(k,j,i+1)-e(k,j,i) ) &433 - ( km(k,j,i)+km(k,j,i-1) ) * ( e(k,j,i)-e(k,j,i-1) ) &434 ) * ddx2 &435 + ( &436 ( km(k,j,i)+km(k,j+1,i) ) * ( e(k,j+1,i)-e(k,j,i) ) &437 - ( km(k,j,i)+km(k,j-1,i) ) * ( e(k,j,i)-e(k,j-1,i) ) &438 ) * ddy2 &439 + ( &440 ( km(k,j,i)+km(k+1,j,i) ) * ( e(k+1,j,i)-e(k,j,i) ) * ddzu(k+1) &441 * rho_air_zw(k) &442 - ( km(k,j,i)+km(k-1,j,i) ) * ( e(k,j,i)-e(k-1,j,i) ) * ddzu(k) &443 * rho_air_zw(k-1) &444 ) * ddzw(k) * drho_air(k) &445 - dissipation446 447 !448 !-- Store dissipation if needed for calculating the sgs particle449 !-- velocities450 IF ( use_sgs_for_particles .OR. wang_kernel .OR. &451 collision_turbulence ) THEN452 diss(k,j,i) = dissipation453 ENDIF454 455 ENDIF456 457 ENDDO458 ENDDO459 ENDDO460 !$acc end kernels461 462 ELSE463 464 !$acc kernels present( ddzu, ddzw, dd2zu, diss, e, km, l_grid ) &465 !$acc present( nzb_s_inner, tend, var, zu, zw )466 DO i = i_left, i_right467 DO j = j_south, j_north468 DO k = 1, nzt469 470 IF ( k > nzb_s_inner(j,i) ) THEN471 !472 !-- Calculate the mixing length (for dissipation)473 dvar_dz = atmos_ocean_sign * &474 ( var(k+1,j,i) - var(k-1,j,i) ) * dd2zu(k)475 IF ( dvar_dz > 0.0_wp ) THEN476 l_stable = 0.76_wp * SQRT( e(k,j,i) ) / &477 SQRT( g / var(k,j,i) * dvar_dz ) + 1E-5_wp478 ELSE479 l_stable = l_grid(k)480 ENDIF481 !482 !-- Adjustment of the mixing length483 IF ( wall_adjustment ) THEN484 l = MIN( wall_adjustment_factor * &485 ( zu(k) - zw(nzb_s_inner(j,i)) ), &486 l_grid(k), l_stable )487 ll = MIN( wall_adjustment_factor * &488 ( zu(k) - zw(nzb_s_inner(j,i)) ), &489 l_grid(k) )490 ELSE491 l = MIN( l_grid(k), l_stable )492 ll = l_grid(k)493 ENDIF494 !495 !-- Calculate the tendency terms496 dissipation = ( 0.19_wp + 0.74_wp * l / ll ) * &497 e(k,j,i) * SQRT( e(k,j,i) ) / l498 499 tend(k,j,i) = tend(k,j,i) &500 + ( &501 ( km(k,j,i)+km(k,j,i+1) ) * ( e(k,j,i+1)-e(k,j,i) ) &502 - ( km(k,j,i)+km(k,j,i-1) ) * ( e(k,j,i)-e(k,j,i-1) ) &503 ) * ddx2 &504 + ( &505 ( km(k,j,i)+km(k,j+1,i) ) * ( e(k,j+1,i)-e(k,j,i) ) &506 - ( km(k,j,i)+km(k,j-1,i) ) * ( e(k,j,i)-e(k,j-1,i) ) &507 ) * ddy2 &508 + ( &509 ( km(k,j,i)+km(k+1,j,i) ) * ( e(k+1,j,i)-e(k,j,i) ) * ddzu(k+1) &510 * rho_air_zw(k) &511 - ( km(k,j,i)+km(k-1,j,i) ) * ( e(k,j,i)-e(k-1,j,i) ) * ddzu(k) &512 * rho_air_zw(k-1) &513 ) * ddzw(k) * drho_air(k) &514 - dissipation515 516 !517 !-- Store dissipation if needed for calculating the sgs518 !-- particle velocities519 IF ( use_sgs_for_particles .OR. wang_kernel .OR. &520 collision_turbulence ) THEN521 diss(k,j,i) = dissipation522 ENDIF523 524 ENDIF525 526 ENDDO527 ENDDO528 ENDDO529 !$acc end kernels530 531 ENDIF532 533 !534 !-- Boundary condition for dissipation535 IF ( use_sgs_for_particles .OR. wang_kernel .OR. &536 collision_turbulence ) THEN537 !$acc kernels present( diss, nzb_s_inner )538 DO i = i_left, i_right539 DO j = j_south, j_north540 diss(nzb_s_inner(j,i),j,i) = diss(nzb_s_inner(j,i)+1,j,i)541 ENDDO542 ENDDO543 !$acc end kernels544 ENDIF545 546 END SUBROUTINE diffusion_e_acc547 334 548 335 -
palm/trunk/SOURCE/diffusion_s.f90
r2101 r2118 20 20 ! Current revisions: 21 21 ! ------------------ 22 ! 22 ! OpenACC version of subroutine removed 23 23 ! 24 24 ! Former revisions: … … 34 34 ! 1873 2016-04-18 14:50:06Z maronga 35 35 ! Module renamed (removed _mod) 36 ! 37 ! 36 ! 38 37 ! 1850 2016-04-08 13:29:27Z maronga 39 38 ! Module renamed 40 !41 39 ! 42 40 ! 1691 2015-10-26 16:17:44Z maronga … … 94 92 95 93 PRIVATE 96 PUBLIC diffusion_s , diffusion_s_acc94 PUBLIC diffusion_s 97 95 98 96 INTERFACE diffusion_s … … 100 98 MODULE PROCEDURE diffusion_s_ij 101 99 END INTERFACE diffusion_s 102 103 INTERFACE diffusion_s_acc104 MODULE PROCEDURE diffusion_s_acc105 END INTERFACE diffusion_s_acc106 100 107 101 CONTAINS … … 242 236 ! Description: 243 237 ! ------------ 244 !> Call for all grid points - accelerator version245 !------------------------------------------------------------------------------!246 SUBROUTINE diffusion_s_acc( s, s_flux_b, s_flux_t, wall_s_flux )247 248 USE arrays_3d, &249 ONLY: ddzu, ddzw, kh, tend, drho_air, rho_air_zw250 251 USE control_parameters, &252 ONLY: use_surface_fluxes, use_top_fluxes253 254 USE grid_variables, &255 ONLY: ddx2, ddy2, fwxm, fwxp, fwym, fwyp, wall_w_x, wall_w_y256 257 USE indices, &258 ONLY: i_left, i_right, j_north, j_south, nxlg, nxrg, nyng, nysg, &259 nzb, nzb_diff_s_inner, nzb_s_inner, nzb_s_outer, nzt, nzt_diff260 261 USE kinds262 263 IMPLICIT NONE264 265 INTEGER(iwp) :: i !<266 INTEGER(iwp) :: j !<267 INTEGER(iwp) :: k !<268 REAL(wp) :: wall_s_flux(0:4) !<269 REAL(wp), DIMENSION(nysg:nyng,nxlg:nxrg) :: s_flux_b !<270 REAL(wp), DIMENSION(nysg:nyng,nxlg:nxrg) :: s_flux_t !<271 #if defined( __nopointer )272 REAL(wp), DIMENSION(nzb:nzt+1,nysg:nyng,nxlg:nxrg) :: s !<273 #else274 REAL(wp), DIMENSION(:,:,:), POINTER :: s !<275 #endif276 277 !$acc kernels present( ddzu, ddzw, fwxm, fwxp, fwym, fwyp, kh ) &278 !$acc present( nzb_diff_s_inner, nzb_s_inner, nzb_s_outer, s ) &279 !$acc present( s_flux_b, s_flux_t, tend, wall_s_flux ) &280 !$acc present( wall_w_x, wall_w_y )281 DO i = i_left, i_right282 DO j = j_south, j_north283 !284 !-- Compute horizontal diffusion285 DO k = 1, nzt286 IF ( k > nzb_s_outer(j,i) ) THEN287 288 tend(k,j,i) = tend(k,j,i) &289 + 0.5_wp * ( &290 ( kh(k,j,i) + kh(k,j,i+1) ) * ( s(k,j,i+1)-s(k,j,i) ) &291 - ( kh(k,j,i) + kh(k,j,i-1) ) * ( s(k,j,i)-s(k,j,i-1) ) &292 ) * ddx2 &293 + 0.5_wp * ( &294 ( kh(k,j,i) + kh(k,j+1,i) ) * ( s(k,j+1,i)-s(k,j,i) ) &295 - ( kh(k,j,i) + kh(k,j-1,i) ) * ( s(k,j,i)-s(k,j-1,i) ) &296 ) * ddy2297 ENDIF298 ENDDO299 300 !301 !-- Apply prescribed horizontal wall heatflux where necessary302 DO k = 1, nzt303 IF ( k > nzb_s_inner(j,i) .AND. k <= nzb_s_outer(j,i) .AND. &304 ( wall_w_x(j,i) /= 0.0_wp .OR. wall_w_y(j,i) /= 0.0_wp ) ) &305 THEN306 tend(k,j,i) = tend(k,j,i) &307 + ( fwxp(j,i) * 0.5_wp * &308 ( kh(k,j,i) + kh(k,j,i+1) ) * ( s(k,j,i+1)-s(k,j,i) ) &309 + ( 1.0_wp - fwxp(j,i) ) * wall_s_flux(1) &310 -fwxm(j,i) * 0.5_wp * &311 ( kh(k,j,i) + kh(k,j,i-1) ) * ( s(k,j,i)-s(k,j,i-1) ) &312 + ( 1.0_wp - fwxm(j,i) ) * wall_s_flux(2) &313 ) * ddx2 &314 + ( fwyp(j,i) * 0.5_wp * &315 ( kh(k,j,i) + kh(k,j+1,i) ) * ( s(k,j+1,i)-s(k,j,i) ) &316 + ( 1.0_wp - fwyp(j,i) ) * wall_s_flux(3) &317 -fwym(j,i) * 0.5_wp * &318 ( kh(k,j,i) + kh(k,j-1,i) ) * ( s(k,j,i)-s(k,j-1,i) ) &319 + ( 1.0_wp - fwym(j,i) ) * wall_s_flux(4) &320 ) * ddy2321 ENDIF322 ENDDO323 324 !325 !-- Compute vertical diffusion. In case that surface fluxes have been326 !-- prescribed or computed at bottom and/or top, index k starts/ends at327 !-- nzb+2 or nzt-1, respectively.328 DO k = 1, nzt_diff329 IF ( k >= nzb_diff_s_inner(j,i) ) THEN330 tend(k,j,i) = tend(k,j,i) &331 + 0.5_wp * ( &332 ( kh(k,j,i) + kh(k+1,j,i) ) * ( s(k+1,j,i)-s(k,j,i) ) * ddzu(k+1) &333 * rho_air_zw(k) &334 - ( kh(k,j,i) + kh(k-1,j,i) ) * ( s(k,j,i)-s(k-1,j,i) ) * ddzu(k) &335 * rho_air_zw(k-1) &336 ) * ddzw(k) * drho_air(k)337 ENDIF338 ENDDO339 340 !341 !-- Vertical diffusion at the first computational gridpoint along342 !-- z-direction343 DO k = 1, nzt344 IF ( use_surface_fluxes .AND. k == nzb_s_inner(j,i)+1 ) THEN345 tend(k,j,i) = tend(k,j,i) &346 + ( 0.5_wp * ( kh(k,j,i)+kh(k+1,j,i) )&347 * ( s(k+1,j,i)-s(k,j,i) ) &348 * ddzu(k+1) &349 * rho_air_zw(k) &350 + s_flux_b(j,i) &351 ) * ddzw(k) * drho_air(k)352 ENDIF353 354 !355 !-- Vertical diffusion at the last computational gridpoint along356 !-- z-direction357 IF ( use_top_fluxes .AND. k == nzt ) THEN358 tend(k,j,i) = tend(k,j,i) &359 + ( - s_flux_t(j,i) &360 - 0.5_wp * ( kh(k-1,j,i)+kh(k,j,i) )&361 * ( s(k,j,i)-s(k-1,j,i) ) &362 * ddzu(k) &363 * rho_air_zw(k-1) &364 ) * ddzw(k) * drho_air(k)365 ENDIF366 ENDDO367 368 ENDDO369 ENDDO370 !$acc end kernels371 372 END SUBROUTINE diffusion_s_acc373 374 375 !------------------------------------------------------------------------------!376 ! Description:377 ! ------------378 238 !> Call for grid point i,j 379 239 !------------------------------------------------------------------------------! -
palm/trunk/SOURCE/diffusion_u.f90
r2101 r2118 20 20 ! Current revisions: 21 21 ! ----------------- 22 ! 22 ! OpenACC version of subroutine removed 23 23 ! 24 24 ! Former revisions: … … 35 35 ! Module renamed (removed _mod) 36 36 ! 37 !38 37 ! 1850 2016-04-08 13:29:27Z maronga 39 38 ! Module renamed 40 !41 39 ! 42 40 ! 1740 2016-01-13 08:19:40Z raasch … … 97 95 98 96 PRIVATE 99 PUBLIC diffusion_u , diffusion_u_acc97 PUBLIC diffusion_u 100 98 101 99 INTERFACE diffusion_u … … 103 101 MODULE PROCEDURE diffusion_u_ij 104 102 END INTERFACE diffusion_u 105 106 INTERFACE diffusion_u_acc107 MODULE PROCEDURE diffusion_u_acc108 END INTERFACE diffusion_u_acc109 103 110 104 CONTAINS … … 280 274 ! Description: 281 275 ! ------------ 282 !> Call for all grid points - accelerator version283 !------------------------------------------------------------------------------!284 SUBROUTINE diffusion_u_acc285 286 USE arrays_3d, &287 ONLY: ddzu, ddzw, km, tend, u, usws, uswst, v, w, &288 drho_air, rho_air_zw289 290 USE control_parameters, &291 ONLY: constant_top_momentumflux, topography, use_surface_fluxes, &292 use_top_fluxes293 294 USE grid_variables, &295 ONLY: ddx, ddx2, ddy, fym, fyp, wall_u296 297 USE indices, &298 ONLY: i_left, i_right, j_north, j_south, nxl, nxr, nyn, nys, nzb, &299 nzb_diff_u, nzb_u_inner, nzb_u_outer, nzt, nzt_diff300 301 USE kinds302 303 IMPLICIT NONE304 305 INTEGER(iwp) :: i !<306 INTEGER(iwp) :: j !<307 INTEGER(iwp) :: k !<308 REAL(wp) :: kmym !<309 REAL(wp) :: kmyp !<310 REAL(wp) :: kmzm !<311 REAL(wp) :: kmzp !<312 313 REAL(wp), DIMENSION(nzb:nzt+1,nys:nyn,nxl:nxr) :: usvs !<314 !$acc declare create ( usvs )315 316 !317 !-- First calculate horizontal momentum flux u'v' at vertical walls,318 !-- if neccessary319 IF ( topography /= 'flat' ) THEN320 CALL wall_fluxes_acc( usvs, 1.0_wp, 0.0_wp, 0.0_wp, 0.0_wp, &321 nzb_u_inner, nzb_u_outer, wall_u )322 ENDIF323 324 !$acc kernels present ( u, v, w, km, tend, usws, uswst ) &325 !$acc present ( ddzu, ddzw, fym, fyp, wall_u ) &326 !$acc present ( nzb_u_inner, nzb_u_outer, nzb_diff_u )327 DO i = i_left, i_right328 DO j = j_south, j_north329 !330 !-- Compute horizontal diffusion331 DO k = 1, nzt332 IF ( k > nzb_u_outer(j,i) ) THEN333 !334 !-- Interpolate eddy diffusivities on staggered gridpoints335 kmyp = 0.25_wp * &336 ( km(k,j,i)+km(k,j+1,i)+km(k,j,i-1)+km(k,j+1,i-1) )337 kmym = 0.25_wp * &338 ( km(k,j,i)+km(k,j-1,i)+km(k,j,i-1)+km(k,j-1,i-1) )339 340 tend(k,j,i) = tend(k,j,i) &341 & + 2.0_wp * ( &342 & km(k,j,i) * ( u(k,j,i+1) - u(k,j,i) ) &343 & - km(k,j,i-1) * ( u(k,j,i) - u(k,j,i-1) ) &344 & ) * ddx2 &345 & + ( kmyp * ( u(k,j+1,i) - u(k,j,i) ) * ddy &346 & + kmyp * ( v(k,j+1,i) - v(k,j+1,i-1) ) * ddx &347 & - kmym * ( u(k,j,i) - u(k,j-1,i) ) * ddy &348 & - kmym * ( v(k,j,i) - v(k,j,i-1) ) * ddx &349 & ) * ddy350 ENDIF351 ENDDO352 353 !354 !-- Wall functions at the north and south walls, respectively355 DO k = 1, nzt356 IF( k > nzb_u_inner(j,i) .AND. k <= nzb_u_outer(j,i) .AND. &357 wall_u(j,i) /= 0.0_wp ) THEN358 359 kmyp = 0.25_wp * &360 ( km(k,j,i)+km(k,j+1,i)+km(k,j,i-1)+km(k,j+1,i-1) )361 kmym = 0.25_wp * &362 ( km(k,j,i)+km(k,j-1,i)+km(k,j,i-1)+km(k,j-1,i-1) )363 364 tend(k,j,i) = tend(k,j,i) &365 + 2.0_wp * ( &366 km(k,j,i) * ( u(k,j,i+1) - u(k,j,i) ) &367 - km(k,j,i-1) * ( u(k,j,i) - u(k,j,i-1) ) &368 ) * ddx2 &369 + ( fyp(j,i) * ( &370 kmyp * ( u(k,j+1,i) - u(k,j,i) ) * ddy &371 + kmyp * ( v(k,j+1,i) - v(k,j+1,i-1) ) * ddx &372 ) &373 - fym(j,i) * ( &374 kmym * ( u(k,j,i) - u(k,j-1,i) ) * ddy &375 + kmym * ( v(k,j,i) - v(k,j,i-1) ) * ddx &376 ) &377 + wall_u(j,i) * usvs(k,j,i) &378 ) * ddy379 ENDIF380 ENDDO381 382 !383 !-- Compute vertical diffusion. In case of simulating a Prandtl layer,384 !-- index k starts at nzb_u_inner+2.385 DO k = 1, nzt_diff386 IF ( k >= nzb_diff_u(j,i) ) THEN387 !388 !-- Interpolate eddy diffusivities on staggered gridpoints389 kmzp = 0.25_wp * &390 ( km(k,j,i)+km(k+1,j,i)+km(k,j,i-1)+km(k+1,j,i-1) )391 kmzm = 0.25_wp * &392 ( km(k,j,i)+km(k-1,j,i)+km(k,j,i-1)+km(k-1,j,i-1) )393 394 tend(k,j,i) = tend(k,j,i) &395 & + ( kmzp * ( ( u(k+1,j,i) - u(k,j,i) ) * ddzu(k+1)&396 & + ( w(k,j,i) - w(k,j,i-1) ) * ddx &397 & ) * rho_air_zw(k) &398 & - kmzm * ( ( u(k,j,i) - u(k-1,j,i) ) * ddzu(k)&399 & + ( w(k-1,j,i) - w(k-1,j,i-1) ) * ddx &400 & ) * rho_air_zw(k-1) &401 & ) * ddzw(k) * drho_air(k)402 ENDIF403 ENDDO404 405 ENDDO406 ENDDO407 408 !409 !-- Vertical diffusion at the first grid point above the surface,410 !-- if the momentum flux at the bottom is given by the Prandtl law or411 !-- if it is prescribed by the user.412 !-- Difference quotient of the momentum flux is not formed over half413 !-- of the grid spacing (2.0*ddzw(k)) any more, since the comparison414 !-- with other (LES) models showed that the values of the momentum415 !-- flux becomes too large in this case.416 !-- The term containing w(k-1,..) (see above equation) is removed here417 !-- because the vertical velocity is assumed to be zero at the surface.418 IF ( use_surface_fluxes ) THEN419 420 DO i = i_left, i_right421 DO j = j_south, j_north422 423 k = nzb_u_inner(j,i)+1424 !425 !-- Interpolate eddy diffusivities on staggered gridpoints426 kmzp = 0.25_wp * &427 ( km(k,j,i)+km(k+1,j,i)+km(k,j,i-1)+km(k+1,j,i-1) )428 429 tend(k,j,i) = tend(k,j,i) &430 & + ( kmzp * ( ( u(k+1,j,i) - u(k,j,i) ) * ddzu(k+1) &431 & + ( w(k,j,i) - w(k,j,i-1) ) * ddx &432 & ) * rho_air_zw(k) &433 & - ( -usws(j,i) ) &434 & ) * ddzw(k) * drho_air(k)435 ENDDO436 ENDDO437 438 ENDIF439 440 !441 !-- Vertical diffusion at the first gridpoint below the top boundary,442 !-- if the momentum flux at the top is prescribed by the user443 IF ( use_top_fluxes .AND. constant_top_momentumflux ) THEN444 445 k = nzt446 447 DO i = i_left, i_right448 DO j = j_south, j_north449 450 !451 !-- Interpolate eddy diffusivities on staggered gridpoints452 kmzm = 0.25_wp * &453 ( km(k,j,i)+km(k-1,j,i)+km(k,j,i-1)+km(k-1,j,i-1) )454 455 tend(k,j,i) = tend(k,j,i) &456 & + ( ( -uswst(j,i) ) &457 & - kmzm * ( ( u(k,j,i) - u(k-1,j,i) ) * ddzu(k) &458 & + ( w(k-1,j,i) - w(k-1,j,i-1) ) * ddx &459 & ) * rho_air_zw(k-1) &460 & ) * ddzw(k) * drho_air(k)461 ENDDO462 ENDDO463 464 ENDIF465 !$acc end kernels466 467 END SUBROUTINE diffusion_u_acc468 469 470 !------------------------------------------------------------------------------!471 ! Description:472 ! ------------473 276 !> Call for grid point i,j 474 277 !------------------------------------------------------------------------------! -
palm/trunk/SOURCE/diffusion_v.f90
r2101 r2118 20 20 ! Current revisions: 21 21 ! ----------------- 22 ! 22 ! OpenACC version of subroutine removed 23 23 ! 24 24 ! Former revisions: … … 35 35 ! Module renamed (removed _mod) 36 36 ! 37 !38 37 ! 1850 2016-04-08 13:29:27Z maronga 39 38 ! Module renamed 40 !41 39 ! 42 40 ! 1740 2016-01-13 08:19:40Z raasch … … 92 90 93 91 PRIVATE 94 PUBLIC diffusion_v , diffusion_v_acc92 PUBLIC diffusion_v 95 93 96 94 INTERFACE diffusion_v … … 98 96 MODULE PROCEDURE diffusion_v_ij 99 97 END INTERFACE diffusion_v 100 101 INTERFACE diffusion_v_acc102 MODULE PROCEDURE diffusion_v_acc103 END INTERFACE diffusion_v_acc104 98 105 99 CONTAINS … … 275 269 ! Description: 276 270 ! ------------ 277 !> Call for all grid points - accelerator version278 !------------------------------------------------------------------------------!279 SUBROUTINE diffusion_v_acc280 281 USE arrays_3d, &282 ONLY: ddzu, ddzw, km, tend, u, v, vsws, vswst, w, &283 drho_air, rho_air_zw284 285 USE control_parameters, &286 ONLY: constant_top_momentumflux, topography, use_surface_fluxes, &287 use_top_fluxes288 289 USE grid_variables, &290 ONLY: ddx, ddy, ddy2, fxm, fxp, wall_v291 292 USE indices, &293 ONLY: i_left, i_right, j_north, j_south, nxl, nxr, nyn, nys, nzb, &294 nzb_diff_v, nzb_v_inner, nzb_v_outer, nzt, nzt_diff295 296 USE kinds297 298 IMPLICIT NONE299 300 INTEGER(iwp) :: i !<301 INTEGER(iwp) :: j !<302 INTEGER(iwp) :: k !<303 REAL(wp) :: kmxm !<304 REAL(wp) :: kmxp !<305 REAL(wp) :: kmzm !<306 REAL(wp) :: kmzp !<307 308 REAL(wp), DIMENSION(nzb:nzt+1,nys:nyn,nxl:nxr) :: vsus !<309 !$acc declare create ( vsus )310 311 !312 !-- First calculate horizontal momentum flux v'u' at vertical walls,313 !-- if neccessary314 IF ( topography /= 'flat' ) THEN315 CALL wall_fluxes_acc( vsus, 0.0_wp, 1.0_wp, 0.0_wp, 0.0_wp, &316 nzb_v_inner, nzb_v_outer, wall_v )317 ENDIF318 319 !$acc kernels present ( u, v, w, km, tend, vsws, vswst ) &320 !$acc present ( ddzu, ddzw, fxm, fxp, wall_v ) &321 !$acc present ( nzb_v_inner, nzb_v_outer, nzb_diff_v )322 DO i = i_left, i_right323 DO j = j_south, j_north324 !325 !-- Compute horizontal diffusion326 DO k = 1, nzt327 IF ( k > nzb_v_outer(j,i) ) THEN328 !329 !-- Interpolate eddy diffusivities on staggered gridpoints330 kmxp = 0.25_wp * &331 ( km(k,j,i)+km(k,j,i+1)+km(k,j-1,i)+km(k,j-1,i+1) )332 kmxm = 0.25_wp * &333 ( km(k,j,i)+km(k,j,i-1)+km(k,j-1,i)+km(k,j-1,i-1) )334 335 tend(k,j,i) = tend(k,j,i) &336 & + ( kmxp * ( v(k,j,i+1) - v(k,j,i) ) * ddx &337 & + kmxp * ( u(k,j,i+1) - u(k,j-1,i+1) ) * ddy &338 & - kmxm * ( v(k,j,i) - v(k,j,i-1) ) * ddx &339 & - kmxm * ( u(k,j,i) - u(k,j-1,i) ) * ddy &340 & ) * ddx &341 & + 2.0_wp * ( &342 & km(k,j,i) * ( v(k,j+1,i) - v(k,j,i) ) &343 & - km(k,j-1,i) * ( v(k,j,i) - v(k,j-1,i) ) &344 & ) * ddy2345 ENDIF346 ENDDO347 348 !349 !-- Wall functions at the left and right walls, respectively350 DO k = 1, nzt351 IF( k > nzb_v_inner(j,i) .AND. k <= nzb_v_outer(j,i) .AND. &352 wall_v(j,i) /= 0.0_wp ) THEN353 354 kmxp = 0.25_wp * &355 ( km(k,j,i)+km(k,j,i+1)+km(k,j-1,i)+km(k,j-1,i+1) )356 kmxm = 0.25_wp * &357 ( km(k,j,i)+km(k,j,i-1)+km(k,j-1,i)+km(k,j-1,i-1) )358 359 tend(k,j,i) = tend(k,j,i) &360 + 2.0_wp * ( &361 km(k,j,i) * ( v(k,j+1,i) - v(k,j,i) ) &362 - km(k,j-1,i) * ( v(k,j,i) - v(k,j-1,i) ) &363 ) * ddy2 &364 + ( fxp(j,i) * ( &365 kmxp * ( v(k,j,i+1) - v(k,j,i) ) * ddx &366 + kmxp * ( u(k,j,i+1) - u(k,j-1,i+1) ) * ddy &367 ) &368 - fxm(j,i) * ( &369 kmxm * ( v(k,j,i) - v(k,j,i-1) ) * ddx &370 + kmxm * ( u(k,j,i) - u(k,j-1,i) ) * ddy &371 ) &372 + wall_v(j,i) * vsus(k,j,i) &373 ) * ddx374 ENDIF375 ENDDO376 377 !378 !-- Compute vertical diffusion. In case of simulating a Prandtl379 !-- layer, index k starts at nzb_v_inner+2.380 DO k = 1, nzt_diff381 IF ( k >= nzb_diff_v(j,i) ) THEN382 !383 !-- Interpolate eddy diffusivities on staggered gridpoints384 kmzp = 0.25_wp * &385 ( km(k,j,i)+km(k+1,j,i)+km(k,j-1,i)+km(k+1,j-1,i) )386 kmzm = 0.25_wp * &387 ( km(k,j,i)+km(k-1,j,i)+km(k,j-1,i)+km(k-1,j-1,i) )388 389 tend(k,j,i) = tend(k,j,i) &390 & + ( kmzp * ( ( v(k+1,j,i) - v(k,j,i) ) * ddzu(k+1)&391 & + ( w(k,j,i) - w(k,j-1,i) ) * ddy &392 & ) * rho_air_zw(k) &393 & - kmzm * ( ( v(k,j,i) - v(k-1,j,i) ) * ddzu(k)&394 & + ( w(k-1,j,i) - w(k-1,j-1,i) ) * ddy &395 & ) * rho_air_zw(k-1) &396 & ) * ddzw(k) * drho_air(k)397 ENDIF398 ENDDO399 400 ENDDO401 ENDDO402 403 !404 !-- Vertical diffusion at the first grid point above the surface,405 !-- if the momentum flux at the bottom is given by the Prandtl law406 !-- or if it is prescribed by the user.407 !-- Difference quotient of the momentum flux is not formed over408 !-- half of the grid spacing (2.0*ddzw(k)) any more, since the409 !-- comparison with other (LES) models showed that the values of410 !-- the momentum flux becomes too large in this case.411 !-- The term containing w(k-1,..) (see above equation) is removed here412 !-- because the vertical velocity is assumed to be zero at the surface.413 IF ( use_surface_fluxes ) THEN414 415 DO i = i_left, i_right416 DO j = j_south, j_north417 418 k = nzb_v_inner(j,i)+1419 !420 !-- Interpolate eddy diffusivities on staggered gridpoints421 kmzp = 0.25_wp * &422 ( km(k,j,i)+km(k+1,j,i)+km(k,j-1,i)+km(k+1,j-1,i) )423 424 tend(k,j,i) = tend(k,j,i) &425 & + ( kmzp * ( ( v(k+1,j,i) - v(k,j,i) ) * ddzu(k+1) &426 & + ( w(k,j,i) - w(k,j-1,i) ) * ddy &427 & ) * rho_air_zw(k) &428 & - ( -vsws(j,i) ) &429 & ) * ddzw(k) * drho_air(k)430 ENDDO431 ENDDO432 433 ENDIF434 435 !436 !-- Vertical diffusion at the first gridpoint below the top boundary,437 !-- if the momentum flux at the top is prescribed by the user438 IF ( use_top_fluxes .AND. constant_top_momentumflux ) THEN439 440 k = nzt441 442 DO i = i_left, i_right443 DO j = j_south, j_north444 445 !446 !-- Interpolate eddy diffusivities on staggered gridpoints447 kmzm = 0.25_wp * &448 ( km(k,j,i)+km(k-1,j,i)+km(k,j-1,i)+km(k-1,j-1,i) )449 450 tend(k,j,i) = tend(k,j,i) &451 & + ( ( -vswst(j,i) ) &452 & - kmzm * ( ( v(k,j,i) - v(k-1,j,i) ) * ddzu(k) &453 & + ( w(k-1,j,i) - w(k-1,j-1,i) ) * ddy &454 & ) * rho_air_zw(k-1) &455 & ) * ddzw(k) * drho_air(k)456 ENDDO457 ENDDO458 459 ENDIF460 !$acc end kernels461 462 END SUBROUTINE diffusion_v_acc463 464 465 !------------------------------------------------------------------------------!466 ! Description:467 ! ------------468 271 !> Call for grid point i,j 469 272 !------------------------------------------------------------------------------! -
palm/trunk/SOURCE/diffusion_w.f90
r2101 r2118 20 20 ! Current revisions: 21 21 ! ----------------- 22 ! 22 ! OpenACC version of subroutine removed 23 23 ! 24 24 ! Former revisions: … … 35 35 ! Module renamed (removed _mod) 36 36 ! 37 !38 37 ! 1850 2016-04-08 13:29:27Z maronga 39 38 ! Module renamed 40 !41 39 ! 42 40 ! 1682 2015-10-07 23:56:08Z knoop … … 97 95 98 96 USE wall_fluxes_mod, & 99 ONLY : wall_fluxes , wall_fluxes_acc97 ONLY : wall_fluxes 100 98 101 99 PRIVATE 102 PUBLIC diffusion_w , diffusion_w_acc100 PUBLIC diffusion_w 103 101 104 102 INTERFACE diffusion_w … … 106 104 MODULE PROCEDURE diffusion_w_ij 107 105 END INTERFACE diffusion_w 108 109 INTERFACE diffusion_w_acc110 MODULE PROCEDURE diffusion_w_acc111 END INTERFACE diffusion_w_acc112 106 113 107 CONTAINS … … 248 242 ! Description: 249 243 ! ------------ 250 !> Call for all grid points - accelerator version251 !------------------------------------------------------------------------------!252 SUBROUTINE diffusion_w_acc253 254 USE arrays_3d, &255 ONLY : ddzu, ddzw, km, tend, u, v, w, drho_air_zw, rho_air256 257 USE control_parameters, &258 ONLY : topography259 260 USE grid_variables, &261 ONLY : ddx, ddy, fwxm, fwxp, fwym, fwyp, wall_w_x, wall_w_y262 263 USE indices, &264 ONLY : i_left, i_right, j_north, j_south, nxl, nxr, nyn, nys, nzb, &265 nzb_w_inner, nzb_w_outer, nzt266 267 USE kinds268 269 IMPLICIT NONE270 271 INTEGER(iwp) :: i !<272 INTEGER(iwp) :: j !<273 INTEGER(iwp) :: k !<274 275 REAL(wp) :: kmxm !<276 REAL(wp) :: kmxp !<277 REAL(wp) :: kmym !<278 REAL(wp) :: kmyp !<279 280 REAL(wp), DIMENSION(nzb:nzt+1,nys:nyn,nxl:nxr) :: wsus !<281 REAL(wp), DIMENSION(nzb:nzt+1,nys:nyn,nxl:nxr) :: wsvs !<282 !$acc declare create ( wsus, wsvs )283 284 !285 !-- First calculate horizontal momentum flux w'u' and/or w'v' at vertical286 !-- walls, if neccessary287 IF ( topography /= 'flat' ) THEN288 CALL wall_fluxes_acc( wsus, 0.0_wp, 0.0_wp, 0.0_wp, 1.0_wp, &289 nzb_w_inner, nzb_w_outer, wall_w_x )290 CALL wall_fluxes_acc( wsvs, 0.0_wp, 0.0_wp, 0.0_wp, 1.0_wp, &291 nzb_w_inner, nzb_w_outer, wall_w_y )292 ENDIF293 294 !$acc kernels present ( u, v, w, km, tend ) &295 !$acc present ( ddzu, ddzw, fwxm, fwxp, fwym, fwyp, wall_w_x, wall_w_y ) &296 !$acc present ( nzb_w_inner, nzb_w_outer )297 DO i = i_left, i_right298 DO j = j_south, j_north299 DO k = 1, nzt300 IF ( k > nzb_w_outer(j,i) ) THEN301 !302 !-- Interpolate eddy diffusivities on staggered gridpoints303 kmxp = 0.25_wp * &304 ( km(k,j,i)+km(k,j,i+1)+km(k+1,j,i)+km(k+1,j,i+1) )305 kmxm = 0.25_wp * &306 ( km(k,j,i)+km(k,j,i-1)+km(k+1,j,i)+km(k+1,j,i-1) )307 kmyp = 0.25_wp * &308 ( km(k,j,i)+km(k+1,j,i)+km(k,j+1,i)+km(k+1,j+1,i) )309 kmym = 0.25_wp * &310 ( km(k,j,i)+km(k+1,j,i)+km(k,j-1,i)+km(k+1,j-1,i) )311 312 tend(k,j,i) = tend(k,j,i) &313 & + ( kmxp * ( w(k,j,i+1) - w(k,j,i) ) * ddx &314 & + kmxp * ( u(k+1,j,i+1) - u(k,j,i+1) ) * ddzu(k+1) &315 & - kmxm * ( w(k,j,i) - w(k,j,i-1) ) * ddx &316 & - kmxm * ( u(k+1,j,i) - u(k,j,i) ) * ddzu(k+1) &317 & ) * ddx &318 & + ( kmyp * ( w(k,j+1,i) - w(k,j,i) ) * ddy &319 & + kmyp * ( v(k+1,j+1,i) - v(k,j+1,i) ) * ddzu(k+1) &320 & - kmym * ( w(k,j,i) - w(k,j-1,i) ) * ddy &321 & - kmym * ( v(k+1,j,i) - v(k,j,i) ) * ddzu(k+1) &322 & ) * ddy &323 & + 2.0_wp * ( &324 & km(k+1,j,i) * ( w(k+1,j,i) - w(k,j,i) ) * ddzw(k+1) &325 & * rho_air(k+1) &326 & - km(k,j,i) * ( w(k,j,i) - w(k-1,j,i) ) * ddzw(k) &327 & * rho_air(k) &328 & ) * ddzu(k+1) * drho_air_zw(k)329 ENDIF330 ENDDO331 332 !333 !-- Wall functions at all vertical walls, where necessary334 DO k = 1,nzt335 336 IF ( k > nzb_w_inner(j,i) .AND. k <= nzb_w_outer(j,i) .AND. &337 wall_w_x(j,i) /= 0.0_wp .AND. wall_w_y(j,i) /= 0.0_wp ) THEN338 !339 !-- Interpolate eddy diffusivities on staggered gridpoints340 kmxp = 0.25_wp * &341 ( km(k,j,i)+km(k,j,i+1)+km(k+1,j,i)+km(k+1,j,i+1) )342 kmxm = 0.25_wp * &343 ( km(k,j,i)+km(k,j,i-1)+km(k+1,j,i)+km(k+1,j,i-1) )344 kmyp = 0.25_wp * &345 ( km(k,j,i)+km(k+1,j,i)+km(k,j+1,i)+km(k+1,j+1,i) )346 kmym = 0.25_wp * &347 ( km(k,j,i)+km(k+1,j,i)+km(k,j-1,i)+km(k+1,j-1,i) )348 349 tend(k,j,i) = tend(k,j,i) &350 + ( fwxp(j,i) * ( &351 kmxp * ( w(k,j,i+1) - w(k,j,i) ) * ddx &352 + kmxp * ( u(k+1,j,i+1) - u(k,j,i+1) ) * ddzu(k+1) &353 ) &354 - fwxm(j,i) * ( &355 kmxm * ( w(k,j,i) - w(k,j,i-1) ) * ddx &356 + kmxm * ( u(k+1,j,i) - u(k,j,i) ) * ddzu(k+1) &357 ) &358 + wall_w_x(j,i) * wsus(k,j,i) &359 ) * ddx &360 + ( fwyp(j,i) * ( &361 kmyp * ( w(k,j+1,i) - w(k,j,i) ) * ddy &362 + kmyp * ( v(k+1,j+1,i) - v(k,j+1,i) ) * ddzu(k+1) &363 ) &364 - fwym(j,i) * ( &365 kmym * ( w(k,j,i) - w(k,j-1,i) ) * ddy &366 + kmym * ( v(k+1,j,i) - v(k,j,i) ) * ddzu(k+1) &367 ) &368 + wall_w_y(j,i) * wsvs(k,j,i) &369 ) * ddy &370 + 2.0_wp * ( &371 km(k+1,j,i) * ( w(k+1,j,i) - w(k,j,i) ) * ddzw(k+1) &372 * rho_air(k+1) &373 - km(k,j,i) * ( w(k,j,i) - w(k-1,j,i) ) * ddzw(k) &374 * rho_air(k) &375 ) * ddzu(k+1) * drho_air_zw(k)376 ENDIF377 ENDDO378 379 ENDDO380 ENDDO381 !$acc end kernels382 383 END SUBROUTINE diffusion_w_acc384 385 386 !------------------------------------------------------------------------------!387 ! Description:388 ! ------------389 244 !> Call for grid point i,j 390 245 !------------------------------------------------------------------------------! -
palm/trunk/SOURCE/diffusivities.f90
r2101 r2118 20 20 ! Current revisions: 21 21 ! ----------------- 22 ! 22 ! OpenACC directives removed 23 23 ! 24 24 ! Former revisions: … … 130 130 131 131 ! 132 !-- Data declerations for accelerators133 !$acc data present( dd2zu, e, km, kh, l_grid, l_wall, nzb_s_inner, var )134 !$acc kernels135 136 !137 132 !-- Introduce an optional minimum tke 138 133 IF ( e_min > 0.0_wp ) THEN 139 134 !$OMP DO 140 !$acc loop141 135 DO i = nxlg, nxrg 142 136 DO j = nysg, nyng 143 !$acc loop vector( 32 )144 137 DO k = 1, nzt 145 138 IF ( k > nzb_s_inner(j,i) ) THEN … … 152 145 153 146 !$OMP DO 154 !$acc loop155 147 DO i = nxlg, nxrg 156 148 DO j = nysg, nyng 157 !$acc loop vector( 32 )158 149 DO k = 1, nzt 159 150 … … 191 182 kh(k,j,i) = ( 1.0_wp + 2.0_wp * l / ll ) * km(k,j,i) 192 183 193 #if ! defined( __openacc ) 194 ! 195 !++ Statistics still have to be realized for accelerators 184 ! 196 185 !-- Summation for averaged profile (cf. flow_statistics) 197 186 DO sr = 0, statistic_regions 198 187 sums_l_l(k,sr,tn) = sums_l_l(k,sr,tn) + l * rmask(j,i,sr) 199 188 ENDDO 200 #endif 189 201 190 ENDIF 202 191 … … 205 194 ENDDO 206 195 207 #if ! defined( __openacc )208 !209 !++ Statistics still have to be realized for accelerators210 196 sums_l_l(nzt+1,:,tn) = sums_l_l(nzt,:,tn) ! quasi boundary-condition for 211 ! data output 212 #endif 213 !$OMP END PARALLEL 197 ! data output 198 !$OMP END PARALLEL 214 199 215 200 ! … … 219 204 !-- values of the diffusivities are not needed 220 205 !$OMP PARALLEL DO 221 !$acc loop222 206 DO i = nxlg, nxrg 223 207 DO j = nysg, nyng … … 249 233 ENDIF 250 234 251 !$acc end kernels252 !$acc end data253 254 235 END SUBROUTINE diffusivities -
palm/trunk/SOURCE/exchange_horiz.f90
r2101 r2118 20 20 ! Current revisions: 21 21 ! ----------------- 22 ! 22 ! OpenACC directives and related code removed 23 23 ! 24 24 ! Former revisions: … … 86 86 USE control_parameters, & 87 87 ONLY: bc_lr, bc_lr_cyc, bc_ns, bc_ns_cyc, grid_level, & 88 mg_switch_to_pe0, on_device,synchronous_exchange88 mg_switch_to_pe0, synchronous_exchange 89 89 90 90 USE cpulog, & … … 254 254 !-- with array syntax, explicit loops are used. 255 255 IF ( bc_lr == 'cyclic' ) THEN 256 IF ( on_device ) THEN 257 !$acc kernels present( ar ) 258 !$acc loop independent 259 DO i = 0, nbgp_local-1 260 DO j = nys-nbgp_local, nyn+nbgp_local 261 DO k = nzb, nzt+1 262 ar(k,j,nxl-nbgp_local+i) = ar(k,j,nxr-nbgp_local+1+i) 263 ar(k,j,nxr+1+i) = ar(k,j,nxl+i) 264 ENDDO 265 ENDDO 266 ENDDO 267 !$acc end kernels 268 ELSE 269 ar(:,:,nxl-nbgp_local:nxl-1) = ar(:,:,nxr-nbgp_local+1:nxr) 270 ar(:,:,nxr+1:nxr+nbgp_local) = ar(:,:,nxl:nxl+nbgp_local-1) 271 ENDIF 256 ar(:,:,nxl-nbgp_local:nxl-1) = ar(:,:,nxr-nbgp_local+1:nxr) 257 ar(:,:,nxr+1:nxr+nbgp_local) = ar(:,:,nxl:nxl+nbgp_local-1) 272 258 ENDIF 273 259 274 260 IF ( bc_ns == 'cyclic' ) THEN 275 IF ( on_device ) THEN 276 !$acc kernels present( ar ) 277 DO i = nxl-nbgp_local, nxr+nbgp_local 278 !$acc loop independent 279 DO j = 0, nbgp_local-1 280 !$acc loop independent 281 DO k = nzb, nzt+1 282 ar(k,nys-nbgp_local+j,i) = ar(k,nyn-nbgp_local+1+j,i) 283 ar(k,nyn+1+j,i) = ar(k,nys+j,i) 284 ENDDO 285 ENDDO 286 ENDDO 287 !$acc end kernels 288 ELSE 289 ar(:,nys-nbgp_local:nys-1,:) = ar(:,nyn-nbgp_local+1:nyn,:) 290 ar(:,nyn+1:nyn+nbgp_local,:) = ar(:,nys:nys+nbgp_local-1,:) 291 ENDIF 261 ar(:,nys-nbgp_local:nys-1,:) = ar(:,nyn-nbgp_local+1:nyn,:) 262 ar(:,nyn+1:nyn+nbgp_local,:) = ar(:,nys:nys+nbgp_local-1,:) 292 263 ENDIF 293 264 -
palm/trunk/SOURCE/fft_xy_mod.f90
r2101 r2118 20 20 ! Current revisions: 21 21 ! ----------------- 22 ! 22 ! OpenACC directives and CUDA-fft related code removed 23 23 ! 24 24 ! Former revisions: … … 31 31 ! 1850 2016-04-08 13:29:27Z maronga 32 32 ! Module renamed 33 !34 33 ! 35 34 ! 1815 2016-04-06 13:49:59Z raasch … … 139 138 ONLY: nx, ny, nz 140 139 141 #if defined( __cuda_fft ) 142 USE ISO_C_BINDING 143 #elif defined( __fftw ) 140 #if defined( __fftw ) 144 141 USE, INTRINSIC :: ISO_C_BINDING 145 142 #endif … … 192 189 REAL(wp), DIMENSION(:), ALLOCATABLE, SAVE :: trig_yf !< 193 190 194 #elif defined( __cuda_fft )195 INTEGER(C_INT), SAVE :: plan_xf !<196 INTEGER(C_INT), SAVE :: plan_xi !<197 INTEGER(C_INT), SAVE :: plan_yf !<198 INTEGER(C_INT), SAVE :: plan_yi !<199 200 INTEGER(iwp), SAVE :: total_points_x_transpo !<201 INTEGER(iwp), SAVE :: total_points_y_transpo !<202 191 #endif 203 192 … … 261 250 SUBROUTINE fft_init 262 251 263 USE cuda_fft_interfaces264 265 252 IMPLICIT NONE 266 253 … … 338 325 CALL ZDFFTM( 0, ny+1, nz1, sqr_dny, work_y, ny+4, work_y, ny+4, & 339 326 trig_yb, worky, 0 ) 340 #elif defined( __cuda_fft )341 total_points_x_transpo = (nx+1) * (nyn_x-nys_x+1) * (nzt_x-nzb_x+1)342 total_points_y_transpo = (ny+1) * (nxr_y-nxl_y+1) * (nzt_y-nzb_y+1)343 CALL CUFFTPLAN1D( plan_xf, nx+1, CUFFT_D2Z, (nyn_x-nys_x+1) * (nzt_x-nzb_x+1) )344 CALL CUFFTPLAN1D( plan_xi, nx+1, CUFFT_Z2D, (nyn_x-nys_x+1) * (nzt_x-nzb_x+1) )345 CALL CUFFTPLAN1D( plan_yf, ny+1, CUFFT_D2Z, (nxr_y-nxl_y+1) * (nzt_y-nzb_y+1) )346 CALL CUFFTPLAN1D( plan_yi, ny+1, CUFFT_Z2D, (nxr_y-nxl_y+1) * (nzt_y-nzb_y+1) )347 327 #else 348 328 message_string = 'no system-specific fft-call available' … … 403 383 404 384 405 USE cuda_fft_interfaces406 #if defined( __cuda_fft )407 USE ISO_C_BINDING408 #endif409 410 385 IMPLICIT NONE 411 386 … … 429 404 #elif defined( __nec ) 430 405 REAL(wp), DIMENSION(6*(nx+1)) :: work2 !< 431 #elif defined( __cuda_fft )432 COMPLEX(dp), DIMENSION(0:(nx+1)/2,nys_x:nyn_x,nzb_x:nzt_x) :: &433 ar_tmp !<434 ! following does not work for PGI 14.1 -> to be removed later435 ! !$acc declare create( ar_tmp )436 406 #endif 437 407 … … 737 707 738 708 ENDIF 739 740 #elif defined( __cuda_fft )741 742 !$acc data create( ar_tmp )743 IF ( forward_fft ) THEN744 745 !$acc data present( ar )746 CALL CUFFTEXECD2Z( plan_xf, ar, ar_tmp )747 748 !$acc kernels749 DO k = nzb_x, nzt_x750 DO j = nys_x, nyn_x751 752 DO i = 0, (nx+1)/2753 ar(i,j,k) = REAL( ar_tmp(i,j,k), KIND=wp ) * dnx754 ENDDO755 756 DO i = 1, (nx+1)/2 - 1757 ar(nx+1-i,j,k) = AIMAG( ar_tmp(i,j,k) ) * dnx758 ENDDO759 760 ENDDO761 ENDDO762 !$acc end kernels763 !$acc end data764 765 ELSE766 767 !$acc data present( ar )768 !$acc kernels769 DO k = nzb_x, nzt_x770 DO j = nys_x, nyn_x771 772 ar_tmp(0,j,k) = CMPLX( ar(0,j,k), 0.0_wp, KIND=wp )773 774 DO i = 1, (nx+1)/2 - 1775 ar_tmp(i,j,k) = CMPLX( ar(i,j,k), ar(nx+1-i,j,k), &776 KIND=wp )777 ENDDO778 ar_tmp((nx+1)/2,j,k) = CMPLX( ar((nx+1)/2,j,k), 0.0_wp, &779 KIND=wp )780 781 ENDDO782 ENDDO783 !$acc end kernels784 785 CALL CUFFTEXECZ2D( plan_xi, ar_tmp, ar )786 !$acc end data787 788 ENDIF789 !$acc end data790 709 791 710 #else … … 1052 971 1053 972 1054 USE cuda_fft_interfaces1055 #if defined( __cuda_fft )1056 USE ISO_C_BINDING1057 #endif1058 1059 973 IMPLICIT NONE 1060 974 … … 1082 996 #elif defined( __nec ) 1083 997 REAL(wp), DIMENSION(6*(ny+1)) :: work2 !< 1084 #elif defined( __cuda_fft )1085 COMPLEX(dp), DIMENSION(0:(ny+1)/2,nxl_y:nxr_y,nzb_y:nzt_y) :: &1086 ar_tmp !<1087 ! following does not work for PGI 14.1 -> to be removed later1088 ! !$acc declare create( ar_tmp )1089 998 #endif 1090 999 … … 1364 1273 1365 1274 ENDIF 1366 #elif defined( __cuda_fft )1367 1368 !$acc data create( ar_tmp )1369 IF ( forward_fft ) THEN1370 1371 !$acc data present( ar )1372 CALL CUFFTEXECD2Z( plan_yf, ar, ar_tmp )1373 1374 !$acc kernels1375 DO k = nzb_y, nzt_y1376 DO i = nxl_y, nxr_y1377 1378 DO j = 0, (ny+1)/21379 ar(j,i,k) = REAL( ar_tmp(j,i,k), KIND=wp ) * dny1380 ENDDO1381 1382 DO j = 1, (ny+1)/2 - 11383 ar(ny+1-j,i,k) = AIMAG( ar_tmp(j,i,k) ) * dny1384 ENDDO1385 1386 ENDDO1387 ENDDO1388 !$acc end kernels1389 !$acc end data1390 1391 ELSE1392 1393 !$acc data present( ar )1394 !$acc kernels1395 DO k = nzb_y, nzt_y1396 DO i = nxl_y, nxr_y1397 1398 ar_tmp(0,i,k) = CMPLX( ar(0,i,k), 0.0_wp, KIND=wp )1399 1400 DO j = 1, (ny+1)/2 - 11401 ar_tmp(j,i,k) = CMPLX( ar(j,i,k), ar(ny+1-j,i,k), &1402 KIND=wp )1403 ENDDO1404 ar_tmp((ny+1)/2,i,k) = CMPLX( ar((ny+1)/2,i,k), 0.0_wp, &1405 KIND=wp )1406 1407 ENDDO1408 ENDDO1409 !$acc end kernels1410 1411 CALL CUFFTEXECZ2D( plan_yi, ar_tmp, ar )1412 !$acc end data1413 1414 ENDIF1415 !$acc end data1416 1417 1275 #else 1418 1276 message_string = 'no system-specific fft-call available' -
palm/trunk/SOURCE/flow_statistics.f90
r2101 r2118 20 20 ! Current revisions: 21 21 ! ----------------- 22 ! 22 ! OpenACC version of subroutine removed 23 23 ! 24 24 ! Former revisions: … … 216 216 !> are zero at the walls and inside buildings. 217 217 !------------------------------------------------------------------------------! 218 #if ! defined( __openacc )219 218 SUBROUTINE flow_statistics 220 219 … … 309 308 CALL cpu_log( log_point(10), 'flow_statistics', 'start' ) 310 309 311 !$acc update host( km, kh, e, ol, pt, qs, qsws, shf, ts, u, usws, v, vsws, w )312 310 313 311 ! … … 1756 1754 1757 1755 END SUBROUTINE flow_statistics 1758 1759 1760 #else1761 1762 1763 !------------------------------------------------------------------------------!1764 ! Description:1765 ! ------------1766 !> flow statistics - accelerator version1767 !------------------------------------------------------------------------------!1768 SUBROUTINE flow_statistics1769 1770 USE arrays_3d, &1771 ONLY: ddzu, ddzw, e, heatflux_output_conversion, hyp, km, kh, &1772 momentumflux_output_conversion, nr, p, prho, pt, q, qc, ql, qr, &1773 qs, qsws, qswst, rho_air, rho_air_zw, rho_ocean, s, sa, saswsb, &1774 saswst, shf, ss, ssws, sswst, td_lsa_lpt, td_lsa_q, td_sub_lpt, &1775 td_sub_q, time_vert, ts, tswst, u, ug, us, usws, uswst, vsws, &1776 v, vg, vpt, vswst, w, w_subs, waterflux_output_conversion, zw1777 1778 1779 USE cloud_parameters, &1780 ONLY: l_d_cp, prr, pt_d_t1781 1782 USE control_parameters, &1783 ONLY : average_count_pr, cloud_droplets, cloud_physics, do_sum, &1784 dt_3d, g, humidity, kappa, large_scale_forcing, &1785 large_scale_subsidence, max_pr_user, message_string, &1786 microphysics_seifert, neutral, ocean, passive_scalar, &1787 simulated_time, use_subsidence_tendencies, use_surface_fluxes, &1788 use_top_fluxes, ws_scheme_mom, ws_scheme_sca1789 1790 USE cpulog, &1791 ONLY: cpu_log, log_point1792 1793 USE grid_variables, &1794 ONLY: ddx, ddy1795 1796 USE indices, &1797 ONLY: ngp_2dh, ngp_2dh_s_inner, ngp_3d, ngp_3d_inner, ngp_sums, &1798 ngp_sums_ls, nxl, nxr, nyn, nys, nzb, nzb_diff_s_inner, &1799 nzb_s_inner, nzt, nzt_diff, rflags_invers1800 1801 USE kinds1802 1803 USE land_surface_model_mod, &1804 ONLY: ghf_eb, land_surface, m_soil, nzb_soil, nzt_soil, &1805 qsws_eb, qsws_liq_eb, qsws_soil_eb, qsws_veg_eb, r_a, r_s, &1806 shf_eb, t_soil1807 1808 USE netcdf_interface, &1809 ONLY: dots_rad, dots_soil1810 1811 USE pegrid1812 1813 USE radiation_model_mod, &1814 ONLY: radiation, radiation_scheme, rad_net, &1815 rad_lw_in, rad_lw_out, rad_sw_in, rad_sw_out1816 1817 #if defined ( __rrtmg )1818 USE radiation_model_mod, &1819 ONLY: rrtm_aldif, rrtm_aldir, rrtm_asdif, rrtm_asdir, rad_lw_cs_hr, &1820 rad_lw_hr, rad_sw_cs_hr, rad_sw_hr1821 #endif1822 1823 USE statistics1824 1825 IMPLICIT NONE1826 1827 INTEGER(iwp) :: i !<1828 INTEGER(iwp) :: j !<1829 INTEGER(iwp) :: k !<1830 INTEGER(iwp) :: k_surface_level !<1831 INTEGER(iwp) :: nt !<1832 INTEGER(iwp) :: omp_get_thread_num !<1833 INTEGER(iwp) :: sr !<1834 INTEGER(iwp) :: tn !<1835 1836 LOGICAL :: first !<1837 1838 REAL(wp) :: dptdz_threshold !<1839 REAL(wp) :: fac !<1840 REAL(wp) :: height !<1841 REAL(wp) :: pts !<1842 REAL(wp) :: sums_l_eper !<1843 REAL(wp) :: sums_l_etot !<1844 REAL(wp) :: s1 !<1845 REAL(wp) :: s2 !<1846 REAL(wp) :: s3 !<1847 REAL(wp) :: s4 !<1848 REAL(wp) :: s5 !<1849 REAL(wp) :: s6 !<1850 REAL(wp) :: s7 !<1851 REAL(wp) :: ust !<1852 REAL(wp) :: ust2 !<1853 REAL(wp) :: u2 !<1854 REAL(wp) :: vst !<1855 REAL(wp) :: vst2 !<1856 REAL(wp) :: v2 !<1857 REAL(wp) :: w2 !<1858 REAL(wp) :: z_i(2) !<1859 1860 REAL(wp) :: dptdz(nzb+1:nzt+1) !<1861 REAL(wp) :: sums_ll(nzb:nzt+1,2) !<1862 1863 CALL cpu_log( log_point(10), 'flow_statistics', 'start' )1864 1865 !1866 !-- To be on the safe side, check whether flow_statistics has already been1867 !-- called once after the current time step1868 IF ( flow_statistics_called ) THEN1869 1870 message_string = 'flow_statistics is called two times within one ' // &1871 'timestep'1872 CALL message( 'flow_statistics', 'PA0190', 1, 2, 0, 6, 0 )1873 1874 ENDIF1875 1876 !$acc data create( sums, sums_l )1877 !$acc update device( hom )1878 1879 !1880 !-- Compute statistics for each (sub-)region1881 DO sr = 0, statistic_regions1882 1883 !1884 !-- Initialize (local) summation array1885 sums_l = 0.0_wp1886 1887 !1888 !-- Store sums that have been computed in other subroutines in summation1889 !-- array1890 sums_l(:,11,:) = sums_l_l(:,sr,:) ! mixing length from diffusivities1891 !-- WARNING: next line still has to be adjusted for OpenMP1892 sums_l(:,21,0) = sums_wsts_bc_l(:,sr) * &1893 heatflux_output_conversion ! heat flux from advec_s_bc1894 sums_l(nzb+9,pr_palm,0) = sums_divold_l(sr) ! old divergence from pres1895 sums_l(nzb+10,pr_palm,0) = sums_divnew_l(sr) ! new divergence from pres1896 1897 !1898 !-- When calcuating horizontally-averaged total (resolved- plus subgrid-1899 !-- scale) vertical fluxes and velocity variances by using commonly-1900 !-- applied Reynolds-based methods ( e.g. <w'pt'> = (w-<w>)*(pt-<pt>) )1901 !-- in combination with the 5th order advection scheme, pronounced1902 !-- artificial kinks could be observed in the vertical profiles near the1903 !-- surface. Please note: these kinks were not related to the model truth,1904 !-- i.e. these kinks are just related to an evaluation problem.1905 !-- In order avoid these kinks, vertical fluxes and horizontal as well1906 !-- vertical velocity variances are calculated directly within the advection1907 !-- routines, according to the numerical discretization, to evaluate the1908 !-- statistical quantities as they will appear within the prognostic1909 !-- equations.1910 !-- Copy the turbulent quantities, evaluated in the advection routines to1911 !-- the local array sums_l() for further computations.1912 IF ( ws_scheme_mom .AND. sr == 0 ) THEN1913 1914 !1915 !-- According to the Neumann bc for the horizontal velocity components,1916 !-- the corresponding fluxes has to satisfiy the same bc.1917 IF ( ocean ) THEN1918 sums_us2_ws_l(nzt+1,:) = sums_us2_ws_l(nzt,:)1919 sums_vs2_ws_l(nzt+1,:) = sums_vs2_ws_l(nzt,:)1920 ENDIF1921 1922 DO i = 0, threads_per_task-11923 !1924 !-- Swap the turbulent quantities evaluated in advec_ws.1925 sums_l(:,13,i) = sums_wsus_ws_l(:,i) &1926 * momentumflux_output_conversion ! w*u*1927 sums_l(:,15,i) = sums_wsvs_ws_l(:,i) &1928 * momentumflux_output_conversion ! w*v*1929 sums_l(:,30,i) = sums_us2_ws_l(:,i) ! u*21930 sums_l(:,31,i) = sums_vs2_ws_l(:,i) ! v*21931 sums_l(:,32,i) = sums_ws2_ws_l(:,i) ! w*21932 sums_l(:,34,i) = sums_l(:,34,i) + 0.5_wp * &1933 ( sums_us2_ws_l(:,i) + sums_vs2_ws_l(:,i) + &1934 sums_ws2_ws_l(:,i) ) ! e*1935 DO k = nzb, nzt1936 sums_l(nzb+5,pr_palm,i) = sums_l(nzb+5,pr_palm,i) + 0.5_wp * ( &1937 sums_us2_ws_l(k,i) + &1938 sums_vs2_ws_l(k,i) + &1939 sums_ws2_ws_l(k,i) )1940 ENDDO1941 ENDDO1942 1943 ENDIF1944 1945 IF ( ws_scheme_sca .AND. sr == 0 ) THEN1946 1947 DO i = 0, threads_per_task-11948 sums_l(:,17,i) = sums_wspts_ws_l(:,i) &1949 * heatflux_output_conversion ! w*pt* from advec_s_ws1950 IF ( ocean ) sums_l(:,66,i) = sums_wssas_ws_l(:,i) ! w*sa*1951 IF ( humidity ) sums_l(:,49,i) = sums_wsqs_ws_l(:,i) &1952 * waterflux_output_conversion !w*q*1953 IF ( passive_scalar ) sums_l(:,116,i) = sums_wsss_ws_l(:,i) !w*s*1954 ENDDO1955 1956 ENDIF1957 !1958 !-- Horizontally averaged profiles of horizontal velocities and temperature.1959 !-- They must have been computed before, because they are already required1960 !-- for other horizontal averages.1961 tn = 01962 1963 !$OMP PARALLEL PRIVATE( i, j, k, tn )1964 !$ tn = omp_get_thread_num()1965 1966 !$acc update device( sums_l )1967 1968 !$OMP DO1969 !$acc parallel loop gang present( pt, rflags_invers, rmask, sums_l, u, v ) create( s1, s2, s3 )1970 DO k = nzb, nzt+11971 s1 = 01972 s2 = 01973 s3 = 01974 !$acc loop vector collapse( 2 ) reduction( +: s1, s2, s3 )1975 DO i = nxl, nxr1976 DO j = nys, nyn1977 !1978 !-- k+1 is used in rflags since rflags is set 0 at surface points1979 s1 = s1 + u(k,j,i) * rmask(j,i,sr) * rflags_invers(j,i,k+1)1980 s2 = s2 + v(k,j,i) * rmask(j,i,sr) * rflags_invers(j,i,k+1)1981 s3 = s3 + pt(k,j,i) * rmask(j,i,sr) * rflags_invers(j,i,k+1)1982 ENDDO1983 ENDDO1984 sums_l(k,1,tn) = s11985 sums_l(k,2,tn) = s21986 sums_l(k,4,tn) = s31987 ENDDO1988 !$acc end parallel loop1989 1990 !1991 !-- Horizontally averaged profile of salinity1992 IF ( ocean ) THEN1993 !$OMP DO1994 !$acc parallel loop gang present( rflags_invers, rmask, sums_l, sa ) create( s1 )1995 DO k = nzb, nzt+11996 s1 = 01997 !$acc loop vector collapse( 2 ) reduction( +: s1 )1998 DO i = nxl, nxr1999 DO j = nys, nyn2000 s1 = s1 + sa(k,j,i) * rmask(j,i,sr) * rflags_invers(j,i,k+1)2001 ENDDO2002 ENDDO2003 sums_l(k,23,tn) = s12004 ENDDO2005 !$acc end parallel loop2006 ENDIF2007 2008 !2009 !-- Horizontally averaged profiles of virtual potential temperature,2010 !-- total water content, specific humidity and liquid water potential2011 !-- temperature2012 IF ( humidity ) THEN2013 2014 !$OMP DO2015 !$acc parallel loop gang present( q, rflags_invers, rmask, sums_l, vpt ) create( s1, s2 )2016 DO k = nzb, nzt+12017 s1 = 02018 s2 = 02019 !$acc loop vector collapse( 2 ) reduction( +: s1, s2 )2020 DO i = nxl, nxr2021 DO j = nys, nyn2022 s1 = s1 + q(k,j,i) * rmask(j,i,sr) * rflags_invers(j,i,k+1)2023 s2 = s2 + vpt(k,j,i) * rmask(j,i,sr) * rflags_invers(j,i,k+1)2024 ENDDO2025 ENDDO2026 sums_l(k,41,tn) = s12027 sums_l(k,44,tn) = s22028 ENDDO2029 !$acc end parallel loop2030 2031 IF ( cloud_physics ) THEN2032 !$OMP DO2033 !$acc parallel loop gang present( pt, q, ql, rflags_invers, rmask, sums_l ) create( s1, s2 )2034 DO k = nzb, nzt+12035 s1 = 02036 s2 = 02037 !$acc loop vector collapse( 2 ) reduction( +: s1, s2 )2038 DO i = nxl, nxr2039 DO j = nys, nyn2040 s1 = s1 + ( q(k,j,i) - ql(k,j,i) ) * &2041 rmask(j,i,sr) * rflags_invers(j,i,k+1)2042 s2 = s2 + ( pt(k,j,i) + l_d_cp*pt_d_t(k) * ql(k,j,i) ) * &2043 rmask(j,i,sr) * rflags_invers(j,i,k+1)2044 ENDDO2045 ENDDO2046 sums_l(k,42,tn) = s12047 sums_l(k,43,tn) = s22048 ENDDO2049 !$acc end parallel loop2050 ENDIF2051 ENDIF2052 2053 !2054 !-- Horizontally averaged profiles of passive scalar2055 IF ( passive_scalar ) THEN2056 !$OMP DO2057 !$acc parallel loop gang present( s, rflags_invers, rmask, sums_l ) create( s1 )2058 DO k = nzb, nzt+12059 s1 = 02060 !$acc loop vector collapse( 2 ) reduction( +: s1 )2061 DO i = nxl, nxr2062 DO j = nys, nyn2063 s1 = s1 + s(k,j,i) * rmask(j,i,sr) * rflags_invers(j,i,k+1)2064 ENDDO2065 ENDDO2066 sums_l(k,117,tn) = s12067 ENDDO2068 !$acc end parallel loop2069 ENDIF2070 !$OMP END PARALLEL2071 2072 !2073 !-- Summation of thread sums2074 IF ( threads_per_task > 1 ) THEN2075 DO i = 1, threads_per_task-12076 !$acc parallel present( sums_l )2077 sums_l(:,1,0) = sums_l(:,1,0) + sums_l(:,1,i)2078 sums_l(:,2,0) = sums_l(:,2,0) + sums_l(:,2,i)2079 sums_l(:,4,0) = sums_l(:,4,0) + sums_l(:,4,i)2080 !$acc end parallel2081 IF ( ocean ) THEN2082 !$acc parallel present( sums_l )2083 sums_l(:,23,0) = sums_l(:,23,0) + sums_l(:,23,i)2084 !$acc end parallel2085 ENDIF2086 IF ( humidity ) THEN2087 !$acc parallel present( sums_l )2088 sums_l(:,41,0) = sums_l(:,41,0) + sums_l(:,41,i)2089 sums_l(:,44,0) = sums_l(:,44,0) + sums_l(:,44,i)2090 !$acc end parallel2091 IF ( cloud_physics ) THEN2092 !$acc parallel present( sums_l )2093 sums_l(:,42,0) = sums_l(:,42,0) + sums_l(:,42,i)2094 sums_l(:,43,0) = sums_l(:,43,0) + sums_l(:,43,i)2095 !$acc end parallel2096 ENDIF2097 ENDIF2098 IF ( passive_scalar ) THEN2099 !$acc parallel present( sums_l )2100 sums_l(:,117,0) = sums_l(:,117,0) + sums_l(:,117,i)2101 !$acc end parallel2102 ENDIF2103 ENDDO2104 ENDIF2105 2106 #if defined( __parallel )2107 !2108 !-- Compute total sum from local sums2109 !$acc update host( sums_l )2110 IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )2111 CALL MPI_ALLREDUCE( sums_l(nzb,1,0), sums(nzb,1), nzt+2-nzb, MPI_REAL, &2112 MPI_SUM, comm2d, ierr )2113 IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )2114 CALL MPI_ALLREDUCE( sums_l(nzb,2,0), sums(nzb,2), nzt+2-nzb, MPI_REAL, &2115 MPI_SUM, comm2d, ierr )2116 IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )2117 CALL MPI_ALLREDUCE( sums_l(nzb,4,0), sums(nzb,4), nzt+2-nzb, MPI_REAL, &2118 MPI_SUM, comm2d, ierr )2119 IF ( ocean ) THEN2120 IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )2121 CALL MPI_ALLREDUCE( sums_l(nzb,23,0), sums(nzb,23), nzt+2-nzb, &2122 MPI_REAL, MPI_SUM, comm2d, ierr )2123 ENDIF2124 IF ( humidity ) THEN2125 IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )2126 CALL MPI_ALLREDUCE( sums_l(nzb,44,0), sums(nzb,44), nzt+2-nzb, &2127 MPI_REAL, MPI_SUM, comm2d, ierr )2128 IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )2129 CALL MPI_ALLREDUCE( sums_l(nzb,41,0), sums(nzb,41), nzt+2-nzb, &2130 MPI_REAL, MPI_SUM, comm2d, ierr )2131 IF ( cloud_physics ) THEN2132 IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )2133 CALL MPI_ALLREDUCE( sums_l(nzb,42,0), sums(nzb,42), nzt+2-nzb, &2134 MPI_REAL, MPI_SUM, comm2d, ierr )2135 IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )2136 CALL MPI_ALLREDUCE( sums_l(nzb,43,0), sums(nzb,43), nzt+2-nzb, &2137 MPI_REAL, MPI_SUM, comm2d, ierr )2138 ENDIF2139 ENDIF2140 2141 IF ( passive_scalar ) THEN2142 IF ( collective_wait ) CALL MPI_BARRIER( comm2d, ierr )2143 CALL MPI_ALLREDUCE( sums_l(nzb,117,0), sums(nzb,117), nzt+2-nzb, &2144 MPI_REAL, MPI_SUM, comm2d, ierr )2145 ENDIF2146 !$acc update device( sums )2147 #else2148 !$acc parallel present( sums, sums_l )2149 sums(:,1) = sums_l(:,1,0)2150 sums(:,2) = sums_l(:,2,0)2151 sums(:,4) = sums_l(:,4,0)2152 !$acc end parallel2153 IF ( ocean ) THEN2154 !$acc parallel present( sums, sums_l )2155 sums(:,23) = sums_l(:,23,0)2156 !$acc end parallel2157 ENDIF2158 IF ( humidity ) THEN2159 !$acc parallel present( sums, sums_l )2160 sums(:,44) = sums_l(:,44,0)2161 sums(:,41) = sums_l(:,41,0)2162 !$acc end parallel2163 IF ( cloud_physics ) THEN2164 !$acc parallel present( sums, sums_l )2165 sums(:,42) = sums_l(:,42,0)2166 sums(:,43) = sums_l(:,43,0)2167 !$acc end parallel2168 ENDIF2169 ENDIF2170 IF ( passive_scalar ) THEN2171 !$acc parallel present( sums, sums_l )2172 sums(:,117) = sums_l(:,117,0)2173 !$acc end parallel2174 ENDIF2175 #endif2176 2177 !2178 !-- Final values are obtained by division by the total number of grid points2179 !-- used for summation. After that store profiles.2180 !$acc parallel present( hom, ngp_2dh, ngp_2dh_s_inner, sums )2181 sums(:,1) = sums(:,1) / ngp_2dh(sr)2182 sums(:,2) = sums(:,2) / ngp_2dh(sr)2183 sums(:,4) = sums(:,4) / ngp_2dh_s_inner(:,sr)2184 hom(:,1,1,sr) = sums(:,1) ! u2185 hom(:,1,2,sr) = sums(:,2) ! v2186 hom(:,1,4,sr) = sums(:,4) ! pt2187 !$acc end parallel2188 2189 !2190 !-- Salinity2191 IF ( ocean ) THEN2192 !$acc parallel present( hom, ngp_2dh_s_inner, sums )2193 sums(:,23) = sums(:,23) / ngp_2dh_s_inner(:,sr)2194 hom(:,1,23,sr) = sums(:,23) ! sa2195 !$acc end parallel2196 ENDIF2197 2198 !2199 !-- Humidity and cloud parameters2200 IF ( humidity ) THEN2201 !$acc parallel present( hom, ngp_2dh_s_inner, sums )2202 sums(:,44) = sums(:,44) / ngp_2dh_s_inner(:,sr)2203 sums(:,41) = sums(:,41) / ngp_2dh_s_inner(:,sr)2204 hom(:,1,44,sr) = sums(:,44) ! vpt2205 hom(:,1,41,sr) = sums(:,41) ! qv (q)2206 !$acc end parallel2207 IF ( cloud_physics ) THEN2208 !$acc parallel present( hom, ngp_2dh_s_inner, sums )2209 sums(:,42) = sums(:,42) / ngp_2dh_s_inner(:,sr)2210 sums(:,43) = sums(:,43) / ngp_2dh_s_inner(:,sr)2211 hom(:,1,42,sr) = sums(:,42) ! qv2212 hom(:,1,43,sr) = sums(:,43) ! pt2213 !$acc end parallel2214 ENDIF2215 ENDIF2216 2217 !2218 !-- Passive scalar2219 IF ( passive_scalar ) THEN2220 !$acc parallel present( hom, ngp_2dh_s_inner, sums )2221 sums(:,117) = sums(:,117) / ngp_2dh_s_inner(:,sr)2222 hom(:,1,117,sr) = sums(:,117) ! s2223 !$acc end parallel2224 ENDIF2225 2226 !2227 !-- Horizontally averaged profiles of the remaining prognostic variables,2228 !-- variances, the total and the perturbation energy (single values in last2229 !-- column of sums_l) and some diagnostic quantities.2230 !-- NOTE: for simplicity, nzb_s_inner is used below, although strictly2231 !-- ---- speaking the following k-loop would have to be split up and2232 !-- rearranged according to the staggered grid.2233 !-- However, this implies no error since staggered velocity components2234 !-- are zero at the walls and inside buildings.2235 tn = 02236 !$OMP PARALLEL PRIVATE( i, j, k, pts, sums_ll, sums_l_eper, &2237 !$OMP sums_l_etot, tn, ust, ust2, u2, vst, vst2, v2, &2238 !$OMP w2 )2239 !$ tn = omp_get_thread_num()2240 2241 !$OMP DO2242 !$acc parallel loop gang present( e, hom, kh, km, p, pt, w, rflags_invers, rmask, sums_l ) create( s1, s2, s3, s4, s5, s6, s7 )2243 DO k = nzb, nzt+12244 s1 = 02245 s2 = 02246 s3 = 02247 s4 = 02248 s5 = 02249 s6 = 02250 s7 = 02251 !$acc loop vector collapse( 2 ) reduction( +: s1, s2, s3, s4, s5, s6, s7 )2252 DO i = nxl, nxr2253 DO j = nys, nyn2254 !2255 !-- Prognostic and diagnostic variables2256 s1 = s1 + w(k,j,i) * rmask(j,i,sr) * rflags_invers(j,i,k+1)2257 s2 = s2 + e(k,j,i) * rmask(j,i,sr) * rflags_invers(j,i,k+1)2258 s3 = s3 + km(k,j,i) * rmask(j,i,sr) * rflags_invers(j,i,k+1)2259 s4 = s4 + kh(k,j,i) * rmask(j,i,sr) * rflags_invers(j,i,k+1)2260 s5 = s5 + p(k,j,i) * rmask(j,i,sr) * rflags_invers(j,i,k+1)2261 s6 = s6 + ( pt(k,j,i)-hom(k,1,4,sr) )**2 * rmask(j,i,sr) * &2262 rflags_invers(j,i,k+1)2263 !2264 !-- Higher moments2265 !-- (Computation of the skewness of w further below)2266 s7 = s7 + w(k,j,i)**3 * rmask(j,i,sr) * rflags_invers(j,i,k+1)2267 ENDDO2268 ENDDO2269 sums_l(k,3,tn) = s12270 sums_l(k,8,tn) = s22271 sums_l(k,9,tn) = s32272 sums_l(k,10,tn) = s42273 sums_l(k,40,tn) = s52274 sums_l(k,33,tn) = s62275 sums_l(k,38,tn) = s72276 ENDDO2277 !$acc end parallel loop2278 2279 IF ( humidity ) THEN2280 !$OMP DO2281 !$acc parallel loop gang present( hom, q, rflags_invers, rmask, sums_l ) create( s1 )2282 DO k = nzb, nzt+12283 s1 = 02284 !$acc loop vector collapse( 2 ) reduction( +: s1 )2285 DO i = nxl, nxr2286 DO j = nys, nyn2287 s1 = s1 + ( q(k,j,i)-hom(k,1,41,sr) )**2 * rmask(j,i,sr) * &2288 rflags_invers(j,i,k+1)2289 ENDDO2290 ENDDO2291 sums_l(k,70,tn) = s12292 ENDDO2293 !$acc end parallel loop2294 ENDIF2295 2296 !2297 !-- Total and perturbation energy for the total domain (being2298 !-- collected in the last column of sums_l).2299 s1 = 02300 !$OMP DO2301 !$acc parallel loop collapse(3) present( rflags_invers, rmask, u, v, w ) reduction(+:s1)2302 DO i = nxl, nxr2303 DO j = nys, nyn2304 DO k = nzb, nzt+12305 s1 = s1 + 0.5_wp * &2306 ( u(k,j,i)**2 + v(k,j,i)**2 + w(k,j,i)**2 ) * &2307 rmask(j,i,sr) * rflags_invers(j,i,k+1)2308 ENDDO2309 ENDDO2310 ENDDO2311 !$acc end parallel loop2312 !$acc parallel present( sums_l )2313 sums_l(nzb+4,pr_palm,tn) = s12314 !$acc end parallel2315 2316 !$OMP DO2317 !$acc parallel present( rmask, sums_l, us, usws, vsws, ts ) create( s1, s2, s3, s4 )2318 s1 = 02319 s2 = 02320 s3 = 02321 s4 = 02322 !$acc loop vector collapse( 2 ) reduction( +: s1, s2, s3, s4 )2323 DO i = nxl, nxr2324 DO j = nys, nyn2325 !2326 !-- 2D-arrays (being collected in the last column of sums_l)2327 s1 = s1 + us(j,i) * rmask(j,i,sr)2328 s2 = s2 + usws(j,i) * rmask(j,i,sr)2329 s3 = s3 + vsws(j,i) * rmask(j,i,sr)2330 s4 = s4 + ts(j,i) * rmask(j,i,sr)2331 ENDDO2332 ENDDO2333 sums_l(nzb,pr_palm,tn) = s12334 sums_l(nzb+1,pr_palm,tn) = s22335 sums_l(nzb+2,pr_palm,tn) = s32336 sums_l(nzb+3,pr_palm,tn) = s42337 !$acc end parallel2338 2339 IF ( humidity ) THEN2340 !$acc parallel present( qs, rmask, sums_l ) create( s1 )2341 s1 = 02342 !$acc loop vector collapse( 2 ) reduction( +: s1 )2343 DO i = nxl, nxr2344 DO j = nys, nyn2345 s1 = s1 + qs(j,i) * rmask(j,i,sr)2346 ENDDO2347 ENDDO2348 sums_l(nzb+12,pr_palm,tn) = s12349 !$acc end parallel2350 ENDIF2351 2352 IF ( passive_scalar ) THEN2353 !$acc parallel present( ss, rmask, sums_l ) create( s1 )2354 s1 = 02355 !$acc loop vector collapse( 2 ) reduction( +: s1 )2356 DO i = nxl, nxr2357 DO j = nys, nyn2358 s1 = s1 + ss(j,i) * rmask(j,i,sr)2359 ENDDO2360 ENDDO2361 sums_l(nzb+13,pr_palm,tn) = s12362 !$acc end parallel2363 ENDIF2364 2365 !2366 !-- Computation of statistics when ws-scheme is not used. Else these2367 !-- quantities are evaluated in the advection routines.2368 IF ( .NOT. ws_scheme_mom .OR. sr /= 0 .OR. simulated_time == 0.0_wp ) &2369 THEN2370 2371 !$OMP DO2372 !$acc parallel loop gang present( u, v, w, rflags_invers, rmask, sums_l ) create( s1, s2, s3, s4, ust2, vst2, w2 )2373 DO k = nzb, nzt+12374 s1 = 02375 s2 = 02376 s3 = 02377 s4 = 02378 !$acc loop vector collapse( 2 ) reduction( +: s1, s2, s3, s4 )2379 DO i = nxl, nxr2380 DO j = nys, nyn2381 ust2 = ( u(k,j,i) - hom(k,1,1,sr) )**22382 vst2 = ( v(k,j,i) - hom(k,1,2,sr) )**22383 w2 = w(k,j,i)**22384 2385 s1 = s1 + ust2 * rmask(j,i,sr) * rflags_invers(j,i,k+1)2386 s2 = s2 + vst2 * rmask(j,i,sr) * rflags_invers(j,i,k+1)2387 s3 = s3 + w2 * rmask(j,i,sr) * rflags_invers(j,i,k+1)2388 !2389 !-- Perturbation energy2390 s4 = s4 + 0.5_wp * ( ust2 + vst2 + w2 ) * rmask(j,i,sr) * &2391 rflags_invers(j,i,k+1)2392 ENDDO2393 ENDDO2394 sums_l(k,30,tn) = s12395 sums_l(k,31,tn) = s22396 sums_l(k,32,tn) = s32397 sums_l(k,34,tn) = s42398 ENDDO2399 !$acc end parallel loop2400 !2401 !-- Total perturbation TKE2402 !$OMP DO2403 !$acc parallel present( sums_l ) create( s1 )2404 s1 = 02405 !$acc loop reduction( +: s1 )2406 DO k = nzb, nzt+12407 s1 = s1 + sums_l(k,34,tn)2408 ENDDO2409 sums_l(nzb+5,pr_palm,tn) = s12410 !$acc end parallel2411 2412 ENDIF2413 2414 !2415 !-- Horizontally averaged profiles of the vertical fluxes2416 2417 !2418 !-- Subgridscale fluxes.2419 !-- WARNING: If a Prandtl-layer is used (k=nzb for flat terrain), the fluxes2420 !-- ------- should be calculated there in a different way. This is done2421 !-- in the next loop further below, where results from this loop are2422 !-- overwritten. However, THIS WORKS IN CASE OF FLAT TERRAIN ONLY!2423 !-- The non-flat case still has to be handled.2424 !-- NOTE: for simplicity, nzb_s_inner is used below, although2425 !-- ---- strictly speaking the following k-loop would have to be2426 !-- split up according to the staggered grid.2427 !-- However, this implies no error since staggered velocity2428 !-- components are zero at the walls and inside buildings.2429 !$OMP DO2430 !$acc parallel loop gang present( ddzu, kh, km, pt, u, v, w, rflags_invers, rmask, sums_l ) create( s1, s2, s3 )2431 DO k = nzb, nzt_diff2432 s1 = 02433 s2 = 02434 s3 = 02435 !$acc loop vector collapse( 2 ) reduction( +: s1, s2, s3 )2436 DO i = nxl, nxr2437 DO j = nys, nyn2438 2439 !2440 !-- Momentum flux w"u"2441 s1 = s1 - 0.25_wp * ( &2442 km(k,j,i)+km(k+1,j,i)+km(k,j,i-1)+km(k+1,j,i-1) &2443 ) * ( &2444 ( u(k+1,j,i) - u(k,j,i) ) * ddzu(k+1) &2445 + ( w(k,j,i) - w(k,j,i-1) ) * ddx &2446 ) &2447 * rmask(j,i,sr) * rflags_invers(j,i,k+1) &2448 * rho_air_zw(k) &2449 * momentumflux_output_conversion(k)2450 !2451 !-- Momentum flux w"v"2452 s2 = s2 - 0.25_wp * ( &2453 km(k,j,i)+km(k+1,j,i)+km(k,j-1,i)+km(k+1,j-1,i) &2454 ) * ( &2455 ( v(k+1,j,i) - v(k,j,i) ) * ddzu(k+1) &2456 + ( w(k,j,i) - w(k,j-1,i) ) * ddy &2457 ) &2458 * rmask(j,i,sr) * rflags_invers(j,i,k+1) &2459 * rho_air_zw(k) &2460 * momentumflux_output_conversion(k)2461 !2462 !-- Heat flux w"pt"2463 s3 = s3 - 0.5_wp * ( kh(k,j,i) + kh(k+1,j,i) ) &2464 * ( pt(k+1,j,i) - pt(k,j,i) ) &2465 * rho_air_zw(k) &2466 * heatflux_output_conversion(k) &2467 * ddzu(k+1) * rmask(j,i,sr) &2468 * rflags_invers(j,i,k+1)2469 ENDDO2470 ENDDO2471 sums_l(k,12,tn) = s12472 sums_l(k,14,tn) = s22473 sums_l(k,16,tn) = s32474 ENDDO2475 !$acc end parallel loop2476 2477 !2478 !-- Salinity flux w"sa"2479 IF ( ocean ) THEN2480 !$acc parallel loop gang present( ddzu, kh, sa, rflags_invers, rmask, sums_l ) create( s1 )2481 DO k = nzb, nzt_diff2482 s1 = 02483 !$acc loop vector collapse( 2 ) reduction( +: s1 )2484 DO i = nxl, nxr2485 DO j = nys, nyn2486 s1 = s1 - 0.5_wp * ( kh(k,j,i) + kh(k+1,j,i) ) &2487 * ( sa(k+1,j,i) - sa(k,j,i) ) &2488 * ddzu(k+1) * rmask(j,i,sr) &2489 * rflags_invers(j,i,k+1)2490 ENDDO2491 ENDDO2492 sums_l(k,65,tn) = s12493 ENDDO2494 !$acc end parallel loop2495 ENDIF2496 2497 !2498 !-- Buoyancy flux, water flux (humidity flux) w"q"2499 IF ( humidity ) THEN2500 2501 !$acc parallel loop gang present( ddzu, kh, q, vpt, rflags_invers, rmask, sums_l ) create( s1, s2 )2502 DO k = nzb, nzt_diff2503 s1 = 02504 s2 = 02505 !$acc loop vector collapse( 2 ) reduction( +: s1, s2 )2506 DO i = nxl, nxr2507 DO j = nys, nyn2508 s1 = s1 - 0.5_wp * ( kh(k,j,i) + kh(k+1,j,i) ) &2509 * ( vpt(k+1,j,i) - vpt(k,j,i) ) &2510 * rho_air_zw(k) &2511 * heatflux_output_conversion(k) &2512 * ddzu(k+1) * rmask(j,i,sr) &2513 * rflags_invers(j,i,k+1)2514 s2 = s2 - 0.5_wp * ( kh(k,j,i) + kh(k+1,j,i) ) &2515 * ( q(k+1,j,i) - q(k,j,i) ) &2516 * rho_air_zw(k) &2517 * waterflux_output_conversion(k) &2518 * ddzu(k+1) * rmask(j,i,sr) &2519 * rflags_invers(j,i,k+1)2520 ENDDO2521 ENDDO2522 sums_l(k,45,tn) = s12523 sums_l(k,48,tn) = s22524 ENDDO2525 !$acc end parallel loop2526 2527 IF ( cloud_physics ) THEN2528 2529 !$acc parallel loop gang present( ddzu, kh, q, ql, rflags_invers, rmask, sums_l ) create( s1 )2530 DO k = nzb, nzt_diff2531 s1 = 02532 !$acc loop vector collapse( 2 ) reduction( +: s1 )2533 DO i = nxl, nxr2534 DO j = nys, nyn2535 s1 = s1 - 0.5_wp * ( kh(k,j,i) + kh(k+1,j,i) ) &2536 * ( ( q(k+1,j,i) - ql(k+1,j,i) ) &2537 - ( q(k,j,i) - ql(k,j,i) ) ) &2538 * rho_air_zw(k) &2539 * waterflux_output_conversion(k) &2540 * ddzu(k+1) * rmask(j,i,sr) &2541 * rflags_invers(j,i,k+1)2542 ENDDO2543 ENDDO2544 sums_l(k,51,tn) = s12545 ENDDO2546 !$acc end parallel loop2547 2548 ENDIF2549 2550 ENDIF2551 !2552 !-- Passive scalar flux2553 IF ( passive_scalar ) THEN2554 2555 !$acc parallel loop gang present( ddzu, kh, s, rflags_invers, rmask, sums_l ) create( s1 )2556 DO k = nzb, nzt_diff2557 s1 = 02558 !$acc loop vector collapse( 2 ) reduction( +: s1 )2559 DO i = nxl, nxr2560 DO j = nys, nyn2561 s1 = s1 - 0.5_wp * ( kh(k,j,i) + kh(k+1,j,i) ) &2562 * ( s(k+1,j,i) - s(k,j,i) ) &2563 * ddzu(k+1) * rmask(j,i,sr) &2564 * rflags_invers(j,i,k+1)2565 ENDDO2566 ENDDO2567 sums_l(k,119,tn) = s12568 ENDDO2569 !$acc end parallel loop2570 2571 ENDIF2572 2573 IF ( use_surface_fluxes ) THEN2574 2575 !$OMP DO2576 !$acc parallel present( rmask, shf, sums_l, usws, vsws ) create( s1, s2, s3, s4, s5 )2577 s1 = 02578 s2 = 02579 s3 = 02580 s4 = 02581 s5 = 02582 !$acc loop vector collapse( 2 ) reduction( +: s1, s2, s3, s4, s5 )2583 DO i = nxl, nxr2584 DO j = nys, nyn2585 !2586 !-- Subgridscale fluxes in the Prandtl layer2587 s1 = s1 + usws(j,i) * momentumflux_output_conversion(nzb) &2588 * rmask(j,i,sr) ! w"u"2589 s2 = s2 + vsws(j,i) * momentumflux_output_conversion(nzb) &2590 * rmask(j,i,sr) ! w"v"2591 s3 = s3 + shf(j,i) * heatflux_output_conversion(nzb) &2592 * rmask(j,i,sr) ! w"pt"2593 s4 = s4 + 0.0_wp * rmask(j,i,sr) ! u"pt"2594 s5 = s5 + 0.0_wp * rmask(j,i,sr) ! v"pt"2595 ENDDO2596 ENDDO2597 sums_l(nzb,12,tn) = s12598 sums_l(nzb,14,tn) = s22599 sums_l(nzb,16,tn) = s32600 sums_l(nzb,58,tn) = s42601 sums_l(nzb,61,tn) = s52602 !$acc end parallel2603 2604 IF ( ocean ) THEN2605 2606 !$OMP DO2607 !$acc parallel present( rmask, saswsb, sums_l ) create( s1 )2608 s1 = 02609 !$acc loop vector collapse( 2 ) reduction( +: s1 )2610 DO i = nxl, nxr2611 DO j = nys, nyn2612 s1 = s1 + saswsb(j,i) * rmask(j,i,sr) ! w"sa"2613 ENDDO2614 ENDDO2615 sums_l(nzb,65,tn) = s12616 !$acc end parallel2617 2618 ENDIF2619 2620 IF ( humidity ) THEN2621 2622 !$OMP DO2623 !$acc parallel present( pt, q, qsws, rmask, shf, sums_l ) create( s1, s2 )2624 s1 = 02625 s2 = 02626 !$acc loop vector collapse( 2 ) reduction( +: s1, s2 )2627 DO i = nxl, nxr2628 DO j = nys, nyn2629 s1 = s1 + qsws(j,i) * waterflux_output_conversion(nzb) &2630 * rmask(j,i,sr) ! w"q" (w"qv")2631 s2 = s2 + ( ( 1.0_wp + 0.61_wp * q(nzb,j,i) ) * shf(j,i) &2632 + 0.61_wp * pt(nzb,j,i) * qsws(j,i) ) &2633 * heatflux_output_conversion(nzb)2634 ENDDO2635 ENDDO2636 sums_l(nzb,48,tn) = s12637 sums_l(nzb,45,tn) = s22638 !$acc end parallel2639 2640 IF ( cloud_droplets ) THEN2641 2642 !$OMP DO2643 !$acc parallel present( pt, q, ql, qsws, rmask, shf, sums_l ) create( s1 )2644 s1 = 02645 !$acc loop vector collapse( 2 ) reduction( +: s1 )2646 DO i = nxl, nxr2647 DO j = nys, nyn2648 s1 = s1 + ( ( 1.0_wp + &2649 0.61_wp * q(nzb,j,i) - ql(nzb,j,i) ) * &2650 shf(j,i) + 0.61_wp * pt(nzb,j,i) * qsws(j,i) )&2651 * heatflux_output_conversion(nzb)2652 ENDDO2653 ENDDO2654 sums_l(nzb,45,tn) = s12655 !$acc end parallel2656 2657 ENDIF2658 2659 IF ( cloud_physics ) THEN2660 2661 !$OMP DO2662 !$acc parallel present( qsws, rmask, sums_l ) create( s1 )2663 s1 = 02664 !$acc loop vector collapse( 2 ) reduction( +: s1 )2665 DO i = nxl, nxr2666 DO j = nys, nyn2667 !2668 !-- Formula does not work if ql(nzb) /= 0.02669 s1 = s1 + qsws(j,i) * waterflux_output_conversion(nzb) &2670 * rmask(j,i,sr) ! w"q" (w"qv")2671 ENDDO2672 ENDDO2673 sums_l(nzb,51,tn) = s12674 !$acc end parallel2675 2676 ENDIF2677 2678 ENDIF2679 2680 IF ( passive_scalar ) THEN2681 2682 !$OMP DO2683 !$acc parallel present( ssws, rmask, sums_l ) create( s1 )2684 s1 = 02685 !$acc loop vector collapse( 2 ) reduction( +: s1 )2686 DO i = nxl, nxr2687 DO j = nys, nyn2688 s1 = s1 + ssws(j,i) * rmask(j,i,sr) ! w"s"2689 ENDDO2690 ENDDO2691 sums_l(nzb,119,tn) = s12692 !$acc end parallel2693 2694 ENDIF2695 2696 ENDIF2697 2698 !2699 !-- Subgridscale fluxes at the top surface2700 IF ( use_top_fluxes ) THEN2701 2702 !$OMP DO2703 !$acc parallel present( rmask, sums_l, tswst, uswst, vswst ) create( s1, s2, s3, s4, s5 )2704 s1 = 02705 s2 = 02706 s3 = 02707 s4 = 02708 s5 = 02709 !$acc loop vector collapse( 2 ) reduction( +: s1, s2, s3, s4, s5 )2710 DO i = nxl, nxr2711 DO j = nys, nyn2712 s1 = s1 + uswst(j,i) * momentumflux_output_conversion(nzt:nzt+1) &2713 * rmask(j,i,sr) ! w"u"2714 s2 = s2 + vswst(j,i) * momentumflux_output_conversion(nzt:nzt+1) &2715 * rmask(j,i,sr) ! w"v"2716 s3 = s3 + tswst(j,i) * heatflux_output_conversion(nzt:nzt+1) &2717 * rmask(j,i,sr) ! w"pt"2718 s4 = s4 + 0.0_wp * rmask(j,i,sr) ! u"pt"2719 s5 = s5 + 0.0_wp * rmask(j,i,sr) ! v"pt"2720 ENDDO2721 ENDDO2722 sums_l(nzt:nzt+1,12,tn) = s12723 sums_l(nzt:nzt+1,14,tn) = s22724 sums_l(nzt:nzt+1,16,tn) = s32725 sums_l(nzt:nzt+1,58,tn) = s42726 sums_l(nzt:nzt+1,61,tn) = s52727 !$acc end parallel2728 2729 IF ( ocean ) THEN2730 2731 !$OMP DO2732 !$acc parallel present( rmask, saswst, sums_l ) create( s1 )2733 s1 = 02734 !$acc loop vector collapse( 2 ) reduction( +: s1 )2735 DO i = nxl, nxr2736 DO j = nys, nyn2737 s1 = s1 + saswst(j,i) * rmask(j,i,sr) ! w"sa"2738 ENDDO2739 ENDDO2740 sums_l(nzt,65,tn) = s12741 !$acc end parallel2742 2743 ENDIF2744 2745 IF ( humidity ) THEN2746 2747 !$OMP DO2748 !$acc parallel present( pt, q, qswst, rmask, tswst, sums_l ) create( s1, s2 )2749 s1 = 02750 s2 = 02751 !$acc loop vector collapse( 2 ) reduction( +: s1, s2 )2752 DO i = nxl, nxr2753 DO j = nys, nyn2754 s1 = s1 + qswst(j,i) * waterflux_output_conversion(nzt) &2755 * rmask(j,i,sr) ! w"q" (w"qv")2756 s2 = s2 + ( ( 1.0_wp + 0.61_wp * q(nzt,j,i) ) * tswst(j,i) +&2757 0.61_wp * pt(nzt,j,i) * qswst(j,i) ) &2758 * heatflux_output_conversion(nzt)2759 ENDDO2760 ENDDO2761 sums_l(nzt,48,tn) = s12762 sums_l(nzt,45,tn) = s22763 !$acc end parallel2764 2765 IF ( cloud_droplets ) THEN2766 2767 !$OMP DO2768 !$acc parallel present( pt, q, ql, qswst, rmask, tswst, sums_l ) create( s1 )2769 s1 = 02770 !$acc loop vector collapse( 2 ) reduction( +: s1 )2771 DO i = nxl, nxr2772 DO j = nys, nyn2773 s1 = s1 + ( ( 1.0_wp + &2774 0.61_wp * q(nzt,j,i) - ql(nzt,j,i) ) * &2775 tswst(j,i) + &2776 0.61_wp * pt(nzt,j,i) * qswst(j,i) ) &2777 * heatflux_output_conversion(nzt)2778 ENDDO2779 ENDDO2780 sums_l(nzt,45,tn) = s12781 !$acc end parallel2782 2783 ENDIF2784 2785 IF ( cloud_physics ) THEN2786 2787 !$OMP DO2788 !$acc parallel present( qswst, rmask, sums_l ) create( s1 )2789 s1 = 02790 !$acc loop vector collapse( 2 ) reduction( +: s1 )2791 DO i = nxl, nxr2792 DO j = nys, nyn2793 !2794 !-- Formula does not work if ql(nzb) /= 0.02795 s1 = s1 + qswst(j,i) * waterflux_output_conversion(nzt) &2796 * rmask(j,i,sr) ! w"q" (w"qv")2797 ENDDO2798 ENDDO2799 sums_l(nzt,51,tn) = s12800 !$acc end parallel2801 2802 ENDIF2803 2804 ENDIF2805 2806 IF ( passive_scalar ) THEN2807 2808 !$OMP DO2809 !$acc parallel present( sswst, rmask, sums_l ) create( s1 )2810 s1 = 02811 !$acc loop vector collapse( 2 ) reduction( +: s1 )2812 DO i = nxl, nxr2813 DO j = nys, nyn2814 s1 = s1 + sswst(j,i) * rmask(j,i,sr) ! w"s"2815 ENDDO2816 ENDDO2817 sums_l(nzt,119,tn) = s12818 !$acc end parallel2819 2820 ENDIF2821 2822 ENDIF2823 2824 !2825 !-- Resolved fluxes (can be computed for all horizontal points)2826 !-- NOTE: for simplicity, nzb_s_inner is used below, although strictly2827 !-- ---- speaking the following k-loop would have to be split up and2828 !-- rearranged according to the staggered grid.2829 !$acc parallel loop gang present( hom, pt, rflags_invers, rmask, sums_l, u, v, w ) create( s1, s2, s3 )2830 DO k = nzb, nzt_diff2831 s1 = 02832 s2 = 02833 s3 = 02834 !$acc loop vector collapse( 2 ) reduction( +: s1, s2, s3 )2835 DO i = nxl, nxr2836 DO j = nys, nyn2837 ust = 0.5_wp * ( u(k,j,i) - hom(k,1,1,sr) + &2838 u(k+1,j,i) - hom(k+1,1,1,sr) )2839 vst = 0.5_wp * ( v(k,j,i) - hom(k,1,2,sr) + &2840 v(k+1,j,i) - hom(k+1,1,2,sr) )2841 pts = 0.5_wp * ( pt(k,j,i) - hom(k,1,4,sr) + &2842 pt(k+1,j,i) - hom(k+1,1,4,sr) )2843 !2844 !-- Higher moments2845 s1 = s1 + pts * w(k,j,i)**2 * rmask(j,i,sr) * rflags_invers(j,i,k+1)2846 s2 = s2 + pts**2 * w(k,j,i) * rmask(j,i,sr) * rflags_invers(j,i,k+1)2847 !2848 !-- Energy flux w*e* (has to be adjusted?)2849 s3 = s3 + w(k,j,i) * 0.5_wp * ( ust**2 + vst**2 + w(k,j,i)**2 )&2850 * rmask(j,i,sr) * rflags_invers(j,i,k+1) &2851