- Timestamp:
- Mar 23, 2020 1:49:05 PM (5 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
palm/trunk/SOURCE/advec_ws.f90
r4466 r4468 20 20 ! Current revisions: 21 21 ! ------------------ 22 ! 23 ! 22 ! 23 ! 24 24 ! Former revisions: 25 25 ! ----------------- 26 26 ! $Id$ 27 ! - bugfix for last commit in openacc branch 28 ! - some loop bounds revised (only to be consistent with cache version) 29 ! - setting of nzb_max_l for advection of the w-component revised 30 ! 31 ! 4466 2020-03-20 16:14:41Z suehring 27 32 ! - vector branch further optimized (linear dependencies along z removed and 28 33 ! loops are splitted) … … 1822 1827 ) & 1823 1828 ) * ddy & 1824 + ( w(k,j,i) * rho_air_zw(k) *&1829 + ( w(k,j,i) * rho_air_zw(k) * & 1825 1830 ( ibit6 + ibit7 + ibit8 ) & 1826 1831 - w(k-1,j,i) * rho_air_zw(k-1) * & … … 1860 1865 div = ( u(k,j,i+1) - u(k,j,i) ) * ddx & 1861 1866 + ( v(k,j+1,i) - v(k,j,i) ) * ddy & 1862 + ( w(k,j,i) * rho_air_zw(k)&1867 + ( w(k,j,i) * rho_air_zw(k) & 1863 1868 - w(k-1,j,i) * rho_air_zw(k-1) & 1864 1869 ) * drho_air(k) * ddzw(k) … … 3188 3193 ( bc_dirichlet_s .OR. bc_radiation_s ) .AND. j <= nys + 2 .OR. & 3189 3194 ( bc_dirichlet_n .OR. bc_radiation_n ) .AND. j >= nyn - 2 ) THEN 3190 nzb_max_l = nzt 3195 nzb_max_l = nzt - 1 3191 3196 ELSE 3192 3197 nzb_max_l = nzb_max … … 3705 3710 INTEGER(iwp) :: nzb_max_l !< index indicating upper bound for order degradation of horizontal advection terms 3706 3711 INTEGER(iwp) :: sk_num !< integer identifier, used for assign fluxes to the correct dimension in the analysis array 3707 INTEGER(iwp) :: tn = 0 !< number of OpenMP thread 3712 INTEGER(iwp) :: tn = 0 !< number of OpenMP thread (is always zero here) 3708 3713 3709 3714 INTEGER(iwp), INTENT(IN), DIMENSION(nzb:nzt+1,nysg:nyng,nxlg:nxrg) :: & … … 3755 3760 REAL(wp), DIMENSION(nzb:nzt+1) :: flux_r !< discretized 6th-order flux at rightward-side of the grid box 3756 3761 REAL(wp), DIMENSION(nzb:nzt+1) :: flux_t !< discretized 6th-order flux at top of the grid box 3757 3758 REAL(wp), DIMENSION(nzb+1:nzt,0:threads_per_task-1) :: swap_diss_y_local !< discretized artificial dissipation at southward-side 3759 REAL(wp), DIMENSION(nzb+1:nzt,0:threads_per_task-1) :: swap_flux_y_local !< discretized 6th-order flux at northward-side 3760 3761 REAL(wp), DIMENSION(nzb+1:nzt,nys:nyn,0:threads_per_task-1) :: swap_diss_x_local !< discretized artificial dissipation at leftward-side 3762 REAL(wp), DIMENSION(nzb+1:nzt,nys:nyn,0:threads_per_task-1) :: swap_flux_x_local !< discretized 6th-order flux at leftward-side 3762 #ifndef _OPENACC 3763 REAL(wp), DIMENSION(nzb+1:nzt) :: swap_diss_y_local !< discretized artificial dissipation at southward-side 3764 REAL(wp), DIMENSION(nzb+1:nzt) :: swap_flux_y_local !< discretized 6th-order flux at northward-side 3765 #endif 3766 #ifdef _OPENACC 3767 REAL(wp), DIMENSION(nzb+1:nzt) :: diss_l !< discretized artificial dissipation at leftward-side 3768 REAL(wp), DIMENSION(nzb+1:nzt) :: diss_s !< discretized artificial dissipation at southward-side 3769 REAL(wp), DIMENSION(nzb+1:nzt) :: flux_l !< discretized 6th-order flux at leftward-side 3770 REAL(wp), DIMENSION(nzb+1:nzt) :: flux_s !< discretized 6th-order flux at southward-side 3771 #endif 3772 3773 REAL(wp), DIMENSION(nzb+1:nzt,nys:nyn) :: swap_diss_x_local !< discretized artificial dissipation at leftward-side 3774 REAL(wp), DIMENSION(nzb+1:nzt,nys:nyn) :: swap_flux_x_local !< discretized 6th-order flux at leftward-side 3763 3775 3764 3776 CALL cpu_log( log_point_s(49), 'advec_s_ws', 'start' ) … … 3795 3807 !$ACC PRIVATE(ibit3_s, ibit4_s, ibit5_s) & 3796 3808 !$ACC PRIVATE(ibit6, ibit7, ibit8) & 3797 !$ACC PRIVATE( flux_r, diss_r) &3798 !$ACC PRIVATE( flux_n, diss_n) &3799 !$ACC PRIVATE( swap_diss_y_local, swap_flux_y_local) &3809 !$ACC PRIVATE(nzb_max_l) & 3810 !$ACC PRIVATE(diss_l, diss_r, flux_l, flux_r) & 3811 !$ACC PRIVATE(diss_n, diss_s, flux_s, flux_n) & 3800 3812 !$ACC PRIVATE(flux_t, diss_t, flux_d, diss_d) & 3801 3813 !$ACC PRIVATE(div, u_comp, u_comp_l, v_comp, v_comp_s) & … … 3838 3850 3839 3851 u_comp = u(k,j,i) - u_gtrans + u_stokes_zu(k) 3840 swap_flux_x_local(k,j ,tn)= u_comp * ( &3852 swap_flux_x_local(k,j) = u_comp * ( & 3841 3853 ( 37.0_wp * ibit2 * adv_sca_5 & 3842 3854 + 7.0_wp * ibit1 * adv_sca_3 & … … 3853 3865 ) 3854 3866 3855 swap_diss_x_local(k,j ,tn)= -ABS( u_comp ) * ( &3867 swap_diss_x_local(k,j) = -ABS( u_comp ) * ( & 3856 3868 ( 10.0_wp * ibit2 * adv_sca_5 & 3857 3869 + 3.0_wp * ibit1 * adv_sca_3 & … … 3873 3885 3874 3886 u_comp = u(k,j,i) - u_gtrans + u_stokes_zu(k) 3875 swap_flux_x_local(k,j ,tn)= u_comp * ( &3887 swap_flux_x_local(k,j) = u_comp * ( & 3876 3888 37.0_wp * ( sk(k,j,i) + sk(k,j,i-1) )& 3877 3889 - 8.0_wp * ( sk(k,j,i+1) + sk(k,j,i-2) )& … … 3879 3891 ) * adv_sca_5 3880 3892 3881 swap_diss_x_local(k,j ,tn)= -ABS( u_comp ) * ( &3893 swap_diss_x_local(k,j) = -ABS( u_comp ) * ( & 3882 3894 10.0_wp * ( sk(k,j,i) - sk(k,j,i-1) )& 3883 3895 - 5.0_wp * ( sk(k,j,i+1) - sk(k,j,i-2) )& … … 3900 3912 3901 3913 v_comp = v(k,j,i) - v_gtrans + v_stokes_zu(k) 3902 swap_flux_y_local(k ,tn)= v_comp * ( &3914 swap_flux_y_local(k) = v_comp * ( & 3903 3915 ( 37.0_wp * ibit5 * adv_sca_5 & 3904 3916 + 7.0_wp * ibit4 * adv_sca_3 & … … 3915 3927 ) 3916 3928 3917 swap_diss_y_local(k ,tn)= -ABS( v_comp ) * ( &3929 swap_diss_y_local(k) = -ABS( v_comp ) * ( & 3918 3930 ( 10.0_wp * ibit5 * adv_sca_5 & 3919 3931 + 3.0_wp * ibit4 * adv_sca_3 & … … 3936 3948 3937 3949 v_comp = v(k,j,i) - v_gtrans + v_stokes_zu(k) 3938 swap_flux_y_local(k ,tn)= v_comp * ( &3950 swap_flux_y_local(k) = v_comp * ( & 3939 3951 37.0_wp * ( sk(k,j,i) + sk(k,j-1,i) ) & 3940 3952 - 8.0_wp * ( sk(k,j+1,i) + sk(k,j-2,i) ) & 3941 3953 + ( sk(k,j+2,i) + sk(k,j-3,i) ) & 3942 3954 ) * adv_sca_5 3943 swap_diss_y_local(k ,tn)= -ABS( v_comp ) * ( &3955 swap_diss_y_local(k) = -ABS( v_comp ) * ( & 3944 3956 10.0_wp * ( sk(k,j,i) - sk(k,j-1,i) ) & 3945 3957 - 5.0_wp * ( sk(k,j+1,i) - sk(k,j-2,i) ) & … … 4000 4012 ibit0_l = REAL( IBITS(advc_flag(k,j,i-1),0,1), KIND = wp ) 4001 4013 4002 u_comp_l = u(k,j,i) - u_gtrans + u_stokes_zu(k)4003 swap_flux_x_local(k,j,tn)= u_comp_l * ( &4014 u_comp_l = u(k,j,i) - u_gtrans + u_stokes_zu(k) 4015 flux_l(k) = u_comp_l * ( & 4004 4016 ( 37.0_wp * ibit2_l * adv_sca_5 & 4005 4017 + 7.0_wp * ibit1_l * adv_sca_3 & … … 4016 4028 ) 4017 4029 4018 swap_diss_x_local(k,j,tn)= -ABS( u_comp_l ) * ( &4030 diss_l(k) = -ABS( u_comp_l ) * ( & 4019 4031 ( 10.0_wp * ibit2_l * adv_sca_5 & 4020 4032 + 3.0_wp * ibit1_l * adv_sca_3 & … … 4025 4037 + ibit1_l * adv_sca_3 & 4026 4038 ) * & 4027 ( sk(k,j,i+1) - sk(k,j,i-2)) &4039 ( sk(k,j,i+1) - sk(k,j,i-2) ) & 4028 4040 + ( ibit2_l * adv_sca_5 & 4029 4041 ) * & 4030 4042 ( sk(k,j,i+2) - sk(k,j,i-3) ) & 4031 4043 ) 4044 #else 4045 flux_l(k) = swap_flux_x_local(k,j) 4046 diss_l(k) = swap_diss_x_local(k,j) 4032 4047 #endif 4033 4048 ibit5 = REAL( IBITS(advc_flag(k,j,i),5,1), KIND = wp ) … … 4073 4088 4074 4089 v_comp_s = v(k,j,i) - v_gtrans + v_stokes_zu(k) 4075 swap_flux_y_local(k,tn)= v_comp_s * ( &4090 flux_s(k) = v_comp_s * ( & 4076 4091 ( 37.0_wp * ibit5_s * adv_sca_5 & 4077 4092 + 7.0_wp * ibit4_s * adv_sca_3 & 4078 4093 + ibit3_s * adv_sca_1 & 4079 4094 ) * & 4080 ( sk(k,j,i) + sk(k,j-1,i) )&4095 ( sk(k,j,i) + sk(k,j-1,i) ) & 4081 4096 - ( 8.0_wp * ibit5_s * adv_sca_5 & 4082 4097 + ibit4_s * adv_sca_3 & … … 4085 4100 + ( ibit5_s * adv_sca_5 & 4086 4101 ) * & 4087 ( sk(k,j+2,i) + sk(k,j-3,i)) &4102 ( sk(k,j+2,i) + sk(k,j-3,i) ) & 4088 4103 ) 4089 4104 4090 swap_flux_y_local(k,tn)= -ABS( v_comp_s ) * ( &4105 diss_s(k) = -ABS( v_comp_s ) * ( & 4091 4106 ( 10.0_wp * ibit5_s * adv_sca_5 & 4092 4107 + 3.0_wp * ibit4_s * adv_sca_3 & … … 4102 4117 ( sk(k,j+2,i) - sk(k,j-3,i) ) & 4103 4118 ) 4119 #else 4120 flux_s(k) = swap_flux_y_local(k) 4121 diss_s(k) = swap_diss_y_local(k) 4104 4122 #endif 4105 4123 ENDDO … … 4123 4141 !-- Recompute the left fluxes. 4124 4142 u_comp_l = u(k,j,i) - u_gtrans + u_stokes_zu(k) 4125 swap_flux_x_local(k,j,tn)= u_comp_l * ( &4143 flux_l(k) = u_comp_l * ( & 4126 4144 37.0_wp * ( sk(k,j,i) + sk(k,j,i-1) ) & 4127 4145 - 8.0_wp * ( sk(k,j,i+1) + sk(k,j,i-2) ) & … … 4129 4147 ) * adv_sca_5 4130 4148 4131 swap_diss_x_local(k,j,tn)= -ABS( u_comp_l ) * ( &4149 diss_l(k) = -ABS( u_comp_l ) * ( & 4132 4150 10.0_wp * ( sk(k,j,i) - sk(k,j,i-1) ) & 4133 4151 - 5.0_wp * ( sk(k,j,i+1) - sk(k,j,i-2) ) & 4134 4152 + ( sk(k,j,i+2) - sk(k,j,i-3) ) & 4135 4153 ) * adv_sca_5 4154 #else 4155 flux_l(k) = swap_flux_x_local(k,j) 4156 diss_l(k) = swap_diss_x_local(k,j) 4157 4136 4158 #endif 4137 4159 … … 4149 4171 !-- Recompute the south fluxes. 4150 4172 v_comp_s = v(k,j,i) - v_gtrans + v_stokes_zu(k) 4151 swap_flux_y_local(k,tn)= v_comp_s * ( &4173 flux_s(k) = v_comp_s * ( & 4152 4174 37.0_wp * ( sk(k,j,i) + sk(k,j-1,i) ) & 4153 4175 - 8.0_wp * ( sk(k,j+1,i) + sk(k,j-2,i) ) & 4154 4176 + ( sk(k,j+2,i) + sk(k,j-3,i) ) & 4155 4177 ) * adv_sca_5 4156 swap_diss_y_local(k,tn)= -ABS( v_comp_s ) * ( &4178 diss_s(k) = -ABS( v_comp_s ) * ( & 4157 4179 10.0_wp * ( sk(k,j,i) - sk(k,j-1,i) ) & 4158 4180 - 5.0_wp * ( sk(k,j+1,i) - sk(k,j-2,i) ) & 4159 4181 + sk(k,j+2,i) - sk(k,j-3,i) & 4160 4182 ) * adv_sca_5 4183 #else 4184 flux_s(k) = swap_flux_y_local(k) 4185 diss_s(k) = swap_diss_y_local(k) 4161 4186 #endif 4162 4187 ENDDO … … 4304 4329 DO k = nzb+1, nzb_max_l 4305 4330 4306 flux_d 4307 diss_d 4331 flux_d = flux_t(k-1) 4332 diss_d = diss_t(k-1) 4308 4333 4309 4334 ibit2 = REAL( IBITS(advc_flag(k,j,i),2,1), KIND = wp ) … … 4336 4361 ) & 4337 4362 ) * ddy & 4338 + ( w(k,j,i) * rho_air_zw(k) *&4363 + ( w(k,j,i) * rho_air_zw(k) * & 4339 4364 ( ibit6 + ibit7 + ibit8 ) & 4340 4365 - w(k-1,j,i) * rho_air_zw(k-1) * & … … 4347 4372 4348 4373 tend(k,j,i) = tend(k,j,i) - ( & 4349 ( flux_r(k) + diss_r(k) - swap_flux_x_local(k,j,tn) - & 4350 swap_diss_x_local(k,j,tn) ) * ddx & 4351 + ( flux_n(k) + diss_n(k) - swap_flux_y_local(k,tn) - & 4352 swap_diss_y_local(k,tn) ) * ddy & 4353 + ( ( flux_t(k) + diss_t(k) ) - & 4354 ( flux_d + diss_d ) & 4355 ) & 4356 * drho_air(k) * ddzw(k) & 4374 ( flux_r(k) + diss_r(k) - & 4375 flux_l(k) - diss_l(k) ) * ddx & 4376 + ( flux_n(k) + diss_n(k) - & 4377 flux_s(k) - diss_s(k) ) * ddy & 4378 + ( flux_t(k) + diss_t(k) - & 4379 flux_d - diss_d ) * drho_air(k) * ddzw(k) & 4357 4380 ) + sk(k,j,i) * div 4358 4381 4359 4382 #ifndef _OPENACC 4360 swap_flux_y_local(k ,tn) = flux_n(k)4361 swap_diss_y_local(k ,tn) = diss_n(k)4362 swap_flux_x_local(k,j ,tn) = flux_r(k)4363 swap_diss_x_local(k,j ,tn) = diss_r(k)4383 swap_flux_y_local(k) = flux_n(k) 4384 swap_diss_y_local(k) = diss_n(k) 4385 swap_flux_x_local(k,j) = flux_r(k) 4386 swap_diss_x_local(k,j) = diss_r(k) 4364 4387 #endif 4365 4388 … … 4368 4391 DO k = nzb_max_l+1, nzt 4369 4392 4370 flux_d 4371 diss_d 4393 flux_d = flux_t(k-1) 4394 diss_d = diss_t(k-1) 4372 4395 ! 4373 4396 !-- Calculate the divergence of the velocity field. A respective … … 4376 4399 div = ( u(k,j,i+1) - u(k,j,i) ) * ddx & 4377 4400 + ( v(k,j+1,i) - v(k,j,i) ) * ddy & 4378 + ( w(k,j,i) * rho_air_zw(k)&4401 + ( w(k,j,i) * rho_air_zw(k) & 4379 4402 - w(k-1,j,i) * rho_air_zw(k-1) & 4380 4403 ) * drho_air(k) * ddzw(k) 4381 4404 4382 4405 tend(k,j,i) = tend(k,j,i) - ( & 4383 ( flux_r(k) + diss_r(k) - swap_flux_x_local(k,j,tn) - & 4384 swap_diss_x_local(k,j,tn) ) * ddx & 4385 + ( flux_n(k) + diss_n(k) - swap_flux_y_local(k,tn) - & 4386 swap_diss_y_local(k,tn) ) * ddy & 4387 + ( ( flux_t(k) + diss_t(k) ) - & 4388 ( flux_d + diss_d ) ) & 4389 * drho_air(k) * ddzw(k) & 4406 ( flux_r(k) + diss_r(k) - & 4407 flux_l(k) - diss_l(k) ) * ddx & 4408 + ( flux_n(k) + diss_n(k) - & 4409 flux_s(k) - diss_s(k) ) * ddy & 4410 + ( flux_t(k) + diss_t(k) - & 4411 flux_d - diss_d ) * drho_air(k) * ddzw(k) & 4390 4412 ) + sk(k,j,i) * div 4391 4413 4392 4414 #ifndef _OPENACC 4393 swap_flux_y_local(k ,tn) = flux_n(k)4394 swap_diss_y_local(k ,tn) = diss_n(k)4395 swap_flux_x_local(k,j ,tn) = flux_r(k)4396 swap_diss_x_local(k,j ,tn) = diss_r(k)4415 swap_flux_y_local(k) = flux_n(k) 4416 swap_diss_y_local(k) = diss_n(k) 4417 swap_flux_x_local(k,j) = flux_r(k) 4418 swap_diss_x_local(k,j) = diss_r(k) 4397 4419 #endif 4398 4420 ENDDO … … 4569 4591 !$ACC PRIVATE(ibit0_l, ibit1_l, ibit2_l) & 4570 4592 !$ACC PRIVATE(ibit3_s, ibit4_s, ibit5_s) & 4593 !$ACC PRIVATE(nzb_max_l) & 4571 4594 !$ACC PRIVATE(ibit6, ibit7, ibit8) & 4572 4595 !$ACC PRIVATE(flux_r, diss_r) & … … 4931 4954 diss_t(nzb) = 0.0_wp 4932 4955 w_comp(nzb) = 0.0_wp 4933 DO k = nzb+1, nzb+ 24956 DO k = nzb+1, nzb+1 4934 4957 ! 4935 4958 !-- k index has to be modified near bottom and top, else array … … 4975 4998 ENDDO 4976 4999 4977 DO k = nzb+ 3, nzt-25000 DO k = nzb+2, nzt-2 4978 5001 4979 5002 ibit8 = REAL( IBITS(advc_flags_m(k,j,i),8,1), KIND = wp ) … … 5282 5305 !$ACC PRIVATE(ibit9_l, ibit10_l, ibit11_l) & 5283 5306 !$ACC PRIVATE(ibit12_s, ibit13_s, ibit14_s) & 5307 !$ACC PRIVATE(nzb_max_l) & 5284 5308 !$ACC PRIVATE(flux_r, diss_r) & 5285 5309 !$ACC PRIVATE(flux_n, diss_n) & … … 5637 5661 diss_t(nzb) = 0.0_wp 5638 5662 w_comp(nzb) = 0.0_wp 5639 DO k = nzb+1, nzb+ 25663 DO k = nzb+1, nzb+1 5640 5664 ! 5641 5665 !-- k index has to be modified near bottom and top, else array … … 5681 5705 ENDDO 5682 5706 5683 DO k = nzb+ 3, nzt-25707 DO k = nzb+2, nzt-2 5684 5708 ibit17 = REAL( IBITS(advc_flags_m(k,j,i),17,1), KIND = wp ) 5685 5709 ibit16 = REAL( IBITS(advc_flags_m(k,j,i),16,1), KIND = wp ) … … 6004 6028 !$ACC PRIVATE(ibit18_l, ibit19_l, ibit20_l) & 6005 6029 !$ACC PRIVATE(ibit21_s, ibit22_s, ibit23_s) & 6030 !$ACC PRIVATE(nzb_max_l) & 6006 6031 !$ACC PRIVATE(flux_r, diss_r) & 6007 6032 !$ACC PRIVATE(flux_n, diss_n) & … … 6027 6052 ( bc_dirichlet_s .OR. bc_radiation_s ) .AND. j <= nys + 2 .OR. & 6028 6053 ( bc_dirichlet_n .OR. bc_radiation_n ) .AND. j >= nyn - 2 ) THEN 6029 nzb_max_l = nzt 6054 nzb_max_l = nzt - 1 6030 6055 ELSE 6031 6056 nzb_max_l = nzb_max
Note: See TracChangeset
for help on using the changeset viewer.