- Timestamp:
- Mar 10, 2013 2:48:14 AM (12 years ago)
- Location:
- palm/trunk/SOURCE
- Files:
-
- 7 edited
Legend:
- Unmodified
- Added
- Removed
-
palm/trunk/SOURCE/boundary_conds.f90
r1054 r1113 1 SUBROUTINE boundary_conds ( range )1 SUBROUTINE boundary_conds 2 2 3 3 !--------------------------------------------------------------------------------! … … 20 20 ! Current revisions: 21 21 ! ----------------- 22 ! GPU-porting 23 ! dummy argument "range" removed 24 ! Bugfix: wrong index in loops of radiation boundary condition 22 25 ! 23 26 ! Former revisions: 24 27 ! ----------------- 25 28 ! $Id$ 29 ! 26 30 ! 27 31 ! 1053 2012-11-13 17:11:03Z hoffmann … … 104 108 IMPLICIT NONE 105 109 106 CHARACTER (LEN=*) :: range107 108 110 INTEGER :: i, j, k 109 111 … … 111 113 112 114 113 IF ( range == 'main') THEN 114 ! 115 !-- Bottom boundary 116 IF ( ibc_uv_b == 1 ) THEN 117 u_p(nzb,:,:) = u_p(nzb+1,:,:) 118 v_p(nzb,:,:) = v_p(nzb+1,:,:) 119 ENDIF 115 ! 116 !-- Bottom boundary 117 IF ( ibc_uv_b == 1 ) THEN 118 !$acc kernels present( u_p, v_p ) 119 u_p(nzb,:,:) = u_p(nzb+1,:,:) 120 v_p(nzb,:,:) = v_p(nzb+1,:,:) 121 !$acc end kernels 122 ENDIF 123 124 !$acc kernels present( nzb_w_inner, w_p ) 125 DO i = nxlg, nxrg 126 DO j = nysg, nyng 127 w_p(nzb_w_inner(j,i),j,i) = 0.0 128 ENDDO 129 ENDDO 130 !$acc end kernels 131 132 ! 133 !-- Top boundary 134 IF ( ibc_uv_t == 0 ) THEN 135 !$acc kernels present( u_init, u_p, v_init, v_p ) 136 u_p(nzt+1,:,:) = u_init(nzt+1) 137 v_p(nzt+1,:,:) = v_init(nzt+1) 138 !$acc end kernels 139 ELSE 140 !$acc kernels present( u_p, v_p ) 141 u_p(nzt+1,:,:) = u_p(nzt,:,:) 142 v_p(nzt+1,:,:) = v_p(nzt,:,:) 143 !$acc end kernels 144 ENDIF 145 !$acc kernels present( w_p ) 146 w_p(nzt:nzt+1,:,:) = 0.0 ! nzt is not a prognostic level (but cf. pres) 147 !$acc end kernels 148 149 ! 150 !-- Temperature at bottom boundary. 151 !-- In case of coupled runs (ibc_pt_b = 2) the temperature is given by 152 !-- the sea surface temperature of the coupled ocean model. 153 IF ( ibc_pt_b == 0 ) THEN 154 !$acc kernels present( nzb_s_inner, pt, pt_p ) 120 155 DO i = nxlg, nxrg 121 156 DO j = nysg, nyng 122 w_p(nzb_w_inner(j,i),j,i) = 0.0157 pt_p(nzb_s_inner(j,i),j,i) = pt(nzb_s_inner(j,i),j,i) 123 158 ENDDO 124 159 ENDDO 125 126 ! 127 !-- Top boundary 128 IF ( ibc_uv_t == 0 ) THEN 129 u_p(nzt+1,:,:) = u_init(nzt+1) 130 v_p(nzt+1,:,:) = v_init(nzt+1) 131 ELSE 132 u_p(nzt+1,:,:) = u_p(nzt,:,:) 133 v_p(nzt+1,:,:) = v_p(nzt,:,:) 160 !$acc end kernels 161 ELSEIF ( ibc_pt_b == 1 ) THEN 162 !$acc kernels present( nzb_s_inner, pt_p ) 163 DO i = nxlg, nxrg 164 DO j = nysg, nyng 165 pt_p(nzb_s_inner(j,i),j,i) = pt_p(nzb_s_inner(j,i)+1,j,i) 166 ENDDO 167 ENDDO 168 !$acc end kernels 169 ENDIF 170 171 ! 172 !-- Temperature at top boundary 173 IF ( ibc_pt_t == 0 ) THEN 174 !$acc kernels present( pt, pt_p ) 175 pt_p(nzt+1,:,:) = pt(nzt+1,:,:) 176 !$acc end kernels 177 ELSEIF ( ibc_pt_t == 1 ) THEN 178 !$acc kernels present( pt_p ) 179 pt_p(nzt+1,:,:) = pt_p(nzt,:,:) 180 !$acc end kernels 181 ELSEIF ( ibc_pt_t == 2 ) THEN 182 !$acc kernels present( dzu, pt_p ) 183 pt_p(nzt+1,:,:) = pt_p(nzt,:,:) + bc_pt_t_val * dzu(nzt+1) 184 !$acc end kernels 185 ENDIF 186 187 ! 188 !-- Boundary conditions for TKE 189 !-- Generally Neumann conditions with de/dz=0 are assumed 190 IF ( .NOT. constant_diffusion ) THEN 191 !$acc kernels present( e_p, nzb_s_inner ) 192 DO i = nxlg, nxrg 193 DO j = nysg, nyng 194 e_p(nzb_s_inner(j,i),j,i) = e_p(nzb_s_inner(j,i)+1,j,i) 195 ENDDO 196 ENDDO 197 e_p(nzt+1,:,:) = e_p(nzt,:,:) 198 !$acc end kernels 199 ENDIF 200 201 ! 202 !-- Boundary conditions for salinity 203 IF ( ocean ) THEN 204 ! 205 !-- Bottom boundary: Neumann condition because salinity flux is always 206 !-- given 207 DO i = nxlg, nxrg 208 DO j = nysg, nyng 209 sa_p(nzb_s_inner(j,i),j,i) = sa_p(nzb_s_inner(j,i)+1,j,i) 210 ENDDO 211 ENDDO 212 213 ! 214 !-- Top boundary: Dirichlet or Neumann 215 IF ( ibc_sa_t == 0 ) THEN 216 sa_p(nzt+1,:,:) = sa(nzt+1,:,:) 217 ELSEIF ( ibc_sa_t == 1 ) THEN 218 sa_p(nzt+1,:,:) = sa_p(nzt,:,:) 134 219 ENDIF 135 w_p(nzt:nzt+1,:,:) = 0.0 ! nzt is not a prognostic level (but cf. pres) 136 137 ! 138 !-- Temperature at bottom boundary. 139 !-- In case of coupled runs (ibc_pt_b = 2) the temperature is given by 140 !-- the sea surface temperature of the coupled ocean model. 141 IF ( ibc_pt_b == 0 ) THEN 220 221 ENDIF 222 223 ! 224 !-- Boundary conditions for total water content or scalar, 225 !-- bottom and top boundary (see also temperature) 226 IF ( humidity .OR. passive_scalar ) THEN 227 ! 228 !-- Surface conditions for constant_humidity_flux 229 IF ( ibc_q_b == 0 ) THEN 142 230 DO i = nxlg, nxrg 143 231 DO j = nysg, nyng 144 pt_p(nzb_s_inner(j,i),j,i) = pt(nzb_s_inner(j,i),j,i)145 ENDDO 146 ENDDO 147 ELSE IF ( ibc_pt_b == 1 ) THEN232 q_p(nzb_s_inner(j,i),j,i) = q(nzb_s_inner(j,i),j,i) 233 ENDDO 234 ENDDO 235 ELSE 148 236 DO i = nxlg, nxrg 149 237 DO j = nysg, nyng 150 pt_p(nzb_s_inner(j,i),j,i) = pt_p(nzb_s_inner(j,i)+1,j,i)238 q_p(nzb_s_inner(j,i),j,i) = q_p(nzb_s_inner(j,i)+1,j,i) 151 239 ENDDO 152 240 ENDDO 153 241 ENDIF 154 155 ! 156 !-- Temperature at top boundary 157 IF ( ibc_pt_t == 0 ) THEN 158 pt_p(nzt+1,:,:) = pt(nzt+1,:,:) 159 ELSEIF ( ibc_pt_t == 1 ) THEN 160 pt_p(nzt+1,:,:) = pt_p(nzt,:,:) 161 ELSEIF ( ibc_pt_t == 2 ) THEN 162 pt_p(nzt+1,:,:) = pt_p(nzt,:,:) + bc_pt_t_val * dzu(nzt+1) 163 ENDIF 164 165 ! 166 !-- Boundary conditions for TKE 167 !-- Generally Neumann conditions with de/dz=0 are assumed 168 IF ( .NOT. constant_diffusion ) THEN 169 DO i = nxlg, nxrg 170 DO j = nysg, nyng 171 e_p(nzb_s_inner(j,i),j,i) = e_p(nzb_s_inner(j,i)+1,j,i) 172 ENDDO 173 ENDDO 174 e_p(nzt+1,:,:) = e_p(nzt,:,:) 175 ENDIF 176 177 ! 178 !-- Boundary conditions for salinity 179 IF ( ocean ) THEN 180 ! 181 !-- Bottom boundary: Neumann condition because salinity flux is always 182 !-- given 183 DO i = nxlg, nxrg 184 DO j = nysg, nyng 185 sa_p(nzb_s_inner(j,i),j,i) = sa_p(nzb_s_inner(j,i)+1,j,i) 186 ENDDO 187 ENDDO 188 189 ! 190 !-- Top boundary: Dirichlet or Neumann 191 IF ( ibc_sa_t == 0 ) THEN 192 sa_p(nzt+1,:,:) = sa(nzt+1,:,:) 193 ELSEIF ( ibc_sa_t == 1 ) THEN 194 sa_p(nzt+1,:,:) = sa_p(nzt,:,:) 195 ENDIF 196 197 ENDIF 198 199 ! 200 !-- Boundary conditions for total water content or scalar, 201 !-- bottom and top boundary (see also temperature) 202 IF ( humidity .OR. passive_scalar ) THEN 203 ! 242 ! 243 !-- Top boundary 244 q_p(nzt+1,:,:) = q_p(nzt,:,:) + bc_q_t_val * dzu(nzt+1) 245 246 IF ( cloud_physics .AND. icloud_scheme == 0 ) THEN 247 ! 204 248 !-- Surface conditions for constant_humidity_flux 205 IF ( ibc_q _b == 0 ) THEN249 IF ( ibc_qr_b == 0 ) THEN 206 250 DO i = nxlg, nxrg 207 251 DO j = nysg, nyng 208 q _p(nzb_s_inner(j,i),j,i) = q(nzb_s_inner(j,i),j,i)252 qr_p(nzb_s_inner(j,i),j,i) = qr(nzb_s_inner(j,i),j,i) 209 253 ENDDO 210 254 ENDDO … … 212 256 DO i = nxlg, nxrg 213 257 DO j = nysg, nyng 214 q _p(nzb_s_inner(j,i),j,i) = q_p(nzb_s_inner(j,i)+1,j,i)258 qr_p(nzb_s_inner(j,i),j,i) = qr_p(nzb_s_inner(j,i)+1,j,i) 215 259 ENDDO 216 260 ENDDO … … 218 262 ! 219 263 !-- Top boundary 220 q_p(nzt+1,:,:) = q_p(nzt,:,:) + bc_q_t_val * dzu(nzt+1) 221 222 IF ( cloud_physics .AND. icloud_scheme == 0 ) THEN 264 qr_p(nzt+1,:,:) = qr_p(nzt,:,:) + bc_qr_t_val * dzu(nzt+1) 223 265 ! 224 !-- Surface conditions for constant_humidity_flux 225 IF ( ibc_qr_b == 0 ) THEN 226 DO i = nxlg, nxrg 227 DO j = nysg, nyng 228 qr_p(nzb_s_inner(j,i),j,i) = qr(nzb_s_inner(j,i),j,i) 229 ENDDO 266 !-- Surface conditions for constant_humidity_flux 267 IF ( ibc_nr_b == 0 ) THEN 268 DO i = nxlg, nxrg 269 DO j = nysg, nyng 270 nr_p(nzb_s_inner(j,i),j,i) = nr(nzb_s_inner(j,i),j,i) 230 271 ENDDO 231 E LSE232 DO i = nxlg, nxrg233 DO j = nysg, nyng234 qr_p(nzb_s_inner(j,i),j,i) = qr_p(nzb_s_inner(j,i)+1,j,i)235 ENDDO272 ENDDO 273 ELSE 274 DO i = nxlg, nxrg 275 DO j = nysg, nyng 276 nr_p(nzb_s_inner(j,i),j,i) = nr_p(nzb_s_inner(j,i)+1,j,i) 236 277 ENDDO 237 ENDIF 238 ! 239 !-- Top boundary 240 qr_p(nzt+1,:,:) = qr_p(nzt,:,:) + bc_qr_t_val * dzu(nzt+1) 241 ! 242 !-- Surface conditions for constant_humidity_flux 243 IF ( ibc_nr_b == 0 ) THEN 244 DO i = nxlg, nxrg 245 DO j = nysg, nyng 246 nr_p(nzb_s_inner(j,i),j,i) = nr(nzb_s_inner(j,i),j,i) 247 ENDDO 248 ENDDO 249 ELSE 250 DO i = nxlg, nxrg 251 DO j = nysg, nyng 252 nr_p(nzb_s_inner(j,i),j,i) = nr_p(nzb_s_inner(j,i)+1,j,i) 253 ENDDO 254 ENDDO 255 ENDIF 256 ! 257 !-- Top boundary 258 nr_p(nzt+1,:,:) = nr_p(nzt,:,:) + bc_nr_t_val * dzu(nzt+1) 259 ENDIF 260 278 ENDDO 279 ENDIF 280 ! 281 !-- Top boundary 282 nr_p(nzt+1,:,:) = nr_p(nzt,:,:) + bc_nr_t_val * dzu(nzt+1) 261 283 ENDIF 262 284 … … 707 729 !-- Calculate the new velocities 708 730 DO k = nzb+1, nzt+1 709 DO i = nxlg, nxrg731 DO j = nysg, nyng 710 732 u_p(k,j,0) = u(k,j,0) - dt_3d * tsc(2) * c_u_m(k) * & 711 733 ( u(k,j,0) - u(k,j,1) ) * ddx … … 846 868 !-- Calculate the new velocities 847 869 DO k = nzb+1, nzt+1 848 DO i = nxlg, nxrg870 DO j = nysg, nyng 849 871 u_p(k,j,nx+1) = u(k,j,nx+1) - dt_3d * tsc(2) * c_u_m(k) * & 850 872 ( u(k,j,nx+1) - u(k,j,nx) ) * ddx -
palm/trunk/SOURCE/exchange_horiz.f90
r1037 r1113 20 20 ! Current revisions: 21 21 ! ----------------- 22 ! 22 ! GPU-porting for single-core (1PE) mode 23 23 ! 24 24 ! Former revisions: … … 82 82 83 83 84 INTEGER :: nbgp_local84 INTEGER :: i, j, k, nbgp_local 85 85 REAL, DIMENSION(nzb:nzt+1,nys-nbgp_local:nyn+nbgp_local, & 86 86 nxl-nbgp_local:nxr+nbgp_local) :: ar … … 199 199 200 200 ! 201 !-- Lateral boundary conditions in the non-parallel case 201 !-- Lateral boundary conditions in the non-parallel case. 202 !-- Case dependent, because in GPU mode still not all arrays are on device. This 203 !-- workaround has to be removed later. Also, since PGI compiler 12.5 has problems 204 !-- with array syntax, explicit loops are used. 202 205 IF ( bc_lr == 'cyclic' ) THEN 203 ar(:,:,nxl-nbgp_local:nxl-1) = ar(:,:,nxr-nbgp_local+1:nxr) 204 ar(:,:,nxr+1:nxr+nbgp_local) = ar(:,:,nxl:nxl+nbgp_local-1) 206 IF ( on_device ) THEN 207 !$acc kernels present( ar ) 208 !$acc loop independent 209 DO i = 0, nbgp_local-1 210 DO j = nys-nbgp_local, nyn+nbgp_local 211 !$acc loop vector( 32 ) 212 DO k = nzb, nzt+1 213 ar(k,j,nxl-nbgp_local+i) = ar(k,j,nxr-nbgp_local+1+i) 214 ar(k,j,nxr+1+i) = ar(k,j,nxl+i) 215 ENDDO 216 ENDDO 217 ENDDO 218 !$acc end kernels 219 ELSE 220 ar(:,:,nxl-nbgp_local:nxl-1) = ar(:,:,nxr-nbgp_local+1:nxr) 221 ar(:,:,nxr+1:nxr+nbgp_local) = ar(:,:,nxl:nxl+nbgp_local-1) 222 ENDIF 205 223 ENDIF 206 224 207 225 IF ( bc_ns == 'cyclic' ) THEN 208 ar(:,nys-nbgp_local:nys-1,:) = ar(:,nyn-nbgp_local+1:nyn,:) 209 ar(:,nyn+1:nyn+nbgp_local,:) = ar(:,nys:nys+nbgp_local-1,:) 226 IF ( on_device ) THEN 227 !$acc kernels present( ar ) 228 !$acc loop 229 DO i = nxl-nbgp_local, nxr+nbgp_local 230 !$acc loop independent 231 DO j = 0, nbgp_local-1 232 !$acc loop vector( 32 ) 233 DO k = nzb, nzt+1 234 ar(k,nys-nbgp_local+j,i) = ar(k,nyn-nbgp_local+1+j,i) 235 ar(k,nyn+1+j,i) = ar(k,nys+j,i) 236 ENDDO 237 ENDDO 238 ENDDO 239 !$acc end kernels 240 ELSE 241 ar(:,nys-nbgp_local:nys-1,:) = ar(:,nyn-nbgp_local+1:nyn,:) 242 ar(:,nyn+1:nyn+nbgp_local,:) = ar(:,nys:nys+nbgp_local-1,:) 243 ENDIF 210 244 ENDIF 211 245 -
palm/trunk/SOURCE/init_3d_model.f90
r1112 r1113 23 23 ! Current revisions: 24 24 ! ------------------ 25 ! 25 ! openACC directive modified 26 26 ! 27 27 ! Former revisions: … … 1435 1435 CALL disturb_field( nzb_v_inner, tend, v ) 1436 1436 n_sor = nsor_ini 1437 !$acc data copy( d, ddz w, nzb_s_inner, tric, u, v, w, tend )1437 !$acc data copy( d, ddzu, ddzw, nzb_s_inner, nzb_u_inner, nzb_v_inner, nzb_w_inner, p, tric, u, v, w, weight_pres, weight_substep, tend ) 1438 1438 CALL pres 1439 1439 !$acc end data -
palm/trunk/SOURCE/modules.f90
r1112 r1113 20 20 ! Current revisions: 21 21 ! ------------------ 22 ! 22 ! +on_device 23 23 ! 24 24 ! Former revisions: … … 752 752 masking_method = .FALSE., mg_switch_to_pe0 = .FALSE., & 753 753 netcdf_output = .FALSE., neutral = .FALSE., ocean = .FALSE., & 754 on_device = .FALSE., & 754 755 outflow_l = .FALSE., outflow_n = .FALSE., outflow_r = .FALSE., & 755 756 outflow_s = .FALSE., passive_scalar = .FALSE., & -
palm/trunk/SOURCE/palm.f90
r1112 r1113 20 20 ! Current revisions: 21 21 ! ----------------- 22 ! 22 ! openACC statements modified 23 23 ! 24 24 ! Former revisions: … … 237 237 !-- Declare and initialize variables in the accelerator memory with their 238 238 !-- host values 239 !$acc data copyin( d, diss, e, e_p, kh, km, p t, pt_p, q, ql, tend, te_m, tpt_m, tu_m, tv_m, tw_m, u, u_p, v, vpt, v_p, w, w_p ) &240 !$acc copyin( tric, d dzu, ddzw, dd2zu, l_grid, l_wall, ptdf_x, ptdf_y, pt_init, rdf, rdf_sc, ug, vg, zu, zw ) &239 !$acc data copyin( d, diss, e, e_p, kh, km, p, pt, pt_p, q, ql, tend, te_m, tpt_m, tu_m, tv_m, tw_m, u, u_p, v, vpt, v_p, w, w_p ) & 240 !$acc copyin( tric, dzu, ddzu, ddzw, dd2zu, l_grid, l_wall, ptdf_x, ptdf_y, pt_init, rdf, rdf_sc, ug, u_init, vg, v_init, zu, zw ) & 241 241 !$acc copyin( hom, qs, qsws, qswst, rif, rif_wall, shf, ts, tswst, us, usws, uswst, vsws, vswst, z0, z0h ) & 242 242 !$acc copyin( fxm, fxp, fym, fyp, fwxm, fwxp, fwym, fwyp, nzb_diff_s_inner, nzb_diff_s_outer, nzb_diff_u ) & 243 243 !$acc copyin( nzb_diff_v, nzb_s_inner, nzb_s_outer, nzb_u_inner ) & 244 244 !$acc copyin( nzb_u_outer, nzb_v_inner, nzb_v_outer, nzb_w_inner ) & 245 !$acc copyin( nzb_w_outer, wall_heatflux, wall_e_x, wall_e_y, wall_u, wall_v, wall_w_x, wall_w_y, wall_flags_0 ) 245 !$acc copyin( nzb_w_outer, wall_heatflux, wall_e_x, wall_e_y, wall_u, wall_v, wall_w_x, wall_w_y, wall_flags_0 ) & 246 !$acc copyin( weight_pres, weight_substep ) 246 247 ! 247 248 !-- Integration of the model equations using timestep-scheme -
palm/trunk/SOURCE/pres.f90
r1112 r1113 20 20 ! Current revisions: 21 21 ! ----------------- 22 ! 22 ! GPU-porting of several loops, some loops rearranged 23 23 ! 24 24 ! Former revisions: … … 403 403 !-- Solver for 2d-decomposition 404 404 CALL poisfft( d, tend ) 405 !$acc update host( d ) 405 406 406 ELSEIF ( psolver == 'poisfft_hybrid' ) THEN 407 407 ! … … 410 410 !-- are some optimization problems in poisfft 411 411 CALL poisfft_hybrid( d ) 412 412 413 ENDIF 413 414 … … 416 417 !-- z-direction 417 418 !$OMP PARALLEL DO 419 !$acc kernels present( d, tend ) 420 !$acc loop 418 421 DO i = nxl, nxr 419 422 DO j = nys, nyn 423 !$acc loop vector( 32 ) 420 424 DO k = nzb+1, nzt 421 425 tend(k,j,i) = d(k,j,i) … … 423 427 ENDDO 424 428 ENDDO 429 !$acc end kernels 425 430 426 431 ! … … 432 437 !-- Neumann (dp/dz = 0) 433 438 !$OMP PARALLEL DO 439 !$acc kernels present( nzb_s_inner, tend ) 434 440 DO i = nxlg, nxrg 435 441 DO j = nysg, nyng … … 437 443 ENDDO 438 444 ENDDO 445 !$acc end kernels 439 446 440 447 ELSE … … 442 449 !-- Dirichlet 443 450 !$OMP PARALLEL DO 451 !$acc kernels present( tend ) 444 452 DO i = nxlg, nxrg 445 453 DO j = nysg, nyng … … 447 455 ENDDO 448 456 ENDDO 457 !$acc end kernels 449 458 450 459 ENDIF … … 456 465 !-- Neumann 457 466 !$OMP PARALLEL DO 467 !$acc kernels present( tend ) 458 468 DO i = nxlg, nxrg 459 469 DO j = nysg, nyng … … 461 471 ENDDO 462 472 ENDDO 473 !$acc end kernels 463 474 464 475 ELSE … … 466 477 !-- Dirichlet 467 478 !$OMP PARALLEL DO 479 !$acc kernels present( tend ) 468 480 DO i = nxlg, nxrg 469 481 DO j = nysg, nyng … … 471 483 ENDDO 472 484 ENDDO 485 !$acc end kernels 473 486 474 487 ENDIF … … 476 489 ! 477 490 !-- Exchange boundaries for p 491 IF ( numprocs == 1 ) THEN ! workaround for single-core GPU runs 492 on_device = .TRUE. ! to be removed after complete porting 493 ELSE ! of ghost point exchange 494 !$acc update host( tend ) 495 ENDIF 478 496 CALL exchange_horiz( tend, nbgp ) 497 IF ( numprocs == 1 ) THEN ! workaround for single-core GPU runs 498 on_device = .FALSE. ! to be removed after complete porting 499 ELSE ! of ghost point exchange 500 !$acc update device( tend ) 501 ENDIF 479 502 480 503 ELSEIF ( psolver == 'sor' ) THEN … … 531 554 !$OMP PARALLEL PRIVATE (i,j,k) 532 555 !$OMP DO 556 !$acc kernels present( p, tend, weight_substep ) 557 !$acc loop 533 558 DO i = nxl-1, nxr+1 534 559 DO j = nys-1, nyn+1 560 !$acc loop vector( 32 ) 535 561 DO k = nzb, nzt+1 536 562 p(k,j,i) = tend(k,j,i) * & … … 539 565 ENDDO 540 566 ENDDO 567 !$acc end kernels 541 568 !$OMP END PARALLEL 542 569 … … 544 571 !$OMP PARALLEL PRIVATE (i,j,k) 545 572 !$OMP DO 573 !$acc kernels present( p, tend, weight_substep ) 574 !$acc loop 546 575 DO i = nxl-1, nxr+1 547 576 DO j = nys-1, nyn+1 577 !$acc loop vector( 32 ) 548 578 DO k = nzb, nzt+1 549 579 p(k,j,i) = p(k,j,i) + tend(k,j,i) * & … … 552 582 ENDDO 553 583 ENDDO 584 !$acc end kernels 554 585 !$OMP END PARALLEL 555 586 … … 571 602 !$OMP PARALLEL PRIVATE (i,j,k) 572 603 !$OMP DO 604 !$acc kernels present( ddzu, nzb_u_inner, nzb_v_inner, nzb_w_inner, tend, u, v, w, weight_pres ) 605 !$acc loop 573 606 DO i = nxl, nxr 574 607 DO j = nys, nyn 575 DO k = nzb_w_inner(j,i)+1, nzt 576 w(k,j,i) = w(k,j,i) - dt_3d * & 577 ( tend(k+1,j,i) - tend(k,j,i) ) * ddzu(k+1) * & 578 weight_pres(intermediate_timestep_count) 579 ENDDO 580 DO k = nzb_u_inner(j,i)+1, nzt 581 u(k,j,i) = u(k,j,i) - dt_3d * & 582 ( tend(k,j,i) - tend(k,j,i-1) ) * ddx * & 583 weight_pres(intermediate_timestep_count) 584 ENDDO 585 DO k = nzb_v_inner(j,i)+1, nzt 586 v(k,j,i) = v(k,j,i) - dt_3d * & 587 ( tend(k,j,i) - tend(k,j-1,i) ) * ddy * & 588 weight_pres(intermediate_timestep_count) 608 !$acc loop vector( 32 ) 609 DO k = 1, nzt 610 IF ( k > nzb_w_inner(j,i) ) THEN 611 w(k,j,i) = w(k,j,i) - dt_3d * & 612 ( tend(k+1,j,i) - tend(k,j,i) ) * ddzu(k+1) * & 613 weight_pres(intermediate_timestep_count) 614 ENDIF 615 ENDDO 616 !$acc loop vector( 32 ) 617 DO k = 1, nzt 618 IF ( k > nzb_u_inner(j,i) ) THEN 619 u(k,j,i) = u(k,j,i) - dt_3d * & 620 ( tend(k,j,i) - tend(k,j,i-1) ) * ddx * & 621 weight_pres(intermediate_timestep_count) 622 ENDIF 623 ENDDO 624 !$acc loop vector( 32 ) 625 DO k = 1, nzt 626 IF ( k > nzb_v_inner(j,i) ) THEN 627 v(k,j,i) = v(k,j,i) - dt_3d * & 628 ( tend(k,j,i) - tend(k,j-1,i) ) * ddy * & 629 weight_pres(intermediate_timestep_count) 630 ENDIF 589 631 ENDDO 590 !591 !-- Sum up the volume flow through the right and north boundary592 IF ( conserve_volume_flow .AND. bc_lr_cyc .AND. bc_ns_cyc .AND. &593 i == nx ) THEN594 !$OMP CRITICAL595 DO k = nzb_2d(j,i) + 1, nzt596 volume_flow_l(1) = volume_flow_l(1) + u(k,j,i) * dzw(k)597 ENDDO598 !$OMP END CRITICAL599 ENDIF600 IF ( conserve_volume_flow .AND. bc_ns_cyc .AND. bc_lr_cyc .AND. &601 j == ny ) THEN602 !$OMP CRITICAL603 DO k = nzb_2d(j,i) + 1, nzt604 volume_flow_l(2) = volume_flow_l(2) + v(k,j,i) * dzw(k)605 ENDDO606 !$OMP END CRITICAL607 ENDIF608 632 609 633 ENDDO 610 634 ENDDO 635 !$acc end kernels 611 636 !$OMP END PARALLEL 637 638 ! 639 !-- Sum up the volume flow through the right and north boundary 640 IF ( conserve_volume_flow .AND. bc_lr_cyc .AND. bc_ns_cyc .AND. & 641 nxr == nx ) THEN 642 643 !$OMP PARALLEL PRIVATE (j,k) 644 !$OMP DO 645 DO j = nys, nyn 646 !$OMP CRITICAL 647 DO k = nzb_2d(j,nx) + 1, nzt 648 volume_flow_l(1) = volume_flow_l(1) + u(k,j,nx) * dzw(k) 649 ENDDO 650 !$OMP END CRITICAL 651 ENDDO 652 !$OMP END PARALLEL 653 654 ENDIF 655 656 IF ( conserve_volume_flow .AND. bc_ns_cyc .AND. bc_lr_cyc .AND. & 657 nyn == ny ) THEN 658 659 !$OMP PARALLEL PRIVATE (i,k) 660 !$OMP DO 661 DO i = nxl, nxr 662 !$OMP CRITICAL 663 DO k = nzb_2d(ny,i) + 1, nzt 664 volume_flow_l(2) = volume_flow_l(2) + v(k,ny,i) * dzw(k) 665 ENDDO 666 !$OMP END CRITICAL 667 ENDDO 668 !$OMP END PARALLEL 669 670 ENDIF 612 671 613 672 ! … … 645 704 ! 646 705 !-- Exchange of boundaries for the velocities 706 IF ( numprocs == 1 ) THEN ! workaround for single-core GPU runs 707 on_device = .TRUE. ! to be removed after complete porting 708 ELSE ! of ghost point exchange 709 !$acc update host( u, v, w ) 710 ENDIF 647 711 CALL exchange_horiz( u, nbgp ) 648 712 CALL exchange_horiz( v, nbgp ) 649 713 CALL exchange_horiz( w, nbgp ) 714 IF ( numprocs == 1 ) THEN ! workaround for single-core GPU runs 715 on_device = .FALSE. ! to be removed after complete porting 716 ELSE ! of ghost point exchange 717 !$acc update device( u, v, w ) 718 ENDIF 650 719 651 720 ! … … 679 748 ENDDO 680 749 #else 750 !$acc kernels present( d, ddzw, nzb_s_inner, u, v, w ) 751 !$acc loop 681 752 DO i = nxl, nxr 682 753 DO j = nys, nyn 754 !$acc loop vector( 32 ) 755 DO k = 1, nzt 756 IF ( k > nzb_s_inner(j,i) ) THEN 757 d(k,j,i) = ( u(k,j,i+1) - u(k,j,i) ) * ddx + & 758 ( v(k,j+1,i) - v(k,j,i) ) * ddy + & 759 ( w(k,j,i) - w(k-1,j,i) ) * ddzw(k) 760 ENDIF 761 ENDDO 762 ENDDO 763 ENDDO 764 !$acc end kernels 765 ! 766 !-- Compute possible PE-sum of divergences for flow_statistics 767 !$OMP PARALLEL PRIVATE (i,j,k) FIRSTPRIVATE(threadsum) REDUCTION(+:localsum) 768 !$OMP DO SCHEDULE( STATIC ) 769 DO i = nxl, nxr 770 DO j = nys, nyn 683 771 DO k = nzb_s_inner(j,i)+1, nzt 684 d(k,j,i) = ( u(k,j,i+1) - u(k,j,i) ) * ddx + &685 ( v(k,j+1,i) - v(k,j,i) ) * ddy + &686 ( w(k,j,i) - w(k-1,j,i) ) * ddzw(k)687 772 threadsum = threadsum + ABS( d(k,j,i) ) 688 773 ENDDO … … 701 786 CALL cpu_log( log_point_s(1), 'divergence', 'stop' ) 702 787 703 !$acc update device( u, v, w )704 705 788 CALL cpu_log( log_point(8), 'pres', 'stop' ) 706 707 789 708 790 -
palm/trunk/SOURCE/time_integration.f90
r1112 r1113 20 20 ! Current revisions: 21 21 ! ------------------ 22 ! 22 ! GPU-porting of boundary conditions, 23 ! openACC directives updated 24 ! formal parameter removed from routine boundary_conds 23 25 ! 24 26 ! Former revisions: … … 242 244 !-- Exchange of ghost points (lateral boundary conditions) 243 245 CALL cpu_log( log_point(26), 'exchange-horiz-progn', 'start' ) 244 !$acc update host( e_p, pt_p, u_p, v_p, w_p ) 246 247 IF ( numprocs == 1 ) THEN ! workaround for single-core GPU runs 248 on_device = .TRUE. ! to be removed after complete porting 249 ELSE ! of ghost point exchange 250 !$acc update host( e_p, pt_p, u_p, v_p, w_p ) 251 ENDIF 252 245 253 CALL exchange_horiz( u_p, nbgp ) 246 254 CALL exchange_horiz( v_p, nbgp ) … … 268 276 IF ( wang_kernel .OR. turbulence ) CALL exchange_horiz( diss, nbgp ) 269 277 278 IF ( numprocs == 1 ) THEN ! workaround for single-core GPU runs 279 on_device = .FALSE. ! to be removed after complete porting 280 ELSE ! of ghost point exchange 281 !$acc update device( e_p, pt_p, u_p, v_p, w_p ) 282 ENDIF 283 270 284 CALL cpu_log( log_point(26), 'exchange-horiz-progn', 'stop' ) 271 285 … … 273 287 !-- Boundary conditions for the prognostic quantities (except of the 274 288 !-- velocities at the outflow in case of a non-cyclic lateral wall) 275 CALL boundary_conds ( 'main' )289 CALL boundary_conds 276 290 277 291 ! 278 292 !-- Swap the time levels in preparation for the next time step. 279 !$acc update device( e_p, pt_p, u_p, v_p, w_p )280 293 CALL swap_timelevel 281 294 … … 305 318 IF ( time_disturb >= dt_disturb ) THEN 306 319 !$acc update host( u, v ) 320 IF ( numprocs == 1 ) on_device = .FALSE. ! workaround, remove later 307 321 IF ( hom(nzb+5,1,pr_palm,0) < disturbance_energy_limit ) THEN 308 322 CALL disturb_field( nzb_u_inner, tend, u ) … … 317 331 dist_range = 0 318 332 ENDIF 333 IF ( numprocs == 1 ) on_device = .TRUE. ! workaround, remove later 319 334 !$acc update device( u, v ) 320 335 time_disturb = time_disturb - dt_disturb
Note: See TracChangeset
for help on using the changeset viewer.