Changeset 1015 for palm/trunk/SOURCE
- Timestamp:
- Sep 27, 2012 9:23:24 AM (12 years ago)
- Location:
- palm/trunk/SOURCE
- Files:
-
- 25 edited
Legend:
- Unmodified
- Added
- Removed
-
palm/trunk/SOURCE/advec_ws.f90
r1011 r1015 4 4 ! Current revisions: 5 5 ! ------------------ 6 ! 6 ! accelerator versions (*_acc) added 7 7 ! 8 8 ! Former revisions: … … 88 88 89 89 PRIVATE 90 PUBLIC advec_s_ws, advec_u_ws, advec_v_ws, advec_w_ws, & 90 PUBLIC advec_s_ws, advec_s_ws_acc, advec_u_ws, advec_u_ws_acc, & 91 advec_v_ws, advec_v_ws_acc, advec_w_ws, advec_w_ws_acc, & 91 92 ws_init, ws_statistics 92 93 … … 109 110 END INTERFACE advec_u_ws 110 111 112 INTERFACE advec_u_ws_acc 113 MODULE PROCEDURE advec_u_ws_acc 114 END INTERFACE advec_u_ws_acc 115 111 116 INTERFACE advec_v_ws 112 117 MODULE PROCEDURE advec_v_ws … … 114 119 END INTERFACE advec_v_ws 115 120 121 INTERFACE advec_v_ws_acc 122 MODULE PROCEDURE advec_v_ws_acc 123 END INTERFACE advec_v_ws_acc 124 116 125 INTERFACE advec_w_ws 117 126 MODULE PROCEDURE advec_w_ws 118 127 MODULE PROCEDURE advec_w_ws_ij 119 128 END INTERFACE advec_w_ws 129 130 INTERFACE advec_w_ws_acc 131 MODULE PROCEDURE advec_w_ws_acc 132 END INTERFACE advec_w_ws_acc 120 133 121 134 CONTAINS … … 2335 2348 2336 2349 !------------------------------------------------------------------------------! 2350 ! Scalar advection - Call for all grid points - accelerator version 2351 !------------------------------------------------------------------------------! 2352 SUBROUTINE advec_s_ws_acc ( sk, sk_char ) 2353 2354 USE arrays_3d 2355 USE constants 2356 USE control_parameters 2357 USE grid_variables 2358 USE indices 2359 USE statistics 2360 2361 IMPLICIT NONE 2362 2363 CHARACTER (LEN = *), INTENT(IN) :: sk_char 2364 2365 INTEGER :: i, ibit0, ibit1, ibit2, ibit3, ibit4, ibit5, ibit6, & 2366 ibit7, ibit8, j, k, k_mm, k_mmm, k_pp, k_ppp, tn = 0 2367 2368 REAL :: diss_d, diss_l, diss_n, diss_r, diss_s, diss_t, div, flux_d, & 2369 flux_l, flux_n, flux_r, flux_s, flux_t, u_comp, v_comp 2370 2371 REAL, INTENT(IN), DIMENSION(nzb:nzt+1,nysg:nyng,nxlg:nxrg) :: sk 2372 2373 ! 2374 !-- Computation of fluxes and tendency terms 2375 !$acc kernels present( ddzw, sk, tend, u, v, w, wall_flags_0 ) 2376 !$acc loop 2377 DO i = nxl, nxr 2378 DO j = nys, nyn 2379 !$acc loop vector( 32 ) 2380 DO k = nzb+1, nzt 2381 2382 ibit2 = IBITS(wall_flags_0(k,j,i),2,1) 2383 ibit1 = IBITS(wall_flags_0(k,j,i),1,1) 2384 ibit0 = IBITS(wall_flags_0(k,j,i),0,1) 2385 2386 u_comp = u(k,j,i) - u_gtrans 2387 flux_l = u_comp * ( & 2388 ( 37.0 * ibit2 * adv_sca_5 & 2389 + 7.0 * ibit1 * adv_sca_3 & 2390 + ibit0 * adv_sca_1 & 2391 ) * & 2392 ( sk(k,j,i) + sk(k,j,i-1) ) & 2393 - ( 8.0 * ibit2 * adv_sca_5 & 2394 + ibit1 * adv_sca_3 & 2395 ) * & 2396 ( sk(k,j,i+1) + sk(k,j,i-2) ) & 2397 + ( ibit2 * adv_sca_5 & 2398 ) * & 2399 ( sk(k,j,i+2) + sk(k,j,i-3) ) & 2400 ) 2401 2402 diss_l = -ABS( u_comp ) * ( & 2403 ( 10.0 * ibit2 * adv_sca_5 & 2404 + 3.0 * ibit1 * adv_sca_3 & 2405 + ibit0 * adv_sca_1 & 2406 ) * & 2407 ( sk(k,j,i) - sk(k,j,i-1) ) & 2408 - ( 5.0 * ibit2 * adv_sca_5 & 2409 + ibit1 * adv_sca_3 & 2410 ) * & 2411 ( sk(k,j,i+1) - sk(k,j,i-2) ) & 2412 + ( ibit2 * adv_sca_5 & 2413 ) * & 2414 ( sk(k,j,i+2) - sk(k,j,i-3) ) & 2415 ) 2416 2417 u_comp = u(k,j,i+1) - u_gtrans 2418 flux_r = u_comp * ( & 2419 ( 37.0 * ibit2 * adv_sca_5 & 2420 + 7.0 * ibit1 * adv_sca_3 & 2421 + ibit0 * adv_sca_1 & 2422 ) * & 2423 ( sk(k,j,i+1) + sk(k,j,i) ) & 2424 - ( 8.0 * ibit2 * adv_sca_5 & 2425 + ibit1 * adv_sca_3 & 2426 ) * & 2427 ( sk(k,j,i+2) + sk(k,j,i-1) ) & 2428 + ( ibit2 * adv_sca_5 & 2429 ) * & 2430 ( sk(k,j,i+3) + sk(k,j,i-2) ) & 2431 ) 2432 2433 diss_r = -ABS( u_comp ) * ( & 2434 ( 10.0 * ibit2 * adv_sca_5 & 2435 + 3.0 * ibit1 * adv_sca_3 & 2436 + ibit0 * adv_sca_1 & 2437 ) * & 2438 ( sk(k,j,i+1) - sk(k,j,i) ) & 2439 - ( 5.0 * ibit2 * adv_sca_5 & 2440 + ibit1 * adv_sca_3 & 2441 ) * & 2442 ( sk(k,j,i+2) - sk(k,j,i-1) ) & 2443 + ( ibit2 * adv_sca_5 & 2444 ) * & 2445 ( sk(k,j,i+3) - sk(k,j,i-2) ) & 2446 ) 2447 2448 ibit5 = IBITS(wall_flags_0(k,j,i),5,1) 2449 ibit4 = IBITS(wall_flags_0(k,j,i),4,1) 2450 ibit3 = IBITS(wall_flags_0(k,j,i),3,1) 2451 2452 v_comp = v(k,j,i) - v_gtrans 2453 flux_s = v_comp * ( & 2454 ( 37.0 * ibit5 * adv_sca_5 & 2455 + 7.0 * ibit4 * adv_sca_3 & 2456 + ibit3 * adv_sca_1 & 2457 ) * & 2458 ( sk(k,j,i) + sk(k,j-1,i) ) & 2459 - ( 8.0 * ibit5 * adv_sca_5 & 2460 + ibit4 * adv_sca_3 & 2461 ) * & 2462 ( sk(k,j+1,i) + sk(k,j-2,i) ) & 2463 + ( ibit5 * adv_sca_5 & 2464 ) * & 2465 ( sk(k,j+2,i) + sk(k,j-3,i) ) & 2466 ) 2467 2468 diss_s = -ABS( v_comp ) * ( & 2469 ( 10.0 * ibit5 * adv_sca_5 & 2470 + 3.0 * ibit4 * adv_sca_3 & 2471 + ibit3 * adv_sca_1 & 2472 ) * & 2473 ( sk(k,j,i) - sk(k,j-1,i) ) & 2474 - ( 5.0 * ibit5 * adv_sca_5 & 2475 + ibit4 * adv_sca_3 & 2476 ) * & 2477 ( sk(k,j+1,i) - sk(k,j-2,i) ) & 2478 + ( ibit5 * adv_sca_5 & 2479 ) * & 2480 ( sk(k,j+2,i) - sk(k,j-3,i) ) & 2481 ) 2482 2483 2484 v_comp = v(k,j+1,i) - v_gtrans 2485 flux_n = v_comp * ( & 2486 ( 37.0 * ibit5 * adv_sca_5 & 2487 + 7.0 * ibit4 * adv_sca_3 & 2488 + ibit3 * adv_sca_1 & 2489 ) * & 2490 ( sk(k,j+1,i) + sk(k,j,i) ) & 2491 - ( 8.0 * ibit5 * adv_sca_5 & 2492 + ibit4 * adv_sca_3 & 2493 ) * & 2494 ( sk(k,j+2,i) + sk(k,j-1,i) ) & 2495 + ( ibit5 * adv_sca_5 & 2496 ) * & 2497 ( sk(k,j+3,i) + sk(k,j-2,i) ) & 2498 ) 2499 2500 diss_n = -ABS( v_comp ) * ( & 2501 ( 10.0 * ibit5 * adv_sca_5 & 2502 + 3.0 * ibit4 * adv_sca_3 & 2503 + ibit3 * adv_sca_1 & 2504 ) * & 2505 ( sk(k,j+1,i) - sk(k,j,i) ) & 2506 - ( 5.0 * ibit5 * adv_sca_5 & 2507 + ibit4 * adv_sca_3 & 2508 ) * & 2509 ( sk(k,j+2,i) - sk(k,j-1,i) ) & 2510 + ( ibit5 * adv_sca_5 & 2511 ) * & 2512 ( sk(k,j+3,i) - sk(k,j-2,i) ) & 2513 ) 2514 2515 ! 2516 !-- indizes k_m, k_mm, ... should be known at these point 2517 ibit8 = IBITS(wall_flags_0(k-1,j,i),8,1) 2518 ibit7 = IBITS(wall_flags_0(k-1,j,i),7,1) 2519 ibit6 = IBITS(wall_flags_0(k-1,j,i),6,1) 2520 2521 k_pp = k + 2 * ( 1 - ibit6 ) 2522 k_mm = k - 2 * ( 1 - ibit6 ) 2523 k_mmm = k - 3 * ibit8 2524 2525 flux_d = w(k-1,j,i) * ( & 2526 ( 37.0 * ibit8 * adv_sca_5 & 2527 + 7.0 * ibit7 * adv_sca_3 & 2528 + ibit6 * adv_sca_1 & 2529 ) * & 2530 ( sk(k,j,i) + sk(k-1,j,i) ) & 2531 - ( 8.0 * ibit8 * adv_sca_5 & 2532 + ibit7 * adv_sca_3 & 2533 ) * & 2534 ( sk(k+1,j,i) + sk(k_mm,j,i) ) & 2535 + ( ibit8 * adv_sca_5 & 2536 ) * ( sk(k_pp,j,i)+ sk(k_mmm,j,i) ) & 2537 ) 2538 2539 diss_d = -ABS( w(k-1,j,i) ) * ( & 2540 ( 10.0 * ibit8 * adv_sca_5 & 2541 + 3.0 * ibit7 * adv_sca_3 & 2542 + ibit6 * adv_sca_1 & 2543 ) * & 2544 ( sk(k,j,i) - sk(k-1,j,i) ) & 2545 - ( 5.0 * ibit8 * adv_sca_5 & 2546 + ibit7 * adv_sca_3 & 2547 ) * & 2548 ( sk(k+1,j,i) - sk(k_mm,j,i) ) & 2549 + ( ibit8 * adv_sca_5 & 2550 ) * & 2551 ( sk(k_pp,j,i) - sk(k_mmm,j,i) ) & 2552 ) 2553 2554 ibit8 = IBITS(wall_flags_0(k,j,i),8,1) 2555 ibit7 = IBITS(wall_flags_0(k,j,i),7,1) 2556 ibit6 = IBITS(wall_flags_0(k,j,i),6,1) 2557 2558 k_ppp = k + 3 * ibit8 2559 k_pp = k + 2 * ( 1 - ibit6 ) 2560 k_mm = k - 2 * ibit8 2561 2562 flux_t = w(k,j,i) * ( & 2563 ( 37.0 * ibit8 * adv_sca_5 & 2564 + 7.0 * ibit7 * adv_sca_3 & 2565 + ibit6 * adv_sca_1 & 2566 ) * & 2567 ( sk(k+1,j,i) + sk(k,j,i) ) & 2568 - ( 8.0 * ibit8 * adv_sca_5 & 2569 + ibit7 * adv_sca_3 & 2570 ) * & 2571 ( sk(k_pp,j,i) + sk(k-1,j,i) ) & 2572 + ( ibit8 * adv_sca_5 & 2573 ) * ( sk(k_ppp,j,i)+ sk(k_mm,j,i) ) & 2574 ) 2575 2576 diss_t = -ABS( w(k,j,i) ) * ( & 2577 ( 10.0 * ibit8 * adv_sca_5 & 2578 + 3.0 * ibit7 * adv_sca_3 & 2579 + ibit6 * adv_sca_1 & 2580 ) * & 2581 ( sk(k+1,j,i) - sk(k,j,i) ) & 2582 - ( 5.0 * ibit8 * adv_sca_5 & 2583 + ibit7 * adv_sca_3 & 2584 ) * & 2585 ( sk(k_pp,j,i) - sk(k-1,j,i) ) & 2586 + ( ibit8 * adv_sca_5 & 2587 ) * & 2588 ( sk(k_ppp,j,i) - sk(k_mm,j,i) ) & 2589 ) 2590 ! 2591 !-- Calculate the divergence of the velocity field. A respective 2592 !-- correction is needed to overcome numerical instabilities caused 2593 !-- by a not sufficient reduction of divergences near topography. 2594 div = ( u(k,j,i+1) - u(k,j,i) ) * ddx & 2595 + ( v(k,j+1,i) - v(k,j,i) ) * ddy & 2596 + ( w(k,j,i) - w(k-1,j,i) ) * ddzw(k) 2597 2598 tend(k,j,i) = - ( & 2599 ( flux_r + diss_r - flux_l - diss_l ) * ddx & 2600 + ( flux_n + diss_n - flux_s - diss_s ) * ddy & 2601 + ( flux_t + diss_t - flux_d - diss_d ) * ddzw(k)& 2602 ) + div * sk(k,j,i) 2603 2604 !++ 2605 !-- Evaluation of statistics 2606 ! SELECT CASE ( sk_char ) 2607 ! 2608 ! CASE ( 'pt' ) 2609 ! sums_wspts_ws_l(k,tn) = sums_wspts_ws_l(k,tn) & 2610 ! + ( flux_t + diss_t ) & 2611 ! * weight_substep(intermediate_timestep_count) 2612 ! CASE ( 'sa' ) 2613 ! sums_wssas_ws_l(k,tn) = sums_wssas_ws_l(k,tn) & 2614 ! + ( flux_t + diss_t ) & 2615 ! * weight_substep(intermediate_timestep_count) 2616 ! CASE ( 'q' ) 2617 ! sums_wsqs_ws_l(k,tn) = sums_wsqs_ws_l(k,tn) & 2618 ! + ( flux_t + diss_t ) & 2619 ! * weight_substep(intermediate_timestep_count) 2620 ! 2621 ! END SELECT 2622 2623 ENDDO 2624 ENDDO 2625 ENDDO 2626 !$acc end kernels 2627 2628 END SUBROUTINE advec_s_ws_acc 2629 2630 2631 !------------------------------------------------------------------------------! 2337 2632 ! Advection of u - Call for all grid points 2338 2633 !------------------------------------------------------------------------------! … … 2752 3047 2753 3048 3049 !------------------------------------------------------------------------------! 3050 ! Advection of u - Call for all grid points - accelerator version 3051 !------------------------------------------------------------------------------! 3052 SUBROUTINE advec_u_ws_acc 3053 3054 USE arrays_3d 3055 USE constants 3056 USE control_parameters 3057 USE grid_variables 3058 USE indices 3059 USE statistics 3060 3061 IMPLICIT NONE 3062 3063 INTEGER :: i, ibit9, ibit10, ibit11, ibit12, ibit13, ibit14, ibit15, & 3064 ibit16, ibit17, j, k, k_mmm, k_mm, k_pp, k_ppp, tn = 0 3065 3066 REAL :: diss_d, diss_l, diss_n, diss_r, diss_s, diss_t, div, & 3067 flux_d, flux_l, flux_n, flux_r, flux_s, flux_t, gu, gv, & 3068 u_comp, u_comp_l, v_comp, v_comp_s, w_comp 3069 3070 3071 gu = 2.0 * u_gtrans 3072 gv = 2.0 * v_gtrans 3073 3074 ! 3075 !-- Computation of fluxes and tendency terms 3076 !$acc kernels present( ddzw, tend, u, v, w, wall_flags_0 ) 3077 !$acc loop 3078 DO i = nxlu, nxr 3079 DO j = nys, nyn 3080 !$acc loop vector( 32 ) 3081 DO k = nzb+1, nzt 3082 3083 ibit11 = IBITS(wall_flags_0(k,j,i),11,1) 3084 ibit10 = IBITS(wall_flags_0(k,j,i),10,1) 3085 ibit9 = IBITS(wall_flags_0(k,j,i),9,1) 3086 3087 u_comp_l = u(k,j,i) + u(k,j,i-1) - gu 3088 flux_l = u_comp_l * ( & 3089 ( 37.0 * ibit11 * adv_mom_5 & 3090 + 7.0 * ibit10 * adv_mom_3 & 3091 + ibit9 * adv_mom_1 & 3092 ) * & 3093 ( u(k,j,i) + u(k,j,i-1) ) & 3094 - ( 8.0 * ibit11 * adv_mom_5 & 3095 + ibit10 * adv_mom_3 & 3096 ) * & 3097 ( u(k,j,i+1) + u(k,j,i-2) ) & 3098 + ( ibit11 * adv_mom_5 & 3099 ) * & 3100 ( u(k,j,i+2) + u(k,j,i-3) ) & 3101 ) 3102 3103 diss_l = - ABS( u_comp_l ) * ( & 3104 ( 10.0 * ibit11 * adv_mom_5 & 3105 + 3.0 * ibit10 * adv_mom_3 & 3106 + ibit9 * adv_mom_1 & 3107 ) * & 3108 ( u(k,j,i) - u(k,j,i-1) ) & 3109 - ( 5.0 * ibit11 * adv_mom_5 & 3110 + ibit10 * adv_mom_3 & 3111 ) * & 3112 ( u(k,j,i+1) - u(k,j,i-2) ) & 3113 + ( ibit11 * adv_mom_5 & 3114 ) * & 3115 ( u(k,j,i+2) - u(k,j,i-3) ) & 3116 ) 3117 3118 u_comp = u(k,j,i+1) + u(k,j,i) 3119 flux_r = ( u_comp - gu ) * ( & 3120 ( 37.0 * ibit11 * adv_mom_5 & 3121 + 7.0 * ibit10 * adv_mom_3 & 3122 + ibit9 * adv_mom_1 & 3123 ) * & 3124 ( u(k,j,i+1) + u(k,j,i) ) & 3125 - ( 8.0 * ibit11 * adv_mom_5 & 3126 + ibit10 * adv_mom_3 & 3127 ) * & 3128 ( u(k,j,i+2) + u(k,j,i-1) ) & 3129 + ( ibit11 * adv_mom_5 & 3130 ) * & 3131 ( u(k,j,i+3) + u(k,j,i-2) ) & 3132 ) 3133 3134 diss_r = - ABS( u_comp - gu ) * ( & 3135 ( 10.0 * ibit11 * adv_mom_5 & 3136 + 3.0 * ibit10 * adv_mom_3 & 3137 + ibit9 * adv_mom_1 & 3138 ) * & 3139 ( u(k,j,i+1) - u(k,j,i) ) & 3140 - ( 5.0 * ibit11 * adv_mom_5 & 3141 + ibit10 * adv_mom_3 & 3142 ) * & 3143 ( u(k,j,i+2) - u(k,j,i-1) ) & 3144 + ( ibit11 * adv_mom_5 & 3145 ) * & 3146 ( u(k,j,i+3) - u(k,j,i-2) ) & 3147 ) 3148 3149 ibit14 = IBITS(wall_flags_0(k,j,i),14,1) 3150 ibit13 = IBITS(wall_flags_0(k,j,i),13,1) 3151 ibit12 = IBITS(wall_flags_0(k,j,i),12,1) 3152 3153 v_comp_s = v(k,j,i) + v(k,j,i-1) - gv 3154 flux_s = v_comp_s * ( & 3155 ( 37.0 * ibit14 * adv_mom_5 & 3156 + 7.0 * ibit13 * adv_mom_3 & 3157 + ibit12 * adv_mom_1 & 3158 ) * & 3159 ( u(k,j,i) + u(k,j-1,i) ) & 3160 - ( 8.0 * ibit14 * adv_mom_5 & 3161 + ibit13 * adv_mom_3 & 3162 ) * & 3163 ( u(k,j+1,i) + u(k,j-2,i) ) & 3164 + ( ibit14 * adv_mom_5 & 3165 ) * & 3166 ( u(k,j+2,i) + u(k,j-3,i) ) & 3167 ) 3168 3169 diss_s = - ABS ( v_comp_s ) * ( & 3170 ( 10.0 * ibit14 * adv_mom_5 & 3171 + 3.0 * ibit13 * adv_mom_3 & 3172 + ibit12 * adv_mom_1 & 3173 ) * & 3174 ( u(k,j,i) - u(k,j-1,i) ) & 3175 - ( 5.0 * ibit14 * adv_mom_5 & 3176 + ibit13 * adv_mom_3 & 3177 ) * & 3178 ( u(k,j+1,i) - u(k,j-2,i) ) & 3179 + ( ibit14 * adv_mom_5 & 3180 ) * & 3181 ( u(k,j+2,i) - u(k,j-3,i) ) & 3182 ) 3183 3184 3185 v_comp = v(k,j+1,i) + v(k,j+1,i-1) - gv 3186 flux_n = v_comp * ( & 3187 ( 37.0 * ibit14 * adv_mom_5 & 3188 + 7.0 * ibit13 * adv_mom_3 & 3189 + ibit12 * adv_mom_1 & 3190 ) * & 3191 ( u(k,j+1,i) + u(k,j,i) ) & 3192 - ( 8.0 * ibit14 * adv_mom_5 & 3193 + ibit13 * adv_mom_3 & 3194 ) * & 3195 ( u(k,j+2,i) + u(k,j-1,i) ) & 3196 + ( ibit14 * adv_mom_5 & 3197 ) * & 3198 ( u(k,j+3,i) + u(k,j-2,i) ) & 3199 ) 3200 3201 diss_n = - ABS ( v_comp ) * ( & 3202 ( 10.0 * ibit14 * adv_mom_5 & 3203 + 3.0 * ibit13 * adv_mom_3 & 3204 + ibit12 * adv_mom_1 & 3205 ) * & 3206 ( u(k,j+1,i) - u(k,j,i) ) & 3207 - ( 5.0 * ibit14 * adv_mom_5 & 3208 + ibit13 * adv_mom_3 & 3209 ) * & 3210 ( u(k,j+2,i) - u(k,j-1,i) ) & 3211 + ( ibit14 * adv_mom_5 & 3212 ) * & 3213 ( u(k,j+3,i) - u(k,j-2,i) ) & 3214 ) 3215 3216 ibit17 = IBITS(wall_flags_0(k-1,j,i),17,1) 3217 ibit16 = IBITS(wall_flags_0(k-1,j,i),16,1) 3218 ibit15 = IBITS(wall_flags_0(k-1,j,i),15,1) 3219 3220 k_pp = k + 2 * ( 1 - ibit15 ) 3221 k_mm = k - 2 * ( 1 - ibit15 ) 3222 k_mmm = k - 3 * ibit17 3223 3224 w_comp = w(k-1,j,i) + w(k-1,j,i-1) 3225 flux_d = w_comp * ( & 3226 ( 37.0 * ibit17 * adv_mom_5 & 3227 + 7.0 * ibit16 * adv_mom_3 & 3228 + ibit15 * adv_mom_1 & 3229 ) * & 3230 ( u(k,j,i) + u(k-1,j,i) ) & 3231 - ( 8.0 * ibit17 * adv_mom_5 & 3232 + ibit16 * adv_mom_3 & 3233 ) * & 3234 ( u(k+1,j,i) + u(k_mm,j,i) ) & 3235 + ( ibit17 * adv_mom_5 & 3236 ) * & 3237 ( u(k_pp,j,i) + u(k_mmm,j,i) ) & 3238 ) 3239 3240 diss_d = - ABS( w_comp ) * ( & 3241 ( 10.0 * ibit17 * adv_mom_5 & 3242 + 3.0 * ibit16 * adv_mom_3 & 3243 + ibit15 * adv_mom_1 & 3244 ) * & 3245 ( u(k,j,i) - u(k-1,j,i) ) & 3246 - ( 5.0 * ibit17 * adv_mom_5 & 3247 + ibit16 * adv_mom_3 & 3248 ) * & 3249 ( u(k+1,j,i) - u(k_mm,j,i) ) & 3250 + ( ibit17 * adv_mom_5 & 3251 ) * & 3252 ( u(k_pp,j,i) - u(k_mmm,j,i) ) & 3253 ) 3254 ! 3255 !-- k index has to be modified near bottom and top, else array 3256 !-- subscripts will be exceeded. 3257 ibit17 = IBITS(wall_flags_0(k,j,i),17,1) 3258 ibit16 = IBITS(wall_flags_0(k,j,i),16,1) 3259 ibit15 = IBITS(wall_flags_0(k,j,i),15,1) 3260 3261 k_ppp = k + 3 * ibit17 3262 k_pp = k + 2 * ( 1 - ibit15 ) 3263 k_mm = k - 2 * ibit17 3264 3265 w_comp = w(k,j,i) + w(k,j,i-1) 3266 flux_t = w_comp * ( & 3267 ( 37.0 * ibit17 * adv_mom_5 & 3268 + 7.0 * ibit16 * adv_mom_3 & 3269 + ibit15 * adv_mom_1 & 3270 ) * & 3271 ( u(k+1,j,i) + u(k,j,i) ) & 3272 - ( 8.0 * ibit17 * adv_mom_5 & 3273 + ibit16 * adv_mom_3 & 3274 ) * & 3275 ( u(k_pp,j,i) + u(k-1,j,i) ) & 3276 + ( ibit17 * adv_mom_5 & 3277 ) * & 3278 ( u(k_ppp,j,i) + u(k_mm,j,i) ) & 3279 ) 3280 3281 diss_t = - ABS( w_comp ) * ( & 3282 ( 10.0 * ibit17 * adv_mom_5 & 3283 + 3.0 * ibit16 * adv_mom_3 & 3284 + ibit15 * adv_mom_1 & 3285 ) * & 3286 ( u(k+1,j,i) - u(k,j,i) ) & 3287 - ( 5.0 * ibit17 * adv_mom_5 & 3288 + ibit16 * adv_mom_3 & 3289 ) * & 3290 ( u(k_pp,j,i) - u(k-1,j,i) ) & 3291 + ( ibit17 * adv_mom_5 & 3292 ) * & 3293 ( u(k_ppp,j,i) - u(k_mm,j,i) ) & 3294 ) 3295 ! 3296 !-- Calculate the divergence of the velocity field. A respective 3297 !-- correction is needed to overcome numerical instabilities caused 3298 !-- by a not sufficient reduction of divergences near topography. 3299 div = ( ( u_comp - ( u(k,j,i) + u(k,j,i-1) ) ) * ddx & 3300 + ( v_comp + gv - ( v(k,j,i) + v(k,j,i-1 ) ) ) * ddy & 3301 + ( w_comp - ( w(k-1,j,i) + w(k-1,j,i-1) ) ) & 3302 * ddzw(k) & 3303 ) * 0.5 3304 3305 tend(k,j,i) = - ( & 3306 ( flux_r + diss_r - flux_l - diss_l ) * ddx & 3307 + ( flux_n + diss_n - flux_s - diss_s ) * ddy & 3308 + ( flux_t + diss_t - flux_d - diss_d ) * ddzw(k) & 3309 ) + div * u(k,j,i) 3310 3311 !++ 3312 !-- Statistical Evaluation of u'u'. The factor has to be applied 3313 !-- for right evaluation when gallilei_trans = .T. . 3314 ! sums_us2_ws_l(k,tn) = sums_us2_ws_l(k,tn) & 3315 ! + ( flux_r * & 3316 ! ( u_comp - 2.0 * hom(k,1,1,0) ) & 3317 ! / ( u_comp - gu + 1.0E-20 ) & 3318 ! + diss_r * & 3319 ! ABS( u_comp - 2.0 * hom(k,1,1,0) ) & 3320 ! / ( ABS( u_comp - gu ) + 1.0E-20 ) ) & 3321 ! * weight_substep(intermediate_timestep_count) 3322 ! 3323 !-- Statistical Evaluation of w'u'. 3324 ! sums_wsus_ws_l(k,tn) = sums_wsus_ws_l(k,tn) & 3325 ! + ( flux_t + diss_t ) & 3326 ! * weight_substep(intermediate_timestep_count) 3327 ENDDO 3328 ENDDO 3329 ENDDO 3330 !$acc end kernels 3331 3332 !++ 3333 ! sums_us2_ws_l(nzb,tn) = sums_us2_ws_l(nzb+1,tn) 3334 3335 END SUBROUTINE advec_u_ws_acc 3336 3337 2754 3338 !------------------------------------------------------------------------------! 2755 3339 ! Advection of v - Call for all grid points … … 3181 3765 3182 3766 !------------------------------------------------------------------------------! 3767 ! Advection of v - Call for all grid points - accelerator version 3768 !------------------------------------------------------------------------------! 3769 SUBROUTINE advec_v_ws_acc 3770 3771 USE arrays_3d 3772 USE constants 3773 USE control_parameters 3774 USE grid_variables 3775 USE indices 3776 USE statistics 3777 3778 IMPLICIT NONE 3779 3780 3781 INTEGER :: i, ibit18, ibit19, ibit20, ibit21, ibit22, ibit23, ibit24, & 3782 ibit25, ibit26, j, k, k_mm, k_mmm, k_pp, k_ppp, tn = 0 3783 3784 REAL :: diss_d, diss_l, diss_n, diss_r, diss_s, diss_t, div, & 3785 flux_d, flux_l, flux_n, flux_r, flux_s, flux_t, gu, gv, & 3786 u_comp, u_comp_l, v_comp, v_comp_s, w_comp 3787 3788 gu = 2.0 * u_gtrans 3789 gv = 2.0 * v_gtrans 3790 3791 ! 3792 !-- Computation of fluxes and tendency terms 3793 !$acc kernels present( ddzw, tend, u, v, w, wall_flags_0 ) 3794 !$acc loop 3795 DO i = nxl, nxr 3796 DO j = nysv, nyn 3797 !$acc loop vector( 32 ) 3798 DO k = nzb+1, nzt 3799 3800 ibit20 = IBITS(wall_flags_0(k,j,i),20,1) 3801 ibit19 = IBITS(wall_flags_0(k,j,i),19,1) 3802 ibit18 = IBITS(wall_flags_0(k,j,i),18,1) 3803 3804 u_comp_l = u(k,j-1,i) + u(k,j,i) - gu 3805 flux_l = u_comp_l * ( & 3806 ( 37.0 * ibit20 * adv_mom_5 & 3807 + 7.0 * ibit19 * adv_mom_3 & 3808 + ibit18 * adv_mom_1 & 3809 ) * & 3810 ( v(k,j,i) + v(k,j,i-1) ) & 3811 - ( 8.0 * ibit20 * adv_mom_5 & 3812 + ibit19 * adv_mom_3 & 3813 ) * & 3814 ( v(k,j,i+1) + v(k,j,i-2) ) & 3815 + ( ibit20 * adv_mom_5 & 3816 ) * & 3817 ( v(k,j,i+2) + v(k,j,i-3) ) & 3818 ) 3819 3820 diss_l = - ABS( u_comp_l ) * ( & 3821 ( 10.0 * ibit20 * adv_mom_5 & 3822 + 3.0 * ibit19 * adv_mom_3 & 3823 + ibit18 * adv_mom_1 & 3824 ) * & 3825 ( v(k,j,i) - v(k,j,i-1) ) & 3826 - ( 5.0 * ibit20 * adv_mom_5 & 3827 + ibit19 * adv_mom_3 & 3828 ) * & 3829 ( v(k,j,i+1) - v(k,j,i-2) ) & 3830 + ( ibit20 * adv_mom_5 & 3831 ) * & 3832 ( v(k,j,i+2) - v(k,j,i-3) ) & 3833 ) 3834 3835 u_comp = u(k,j-1,i+1) + u(k,j,i+1) - gu 3836 flux_r = u_comp * ( & 3837 ( 37.0 * ibit20 * adv_mom_5 & 3838 + 7.0 * ibit19 * adv_mom_3 & 3839 + ibit18 * adv_mom_1 & 3840 ) * & 3841 ( v(k,j,i+1) + v(k,j,i) ) & 3842 - ( 8.0 * ibit20 * adv_mom_5 & 3843 + ibit19 * adv_mom_3 & 3844 ) * & 3845 ( v(k,j,i+2) + v(k,j,i-1) ) & 3846 + ( ibit20 * adv_mom_5 & 3847 ) * & 3848 ( v(k,j,i+3) + v(k,j,i-2) ) & 3849 ) 3850 3851 diss_r = - ABS( u_comp ) * ( & 3852 ( 10.0 * ibit20 * adv_mom_5 & 3853 + 3.0 * ibit19 * adv_mom_3 & 3854 + ibit18 * adv_mom_1 & 3855 ) * & 3856 ( v(k,j,i+1) - v(k,j,i) ) & 3857 - ( 5.0 * ibit20 * adv_mom_5 & 3858 + ibit19 * adv_mom_3 & 3859 ) * & 3860 ( v(k,j,i+2) - v(k,j,i-1) ) & 3861 + ( ibit20 * adv_mom_5 & 3862 ) * & 3863 ( v(k,j,i+3) - v(k,j,i-2) ) & 3864 ) 3865 3866 ibit23 = IBITS(wall_flags_0(k,j,i),23,1) 3867 ibit22 = IBITS(wall_flags_0(k,j,i),22,1) 3868 ibit21 = IBITS(wall_flags_0(k,j,i),21,1) 3869 3870 3871 v_comp_s = v(k,j,i) + v(k,j-1,i) - gv 3872 flux_s = v_comp_s * ( & 3873 ( 37.0 * ibit23 * adv_mom_5 & 3874 + 7.0 * ibit22 * adv_mom_3 & 3875 + ibit21 * adv_mom_1 & 3876 ) * & 3877 ( v(k,j,i) + v(k,j-1,i) ) & 3878 - ( 8.0 * ibit23 * adv_mom_5 & 3879 + ibit22 * adv_mom_3 & 3880 ) * & 3881 ( v(k,j+1,i) + v(k,j-2,i) ) & 3882 + ( ibit23 * adv_mom_5 & 3883 ) * & 3884 ( v(k,j+2,i) + v(k,j-3,i) ) & 3885 ) 3886 3887 diss_s = - ABS( v_comp_s ) * ( & 3888 ( 10.0 * ibit23 * adv_mom_5 & 3889 + 3.0 * ibit22 * adv_mom_3 & 3890 + ibit21 * adv_mom_1 & 3891 ) * & 3892 ( v(k,j,i) - v(k,j-1,i) ) & 3893 - ( 5.0 * ibit23 * adv_mom_5 & 3894 + ibit22 * adv_mom_3 & 3895 ) * & 3896 ( v(k,j+1,i) - v(k,j-2,i) ) & 3897 + ( ibit23 * adv_mom_5 & 3898 ) * & 3899 ( v(k,j+2,i) - v(k,j-3,i) ) & 3900 ) 3901 3902 v_comp = v(k,j+1,i) + v(k,j,i) 3903 flux_n = ( v_comp - gv ) * ( & 3904 ( 37.0 * ibit23 * adv_mom_5 & 3905 + 7.0 * ibit22 * adv_mom_3 & 3906 + ibit21 * adv_mom_1 & 3907 ) * & 3908 ( v(k,j+1,i) + v(k,j,i) ) & 3909 - ( 8.0 * ibit23 * adv_mom_5 & 3910 + ibit22 * adv_mom_3 & 3911 ) * & 3912 ( v(k,j+2,i) + v(k,j-1,i) ) & 3913 + ( ibit23 * adv_mom_5 & 3914 ) * & 3915 ( v(k,j+3,i) + v(k,j-2,i) ) & 3916 ) 3917 3918 diss_n = - ABS( v_comp - gv ) * ( & 3919 ( 10.0 * ibit23 * adv_mom_5 & 3920 + 3.0 * ibit22 * adv_mom_3 & 3921 + ibit21 * adv_mom_1 & 3922 ) * & 3923 ( v(k,j+1,i) - v(k,j,i) ) & 3924 - ( 5.0 * ibit23 * adv_mom_5 & 3925 + ibit22 * adv_mom_3 & 3926 ) * & 3927 ( v(k,j+2,i) - v(k,j-1,i) ) & 3928 + ( ibit23 * adv_mom_5 & 3929 ) * & 3930 ( v(k,j+3,i) - v(k,j-2,i) ) & 3931 ) 3932 3933 ibit26 = IBITS(wall_flags_0(k-1,j,i),26,1) 3934 ibit25 = IBITS(wall_flags_0(k-1,j,i),25,1) 3935 ibit24 = IBITS(wall_flags_0(k-1,j,i),24,1) 3936 3937 k_pp = k + 2 * ( 1 - ibit24 ) 3938 k_mm = k - 2 * ( 1 - ibit24 ) 3939 k_mmm = k - 3 * ibit26 3940 3941 w_comp = w(k-1,j-1,i) + w(k-1,j,i) 3942 flux_d = w_comp * ( & 3943 ( 37.0 * ibit26 * adv_mom_5 & 3944 + 7.0 * ibit25 * adv_mom_3 & 3945 + ibit24 * adv_mom_1 & 3946 ) * & 3947 ( v(k,j,i) + v(k-1,j,i) ) & 3948 - ( 8.0 * ibit26 * adv_mom_5 & 3949 + ibit25 * adv_mom_3 & 3950 ) * & 3951 ( v(k+1,j,i) + v(k_mm,j,i) ) & 3952 + ( ibit26 * adv_mom_5 & 3953 ) * & 3954 ( v(k_pp,j,i) + v(k_mmm,j,i) ) & 3955 ) 3956 3957 diss_d = - ABS( w_comp ) * ( & 3958 ( 10.0 * ibit26 * adv_mom_5 & 3959 + 3.0 * ibit25 * adv_mom_3 & 3960 + ibit24 * adv_mom_1 & 3961 ) * & 3962 ( v(k,j,i) - v(k-1,j,i) ) & 3963 - ( 5.0 * ibit26 * adv_mom_5 & 3964 + ibit25 * adv_mom_3 & 3965 ) * & 3966 ( v(k+1,j,i) - v(k_mm,j,i) ) & 3967 + ( ibit26 * adv_mom_5 & 3968 ) * & 3969 ( v(k_pp,j,i) - v(k_mmm,j,i) ) & 3970 ) 3971 ! 3972 !-- k index has to be modified near bottom and top, else array 3973 !-- subscripts will be exceeded. 3974 ibit26 = IBITS(wall_flags_0(k,j,i),26,1) 3975 ibit25 = IBITS(wall_flags_0(k,j,i),25,1) 3976 ibit24 = IBITS(wall_flags_0(k,j,i),24,1) 3977 3978 k_ppp = k + 3 * ibit26 3979 k_pp = k + 2 * ( 1 - ibit24 ) 3980 k_mm = k - 2 * ibit26 3981 3982 w_comp = w(k,j-1,i) + w(k,j,i) 3983 flux_t = w_comp * ( & 3984 ( 37.0 * ibit26 * adv_mom_5 & 3985 + 7.0 * ibit25 * adv_mom_3 & 3986 + ibit24 * adv_mom_1 & 3987 ) * & 3988 ( v(k+1,j,i) + v(k,j,i) ) & 3989 - ( 8.0 * ibit26 * adv_mom_5 & 3990 + ibit25 * adv_mom_3 & 3991 ) * & 3992 ( v(k_pp,j,i) + v(k-1,j,i) ) & 3993 + ( ibit26 * adv_mom_5 & 3994 ) * & 3995 ( v(k_ppp,j,i) + v(k_mm,j,i) ) & 3996 ) 3997 3998 diss_t = - ABS( w_comp ) * ( & 3999 ( 10.0 * ibit26 * adv_mom_5 & 4000 + 3.0 * ibit25 * adv_mom_3 & 4001 + ibit24 * adv_mom_1 & 4002 ) * & 4003 ( v(k+1,j,i) - v(k,j,i) ) & 4004 - ( 5.0 * ibit26 * adv_mom_5 & 4005 + ibit25 * adv_mom_3 & 4006 ) * & 4007 ( v(k_pp,j,i) - v(k-1,j,i) ) & 4008 + ( ibit26 * adv_mom_5 & 4009 ) * & 4010 ( v(k_ppp,j,i) - v(k_mm,j,i) ) & 4011 ) 4012 ! 4013 !-- Calculate the divergence of the velocity field. A respective 4014 !-- correction is needed to overcome numerical instabilities caused 4015 !-- by a not sufficient reduction of divergences near topography. 4016 div = ( ( u_comp + gu - ( u(k,j-1,i) + u(k,j,i) ) ) * ddx & 4017 + ( v_comp - ( v(k,j,i) + v(k,j-1,i) ) ) * ddy & 4018 + ( w_comp - ( w(k-1,j-1,i) + w(k-1,j,i) ) & 4019 ) * ddzw(k) & 4020 ) * 0.5 4021 4022 tend(k,j,i) = - ( & 4023 ( flux_r + diss_r - flux_l - diss_l ) * ddx & 4024 + ( flux_n + diss_n - flux_s - diss_s ) * ddy & 4025 + ( flux_t + diss_t - flux_d - diss_d ) * ddzw(k) & 4026 ) + div * v(k,j,i) 4027 4028 4029 !++ 4030 !-- Statistical Evaluation of v'v'. The factor has to be applied 4031 !-- for right evaluation when gallilei_trans = .T. . 4032 ! sums_vs2_ws_l(k,tn) = sums_vs2_ws_l(k,tn) & 4033 ! + ( flux_n & 4034 ! * ( v_comp - 2.0 * hom(k,1,2,0) ) & 4035 ! / ( v_comp - gv + 1.0E-20 ) & 4036 ! + diss_n & 4037 ! * ABS( v_comp - 2.0 * hom(k,1,2,0) ) & 4038 ! / ( ABS( v_comp - gv ) +1.0E-20 ) ) & 4039 ! * weight_substep(intermediate_timestep_count) 4040 ! 4041 !-- Statistical Evaluation of w'v'. 4042 ! sums_wsvs_ws_l(k,tn) = sums_wsvs_ws_l(k,tn) & 4043 ! + ( flux_t + diss_t ) & 4044 ! * weight_substep(intermediate_timestep_count) 4045 4046 ENDDO 4047 ENDDO 4048 ENDDO 4049 !$acc end kernels 4050 4051 !++ 4052 ! sums_vs2_ws_l(nzb,tn) = sums_vs2_ws_l(nzb+1,tn) 4053 4054 END SUBROUTINE advec_v_ws_acc 4055 4056 4057 !------------------------------------------------------------------------------! 3183 4058 ! Advection of w - Call for all grid points 3184 4059 !------------------------------------------------------------------------------! … … 3584 4459 END SUBROUTINE advec_w_ws 3585 4460 4461 4462 !------------------------------------------------------------------------------! 4463 ! Advection of w - Call for all grid points - accelerator version 4464 !------------------------------------------------------------------------------! 4465 SUBROUTINE advec_w_ws_acc 4466 4467 USE arrays_3d 4468 USE constants 4469 USE control_parameters 4470 USE grid_variables 4471 USE indices 4472 USE statistics 4473 4474 IMPLICIT NONE 4475 4476 INTEGER :: i, ibit27, ibit28, ibit29, ibit30, ibit31, ibit32, ibit33, & 4477 ibit34, ibit35, j, k, k_mmm, k_mm, k_pp, k_ppp, tn = 0 4478 4479 REAL :: diss_d, diss_l, diss_n, diss_r, diss_s, diss_t, div, & 4480 flux_d, flux_l, flux_n, flux_r, flux_s, flux_t, gu, gv, & 4481 u_comp, u_comp_l, v_comp, v_comp_s, w_comp 4482 4483 gu = 2.0 * u_gtrans 4484 gv = 2.0 * v_gtrans 4485 4486 ! 4487 !-- Computation of fluxes and tendency terms 4488 !$acc kernels present( ddzu, tend, u, v, w, wall_flags_0 ) 4489 !$acc loop 4490 DO i = nxl, nxr 4491 DO j = nys, nyn 4492 !$acc loop vector( 32 ) 4493 DO k = nzb+1, nzt 4494 4495 ibit29 = IBITS(wall_flags_0(k,j,i),29,1) 4496 ibit28 = IBITS(wall_flags_0(k,j,i),28,1) 4497 ibit27 = IBITS(wall_flags_0(k,j,i),27,1) 4498 4499 4500 u_comp_l = u(k+1,j,i) + u(k,j,i) - gu 4501 flux_l = u_comp_l * ( & 4502 ( 37.0 * ibit29 * adv_mom_5 & 4503 + 7.0 * ibit28 * adv_mom_3 & 4504 + ibit27 * adv_mom_1 & 4505 ) * & 4506 ( w(k,j,i) + w(k,j,i-1) ) & 4507 - ( 8.0 * ibit29 * adv_mom_5 & 4508 + ibit28 * adv_mom_3 & 4509 ) * & 4510 ( w(k,j,i+1) + w(k,j,i-2) ) & 4511 + ( ibit29 * adv_mom_5 & 4512 ) * & 4513 ( w(k,j,i+2) + w(k,j,i-3) ) & 4514 ) 4515 4516 diss_l = - ABS( u_comp_l ) * ( & 4517 ( 10.0 * ibit29 * adv_mom_5 & 4518 + 3.0 * ibit28 * adv_mom_3 & 4519 + ibit27 * adv_mom_1 & 4520 ) * & 4521 ( w(k,j,i) - w(k,j,i-1) ) & 4522 - ( 5.0 * ibit29 * adv_mom_5 & 4523 + ibit28 * adv_mom_3 & 4524 ) * & 4525 ( w(k,j,i+1) - w(k,j,i-2) ) & 4526 + ( ibit29 * adv_mom_5 & 4527 ) * & 4528 ( w(k,j,i+2) - w(k,j,i-3) ) & 4529 ) 4530 4531 u_comp = u(k+1,j,i+1) + u(k,j,i+1) - gu 4532 flux_r = u_comp * ( & 4533 ( 37.0 * ibit29 * adv_mom_5 & 4534 + 7.0 * ibit28 * adv_mom_3 & 4535 + ibit27 * adv_mom_1 & 4536 ) * & 4537 ( w(k,j,i+1) + w(k,j,i) ) & 4538 - ( 8.0 * ibit29 * adv_mom_5 & 4539 + ibit28 * adv_mom_3 & 4540 ) * & 4541 ( w(k,j,i+2) + w(k,j,i-1) ) & 4542 + ( ibit29 * adv_mom_5 & 4543 ) * & 4544 ( w(k,j,i+3) + w(k,j,i-2) ) & 4545 ) 4546 4547 diss_r = - ABS( u_comp ) * ( & 4548 ( 10.0 * ibit29 * adv_mom_5 & 4549 + 3.0 * ibit28 * adv_mom_3 & 4550 + ibit27 * adv_mom_1 & 4551 ) * & 4552 ( w(k,j,i+1) - w(k,j,i) ) & 4553 - ( 5.0 * ibit29 * adv_mom_5 & 4554 + ibit28 * adv_mom_3 & 4555 ) * & 4556 ( w(k,j,i+2) - w(k,j,i-1) ) & 4557 + ( ibit29 * adv_mom_5 & 4558 ) * & 4559 ( w(k,j,i+3) - w(k,j,i-2) ) & 4560 ) 4561 4562 ibit32 = IBITS(wall_flags_0(k,j,i),32,1) 4563 ibit31 = IBITS(wall_flags_0(k,j,i),31,1) 4564 ibit30 = IBITS(wall_flags_0(k,j,i),30,1) 4565 4566 v_comp_s = v(k+1,j,i) + v(k,j,i) - gv 4567 flux_s = v_comp_s * ( & 4568 ( 37.0 * ibit32 * adv_mom_5 & 4569 + 7.0 * ibit31 * adv_mom_3 & 4570 + ibit30 * adv_mom_1 & 4571 ) * & 4572 ( w(k,j,i) + w(k,j-1,i) ) & 4573 - ( 8.0 * ibit32 * adv_mom_5 & 4574 + ibit31 * adv_mom_3 & 4575 ) * & 4576 ( w(k,j+1,i) + w(k,j-2,i) ) & 4577 + ( ibit32 * adv_mom_5 & 4578 ) * & 4579 ( w(k,j+2,i) + w(k,j-3,i) ) & 4580 ) 4581 4582 diss_s = - ABS( v_comp_s ) * ( & 4583 ( 10.0 * ibit32 * adv_mom_5 & 4584 + 3.0 * ibit31 * adv_mom_3 & 4585 + ibit30 * adv_mom_1 & 4586 ) * & 4587 ( w(k,j,i) - w(k,j-1,i) ) & 4588 - ( 5.0 * ibit32 * adv_mom_5 & 4589 + ibit31 * adv_mom_3 & 4590 ) * & 4591 ( w(k,j+1,i) - w(k,j-2,i) ) & 4592 + ( ibit32 * adv_mom_5 & 4593 ) * & 4594 ( w(k,j+2,i) - w(k,j-3,i) ) & 4595 ) 4596 4597 v_comp = v(k+1,j+1,i) + v(k,j+1,i) - gv 4598 flux_n = v_comp * ( & 4599 ( 37.0 * ibit32 * adv_mom_5 & 4600 + 7.0 * ibit31 * adv_mom_3 & 4601 + ibit30 * adv_mom_1 & 4602 ) * & 4603 ( w(k,j+1,i) + w(k,j,i) ) & 4604 - ( 8.0 * ibit32 * adv_mom_5 & 4605 + ibit31 * adv_mom_3 & 4606 ) * & 4607 ( w(k,j+2,i) + w(k,j-1,i) ) & 4608 + ( ibit32 * adv_mom_5 & 4609 ) * & 4610 ( w(k,j+3,i) + w(k,j-2,i) ) & 4611 ) 4612 4613 diss_n = - ABS( v_comp ) * ( & 4614 ( 10.0 * ibit32 * adv_mom_5 & 4615 + 3.0 * ibit31 * adv_mom_3 & 4616 + ibit30 * adv_mom_1 & 4617 ) * & 4618 ( w(k,j+1,i) - w(k,j,i) ) & 4619 - ( 5.0 * ibit32 * adv_mom_5 & 4620 + ibit31 * adv_mom_3 & 4621 ) * & 4622 ( w(k,j+2,i) - w(k,j-1,i) ) & 4623 + ( ibit32 * adv_mom_5 & 4624 ) * & 4625 ( w(k,j+3,i) - w(k,j-2,i) ) & 4626 ) 4627 4628 4629 ibit35 = IBITS(wall_flags_0(k-1,j,i),35,1) 4630 ibit34 = IBITS(wall_flags_0(k-1,j,i),34,1) 4631 ibit33 = IBITS(wall_flags_0(k-1,j,i),33,1) 4632 4633 k_pp = k + 2 * ( 1 - ibit33 ) 4634 k_mm = k - 2 * ( 1 - ibit33 ) 4635 k_mmm = k - 3 * ibit35 4636 4637 w_comp = w(k,j,i) + w(k-1,j,i) 4638 flux_d = w_comp * ( & 4639 ( 37.0 * ibit35 * adv_mom_5 & 4640 + 7.0 * ibit34 * adv_mom_3 & 4641 + ibit33 * adv_mom_1 & 4642 ) * & 4643 ( w(k,j,i) + w(k-1,j,i) ) & 4644 - ( 8.0 * ibit35 * adv_mom_5 & 4645 + ibit34 * adv_mom_3 & 4646 ) * & 4647 ( w(k+1,j,i) + w(k_mm,j,i) ) & 4648 + ( ibit35 * adv_mom_5 & 4649 ) * & 4650 ( w(k_pp,j,i) + w(k_mmm,j,i) ) & 4651 ) 4652 4653 diss_d = - ABS( w_comp ) * ( & 4654 ( 10.0 * ibit35 * adv_mom_5 & 4655 + 3.0 * ibit34 * adv_mom_3 & 4656 + ibit33 * adv_mom_1 & 4657 ) * & 4658 ( w(k,j,i) - w(k-1,j,i) ) & 4659 - ( 5.0 * ibit35 * adv_mom_5 & 4660 + ibit34 * adv_mom_3 & 4661 ) * & 4662 ( w(k+1,j,i) - w(k_mm,j,i) ) & 4663 + ( ibit35 * adv_mom_5 & 4664 ) * & 4665 ( w(k_pp,j,i) - w(k_mmm,j,i) ) & 4666 ) 4667 4668 ! 4669 !-- k index has to be modified near bottom and top, else array 4670 !-- subscripts will be exceeded. 4671 ibit35 = IBITS(wall_flags_0(k,j,i),35,1) 4672 ibit34 = IBITS(wall_flags_0(k,j,i),34,1) 4673 ibit33 = IBITS(wall_flags_0(k,j,i),33,1) 4674 4675 k_ppp = k + 3 * ibit35 4676 k_pp = k + 2 * ( 1 - ibit33 ) 4677 k_mm = k - 2 * ibit35 4678 4679 w_comp = w(k+1,j,i) + w(k,j,i) 4680 flux_t = w_comp * ( & 4681 ( 37.0 * ibit35 * adv_mom_5 & 4682 + 7.0 * ibit34 * adv_mom_3 & 4683 + ibit33 * adv_mom_1 & 4684 ) * & 4685 ( w(k+1,j,i) + w(k,j,i) ) & 4686 - ( 8.0 * ibit35 * adv_mom_5 & 4687 + ibit34 * adv_mom_3 & 4688 ) * & 4689 ( w(k_pp,j,i) + w(k-1,j,i) ) & 4690 + ( ibit35 * adv_mom_5 & 4691 ) * & 4692 ( w(k_ppp,j,i) + w(k_mm,j,i) ) & 4693 ) 4694 4695 diss_t = - ABS( w_comp ) * ( & 4696 ( 10.0 * ibit35 * adv_mom_5 & 4697 + 3.0 * ibit34 * adv_mom_3 & 4698 + ibit33 * adv_mom_1 & 4699 ) * & 4700 ( w(k+1,j,i) - w(k,j,i) ) & 4701 - ( 5.0 * ibit35 * adv_mom_5 & 4702 + ibit34 * adv_mom_3 & 4703 ) * & 4704 ( w(k_pp,j,i) - w(k-1,j,i) ) & 4705 + ( ibit35 * adv_mom_5 & 4706 ) * & 4707 ( w(k_ppp,j,i) - w(k_mm,j,i) ) & 4708 ) 4709 ! 4710 !-- Calculate the divergence of the velocity field. A respective 4711 !-- correction is needed to overcome numerical instabilities caused 4712 !-- by a not sufficient reduction of divergences near topography. 4713 div = ( ( u_comp + gu - ( u(k+1,j,i) + u(k,j,i) ) ) * ddx & 4714 + ( v_comp + gv - ( v(k+1,j,i) + v(k,j,i) ) ) * ddy & 4715 + ( w_comp - ( w(k,j,i) + w(k-1,j,i) ) ) & 4716 * ddzu(k+1) & 4717 ) * 0.5 4718 4719 tend(k,j,i) = - ( & 4720 ( flux_r + diss_r - flux_l - diss_l ) * ddx & 4721 + ( flux_n + diss_n - flux_s - diss_s ) * ddy & 4722 + ( flux_t + diss_t - flux_d - diss_d ) * ddzu(k+1) & 4723 ) + div * w(k,j,i) 4724 4725 4726 !++ 4727 !-- Statistical Evaluation of w'w'. 4728 ! sums_ws2_ws_l(k,tn) = sums_ws2_ws_l(k,tn) & 4729 ! + ( flux_t + diss_t ) & 4730 ! * weight_substep(intermediate_timestep_count) 4731 4732 ENDDO 4733 ENDDO 4734 ENDDO 4735 !$acc end kernels 4736 4737 END SUBROUTINE advec_w_ws_acc 4738 3586 4739 END MODULE advec_ws -
palm/trunk/SOURCE/buoyancy.f90
r1011 r1015 4 4 ! Currrent revisions: 5 5 ! ----------------- 6 ! 6 ! accelerator version (*_acc) added 7 7 ! 8 8 ! Former revisions: … … 55 55 56 56 PRIVATE 57 PUBLIC buoyancy, calc_mean_profile57 PUBLIC buoyancy, buoyancy_acc, calc_mean_profile 58 58 59 59 INTERFACE buoyancy … … 61 61 MODULE PROCEDURE buoyancy_ij 62 62 END INTERFACE buoyancy 63 64 INTERFACE buoyancy_acc 65 MODULE PROCEDURE buoyancy_acc 66 END INTERFACE buoyancy_acc 63 67 64 68 INTERFACE calc_mean_profile … … 164 168 165 169 END SUBROUTINE buoyancy 170 171 172 !------------------------------------------------------------------------------! 173 ! Call for all grid points - accelerator version 174 !------------------------------------------------------------------------------! 175 SUBROUTINE buoyancy_acc( var, var_reference, wind_component, pr ) 176 177 USE arrays_3d 178 USE control_parameters 179 USE indices 180 USE pegrid 181 USE statistics 182 183 IMPLICIT NONE 184 185 INTEGER :: i, j, k, pr, wind_component 186 REAL :: var_reference 187 #if defined( __nopointer ) 188 REAL, DIMENSION(nzb:nzt+1,nysg:nyng,nxlg:nxrg) :: var 189 #else 190 REAL, DIMENSION(:,:,:), POINTER :: var 191 #endif 192 193 194 IF ( .NOT. sloping_surface ) THEN 195 ! 196 !-- Normal case: horizontal surface 197 IF ( use_reference ) THEN 198 DO i = nxl, nxr 199 DO j = nys, nyn 200 DO k = nzb_s_inner(j,i)+1, nzt-1 201 tend(k,j,i) = tend(k,j,i) + atmos_ocean_sign * g * 0.5 * & 202 ( & 203 ( var(k,j,i) - hom(k,1,pr,0) ) / var_reference + & 204 ( var(k+1,j,i) - hom(k+1,1,pr,0) ) / var_reference & 205 ) 206 ENDDO 207 ENDDO 208 ENDDO 209 ELSE 210 !$acc kernels present( nzb_s_inner, hom, tend, var ) 211 !$acc loop 212 DO i = nxl, nxr 213 DO j = nys, nyn 214 !$acc loop vector(32) 215 DO k = 1, nzt-1 216 IF ( k > nzb_s_inner(j,i) ) THEN 217 tend(k,j,i) = tend(k,j,i) + atmos_ocean_sign * g * 0.5 * & 218 ( & 219 ( var(k,j,i) - hom(k,1,pr,0) ) / hom(k,1,pr,0) + & 220 ( var(k+1,j,i) - hom(k+1,1,pr,0) ) / hom(k+1,1,pr,0) & 221 ) 222 ENDIF 223 ENDDO 224 ENDDO 225 ENDDO 226 !$acc end kernels 227 ENDIF 228 229 ELSE 230 ! 231 !-- Buoyancy term for a surface with a slope in x-direction. The equations 232 !-- for both the u and w velocity-component contain proportionate terms. 233 !-- Temperature field at time t=0 serves as environmental temperature. 234 !-- Reference temperature (pt_surface) is the one at the lower left corner 235 !-- of the total domain. 236 IF ( wind_component == 1 ) THEN 237 238 DO i = nxlu, nxr 239 DO j = nys, nyn 240 DO k = nzb_s_inner(j,i)+1, nzt-1 241 tend(k,j,i) = tend(k,j,i) + g * sin_alpha_surface * & 242 0.5 * ( ( pt(k,j,i-1) + pt(k,j,i) ) & 243 - ( pt_slope_ref(k,i-1) + pt_slope_ref(k,i) ) & 244 ) / pt_surface 245 ENDDO 246 ENDDO 247 ENDDO 248 249 ELSEIF ( wind_component == 3 ) THEN 250 251 DO i = nxl, nxr 252 DO j = nys, nyn 253 DO k = nzb_s_inner(j,i)+1, nzt-1 254 tend(k,j,i) = tend(k,j,i) + g * cos_alpha_surface * & 255 0.5 * ( ( pt(k,j,i) + pt(k+1,j,i) ) & 256 - ( pt_slope_ref(k,i) + pt_slope_ref(k+1,i) ) & 257 ) / pt_surface 258 ENDDO 259 ENDDO 260 ENDDO 261 262 ELSE 263 264 WRITE( message_string, * ) 'no term for component "',& 265 wind_component,'"' 266 CALL message( 'buoyancy', 'PA0159', 1, 2, 0, 6, 0 ) 267 268 ENDIF 269 270 ENDIF 271 272 END SUBROUTINE buoyancy_acc 166 273 167 274 -
palm/trunk/SOURCE/check_parameters.f90
r1004 r1015 4 4 ! Current revisions: 5 5 ! ----------------- 6 ! 6 ! acc allowed for loop optimization, 7 ! checks for adjustment of mixing length to the Prandtl mixing length removed 7 8 ! 8 9 ! Former revisions: … … 481 482 ENDIF 482 483 ENDIF 483 IF ( loop_optimization /= 'noopt' .AND. loop_optimization /= 'cache' & 484 .AND. loop_optimization /= 'vector' ) THEN 485 message_string = 'illegal value given for loop_optimization: "' // & 486 TRIM( loop_optimization ) // '"' 487 CALL message( 'check_parameters', 'PA0013', 1, 2, 0, 6, 0 ) 488 ENDIF 484 485 SELECT CASE ( TRIM( loop_optimization ) ) 486 487 CASE ( 'acc', 'cache', 'noopt', 'vector' ) 488 CONTINUE 489 490 CASE DEFAULT 491 message_string = 'illegal value given for loop_optimization: "' // & 492 TRIM( loop_optimization ) // '"' 493 CALL message( 'check_parameters', 'PA0013', 1, 2, 0, 6, 0 ) 494 495 END SELECT 489 496 490 497 ! … … 1394 1401 IF ( bc_e_b == 'neumann' ) THEN 1395 1402 ibc_e_b = 1 1396 IF ( adjust_mixing_length .AND. prandtl_layer ) THEN1397 message_string = 'adjust_mixing_length = TRUE and bc_e_b = "neumann"'1398 CALL message( 'check_parameters', 'PA0055', 0, 1, 0, 6, 0 )1399 ENDIF1400 1403 ELSEIF ( bc_e_b == '(u*)**2+neumann' ) THEN 1401 1404 ibc_e_b = 2 1402 IF ( .NOT. adjust_mixing_length .AND.prandtl_layer ) THEN1403 message_string = 'adjust _mixing_length = FALSE and bc_e_b = "' // &1405 IF ( prandtl_layer ) THEN 1406 message_string = 'adjust mixing length = FALSE and bc_e_b = "' // & 1404 1407 TRIM( bc_e_b ) // '"' 1405 1408 CALL message( 'check_parameters', 'PA0056', 0, 1, 0, 6, 0 ) -
palm/trunk/SOURCE/coriolis.f90
r392 r1015 4 4 ! Current revisions: 5 5 ! ----------------- 6 ! 7 ! 6 ! accelerator version (*_acc) added 8 7 ! 9 8 ! Former revisions: … … 36 35 37 36 PRIVATE 38 PUBLIC coriolis 37 PUBLIC coriolis, coriolis_acc 39 38 40 39 INTERFACE coriolis … … 42 41 MODULE PROCEDURE coriolis_ij 43 42 END INTERFACE coriolis 43 44 INTERFACE coriolis_acc 45 MODULE PROCEDURE coriolis_acc 46 END INTERFACE coriolis_acc 44 47 45 48 CONTAINS … … 116 119 117 120 END SUBROUTINE coriolis 121 122 123 !------------------------------------------------------------------------------! 124 ! Call for all grid points - accelerator version 125 !------------------------------------------------------------------------------! 126 SUBROUTINE coriolis_acc( component ) 127 128 USE arrays_3d 129 USE control_parameters 130 USE indices 131 USE pegrid 132 133 IMPLICIT NONE 134 135 INTEGER :: component, i, j, k 136 137 138 ! 139 !-- Compute Coriolis terms for the three velocity components 140 SELECT CASE ( component ) 141 142 ! 143 !-- u-component 144 CASE ( 1 ) 145 !$acc kernels present( nzb_u_inner, tend, v, vg, w ) 146 !$acc loop 147 DO i = nxlu, nxr 148 DO j = nys, nyn 149 !$acc loop vector( 32 ) 150 DO k = 1, nzt 151 IF ( k > nzb_u_inner(j,i) ) THEN 152 tend(k,j,i) = tend(k,j,i) + f * ( 0.25 * & 153 ( v(k,j,i-1) + v(k,j,i) + v(k,j+1,i-1) + & 154 v(k,j+1,i) ) - vg(k) ) & 155 - fs * ( 0.25 * & 156 ( w(k-1,j,i-1) + w(k-1,j,i) + w(k,j,i-1) & 157 + w(k,j,i) ) & 158 ) 159 ENDIF 160 ENDDO 161 ENDDO 162 ENDDO 163 !$acc end kernels 164 165 ! 166 !-- v-component 167 CASE ( 2 ) 168 !$acc kernels present( nzb_v_inner, tend, u, ug ) 169 !$acc loop 170 DO i = nxl, nxr 171 DO j = nysv, nyn 172 !$acc loop vector( 32 ) 173 DO k = 1, nzt 174 IF ( k > nzb_v_inner(j,i) ) THEN 175 tend(k,j,i) = tend(k,j,i) - f * ( 0.25 * & 176 ( u(k,j-1,i) + u(k,j,i) + u(k,j-1,i+1) + & 177 u(k,j,i+1) ) - ug(k) ) 178 ENDIF 179 ENDDO 180 ENDDO 181 ENDDO 182 !$acc end kernels 183 184 ! 185 !-- w-component 186 CASE ( 3 ) 187 !$acc kernels present( nzb_w_inner, tend, u ) 188 !$acc loop 189 DO i = nxl, nxr 190 DO j = nys, nyn 191 !$acc loop vector( 32 ) 192 DO k = 1, nzt 193 IF ( k > nzb_w_inner(j,i) ) THEN 194 tend(k,j,i) = tend(k,j,i) + fs * 0.25 * & 195 ( u(k,j,i) + u(k+1,j,i) + u(k,j,i+1) + & 196 u(k+1,j,i+1) ) 197 ENDIF 198 ENDDO 199 ENDDO 200 ENDDO 201 !$acc end kernels 202 203 CASE DEFAULT 204 205 WRITE( message_string, * ) ' wrong component: ', component 206 CALL message( 'coriolis', 'PA0173', 1, 2, 0, 6, 0 ) 207 208 END SELECT 209 210 END SUBROUTINE coriolis_acc 118 211 119 212 -
palm/trunk/SOURCE/cpu_statistics.f90
r683 r1015 4 4 ! Current revisions: 5 5 ! ----------------- 6 ! output of handling of ghostpoint exchange6 ! output of accelerator board information 7 7 ! 8 8 ! Former revisions: 9 9 ! ----------------- 10 10 ! $Id$ 11 ! 12 ! 683 2011-02-09 14:25:15Z raasch 13 ! output of handling of ghostpoint exchange 11 14 ! 12 15 ! 622 2010-12-10 08:08:13Z raasch … … 135 138 numprocs * threads_per_task, pdims(1), pdims(2), & 136 139 threads_per_task 140 IF ( num_acc_per_node /= 0 ) WRITE ( 18, 108 ) num_acc_per_node 141 WRITE ( 18, 110 ) 137 142 #else 138 143 WRITE ( 18, 100 ) TRIM( run_description_header ), & 139 144 numprocs * threads_per_task, 1, 1, & 140 145 threads_per_task 146 IF ( num_acc_per_node /= 0 ) WRITE ( 18, 109 ) num_acc_per_node 147 WRITE ( 18, 110 ) 141 148 #endif 142 149 DO … … 276 283 277 284 100 FORMAT (A/11('-')//'CPU measures for ',I5,' PEs (',I5,'(x) * ',I5,'(y', & 278 &') tasks *',I5,' threads):'/ & 279 &'----------------------------------------------------------', & 280 &'------------'//& 281 &'place: mean counts min ', & 282 &' max rms'/ & 283 &' sec. % sec. ', & 284 &' sec. sec.'/ & 285 &'-----------------------------------------------------------', & 286 &'-------------------') 285 &') tasks *',I5,' threads):') 287 286 288 287 101 FORMAT (/'special measures:'/ & … … 296 295 106 FORMAT (/'Exchange of ghostpoints via MPI_ISEND/MPI_IRECV') 297 296 107 FORMAT (//) 297 108 FORMAT ('Accelerator boards per node: ',I2) 298 109 FORMAT ('Accelerator boards: ',I2) 299 110 FORMAT ('----------------------------------------------------------', & 300 &'------------'//& 301 &'place: mean counts min ', & 302 &' max rms'/ & 303 &' sec. % sec. ', & 304 &' sec. sec.'/ & 305 &'-----------------------------------------------------------', & 306 &'-------------------') 298 307 299 308 END SUBROUTINE cpu_statistics -
palm/trunk/SOURCE/diffusion_e.f90
r1011 r1015 4 4 ! Current revisions: 5 5 ! ----------------- 6 ! 6 ! accelerator version (*_acc) added, 7 ! adjustment of mixing length to the Prandtl mixing length at first grid point 8 ! above ground removed 7 9 ! 8 10 ! Former revisions: … … 56 58 57 59 PRIVATE 58 PUBLIC diffusion_e 60 PUBLIC diffusion_e, diffusion_e_acc 59 61 60 62 … … 64 66 END INTERFACE diffusion_e 65 67 68 INTERFACE diffusion_e_acc 69 MODULE PROCEDURE diffusion_e_acc 70 END INTERFACE diffusion_e_acc 71 66 72 CONTAINS 67 73 … … 81 87 82 88 INTEGER :: i, j, k 83 REAL :: dvar_dz, l_stable, phi_m,var_reference89 REAL :: dvar_dz, l_stable, var_reference 84 90 85 91 #if defined( __nopointer ) … … 98 104 DO i = nxl, nxr 99 105 DO j = nys, nyn 100 !101 !-- First, calculate phi-function for eventually adjusting the &102 !-- mixing length to the prandtl mixing length103 IF ( adjust_mixing_length .AND. prandtl_layer ) THEN104 IF ( rif(j,i) >= 0.0 ) THEN105 phi_m = 1.0 + 5.0 * rif(j,i)106 ELSE107 phi_m = 1.0 / SQRT( SQRT( 1.0 - 16.0 * rif(j,i) ) )108 ENDIF109 ENDIF110 111 106 DO k = nzb_s_inner(j,i)+1, nzt 112 107 ! … … 133 128 ll(k,j) = l_grid(k) 134 129 ENDIF 135 IF ( adjust_mixing_length .AND. prandtl_layer ) THEN136 l(k,j) = MIN( l(k,j), kappa * &137 ( zu(k) - zw(nzb_s_inner(j,i)) ) &138 / phi_m )139 ll(k,j) = MIN( ll(k,j), kappa * &140 ( zu(k) - zw(nzb_s_inner(j,i)) ) &141 / phi_m )142 ENDIF143 130 144 131 ENDDO … … 188 175 DO i = nxl, nxr 189 176 DO j = nys, nyn 190 !191 !-- First, calculate phi-function for eventually adjusting the &192 !-- mixing length to the prandtl mixing length193 IF ( adjust_mixing_length .AND. prandtl_layer ) THEN194 IF ( rif(j,i) >= 0.0 ) THEN195 phi_m = 1.0 + 5.0 * rif(j,i)196 ELSE197 phi_m = 1.0 / SQRT( SQRT( 1.0 - 16.0 * rif(j,i) ) )198 ENDIF199 ENDIF200 201 177 DO k = nzb_s_inner(j,i)+1, nzt 202 178 ! … … 223 199 ll(k,j) = l_grid(k) 224 200 ENDIF 225 IF ( adjust_mixing_length .AND. prandtl_layer ) THEN226 l(k,j) = MIN( l(k,j), kappa * &227 ( zu(k) - zw(nzb_s_inner(j,i)) ) &228 / phi_m )229 ll(k,j) = MIN( ll(k,j), kappa * &230 ( zu(k) - zw(nzb_s_inner(j,i)) ) &231 / phi_m )232 ENDIF233 201 234 202 ENDDO … … 290 258 291 259 !------------------------------------------------------------------------------! 292 ! Call for grid point i,j293 !------------------------------------------------------------------------------! 294 SUBROUTINE diffusion_e_ ij( i, j,var, var_reference )260 ! Call for all grid points - accelerator version 261 !------------------------------------------------------------------------------! 262 SUBROUTINE diffusion_e_acc( var, var_reference ) 295 263 296 264 USE arrays_3d … … 303 271 304 272 INTEGER :: i, j, k 305 REAL :: dvar_dz, l_stable, phi_m, var_reference 273 REAL :: dissipation, dvar_dz, l, ll, l_stable, var_reference 274 275 #if defined( __nopointer ) 276 REAL, DIMENSION(nzb:nzt+1,nysg:nyng,nxlg:nxrg) :: var 277 #else 278 REAL, DIMENSION(:,:,:), POINTER :: var 279 #endif 280 281 282 ! 283 !-- This if clause must be outside the k-loop because otherwise 284 !-- runtime errors occur with -C hopt on NEC 285 IF ( use_reference ) THEN 286 STOP '+++ use_reference in diffusion_e not implemented' 287 ! DO i = nxl, nxr 288 ! DO j = nys, nyn 289 ! DO k = nzb_s_inner(j,i)+1, nzt 290 ! 291 !-- Calculate the mixing length (for dissipation) 292 ! dvar_dz = atmos_ocean_sign * & 293 ! ( var(k+1,j,i) - var(k-1,j,i) ) * dd2zu(k) 294 ! IF ( dvar_dz > 0.0 ) THEN 295 ! l_stable = 0.76 * SQRT( e(k,j,i) ) / & 296 ! SQRT( g / var_reference * dvar_dz ) + 1E-5 297 ! ELSE 298 ! l_stable = l_grid(k) 299 ! ENDIF 300 ! 301 !-- Adjustment of the mixing length 302 ! IF ( wall_adjustment ) THEN 303 ! l(k,j) = MIN( wall_adjustment_factor * & 304 ! ( zu(k) - zw(nzb_s_inner(j,i)) ), & 305 ! l_grid(k), l_stable ) 306 ! ll(k,j) = MIN( wall_adjustment_factor * & 307 ! ( zu(k) - zw(nzb_s_inner(j,i)) ), & 308 ! l_grid(k) ) 309 ! ELSE 310 ! l(k,j) = MIN( l_grid(k), l_stable ) 311 ! ll(k,j) = l_grid(k) 312 ! ENDIF 313 ! 314 ! ENDDO 315 ! ENDDO 316 ! 317 ! 318 !-- Calculate the tendency terms 319 ! DO j = nys, nyn 320 ! DO k = nzb_s_inner(j,i)+1, nzt 321 ! 322 ! dissipation(k,j) = ( 0.19 + 0.74 * l(k,j) / ll(k,j) ) * & 323 ! e(k,j,i) * SQRT( e(k,j,i) ) / l(k,j) 324 ! 325 ! tend(k,j,i) = tend(k,j,i) & 326 ! + ( & 327 ! ( km(k,j,i)+km(k,j,i+1) ) * ( e(k,j,i+1)-e(k,j,i) ) & 328 ! - ( km(k,j,i)+km(k,j,i-1) ) * ( e(k,j,i)-e(k,j,i-1) ) & 329 ! ) * ddx2 & 330 ! + ( & 331 ! ( km(k,j,i)+km(k,j+1,i) ) * ( e(k,j+1,i)-e(k,j,i) ) & 332 ! - ( km(k,j,i)+km(k,j-1,i) ) * ( e(k,j,i)-e(k,j-1,i) ) & 333 ! ) * ddy2 & 334 ! + ( & 335 ! ( km(k,j,i)+km(k+1,j,i) ) * ( e(k+1,j,i)-e(k,j,i) ) * ddzu(k+1) & 336 ! - ( km(k,j,i)+km(k-1,j,i) ) * ( e(k,j,i)-e(k-1,j,i) ) * ddzu(k) & 337 ! ) * ddzw(k) & 338 ! - dissipation(k,j) 339 ! 340 ! ENDDO 341 ! ENDDO 342 ! 343 ! 344 !-- Store dissipation if needed for calculating the sgs particle 345 !-- velocities 346 ! IF ( use_sgs_for_particles .OR. wang_kernel ) THEN 347 ! DO j = nys, nyn 348 ! DO k = nzb_s_inner(j,i)+1, nzt 349 ! diss(k,j,i) = dissipation(k,j) 350 ! ENDDO 351 ! ENDDO 352 ! ENDIF 353 ! 354 ! ENDDO 355 ! 356 ELSE 357 358 !$acc kernels present( ddzu, ddzw, dd2zu, diss, e, km, l_grid ) & 359 !$acc present( nzb_s_inner, rif, tend, var, zu, zw ) 360 !$acc loop 361 DO i = nxl, nxr 362 DO j = nys, nyn 363 !$acc loop vector( 32 ) 364 DO k = 1, nzt 365 366 IF ( k > nzb_s_inner(j,i) ) THEN 367 ! 368 !-- Calculate the mixing length (for dissipation) 369 dvar_dz = atmos_ocean_sign * & 370 ( var(k+1,j,i) - var(k-1,j,i) ) * dd2zu(k) 371 IF ( dvar_dz > 0.0 ) THEN 372 l_stable = 0.76 * SQRT( e(k,j,i) ) / & 373 SQRT( g / var(k,j,i) * dvar_dz ) + 1E-5 374 ELSE 375 l_stable = l_grid(k) 376 ENDIF 377 ! 378 !-- Adjustment of the mixing length 379 IF ( wall_adjustment ) THEN 380 l = MIN( wall_adjustment_factor * & 381 ( zu(k) - zw(nzb_s_inner(j,i)) ), & 382 l_grid(k), l_stable ) 383 ll = MIN( wall_adjustment_factor * & 384 ( zu(k) - zw(nzb_s_inner(j,i)) ), & 385 l_grid(k) ) 386 ELSE 387 l = MIN( l_grid(k), l_stable ) 388 ll = l_grid(k) 389 ENDIF 390 ! 391 !-- Calculate the tendency terms 392 dissipation = ( 0.19 + 0.74 * l / ll ) * & 393 e(k,j,i) * SQRT( e(k,j,i) ) / l 394 395 tend(k,j,i) = tend(k,j,i) & 396 + ( & 397 ( km(k,j,i)+km(k,j,i+1) ) * ( e(k,j,i+1)-e(k,j,i) ) & 398 - ( km(k,j,i)+km(k,j,i-1) ) * ( e(k,j,i)-e(k,j,i-1) ) & 399 ) * ddx2 & 400 + ( & 401 ( km(k,j,i)+km(k,j+1,i) ) * ( e(k,j+1,i)-e(k,j,i) ) & 402 - ( km(k,j,i)+km(k,j-1,i) ) * ( e(k,j,i)-e(k,j-1,i) ) & 403 ) * ddy2 & 404 + ( & 405 ( km(k,j,i)+km(k+1,j,i) ) * ( e(k+1,j,i)-e(k,j,i) ) * ddzu(k+1) & 406 - ( km(k,j,i)+km(k-1,j,i) ) * ( e(k,j,i)-e(k-1,j,i) ) * ddzu(k) & 407 ) * ddzw(k) & 408 - dissipation 409 410 ! 411 !-- Store dissipation if needed for calculating the sgs 412 !-- particle velocities 413 IF ( use_sgs_for_particles .OR. wang_kernel ) THEN 414 diss(k,j,i) = dissipation 415 ENDIF 416 417 ENDIF 418 419 ENDDO 420 ENDDO 421 ENDDO 422 !$acc end kernels 423 424 ENDIF 425 426 ! 427 !-- Boundary condition for dissipation 428 IF ( use_sgs_for_particles .OR. wang_kernel ) THEN 429 !$acc kernels present( diss, nzb_s_inner ) 430 !$acc loop 431 DO i = nxl, nxr 432 !$acc loop vector( 32 ) 433 DO j = nys, nyn 434 diss(nzb_s_inner(j,i),j,i) = diss(nzb_s_inner(j,i)+1,j,i) 435 ENDDO 436 ENDDO 437 !$acc end kernels 438 ENDIF 439 440 END SUBROUTINE diffusion_e_acc 441 442 443 !------------------------------------------------------------------------------! 444 ! Call for grid point i,j 445 !------------------------------------------------------------------------------! 446 SUBROUTINE diffusion_e_ij( i, j, var, var_reference ) 447 448 USE arrays_3d 449 USE control_parameters 450 USE grid_variables 451 USE indices 452 USE particle_attributes 453 454 IMPLICIT NONE 455 456 INTEGER :: i, j, k 457 REAL :: dvar_dz, l_stable, var_reference 306 458 307 459 #if defined( __nopointer ) … … 312 464 REAL, DIMENSION(nzb+1:nzt) :: dissipation, l, ll 313 465 314 315 !316 !-- First, calculate phi-function for eventually adjusting the mixing length317 !-- to the prandtl mixing length318 IF ( adjust_mixing_length .AND. prandtl_layer ) THEN319 IF ( rif(j,i) >= 0.0 ) THEN320 phi_m = 1.0 + 5.0 * rif(j,i)321 ELSE322 phi_m = 1.0 / SQRT( SQRT( 1.0 - 16.0 * rif(j,i) ) )323 ENDIF324 ENDIF325 466 326 467 ! … … 352 493 ll(k) = l_grid(k) 353 494 ENDIF 354 IF ( adjust_mixing_length .AND. prandtl_layer ) THEN355 l(k) = MIN( l(k), kappa * &356 ( zu(k) - zw(nzb_s_inner(j,i)) ) / phi_m )357 ll(k) = MIN( ll(k), kappa * &358 ( zu(k) - zw(nzb_s_inner(j,i)) ) / phi_m )359 ENDIF360 361 495 ! 362 496 !-- Calculate the tendency term -
palm/trunk/SOURCE/diffusion_s.f90
r1011 r1015 4 4 ! Current revisions: 5 5 ! ------------------ 6 ! 6 ! accelerator version (*_acc) added 7 7 ! 8 8 ! Former revisions: … … 48 48 49 49 PRIVATE 50 PUBLIC diffusion_s 50 PUBLIC diffusion_s, diffusion_s_acc 51 51 52 52 INTERFACE diffusion_s … … 54 54 MODULE PROCEDURE diffusion_s_ij 55 55 END INTERFACE diffusion_s 56 57 INTERFACE diffusion_s_acc 58 MODULE PROCEDURE diffusion_s_acc 59 END INTERFACE diffusion_s_acc 56 60 57 61 CONTAINS … … 173 177 174 178 !------------------------------------------------------------------------------! 175 ! Call for grid point i,j176 !------------------------------------------------------------------------------! 177 SUBROUTINE diffusion_s_ ij( i, j,s, s_flux_b, s_flux_t, wall_s_flux )179 ! Call for all grid points - accelerator version 180 !------------------------------------------------------------------------------! 181 SUBROUTINE diffusion_s_acc( s, s_flux_b, s_flux_t, wall_s_flux ) 178 182 179 183 USE arrays_3d … … 194 198 #endif 195 199 200 !$acc kernels present( ddzu, ddzw, fwxm, fwxp, fwym, fwyp, kh ) & 201 !$acc present( nzb_diff_s_inner, nzb_s_inner, nzb_s_outer, s ) & 202 !$acc present( s_flux_b, s_flux_t, tend, wall_s_flux ) & 203 !$acc present( wall_w_x, wall_w_y ) 204 !$acc loop 205 DO i = nxl, nxr 206 DO j = nys,nyn 207 ! 208 !-- Compute horizontal diffusion 209 !$acc loop vector( 32 ) 210 DO k = 1, nzt 211 IF ( k > nzb_s_outer(j,i) ) THEN 212 213 tend(k,j,i) = tend(k,j,i) & 214 + 0.5 * ( & 215 ( kh(k,j,i) + kh(k,j,i+1) ) * ( s(k,j,i+1)-s(k,j,i) ) & 216 - ( kh(k,j,i) + kh(k,j,i-1) ) * ( s(k,j,i)-s(k,j,i-1) ) & 217 ) * ddx2 & 218 + 0.5 * ( & 219 ( kh(k,j,i) + kh(k,j+1,i) ) * ( s(k,j+1,i)-s(k,j,i) ) & 220 - ( kh(k,j,i) + kh(k,j-1,i) ) * ( s(k,j,i)-s(k,j-1,i) ) & 221 ) * ddy2 222 ENDIF 223 ENDDO 224 225 ! 226 !-- Apply prescribed horizontal wall heatflux where necessary 227 !$acc loop vector(32) 228 DO k = 1, nzt 229 IF ( k > nzb_s_inner(j,i) .AND. k <= nzb_s_outer(j,i) .AND. & 230 ( wall_w_x(j,i) /= 0.0 .OR. wall_w_y(j,i) /= 0.0 ) ) & 231 THEN 232 tend(k,j,i) = tend(k,j,i) & 233 + ( fwxp(j,i) * 0.5 * & 234 ( kh(k,j,i) + kh(k,j,i+1) ) * ( s(k,j,i+1)-s(k,j,i) ) & 235 + ( 1.0 - fwxp(j,i) ) * wall_s_flux(1) & 236 -fwxm(j,i) * 0.5 * & 237 ( kh(k,j,i) + kh(k,j,i-1) ) * ( s(k,j,i)-s(k,j,i-1) ) & 238 + ( 1.0 - fwxm(j,i) ) * wall_s_flux(2) & 239 ) * ddx2 & 240 + ( fwyp(j,i) * 0.5 * & 241 ( kh(k,j,i) + kh(k,j+1,i) ) * ( s(k,j+1,i)-s(k,j,i) ) & 242 + ( 1.0 - fwyp(j,i) ) * wall_s_flux(3) & 243 -fwym(j,i) * 0.5 * & 244 ( kh(k,j,i) + kh(k,j-1,i) ) * ( s(k,j,i)-s(k,j-1,i) ) & 245 + ( 1.0 - fwym(j,i) ) * wall_s_flux(4) & 246 ) * ddy2 247 ENDIF 248 ENDDO 249 250 ! 251 !-- Compute vertical diffusion. In case that surface fluxes have been 252 !-- prescribed or computed at bottom and/or top, index k starts/ends at 253 !-- nzb+2 or nzt-1, respectively. 254 !$acc loop vector( 32 ) 255 DO k = 1, nzt_diff 256 IF ( k >= nzb_diff_s_inner(j,i) ) THEN 257 tend(k,j,i) = tend(k,j,i) & 258 + 0.5 * ( & 259 ( kh(k,j,i) + kh(k+1,j,i) ) * ( s(k+1,j,i)-s(k,j,i) ) * ddzu(k+1) & 260 - ( kh(k,j,i) + kh(k-1,j,i) ) * ( s(k,j,i)-s(k-1,j,i) ) * ddzu(k) & 261 ) * ddzw(k) 262 ENDIF 263 ENDDO 264 265 ! 266 !-- Vertical diffusion at the first computational gridpoint along 267 !-- z-direction 268 !$acc loop vector( 32 ) 269 DO k = 1, nzt 270 IF ( use_surface_fluxes .AND. k == nzb_s_inner(j,i)+1 ) THEN 271 tend(k,j,i) = tend(k,j,i) & 272 + ( 0.5 * ( kh(k,j,i)+kh(k+1,j,i) ) & 273 * ( s(k+1,j,i)-s(k,j,i) ) & 274 * ddzu(k+1) & 275 + s_flux_b(j,i) & 276 ) * ddzw(k) 277 ENDIF 278 279 ! 280 !-- Vertical diffusion at the last computational gridpoint along 281 !-- z-direction 282 IF ( use_top_fluxes .AND. k == nzt ) THEN 283 tend(k,j,i) = tend(k,j,i) & 284 + ( - s_flux_t(j,i) & 285 - 0.5 * ( kh(k-1,j,i)+kh(k,j,i) )& 286 * ( s(k,j,i)-s(k-1,j,i) ) & 287 * ddzu(k) & 288 ) * ddzw(k) 289 ENDIF 290 ENDDO 291 292 ENDDO 293 ENDDO 294 !$acc end kernels 295 296 END SUBROUTINE diffusion_s_acc 297 298 299 !------------------------------------------------------------------------------! 300 ! Call for grid point i,j 301 !------------------------------------------------------------------------------! 302 SUBROUTINE diffusion_s_ij( i, j, s, s_flux_b, s_flux_t, wall_s_flux ) 303 304 USE arrays_3d 305 USE control_parameters 306 USE grid_variables 307 USE indices 308 309 IMPLICIT NONE 310 311 INTEGER :: i, j, k 312 REAL :: vertical_gridspace 313 REAL :: wall_s_flux(0:4) 314 REAL, DIMENSION(nysg:nyng,nxlg:nxrg) :: s_flux_b, s_flux_t 315 #if defined( __nopointer ) 316 REAL, DIMENSION(nzb:nzt+1,nysg:nyng,nxlg:nxrg) :: s 317 #else 318 REAL, DIMENSION(:,:,:), POINTER :: s 319 #endif 320 196 321 ! 197 322 !-- Compute horizontal diffusion -
palm/trunk/SOURCE/diffusion_u.f90
r1002 r1015 4 4 ! Current revisions: 5 5 ! ----------------- 6 ! 6 ! accelerator version (*_acc) added 7 7 ! 8 8 ! Former revisions: … … 58 58 59 59 PRIVATE 60 PUBLIC diffusion_u 60 PUBLIC diffusion_u, diffusion_u_acc 61 61 62 62 INTERFACE diffusion_u … … 64 64 MODULE PROCEDURE diffusion_u_ij 65 65 END INTERFACE diffusion_u 66 67 INTERFACE diffusion_u_acc 68 MODULE PROCEDURE diffusion_u_acc 69 END INTERFACE diffusion_u_acc 66 70 67 71 CONTAINS … … 220 224 221 225 !------------------------------------------------------------------------------! 226 ! Call for all grid points - accelerator version 227 !------------------------------------------------------------------------------! 228 SUBROUTINE diffusion_u_acc 229 230 USE arrays_3d 231 USE control_parameters 232 USE grid_variables 233 USE indices 234 235 IMPLICIT NONE 236 237 INTEGER :: i, j, k 238 REAL :: kmym, kmyp, kmzm, kmzp 239 240 !$acc declare create ( usvs ) 241 REAL, DIMENSION(nzb:nzt+1,nys:nyn,nxl:nxr) :: usvs 242 243 ! 244 !-- First calculate horizontal momentum flux u'v' at vertical walls, 245 !-- if neccessary 246 IF ( topography /= 'flat' ) THEN 247 CALL wall_fluxes_acc( usvs, 1.0, 0.0, 0.0, 0.0, nzb_u_inner, & 248 nzb_u_outer, wall_u ) 249 ENDIF 250 251 !$acc kernels present ( u, v, w, km, tend, usws, uswst ) & 252 !$acc present ( ddzu, ddzw, fym, fyp, wall_u ) & 253 !$acc present ( nzb_u_inner, nzb_u_outer, nzb_diff_u ) 254 !$acc loop 255 DO i = nxlu, nxr 256 DO j = nys, nyn 257 ! 258 !-- Compute horizontal diffusion 259 !$acc loop vector(32) 260 DO k = 1, nzt 261 IF ( k > nzb_u_outer(j,i) ) THEN 262 ! 263 !-- Interpolate eddy diffusivities on staggered gridpoints 264 kmyp = 0.25 * & 265 ( km(k,j,i)+km(k,j+1,i)+km(k,j,i-1)+km(k,j+1,i-1) ) 266 kmym = 0.25 * & 267 ( km(k,j,i)+km(k,j-1,i)+km(k,j,i-1)+km(k,j-1,i-1) ) 268 269 tend(k,j,i) = tend(k,j,i) & 270 & + 2.0 * ( & 271 & km(k,j,i) * ( u(k,j,i+1) - u(k,j,i) ) & 272 & - km(k,j,i-1) * ( u(k,j,i) - u(k,j,i-1) ) & 273 & ) * ddx2 & 274 & + ( kmyp * ( u(k,j+1,i) - u(k,j,i) ) * ddy & 275 & + kmyp * ( v(k,j+1,i) - v(k,j+1,i-1) ) * ddx & 276 & - kmym * ( u(k,j,i) - u(k,j-1,i) ) * ddy & 277 & - kmym * ( v(k,j,i) - v(k,j,i-1) ) * ddx & 278 & ) * ddy 279 ENDIF 280 ENDDO 281 282 ! 283 !-- Wall functions at the north and south walls, respectively 284 !$acc loop vector(32) 285 DO k = 1, nzt 286 IF( k > nzb_u_inner(j,i) .AND. k <= nzb_u_outer(j,i) .AND. & 287 wall_u(j,i) /= 0.0 ) THEN 288 289 kmyp = 0.25 * & 290 ( km(k,j,i)+km(k,j+1,i)+km(k,j,i-1)+km(k,j+1,i-1) ) 291 kmym = 0.25 * & 292 ( km(k,j,i)+km(k,j-1,i)+km(k,j,i-1)+km(k,j-1,i-1) ) 293 294 tend(k,j,i) = tend(k,j,i) & 295 + 2.0 * ( & 296 km(k,j,i) * ( u(k,j,i+1) - u(k,j,i) ) & 297 - km(k,j,i-1) * ( u(k,j,i) - u(k,j,i-1) ) & 298 ) * ddx2 & 299 + ( fyp(j,i) * ( & 300 kmyp * ( u(k,j+1,i) - u(k,j,i) ) * ddy & 301 + kmyp * ( v(k,j+1,i) - v(k,j+1,i-1) ) * ddx & 302 ) & 303 - fym(j,i) * ( & 304 kmym * ( u(k,j,i) - u(k,j-1,i) ) * ddy & 305 + kmym * ( v(k,j,i) - v(k,j,i-1) ) * ddx & 306 ) & 307 + wall_u(j,i) * usvs(k,j,i) & 308 ) * ddy 309 ENDIF 310 ENDDO 311 312 ! 313 !-- Compute vertical diffusion. In case of simulating a Prandtl layer, 314 !-- index k starts at nzb_u_inner+2. 315 !$acc loop vector(32) 316 DO k = 1, nzt_diff 317 IF ( k >= nzb_diff_u(j,i) ) THEN 318 ! 319 !-- Interpolate eddy diffusivities on staggered gridpoints 320 kmzp = 0.25 * & 321 ( km(k,j,i)+km(k+1,j,i)+km(k,j,i-1)+km(k+1,j,i-1) ) 322 kmzm = 0.25 * & 323 ( km(k,j,i)+km(k-1,j,i)+km(k,j,i-1)+km(k-1,j,i-1) ) 324 325 tend(k,j,i) = tend(k,j,i) & 326 & + ( kmzp * ( ( u(k+1,j,i) - u(k,j,i) ) * ddzu(k+1)& 327 & + ( w(k,j,i) - w(k,j,i-1) ) * ddx & 328 & ) & 329 & - kmzm * ( ( u(k,j,i) - u(k-1,j,i) ) * ddzu(k)& 330 & + ( w(k-1,j,i) - w(k-1,j,i-1) ) * ddx & 331 & ) & 332 & ) * ddzw(k) 333 ENDIF 334 ENDDO 335 336 ENDDO 337 ENDDO 338 339 ! 340 !-- Vertical diffusion at the first grid point above the surface, 341 !-- if the momentum flux at the bottom is given by the Prandtl law or 342 !-- if it is prescribed by the user. 343 !-- Difference quotient of the momentum flux is not formed over half 344 !-- of the grid spacing (2.0*ddzw(k)) any more, since the comparison 345 !-- with other (LES) modell showed that the values of the momentum 346 !-- flux becomes too large in this case. 347 !-- The term containing w(k-1,..) (see above equation) is removed here 348 !-- because the vertical velocity is assumed to be zero at the surface. 349 IF ( use_surface_fluxes ) THEN 350 351 !$acc loop 352 DO i = nxlu, nxr 353 !$acc loop vector(32) 354 DO j = nys, nyn 355 356 k = nzb_u_inner(j,i)+1 357 ! 358 !-- Interpolate eddy diffusivities on staggered gridpoints 359 kmzp = 0.25 * & 360 ( km(k,j,i)+km(k+1,j,i)+km(k,j,i-1)+km(k+1,j,i-1) ) 361 kmzm = 0.25 * & 362 ( km(k,j,i)+km(k-1,j,i)+km(k,j,i-1)+km(k-1,j,i-1) ) 363 364 tend(k,j,i) = tend(k,j,i) & 365 & + ( kmzp * ( w(k,j,i) - w(k,j,i-1) ) * ddx & 366 & ) * ddzw(k) & 367 & + ( kmzp * ( u(k+1,j,i) - u(k,j,i) ) * ddzu(k+1) & 368 & + usws(j,i) & 369 & ) * ddzw(k) 370 ENDDO 371 ENDDO 372 373 ENDIF 374 375 ! 376 !-- Vertical diffusion at the first gridpoint below the top boundary, 377 !-- if the momentum flux at the top is prescribed by the user 378 IF ( use_top_fluxes .AND. constant_top_momentumflux ) THEN 379 380 k = nzt 381 382 !$acc loop 383 DO i = nxlu, nxr 384 !$acc loop vector(32) 385 DO j = nys, nyn 386 387 ! 388 !-- Interpolate eddy diffusivities on staggered gridpoints 389 kmzp = 0.25 * & 390 ( km(k,j,i)+km(k+1,j,i)+km(k,j,i-1)+km(k+1,j,i-1) ) 391 kmzm = 0.25 * & 392 ( km(k,j,i)+km(k-1,j,i)+km(k,j,i-1)+km(k-1,j,i-1) ) 393 394 tend(k,j,i) = tend(k,j,i) & 395 & - ( kmzm * ( w(k-1,j,i) - w(k-1,j,i-1) ) * ddx & 396 & ) * ddzw(k) & 397 & + ( -uswst(j,i) & 398 & - kmzm * ( u(k,j,i) - u(k-1,j,i) ) * ddzu(k) & 399 & ) * ddzw(k) 400 ENDDO 401 ENDDO 402 403 ENDIF 404 !$acc end kernels 405 406 END SUBROUTINE diffusion_u_acc 407 408 409 !------------------------------------------------------------------------------! 222 410 ! Call for grid point i,j 223 411 !------------------------------------------------------------------------------! -
palm/trunk/SOURCE/diffusion_v.f90
r1002 r1015 4 4 ! Current revisions: 5 5 ! ----------------- 6 ! 6 ! accelerator version (*_acc) added 7 7 ! 8 8 ! Former revisions: … … 56 56 57 57 PRIVATE 58 PUBLIC diffusion_v 58 PUBLIC diffusion_v, diffusion_v_acc 59 59 60 60 INTERFACE diffusion_v … … 62 62 MODULE PROCEDURE diffusion_v_ij 63 63 END INTERFACE diffusion_v 64 65 INTERFACE diffusion_v_acc 66 MODULE PROCEDURE diffusion_v_acc 67 END INTERFACE diffusion_v_acc 64 68 65 69 CONTAINS … … 218 222 219 223 !------------------------------------------------------------------------------! 224 ! Call for all grid points - accelerator version 225 !------------------------------------------------------------------------------! 226 SUBROUTINE diffusion_v_acc 227 228 USE arrays_3d 229 USE control_parameters 230 USE grid_variables 231 USE indices 232 233 IMPLICIT NONE 234 235 INTEGER :: i, j, k 236 REAL :: kmxm, kmxp, kmzm, kmzp 237 238 !$acc declare create ( vsus ) 239 REAL, DIMENSION(nzb:nzt+1,nys:nyn,nxl:nxr) :: vsus 240 241 ! 242 !-- First calculate horizontal momentum flux v'u' at vertical walls, 243 !-- if neccessary 244 IF ( topography /= 'flat' ) THEN 245 CALL wall_fluxes_acc( vsus, 0.0, 1.0, 0.0, 0.0, nzb_v_inner, & 246 nzb_v_outer, wall_v ) 247 ENDIF 248 249 !$acc kernels present ( u, v, w, km, tend, vsws, vswst ) & 250 !$acc present ( ddzu, ddzw, fxm, fxp, wall_v ) & 251 !$acc present ( nzb_v_inner, nzb_v_outer, nzb_diff_v ) 252 !$acc loop 253 DO i = nxl, nxr 254 DO j = nysv, nyn 255 ! 256 !-- Compute horizontal diffusion 257 !$acc loop vector(32) 258 DO k = 1, nzt 259 IF ( k > nzb_v_outer(j,i) ) THEN 260 ! 261 !-- Interpolate eddy diffusivities on staggered gridpoints 262 kmxp = 0.25 * & 263 ( km(k,j,i)+km(k,j,i+1)+km(k,j-1,i)+km(k,j-1,i+1) ) 264 kmxm = 0.25 * & 265 ( km(k,j,i)+km(k,j,i-1)+km(k,j-1,i)+km(k,j-1,i-1) ) 266 267 tend(k,j,i) = tend(k,j,i) & 268 & + ( kmxp * ( v(k,j,i+1) - v(k,j,i) ) * ddx & 269 & + kmxp * ( u(k,j,i+1) - u(k,j-1,i+1) ) * ddy & 270 & - kmxm * ( v(k,j,i) - v(k,j,i-1) ) * ddx & 271 & - kmxm * ( u(k,j,i) - u(k,j-1,i) ) * ddy & 272 & ) * ddx & 273 & + 2.0 * ( & 274 & km(k,j,i) * ( v(k,j+1,i) - v(k,j,i) ) & 275 & - km(k,j-1,i) * ( v(k,j,i) - v(k,j-1,i) ) & 276 & ) * ddy2 277 ENDIF 278 ENDDO 279 280 ! 281 !-- Wall functions at the left and right walls, respectively 282 !$acc loop vector(32) 283 DO k = 1, nzt 284 IF( k > nzb_v_inner(j,i) .AND. k <= nzb_v_outer(j,i) .AND. & 285 wall_v(j,i) /= 0.0 ) THEN 286 287 kmxp = 0.25 * & 288 ( km(k,j,i)+km(k,j,i+1)+km(k,j-1,i)+km(k,j-1,i+1) ) 289 kmxm = 0.25 * & 290 ( km(k,j,i)+km(k,j,i-1)+km(k,j-1,i)+km(k,j-1,i-1) ) 291 292 tend(k,j,i) = tend(k,j,i) & 293 + 2.0 * ( & 294 km(k,j,i) * ( v(k,j+1,i) - v(k,j,i) ) & 295 - km(k,j-1,i) * ( v(k,j,i) - v(k,j-1,i) ) & 296 ) * ddy2 & 297 + ( fxp(j,i) * ( & 298 kmxp * ( v(k,j,i+1) - v(k,j,i) ) * ddx & 299 + kmxp * ( u(k,j,i+1) - u(k,j-1,i+1) ) * ddy & 300 ) & 301 - fxm(j,i) * ( & 302 kmxm * ( v(k,j,i) - v(k,j,i-1) ) * ddx & 303 + kmxm * ( u(k,j,i) - u(k,j-1,i) ) * ddy & 304 ) & 305 + wall_v(j,i) * vsus(k,j,i) & 306 ) * ddx 307 ENDIF 308 ENDDO 309 310 ! 311 !-- Compute vertical diffusion. In case of simulating a Prandtl 312 !-- layer, index k starts at nzb_v_inner+2. 313 !$acc loop vector(32) 314 DO k = 1, nzt_diff 315 IF ( k >= nzb_diff_v(j,i) ) THEN 316 ! 317 !-- Interpolate eddy diffusivities on staggered gridpoints 318 kmzp = 0.25 * & 319 ( km(k,j,i)+km(k+1,j,i)+km(k,j-1,i)+km(k+1,j-1,i) ) 320 kmzm = 0.25 * & 321 ( km(k,j,i)+km(k-1,j,i)+km(k,j-1,i)+km(k-1,j-1,i) ) 322 323 tend(k,j,i) = tend(k,j,i) & 324 & + ( kmzp * ( ( v(k+1,j,i) - v(k,j,i) ) * ddzu(k+1)& 325 & + ( w(k,j,i) - w(k,j-1,i) ) * ddy & 326 & ) & 327 & - kmzm * ( ( v(k,j,i) - v(k-1,j,i) ) * ddzu(k)& 328 & + ( w(k-1,j,i) - w(k-1,j-1,i) ) * ddy & 329 & ) & 330 & ) * ddzw(k) 331 ENDIF 332 ENDDO 333 334 ENDDO 335 ENDDO 336 337 ! 338 !-- Vertical diffusion at the first grid point above the surface, 339 !-- if the momentum flux at the bottom is given by the Prandtl law 340 !-- or if it is prescribed by the user. 341 !-- Difference quotient of the momentum flux is not formed over 342 !-- half of the grid spacing (2.0*ddzw(k)) any more, since the 343 !-- comparison with other (LES) modell showed that the values of 344 !-- the momentum flux becomes too large in this case. 345 !-- The term containing w(k-1,..) (see above equation) is removed here 346 !-- because the vertical velocity is assumed to be zero at the surface. 347 IF ( use_surface_fluxes ) THEN 348 349 !$acc loop 350 DO i = nxl, nxr 351 !$acc loop vector(32) 352 DO j = nysv, nyn 353 354 k = nzb_v_inner(j,i)+1 355 ! 356 !-- Interpolate eddy diffusivities on staggered gridpoints 357 kmzp = 0.25 * & 358 ( km(k,j,i)+km(k+1,j,i)+km(k,j-1,i)+km(k+1,j-1,i) ) 359 kmzm = 0.25 * & 360 ( km(k,j,i)+km(k-1,j,i)+km(k,j-1,i)+km(k-1,j-1,i) ) 361 362 tend(k,j,i) = tend(k,j,i) & 363 & + ( kmzp * ( w(k,j,i) - w(k,j-1,i) ) * ddy & 364 & ) * ddzw(k) & 365 & + ( kmzp * ( v(k+1,j,i) - v(k,j,i) ) * ddzu(k+1) & 366 & + vsws(j,i) & 367 & ) * ddzw(k) 368 ENDDO 369 ENDDO 370 371 ENDIF 372 373 ! 374 !-- Vertical diffusion at the first gridpoint below the top boundary, 375 !-- if the momentum flux at the top is prescribed by the user 376 IF ( use_top_fluxes .AND. constant_top_momentumflux ) THEN 377 378 k = nzt 379 380 !$acc loop 381 DO i = nxl, nxr 382 !$acc loop vector(32) 383 DO j = nysv, nyn 384 385 ! 386 !-- Interpolate eddy diffusivities on staggered gridpoints 387 kmzp = 0.25 * & 388 ( km(k,j,i)+km(k+1,j,i)+km(k,j-1,i)+km(k+1,j-1,i) ) 389 kmzm = 0.25 * & 390 ( km(k,j,i)+km(k-1,j,i)+km(k,j-1,i)+km(k-1,j-1,i) ) 391 392 tend(k,j,i) = tend(k,j,i) & 393 & - ( kmzm * ( w(k-1,j,i) - w(k-1,j-1,i) ) * ddy & 394 & ) * ddzw(k) & 395 & + ( -vswst(j,i) & 396 & - kmzm * ( v(k,j,i) - v(k-1,j,i) ) * ddzu(k) & 397 & ) * ddzw(k) 398 ENDDO 399 ENDDO 400 401 ENDIF 402 !$acc end kernels 403 404 END SUBROUTINE diffusion_v_acc 405 406 407 !------------------------------------------------------------------------------! 220 408 ! Call for grid point i,j 221 409 !------------------------------------------------------------------------------! -
palm/trunk/SOURCE/diffusion_w.f90
r1002 r1015 4 4 ! Current revisions: 5 5 ! ----------------- 6 ! 6 ! accelerator version (*_acc) added 7 7 ! 8 8 ! Former revisions: … … 50 50 51 51 PRIVATE 52 PUBLIC diffusion_w 52 PUBLIC diffusion_w, diffusion_w_acc 53 53 54 54 INTERFACE diffusion_w … … 56 56 MODULE PROCEDURE diffusion_w_ij 57 57 END INTERFACE diffusion_w 58 59 INTERFACE diffusion_w_acc 60 MODULE PROCEDURE diffusion_w_acc 61 END INTERFACE diffusion_w_acc 58 62 59 63 CONTAINS … … 170 174 171 175 !------------------------------------------------------------------------------! 176 ! Call for all grid points - accelerator version 177 !------------------------------------------------------------------------------! 178 SUBROUTINE diffusion_w_acc 179 180 USE arrays_3d 181 USE control_parameters 182 USE grid_variables 183 USE indices 184 185 IMPLICIT NONE 186 187 INTEGER :: i, j, k 188 REAL :: kmxm, kmxp, kmym, kmyp 189 190 !$acc declare create ( wsus, wsvs ) 191 REAL, DIMENSION(nzb:nzt+1,nys:nyn,nxl:nxr) :: wsus, wsvs 192 193 194 ! 195 !-- First calculate horizontal momentum flux w'u' and/or w'v' at vertical 196 !-- walls, if neccessary 197 IF ( topography /= 'flat' ) THEN 198 CALL wall_fluxes_acc( wsus, 0.0, 0.0, 0.0, 1.0, nzb_w_inner, & 199 nzb_w_outer, wall_w_x ) 200 CALL wall_fluxes_acc( wsvs, 0.0, 0.0, 1.0, 0.0, nzb_w_inner, & 201 nzb_w_outer, wall_w_y ) 202 ENDIF 203 204 !$acc kernels present ( u, v, w, km, tend, vsws, vswst ) & 205 !$acc present ( ddzu, ddzw, fwxm, fwxp, fwym, fwyp, wall_w_x, wall_w_y ) & 206 !$acc present ( nzb_w_inner, nzb_w_outer ) 207 !$acc loop 208 DO i = nxl, nxr 209 DO j = nys, nyn 210 !$acc loop vector( 32 ) 211 DO k = 1, nzt 212 IF ( k > nzb_w_outer(j,i) ) THEN 213 ! 214 !-- Interpolate eddy diffusivities on staggered gridpoints 215 kmxp = 0.25 * & 216 ( km(k,j,i)+km(k,j,i+1)+km(k+1,j,i)+km(k+1,j,i+1) ) 217 kmxm = 0.25 * & 218 ( km(k,j,i)+km(k,j,i-1)+km(k+1,j,i)+km(k+1,j,i-1) ) 219 kmyp = 0.25 * & 220 ( km(k,j,i)+km(k+1,j,i)+km(k,j+1,i)+km(k+1,j+1,i) ) 221 kmym = 0.25 * & 222 ( km(k,j,i)+km(k+1,j,i)+km(k,j-1,i)+km(k+1,j-1,i) ) 223 224 tend(k,j,i) = tend(k,j,i) & 225 & + ( kmxp * ( w(k,j,i+1) - w(k,j,i) ) * ddx & 226 & + kmxp * ( u(k+1,j,i+1) - u(k,j,i+1) ) * ddzu(k+1) & 227 & - kmxm * ( w(k,j,i) - w(k,j,i-1) ) * ddx & 228 & - kmxm * ( u(k+1,j,i) - u(k,j,i) ) * ddzu(k+1) & 229 & ) * ddx & 230 & + ( kmyp * ( w(k,j+1,i) - w(k,j,i) ) * ddy & 231 & + kmyp * ( v(k+1,j+1,i) - v(k,j+1,i) ) * ddzu(k+1) & 232 & - kmym * ( w(k,j,i) - w(k,j-1,i) ) * ddy & 233 & - kmym * ( v(k+1,j,i) - v(k,j,i) ) * ddzu(k+1) & 234 & ) * ddy & 235 & + 2.0 * ( & 236 & km(k+1,j,i) * ( w(k+1,j,i) - w(k,j,i) ) * ddzw(k+1) & 237 & - km(k,j,i) * ( w(k,j,i) - w(k-1,j,i) ) * ddzw(k) & 238 & ) * ddzu(k+1) 239 ENDIF 240 ENDDO 241 242 ! 243 !-- Wall functions at all vertical walls, where necessary 244 !$acc loop vector( 32 ) 245 DO k = 1,nzt 246 247 IF ( k > nzb_w_inner(j,i) .AND. k <= nzb_w_outer(j,i) .AND. & 248 wall_w_x(j,i) /= 0.0 .AND. wall_w_y(j,i) /= 0.0 ) THEN 249 ! 250 !-- Interpolate eddy diffusivities on staggered gridpoints 251 kmxp = 0.25 * & 252 ( km(k,j,i)+km(k,j,i+1)+km(k+1,j,i)+km(k+1,j,i+1) ) 253 kmxm = 0.25 * & 254 ( km(k,j,i)+km(k,j,i-1)+km(k+1,j,i)+km(k+1,j,i-1) ) 255 kmyp = 0.25 * & 256 ( km(k,j,i)+km(k+1,j,i)+km(k,j+1,i)+km(k+1,j+1,i) ) 257 kmym = 0.25 * & 258 ( km(k,j,i)+km(k+1,j,i)+km(k,j-1,i)+km(k+1,j-1,i) ) 259 260 tend(k,j,i) = tend(k,j,i) & 261 + ( fwxp(j,i) * ( & 262 kmxp * ( w(k,j,i+1) - w(k,j,i) ) * ddx & 263 + kmxp * ( u(k+1,j,i+1) - u(k,j,i+1) ) * ddzu(k+1) & 264 ) & 265 - fwxm(j,i) * ( & 266 kmxm * ( w(k,j,i) - w(k,j,i-1) ) * ddx & 267 + kmxm * ( u(k+1,j,i) - u(k,j,i) ) * ddzu(k+1) & 268 ) & 269 + wall_w_x(j,i) * wsus(k,j,i) & 270 ) * ddx & 271 + ( fwyp(j,i) * ( & 272 kmyp * ( w(k,j+1,i) - w(k,j,i) ) * ddy & 273 + kmyp * ( v(k+1,j+1,i) - v(k,j+1,i) ) * ddzu(k+1) & 274 ) & 275 - fwym(j,i) * ( & 276 kmym * ( w(k,j,i) - w(k,j-1,i) ) * ddy & 277 + kmym * ( v(k+1,j,i) - v(k,j,i) ) * ddzu(k+1) & 278 ) & 279 + wall_w_y(j,i) * wsvs(k,j,i) & 280 ) * ddy & 281 + 2.0 * ( & 282 km(k+1,j,i) * ( w(k+1,j,i) - w(k,j,i) ) * ddzw(k+1) & 283 - km(k,j,i) * ( w(k,j,i) - w(k-1,j,i) ) * ddzw(k) & 284 ) * ddzu(k+1) 285 ENDIF 286 ENDDO 287 288 ENDDO 289 ENDDO 290 !$acc end kernels 291 292 END SUBROUTINE diffusion_w_acc 293 294 295 !------------------------------------------------------------------------------! 172 296 ! Call for grid point i,j 173 297 !------------------------------------------------------------------------------! -
palm/trunk/SOURCE/diffusivities.f90
r668 r1015 4 4 ! Current revisions: 5 5 ! ----------------- 6 ! OpenACC statements added + code changes required for GPU optimization, 7 ! adjustment of mixing length to the Prandtl mixing length at first grid point 8 ! above ground removed 6 9 ! 7 10 ! Former revisions: … … 52 55 INTEGER :: i, j, k, omp_get_thread_num, sr, tn 53 56 54 REAL :: dvar_dz, l_stable, var_reference 55 56 REAL, SAVE :: phi_m = 1.0 57 REAL :: dvar_dz, l, ll, l_stable, sqrt_e, var_reference 57 58 58 59 REAL :: var(nzb:nzt+1,nysg:nyng,nxlg:nxrg) 59 60 REAL, DIMENSION(1:nzt) :: l, ll, sqrt_e61 60 62 61 … … 71 70 ! 72 71 !-- Compute the turbulent diffusion coefficient for momentum 73 !$OMP PARALLEL PRIVATE (dvar_dz,i,j,k,l,ll,l_stable, phi_m,sqrt_e,sr,tn)72 !$OMP PARALLEL PRIVATE (dvar_dz,i,j,k,l,ll,l_stable,sqrt_e,sr,tn) 74 73 !$ tn = omp_get_thread_num() 75 74 75 ! 76 !-- Data declerations for accelerators 77 !$acc data present( dd2zu, e, km, kh, l_grid, l_wall, nzb_s_inner, rif, var ) 78 !$acc kernels 79 80 ! 81 !-- Introduce an optional minimum tke 82 IF ( e_min > 0.0 ) THEN 83 !$OMP DO 84 !$acc loop 85 DO i = nxlg, nxrg 86 DO j = nysg, nyng 87 !$acc loop vector( 32 ) 88 DO k = 1, nzt 89 IF ( k > nzb_s_inner(j,i) ) THEN 90 e(k,j,i) = MAX( e(k,j,i), e_min ) 91 ENDIF 92 ENDDO 93 ENDDO 94 ENDDO 95 ENDIF 96 76 97 !$OMP DO 98 !$acc loop 77 99 DO i = nxlg, nxrg 78 100 DO j = nysg, nyng 101 !$acc loop vector( 32 ) 102 DO k = 1, nzt 79 103 104 IF ( k > nzb_s_inner(j,i) ) THEN 105 106 sqrt_e = SQRT( e(k,j,i) ) 80 107 ! 81 !-- Compute the Phi-function for a possible adaption of the mixing length 82 !-- to the Prandtl mixing length 83 IF ( adjust_mixing_length .AND. prandtl_layer ) THEN 84 IF ( rif(j,i) >= 0.0 ) THEN 85 phi_m = 1.0 + 5.0 * rif(j,i) 86 ELSE 87 phi_m = 1.0 / SQRT( SQRT( 1.0 - 16.0 * rif(j,i) ) ) 88 ENDIF 89 ENDIF 90 108 !-- Determine the mixing length 109 dvar_dz = atmos_ocean_sign * & ! inverse effect of pt/rho gradient 110 ( var(k+1,j,i) - var(k-1,j,i) ) * dd2zu(k) 111 IF ( dvar_dz > 0.0 ) THEN 112 IF ( use_reference ) THEN 113 l_stable = 0.76 * sqrt_e / & 114 SQRT( g / var_reference * dvar_dz ) + 1E-5 115 ELSE 116 l_stable = 0.76 * sqrt_e / & 117 SQRT( g / var(k,j,i) * dvar_dz ) + 1E-5 118 ENDIF 119 ELSE 120 l_stable = l_grid(k) 121 ENDIF 91 122 ! 92 !-- Introduce an optional minimum tke 93 IF ( e_min > 0.0 ) THEN 94 DO k = nzb_s_inner(j,i)+1, nzt 95 e(k,j,i) = MAX( e(k,j,i), e_min ) 96 ENDDO 97 ENDIF 123 !-- Adjustment of the mixing length 124 IF ( wall_adjustment ) THEN 125 l = MIN( l_wall(k,j,i), l_grid(k), l_stable ) 126 ll = MIN( l_wall(k,j,i), l_grid(k) ) 127 ELSE 128 l = MIN( l_grid(k), l_stable ) 129 ll = l_grid(k) 130 ENDIF 98 131 99 ! 100 !-- Calculate square root of e in a seperate loop, because it is used 101 !-- twice in the next loop (better vectorization) 102 DO k = nzb_s_inner(j,i)+1, nzt 103 sqrt_e(k) = SQRT( e(k,j,i) ) 104 ENDDO 132 ! 133 !-- Compute diffusion coefficients for momentum and heat 134 km(k,j,i) = 0.1 * l * sqrt_e 135 kh(k,j,i) = ( 1.0 + 2.0 * l / ll ) * km(k,j,i) 105 136 106 !107 !-- Determine the mixing length108 DO k = nzb_s_inner(j,i)+1, nzt109 dvar_dz = atmos_ocean_sign * & ! inverse effect of pt/rho gradient110 ( var(k+1,j,i) - var(k-1,j,i) ) * dd2zu(k)111 IF ( dvar_dz > 0.0 ) THEN112 IF ( use_reference ) THEN113 l_stable = 0.76 * sqrt_e(k) / &114 SQRT( g / var_reference * dvar_dz ) + 1E-5115 ELSE116 l_stable = 0.76 * sqrt_e(k) / &117 SQRT( g / var(k,j,i) * dvar_dz ) + 1E-5118 ENDIF119 ELSE120 l_stable = l_grid(k)121 ENDIF122 !123 !-- Adjustment of the mixing length124 IF ( wall_adjustment ) THEN125 l(k) = MIN( l_wall(k,j,i), l_grid(k), l_stable )126 ll(k) = MIN( l_wall(k,j,i), l_grid(k) )127 ELSE128 l(k) = MIN( l_grid(k), l_stable )129 ll(k) = l_grid(k)130 ENDIF131 IF ( adjust_mixing_length .AND. prandtl_layer ) THEN132 l(k) = MIN( l(k), kappa * &133 ( zu(k) - zw(nzb_s_inner(j,i)) ) / phi_m )134 ll(k) = MIN( ll(k), kappa * &135 ( zu(k) - zw(nzb_s_inner(j,i)) ) / phi_m )136 137 ENDIF 137 138 139 #if ! defined( __openacc ) 138 140 ! 139 !-- Compute diffusion coefficients for momentum and heat 140 km(k,j,i) = 0.1 * l(k) * sqrt_e(k) 141 kh(k,j,i) = ( 1.0 + 2.0 * l(k) / ll(k) ) * km(k,j,i) 141 !++ Statistics still have to be realized for accelerators 142 !-- Summation for averaged profile (cf. flow_statistics) 143 DO sr = 0, statistic_regions 144 sums_l_l(k,sr,tn) = sums_l_l(k,sr,tn) + l * rmask(j,i,sr) 145 ENDDO 146 #endif 142 147 143 148 ENDDO 144 145 !146 !-- Summation for averaged profile (cf. flow_statistics)147 !-- (the IF statement still requires a performance check on NEC machines)148 DO sr = 0, statistic_regions149 IF ( rmask(j,i,sr) /= 0.0 .AND. &150 i >= nxl .AND. i <= nxr .AND. j >= nys .AND. j <= nyn ) THEN151 DO k = nzb_s_inner(j,i)+1, nzt152 sums_l_l(k,sr,tn) = sums_l_l(k,sr,tn) + l(k)153 ENDDO154 ENDIF155 ENDDO156 157 149 ENDDO 158 150 ENDDO 159 151 152 #if ! defined( __openacc ) 153 ! 154 !++ Statistics still have to be realized for accelerators 160 155 sums_l_l(nzt+1,:,tn) = sums_l_l(nzt,:,tn) ! quasi boundary-condition for 161 156 ! data output 162 157 #endif 163 158 !$OMP END PARALLEL 164 159 … … 169 164 !-- values of the diffusivities are not needed 170 165 !$OMP PARALLEL DO 166 !$acc loop 171 167 DO i = nxlg, nxrg 172 168 DO j = nysg, nyng … … 198 194 ENDIF 199 195 196 !$acc end kernels 197 !$acc end data 200 198 201 199 END SUBROUTINE diffusivities -
palm/trunk/SOURCE/header.f90
r1004 r1015 4 4 ! Current revisions: 5 5 ! ----------------- 6 ! 6 ! output of Aajustment of mixing length to the Prandtl mixing length at first 7 ! grid point above ground removed 7 8 ! 8 9 ! Former revisions: … … 1482 1483 IF ( e_min > 0.0 ) WRITE ( io, 454 ) e_min 1483 1484 IF ( wall_adjustment ) WRITE ( io, 453 ) wall_adjustment_factor 1484 IF ( adjust_mixing_length .AND. prandtl_layer ) WRITE ( io, 452 )1485 1485 ENDIF 1486 1486 … … 1936 1936 451 FORMAT (' Diffusion coefficients are constant:'/ & 1937 1937 ' Km = ',F6.2,' m**2/s Kh = ',F6.2,' m**2/s Pr = ',F5.2) 1938 452 FORMAT (' Mixing length is limited to the Prandtl mixing lenth.')1939 1938 453 FORMAT (' Mixing length is limited to ',F4.2,' * z') 1940 1939 454 FORMAT (' TKE is not allowed to fall below ',E9.2,' (m/s)**2') -
palm/trunk/SOURCE/init_1d_model.f90
r1002 r1015 4 4 ! Current revisions: 5 5 ! ----------------- 6 ! 6 ! adjustment of mixing length to the Prandtl mixing length at first grid point 7 ! above ground removed 7 8 ! 8 9 ! Former revisions: … … 113 114 l_black(nzt+1) = l_black(nzt) 114 115 115 ENDIF116 117 !118 !-- Adjust mixing length to the prandtl mixing length (within the prandtl119 !-- layer)120 IF ( adjust_mixing_length .AND. prandtl_layer ) THEN121 k = nzb+1122 l_black(k) = MIN( l_black(k), kappa * zu(k) )123 116 ENDIF 124 117 ENDIF … … 603 596 604 597 ! 605 !-- Adjust mixing length to the prandtl mixing length606 IF ( adjust_mixing_length .AND. prandtl_layer ) THEN607 k = nzb+1608 IF ( rif1d(k) >= 0.0 ) THEN609 l1d(k) = MIN( l1d(k), kappa * zu(k) / ( 1.0 + 5.0 * &610 rif1d(k) ) )611 ELSE612 l1d(k) = MIN( l1d(k), kappa * zu(k) * &613 SQRT( SQRT( 1.0 - 16.0 * rif1d(k) ) ) )614 ENDIF615 ENDIF616 617 !618 598 !-- Compute the diffusion coefficients for momentum via the 619 599 !-- corresponding Prandtl-layer relationship and according to -
palm/trunk/SOURCE/init_3d_model.f90
r1011 r1015 7 7 ! Current revisions: 8 8 ! ------------------ 9 ! 9 ! mask is set to zero for ghost boundaries 10 10 ! 11 11 ! Former revisions: … … 1530 1530 ! 1531 1531 !-- Pre-set masks for regional statistics. Default is the total model domain. 1532 !-- Ghost points are excluded because counting values at the ghost boundaries 1533 !-- would bias the statistics 1532 1534 rmask = 1.0 1535 rmask(nxlg:nxl-1,:,:) = 0.0; rmask(nxr+1:nxrg,:,:) = 0.0 1536 rmask(:,nysg:nys-1,:) = 0.0; rmask(:,nyn+1:nyng,:) = 0.0 1533 1537 1534 1538 ! -
palm/trunk/SOURCE/init_grid.f90
r997 r1015 4 4 ! Current revisions: 5 5 ! ----------------- 6 ! 6 ! lower index for calculating wall_flags_0 set to nzb_w_inner instead of 7 ! nzb_w_inner+1 7 8 ! 8 9 ! Former revisions: … … 1253 1254 DO i = nxl, nxr 1254 1255 DO j = nys, nyn 1255 DO k = nzb_w_inner(j,i) +1, nzt1256 DO k = nzb_w_inner(j,i), nzt 1256 1257 ! 1257 1258 !-- w component - x-direction -
palm/trunk/SOURCE/modules.f90
r1011 r1015 4 4 ! Current revisions: 5 5 ! ----------------- 6 ! 6 ! +acc_rank, num_acc_per_node, 7 ! -adjust_mixing_length 7 8 ! 8 9 ! Former revisions: … … 592 593 mask_i_global, mask_j_global, mask_k_global 593 594 594 LOGICAL :: a djust_mixing_length = .FALSE., avs_output = .FALSE., &595 LOGICAL :: avs_output = .FALSE., & 595 596 bc_lr_cyc =.TRUE., bc_lr_dirneu = .FALSE., & 596 597 bc_lr_dirrad = .FALSE., bc_lr_neudir = .FALSE., & … … 1335 1336 #endif 1336 1337 CHARACTER(LEN=5) :: myid_char = '' 1337 INTEGER :: id_inflow = 0, id_recycling = 0, myid = 0, & 1338 INTEGER :: acc_rank, id_inflow = 0, id_recycling = 0, & 1339 myid = 0, num_acc_per_node = 0, & 1338 1340 target_id, npex = -1, npey = -1, numprocs = 1, & 1339 1341 numprocs_previous_run = -1, & -
palm/trunk/SOURCE/palm.f90
r863 r1015 4 4 ! Current revisions: 5 5 ! ----------------- 6 ! 6 ! OpenACC statements added + code changes required for GPU optimization 7 7 ! 8 8 ! Former revisions: … … 77 77 USE statistics 78 78 79 #if defined( __openacc ) 80 USE OPENACC 81 #endif 82 79 83 IMPLICIT NONE 80 84 … … 84 88 CHARACTER (LEN=1) :: cdum 85 89 INTEGER :: i, run_description_header_i(80) 90 #if defined( __openacc ) 91 REAL, DIMENSION(100) :: acc_dum 92 #endif 86 93 87 94 version = 'PALM 3.8a' … … 102 109 #endif 103 110 111 #if defined( __openacc ) 112 ! 113 !-- Get the number of accelerator boards per node and assign the MPI processes 114 !-- to these boards 115 num_acc_per_node = ACC_GET_NUM_DEVICES( ACC_DEVICE_NVIDIA ) 116 acc_rank = MOD( myid, num_acc_per_node ) 117 CALL ACC_SET_DEVICE_NUM ( acc_rank, ACC_DEVICE_NVIDIA ) 118 ! 119 !-- Test output (to be removed later) 120 WRITE (*,'(A,I4,A,I3,A,I3,A,I3)') '*** Connect MPI-Task ', myid,' to CPU ',& 121 acc_rank, ' Devices: ', num_acc_per_node,& 122 ' connected to:', & 123 ACC_GET_DEVICE_NUM( ACC_DEVICE_NVIDIA ) 124 #endif 125 ! 126 !-- Ensure that OpenACC first attaches the GPU devices by copying a dummy data 127 !-- region 128 !$acc data copyin( acc_dum ) 129 104 130 ! 105 131 !-- Initialize measuring of the CPU-time remaining to the run … … 177 203 ENDIF 178 204 205 ! 206 !-- Declare and initialize variables in the accelerator memory with their 207 !-- host values 208 !$acc data copyin( diss, e, e_p, kh, km, pt, pt_p, q, ql, tend, te_m, tpt_m, tu_m, tv_m, tw_m, u, u_p, v, vpt, v_p, w, w_p ) & 209 !$acc copyin( ddzu, ddzw, dd2zu, l_grid, l_wall, ptdf_x, ptdf_y, pt_init, rdf, rdf_sc, ug, vg, zu, zw ) & 210 !$acc copyin( hom, qs, qsws, qswst, rif, rif_wall, shf, ts, tswst, us, usws, uswst, vsws, vswst, z0, z0h ) & 211 !$acc copyin( fxm, fxp, fym, fyp, fwxm, fwxp, fwym, fwyp, nzb_diff_s_inner, nzb_diff_s_outer, nzb_diff_u ) & 212 !$acc copyin( nzb_diff_v, nzb_s_inner, nzb_s_outer, nzb_u_inner ) & 213 !$acc copyin( nzb_u_outer, nzb_v_inner, nzb_v_outer, nzb_w_inner ) & 214 !$acc copyin( nzb_w_outer, wall_heatflux, wall_e_x, wall_e_y, wall_u, wall_v, wall_w_x, wall_w_y, wall_flags_0 ) 179 215 ! 180 216 !-- Integration of the model equations using timestep-scheme … … 242 278 243 279 ! 280 !-- Close the OpenACC dummy data region 281 !$acc end data 282 !$acc end data 283 284 ! 244 285 !-- Take final CPU-time for CPU-time analysis 245 286 CALL cpu_log( log_point(1), 'total', 'stop' ) -
palm/trunk/SOURCE/parin.f90
r1004 r1015 4 4 ! Current revisions: 5 5 ! ----------------- 6 ! 6 ! -adjust_mixing_length 7 7 ! 8 8 ! Former revisions: … … 164 164 165 165 166 NAMELIST /inipar/ a djust_mixing_length, alpha_surface, bc_e_b, bc_lr, &166 NAMELIST /inipar/ alpha_surface, bc_e_b, bc_lr, & 167 167 bc_ns, bc_p_b, bc_p_t, bc_pt_b, bc_pt_t, bc_q_b, & 168 168 bc_q_t,bc_s_b, bc_s_t, bc_sa_t, bc_uv_b, bc_uv_t, & -
palm/trunk/SOURCE/prandtl_fluxes.f90
r979 r1015 4 4 ! Current revisions: 5 5 ! ----------------- 6 ! 6 ! OpenACC statements added 7 7 ! 8 8 ! Former revisions: … … 65 65 66 66 INTEGER :: i, j, k 67 LOGICAL :: coupled_run 67 68 REAL :: a, b, e_q, rifm, uv_total, z_p 68 69 70 ! 71 !-- Data information for accelerators 72 !$acc data present( e, nzb_u_inner, nzb_v_inner, nzb_s_inner, pt, q, qs ) & 73 !$acc present( qsws, rif, shf, ts, u, us, usws, v, vpt, vsws, zu, zw, z0, z0h ) 69 74 ! 70 75 !-- Compute theta* … … 74 79 !-- for u* use the value from the previous time step 75 80 !$OMP PARALLEL DO 81 !$acc kernels do 76 82 DO i = nxlg, nxrg 77 83 DO j = nysg, nyng … … 90 96 !-- (the Richardson number is still the one from the previous time step) 91 97 !$OMP PARALLEL DO PRIVATE( a, b, k, z_p ) 98 !$acc kernels do 92 99 DO i = nxlg, nxrg 93 100 DO j = nysg, nyng … … 122 129 IF ( .NOT. humidity ) THEN 123 130 !$OMP PARALLEL DO PRIVATE( k, z_p ) 131 !$acc kernels do 124 132 DO i = nxlg, nxrg 125 133 DO j = nysg, nyng … … 140 148 ELSE 141 149 !$OMP PARALLEL DO PRIVATE( k, z_p ) 150 !$acc kernels do 142 151 DO i = nxlg, nxrg 143 152 DO j = nysg, nyng … … 162 171 !-- Compute u* at the scalars' grid points 163 172 !$OMP PARALLEL DO PRIVATE( a, b, k, uv_total, z_p ) 173 !$acc kernels do 164 174 DO i = nxl, nxr 165 175 DO j = nys, nyn … … 203 213 !-- Values of us at ghost point locations are needed for the evaluation of usws 204 214 !-- and vsws. 215 !$acc update host( us ) 205 216 CALL exchange_horiz_2d( us ) 217 !$acc update device( us ) 218 206 219 ! 207 220 !-- Compute u'w' for the total model domain. 208 221 !-- First compute the corresponding component of u* and square it. 209 222 !$OMP PARALLEL DO PRIVATE( a, b, k, rifm, z_p ) 223 !$acc kernels do 210 224 DO i = nxl, nxr 211 225 DO j = nys, nyn … … 245 259 !-- First compute the corresponding component of u* and square it. 246 260 !$OMP PARALLEL DO PRIVATE( a, b, k, rifm, z_p ) 261 !$acc kernels do 247 262 DO i = nxl, nxr 248 263 DO j = nys, nyn … … 285 300 !-- For a given water flux in the Prandtl layer: 286 301 !$OMP PARALLEL DO 302 !$acc kernels do 287 303 DO i = nxlg, nxrg 288 304 DO j = nysg, nyng … … 291 307 ENDDO 292 308 293 ELSE 309 ELSE 310 coupled_run = ( coupling_mode == 'atmosphere_to_ocean' .AND. run_coupled ) 294 311 !$OMP PARALLEL DO PRIVATE( a, b, k, z_p ) 312 !$acc kernels do 295 313 DO i = nxlg, nxrg 296 314 DO j = nysg, nyng … … 302 320 !-- Assume saturation for atmosphere coupled to ocean (but not 303 321 !-- in case of precursor runs) 304 IF ( coupling_mode == 'atmosphere_to_ocean' .AND. run_coupled )& 305 THEN 322 IF ( coupled_run ) THEN 306 323 e_q = 6.1 * & 307 324 EXP( 0.07 * ( MIN(pt(0,j,i),pt(1,j,i)) - 273.15 ) ) … … 334 351 !-- Exchange the boundaries for the momentum fluxes (only for sake of 335 352 !-- completeness) 353 !$acc update host( usws, vsws ) 336 354 CALL exchange_horiz_2d( usws ) 337 355 CALL exchange_horiz_2d( vsws ) 338 IF ( humidity .OR. passive_scalar ) CALL exchange_horiz_2d( qsws ) 356 !$acc update device( usws, vsws ) 357 IF ( humidity .OR. passive_scalar ) THEN 358 !$acc update host( qsws ) 359 CALL exchange_horiz_2d( qsws ) 360 !$acc update device( qsws ) 361 ENDIF 339 362 340 363 ! … … 342 365 IF ( .NOT. constant_heatflux ) THEN 343 366 !$OMP PARALLEL DO 367 !$acc kernels do 344 368 DO i = nxlg, nxrg 345 369 DO j = nysg, nyng … … 353 377 IF ( .NOT. constant_waterflux .AND. ( humidity .OR. passive_scalar ) ) THEN 354 378 !$OMP PARALLEL DO 379 !$acc kernels do 355 380 DO i = nxlg, nxrg 356 381 DO j = nysg, nyng … … 364 389 IF ( ibc_e_b == 2 ) THEN 365 390 !$OMP PARALLEL DO 391 !$acc kernels do 366 392 DO i = nxlg, nxrg 367 393 DO j = nysg, nyng … … 375 401 ENDIF 376 402 403 !$acc end data 377 404 378 405 END SUBROUTINE prandtl_fluxes -
palm/trunk/SOURCE/production_e.f90
r1008 r1015 4 4 ! Current revisions: 5 5 ! ----------------- 6 ! 6 ! accelerator version (*_acc) added 7 7 ! 8 8 ! Former revisions: … … 83 83 84 84 PRIVATE 85 PUBLIC production_e, production_e_ init85 PUBLIC production_e, production_e_acc, production_e_init 86 86 87 87 LOGICAL, SAVE :: first_call = .TRUE. … … 94 94 END INTERFACE production_e 95 95 96 INTERFACE production_e_acc 97 MODULE PROCEDURE production_e_acc 98 END INTERFACE production_e_acc 99 96 100 INTERFACE production_e_init 97 101 MODULE PROCEDURE production_e_init … … 675 679 676 680 !------------------------------------------------------------------------------! 681 ! Call for all grid points - accelerator version 682 !------------------------------------------------------------------------------! 683 SUBROUTINE production_e_acc 684 685 USE arrays_3d 686 USE cloud_parameters 687 USE control_parameters 688 USE grid_variables 689 USE indices 690 USE statistics 691 692 IMPLICIT NONE 693 694 INTEGER :: i, j, k 695 696 REAL :: def, dudx, dudy, dudz, dvdx, dvdy, dvdz, dwdx, dwdy, dwdz, & 697 k1, k2, km_neutral, theta, temp 698 699 !$acc declare create ( usvs, vsus, wsus, wsvs ) 700 REAL, DIMENSION(nzb:nzt+1,nys:nyn,nxl:nxr) :: usvs, vsus, wsus, wsvs 701 ! REAL, DIMENSION(nzb:nzt+1) :: usvs, vsus, wsus, wsvs 702 703 ! 704 !-- First calculate horizontal momentum flux u'v', w'v', v'u', w'u' at 705 !-- vertical walls, if neccessary 706 !-- CAUTION: results are slightly different from the ij-version!! 707 !-- ij-version should be called further below within the ij-loops!! 708 IF ( topography /= 'flat' ) THEN 709 CALL wall_fluxes_e_acc( usvs, 1.0, 0.0, 0.0, 0.0, wall_e_y ) 710 CALL wall_fluxes_e_acc( wsvs, 0.0, 0.0, 1.0, 0.0, wall_e_y ) 711 CALL wall_fluxes_e_acc( vsus, 0.0, 1.0, 0.0, 0.0, wall_e_x ) 712 CALL wall_fluxes_e_acc( wsus, 0.0, 0.0, 0.0, 1.0, wall_e_x ) 713 ENDIF 714 715 716 ! 717 !-- Calculate TKE production by shear 718 !$acc kernels present( ddzw, dd2zu, kh, km, nzb_diff_s_inner, nzb_diff_s_outer ) & 719 !$acc present( nzb_s_inner, nzb_s_outer, pt, q, ql, qsws, qswst, rho ) & 720 !$acc present( shf, tend, tswst, u, v, vpt, w, wall_e_x, wall_e_y ) & 721 !$acc copyin( u_0, v_0 ) 722 !$acc loop 723 DO i = nxl, nxr 724 DO j = nys, nyn 725 !$acc loop vector( 32 ) 726 DO k = 1, nzt 727 728 IF ( k >= nzb_diff_s_outer(j,i) ) THEN 729 730 dudx = ( u(k,j,i+1) - u(k,j,i) ) * ddx 731 dudy = 0.25 * ( u(k,j+1,i) + u(k,j+1,i+1) - & 732 u(k,j-1,i) - u(k,j-1,i+1) ) * ddy 733 dudz = 0.5 * ( u(k+1,j,i) + u(k+1,j,i+1) - & 734 u(k-1,j,i) - u(k-1,j,i+1) ) * dd2zu(k) 735 736 dvdx = 0.25 * ( v(k,j,i+1) + v(k,j+1,i+1) - & 737 v(k,j,i-1) - v(k,j+1,i-1) ) * ddx 738 dvdy = ( v(k,j+1,i) - v(k,j,i) ) * ddy 739 dvdz = 0.5 * ( v(k+1,j,i) + v(k+1,j+1,i) - & 740 v(k-1,j,i) - v(k-1,j+1,i) ) * dd2zu(k) 741 742 dwdx = 0.25 * ( w(k,j,i+1) + w(k-1,j,i+1) - & 743 w(k,j,i-1) - w(k-1,j,i-1) ) * ddx 744 dwdy = 0.25 * ( w(k,j+1,i) + w(k-1,j+1,i) - & 745 w(k,j-1,i) - w(k-1,j-1,i) ) * ddy 746 dwdz = ( w(k,j,i) - w(k-1,j,i) ) * ddzw(k) 747 748 def = 2.0 * ( dudx**2 + dvdy**2 + dwdz**2 ) + & 749 dudy**2 + dvdx**2 + dwdx**2 + dwdy**2 + dudz**2 + & 750 dvdz**2 + 2.0 * ( dvdx*dudy + dwdx*dudz + dwdy*dvdz ) 751 752 IF ( def < 0.0 ) def = 0.0 753 754 tend(k,j,i) = tend(k,j,i) + km(k,j,i) * def 755 756 ENDIF 757 758 ENDDO 759 ENDDO 760 ENDDO 761 762 IF ( prandtl_layer ) THEN 763 764 ! 765 !-- Position beneath wall 766 !-- (2) - Will allways be executed. 767 !-- 'bottom and wall: use u_0,v_0 and wall functions' 768 !$acc loop 769 DO i = nxl, nxr 770 DO j = nys, nyn 771 !$acc loop vector( 32 ) 772 DO k = 1, nzt 773 774 IF ( ( wall_e_x(j,i) /= 0.0 ).OR.( wall_e_y(j,i) /= 0.0 ) ) & 775 THEN 776 777 IF ( k == nzb_diff_s_inner(j,i) - 1 ) THEN 778 dudx = ( u(k,j,i+1) - u(k,j,i) ) * ddx 779 dudz = 0.5 * ( u(k+1,j,i) + u(k+1,j,i+1) - & 780 u_0(j,i) - u_0(j,i+1) ) * dd2zu(k) 781 dvdy = ( v(k,j+1,i) - v(k,j,i) ) * ddy 782 dvdz = 0.5 * ( v(k+1,j,i) + v(k+1,j+1,i) - & 783 v_0(j,i) - v_0(j+1,i) ) * dd2zu(k) 784 dwdz = ( w(k,j,i) - w(k-1,j,i) ) * ddzw(k) 785 786 IF ( wall_e_y(j,i) /= 0.0 ) THEN 787 ! 788 !-- Inconsistency removed: as the thermal stratification is 789 !-- not taken into account for the evaluation of the wall 790 !-- fluxes at vertical walls, the eddy viscosity km must not 791 !-- be used for the evaluation of the velocity gradients dudy 792 !-- and dwdy 793 !-- Note: The validity of the new method has not yet been 794 !-- shown, as so far no suitable data for a validation 795 !-- has been available 796 ! CALL wall_fluxes_e( i, j, k, nzb_diff_s_outer(j,i)-2, & 797 ! usvs, 1.0, 0.0, 0.0, 0.0 ) 798 ! CALL wall_fluxes_e( i, j, k, nzb_diff_s_outer(j,i)-2, & 799 ! wsvs, 0.0, 0.0, 1.0, 0.0 ) 800 km_neutral = kappa * & 801 ( usvs(k,j,i)**2 + wsvs(k,j,i)**2 )**0.25 * & 802 0.5 * dy 803 IF ( km_neutral > 0.0 ) THEN 804 dudy = - wall_e_y(j,i) * usvs(k,j,i) / km_neutral 805 dwdy = - wall_e_y(j,i) * wsvs(k,j,i) / km_neutral 806 ELSE 807 dudy = 0.0 808 dwdy = 0.0 809 ENDIF 810 ELSE 811 dudy = 0.25 * ( u(k,j+1,i) + u(k,j+1,i+1) - & 812 u(k,j-1,i) - u(k,j-1,i+1) ) * ddy 813 dwdy = 0.25 * ( w(k,j+1,i) + w(k-1,j+1,i) - & 814 w(k,j-1,i) - w(k-1,j-1,i) ) * ddy 815 ENDIF 816 817 IF ( wall_e_x(j,i) /= 0.0 ) THEN 818 ! 819 !-- Inconsistency removed: as the thermal stratification is 820 !-- not taken into account for the evaluation of the wall 821 !-- fluxes at vertical walls, the eddy viscosity km must not 822 !-- be used for the evaluation of the velocity gradients dvdx 823 !-- and dwdx 824 !-- Note: The validity of the new method has not yet been 825 !-- shown, as so far no suitable data for a validation 826 !-- has been available 827 ! CALL wall_fluxes_e( i, j, k, nzb_diff_s_outer(j,i)-2, & 828 ! vsus, 0.0, 1.0, 0.0, 0.0 ) 829 ! CALL wall_fluxes_e( i, j, k, nzb_diff_s_outer(j,i)-2, & 830 ! wsus, 0.0, 0.0, 0.0, 1.0 ) 831 km_neutral = kappa * & 832 ( vsus(k,j,i)**2 + wsus(k,j,i)**2 )**0.25 * & 833 0.5 * dx 834 IF ( km_neutral > 0.0 ) THEN 835 dvdx = - wall_e_x(j,i) * vsus(k,j,i) / km_neutral 836 dwdx = - wall_e_x(j,i) * wsus(k,j,i) / km_neutral 837 ELSE 838 dvdx = 0.0 839 dwdx = 0.0 840 ENDIF 841 ELSE 842 dvdx = 0.25 * ( v(k,j,i+1) + v(k,j+1,i+1) - & 843 v(k,j,i-1) - v(k,j+1,i-1) ) * ddx 844 dwdx = 0.25 * ( w(k,j,i+1) + w(k-1,j,i+1) - & 845 w(k,j,i-1) - w(k-1,j,i-1) ) * ddx 846 ENDIF 847 848 def = 2.0 * ( dudx**2 + dvdy**2 + dwdz**2 ) + & 849 dudy**2 + dvdx**2 + dwdx**2 + dwdy**2 + dudz**2 + & 850 dvdz**2 + 2.0 * ( dvdx*dudy + dwdx*dudz + dwdy*dvdz ) 851 852 IF ( def < 0.0 ) def = 0.0 853 854 tend(k,j,i) = tend(k,j,i) + km(k,j,i) * def 855 856 ENDIF 857 ! 858 !-- (3) - will be executed only, if there is at least one level 859 !-- between (2) and (4), i.e. the topography must have a 860 !-- minimum height of 2 dz. Wall fluxes for this case have 861 !-- already been calculated for (2). 862 !-- 'wall only: use wall functions' 863 864 IF ( k >= nzb_diff_s_inner(j,i) .AND. & 865 k <= nzb_diff_s_outer(j,i)-2 ) THEN 866 867 dudx = ( u(k,j,i+1) - u(k,j,i) ) * ddx 868 dudz = 0.5 * ( u(k+1,j,i) + u(k+1,j,i+1) - & 869 u(k-1,j,i) - u(k-1,j,i+1) ) * dd2zu(k) 870 dvdy = ( v(k,j+1,i) - v(k,j,i) ) * ddy 871 dvdz = 0.5 * ( v(k+1,j,i) + v(k+1,j+1,i) - & 872 v(k-1,j,i) - v(k-1,j+1,i) ) * dd2zu(k) 873 dwdz = ( w(k,j,i) - w(k-1,j,i) ) * ddzw(k) 874 875 IF ( wall_e_y(j,i) /= 0.0 ) THEN 876 ! 877 !-- Inconsistency removed: as the thermal stratification 878 !-- is not taken into account for the evaluation of the 879 !-- wall fluxes at vertical walls, the eddy viscosity km 880 !-- must not be used for the evaluation of the velocity 881 !-- gradients dudy and dwdy 882 !-- Note: The validity of the new method has not yet 883 !-- been shown, as so far no suitable data for a 884 !-- validation has been available 885 km_neutral = kappa * ( usvs(k,j,i)**2 + & 886 wsvs(k,j,i)**2 )**0.25 * 0.5 * dy 887 IF ( km_neutral > 0.0 ) THEN 888 dudy = - wall_e_y(j,i) * usvs(k,j,i) / km_neutral 889 dwdy = - wall_e_y(j,i) * wsvs(k,j,i) / km_neutral 890 ELSE 891 dudy = 0.0 892 dwdy = 0.0 893 ENDIF 894 ELSE 895 dudy = 0.25 * ( u(k,j+1,i) + u(k,j+1,i+1) - & 896 u(k,j-1,i) - u(k,j-1,i+1) ) * ddy 897 dwdy = 0.25 * ( w(k,j+1,i) + w(k-1,j+1,i) - & 898 w(k,j-1,i) - w(k-1,j-1,i) ) * ddy 899 ENDIF 900 901 IF ( wall_e_x(j,i) /= 0.0 ) THEN 902 ! 903 !-- Inconsistency removed: as the thermal stratification 904 !-- is not taken into account for the evaluation of the 905 !-- wall fluxes at vertical walls, the eddy viscosity km 906 !-- must not be used for the evaluation of the velocity 907 !-- gradients dvdx and dwdx 908 !-- Note: The validity of the new method has not yet 909 !-- been shown, as so far no suitable data for a 910 !-- validation has been available 911 km_neutral = kappa * ( vsus(k,j,i)**2 + & 912 wsus(k,j,i)**2 )**0.25 * 0.5 * dx 913 IF ( km_neutral > 0.0 ) THEN 914 dvdx = - wall_e_x(j,i) * vsus(k,j,i) / km_neutral 915 dwdx = - wall_e_x(j,i) * wsus(k,j,i) / km_neutral 916 ELSE 917 dvdx = 0.0 918 dwdx = 0.0 919 ENDIF 920 ELSE 921 dvdx = 0.25 * ( v(k,j,i+1) + v(k,j+1,i+1) - & 922 v(k,j,i-1) - v(k,j+1,i-1) ) * ddx 923 dwdx = 0.25 * ( w(k,j,i+1) + w(k-1,j,i+1) - & 924 w(k,j,i-1) - w(k-1,j,i-1) ) * ddx 925 ENDIF 926 927 def = 2.0 * ( dudx**2 + dvdy**2 + dwdz**2 ) + & 928 dudy**2 + dvdx**2 + dwdx**2 + dwdy**2 + dudz**2 + & 929 dvdz**2 + 2.0 * ( dvdx*dudy + dwdx*dudz + dwdy*dvdz ) 930 931 IF ( def < 0.0 ) def = 0.0 932 933 tend(k,j,i) = tend(k,j,i) + km(k,j,i) * def 934 935 ENDIF 936 937 ! 938 !-- (4) - will allways be executed. 939 !-- 'special case: free atmosphere' (as for case (0)) 940 IF ( k == nzb_diff_s_outer(j,i)-1 ) THEN 941 942 dudx = ( u(k,j,i+1) - u(k,j,i) ) * ddx 943 dudy = 0.25 * ( u(k,j+1,i) + u(k,j+1,i+1) - & 944 u(k,j-1,i) - u(k,j-1,i+1) ) * ddy 945 dudz = 0.5 * ( u(k+1,j,i) + u(k+1,j,i+1) - & 946 u(k-1,j,i) - u(k-1,j,i+1) ) * dd2zu(k) 947 948 dvdx = 0.25 * ( v(k,j,i+1) + v(k,j+1,i+1) - & 949 v(k,j,i-1) - v(k,j+1,i-1) ) * ddx 950 dvdy = ( v(k,j+1,i) - v(k,j,i) ) * ddy 951 dvdz = 0.5 * ( v(k+1,j,i) + v(k+1,j+1,i) - & 952 v(k-1,j,i) - v(k-1,j+1,i) ) * dd2zu(k) 953 954 dwdx = 0.25 * ( w(k,j,i+1) + w(k-1,j,i+1) - & 955 w(k,j,i-1) - w(k-1,j,i-1) ) * ddx 956 dwdy = 0.25 * ( w(k,j+1,i) + w(k-1,j+1,i) - & 957 w(k,j-1,i) - w(k-1,j-1,i) ) * ddy 958 dwdz = ( w(k,j,i) - w(k-1,j,i) ) * ddzw(k) 959 960 def = 2.0 * ( dudx**2 + dvdy**2 + dwdz**2 ) + & 961 dudy**2 + dvdx**2 + dwdx**2 + dwdy**2 + dudz**2 + & 962 dvdz**2 + 2.0 * ( dvdx*dudy + dwdx*dudz + dwdy*dvdz ) 963 964 IF ( def < 0.0 ) def = 0.0 965 966 tend(k,j,i) = tend(k,j,i) + km(k,j,i) * def 967 968 ENDIF 969 970 ENDIF 971 972 ENDDO 973 ENDDO 974 ENDDO 975 976 ! 977 !-- Position without adjacent wall 978 !-- (1) - will allways be executed. 979 !-- 'bottom only: use u_0,v_0' 980 !$acc loop 981 DO i = nxl, nxr 982 DO j = nys, nyn 983 !$acc loop vector( 32 ) 984 DO k = 1, nzt 985 986 IF ( ( wall_e_x(j,i) == 0.0 ) .AND. ( wall_e_y(j,i) == 0.0 ) ) & 987 THEN 988 989 IF ( k == nzb_diff_s_inner(j,i)-1 ) THEN 990 991 dudx = ( u(k,j,i+1) - u(k,j,i) ) * ddx 992 dudy = 0.25 * ( u(k,j+1,i) + u(k,j+1,i+1) - & 993 u(k,j-1,i) - u(k,j-1,i+1) ) * ddy 994 dudz = 0.5 * ( u(k+1,j,i) + u(k+1,j,i+1) - & 995 u_0(j,i) - u_0(j,i+1) ) * dd2zu(k) 996 997 dvdx = 0.25 * ( v(k,j,i+1) + v(k,j+1,i+1) - & 998 v(k,j,i-1) - v(k,j+1,i-1) ) * ddx 999 dvdy = ( v(k,j+1,i) - v(k,j,i) ) * ddy 1000 dvdz = 0.5 * ( v(k+1,j,i) + v(k+1,j+1,i) - & 1001 v_0(j,i) - v_0(j+1,i) ) * dd2zu(k) 1002 1003 dwdx = 0.25 * ( w(k,j,i+1) + w(k-1,j,i+1) - & 1004 w(k,j,i-1) - w(k-1,j,i-1) ) * ddx 1005 dwdy = 0.25 * ( w(k,j+1,i) + w(k-1,j+1,i) - & 1006 w(k,j-1,i) - w(k-1,j-1,i) ) * ddy 1007 dwdz = ( w(k,j,i) - w(k-1,j,i) ) * ddzw(k) 1008 1009 def = 2.0 * ( dudx**2 + dvdy**2 + dwdz**2 ) + & 1010 dudy**2 + dvdx**2 + dwdx**2 + dwdy**2 + dudz**2 + & 1011 dvdz**2 + 2.0 * ( dvdx*dudy + dwdx*dudz + dwdy*dvdz ) 1012 1013 IF ( def < 0.0 ) def = 0.0 1014 1015 tend(k,j,i) = tend(k,j,i) + km(k,j,i) * def 1016 1017 ENDIF 1018 1019 ENDIF 1020 1021 ENDDO 1022 ENDDO 1023 ENDDO 1024 1025 ELSEIF ( use_surface_fluxes ) THEN 1026 1027 !$acc loop 1028 DO i = nxl, nxr 1029 DO j = nys, nyn 1030 !$acc loop vector(32) 1031 DO k = 1, nzt 1032 1033 IF ( k == nzb_diff_s_outer(j,i)-1 ) THEN 1034 1035 dudx = ( u(k,j,i+1) - u(k,j,i) ) * ddx 1036 dudy = 0.25 * ( u(k,j+1,i) + u(k,j+1,i+1) - & 1037 u(k,j-1,i) - u(k,j-1,i+1) ) * ddy 1038 dudz = 0.5 * ( u(k+1,j,i) + u(k+1,j,i+1) - & 1039 u(k-1,j,i) - u(k-1,j,i+1) ) * dd2zu(k) 1040 1041 dvdx = 0.25 * ( v(k,j,i+1) + v(k,j+1,i+1) - & 1042 v(k,j,i-1) - v(k,j+1,i-1) ) * ddx 1043 dvdy = ( v(k,j+1,i) - v(k,j,i) ) * ddy 1044 dvdz = 0.5 * ( v(k+1,j,i) + v(k+1,j+1,i) - & 1045 v(k-1,j,i) - v(k-1,j+1,i) ) * dd2zu(k) 1046 1047 dwdx = 0.25 * ( w(k,j,i+1) + w(k-1,j,i+1) - & 1048 w(k,j,i-1) - w(k-1,j,i-1) ) * ddx 1049 dwdy = 0.25 * ( w(k,j+1,i) + w(k-1,j+1,i) - & 1050 w(k,j-1,i) - w(k-1,j-1,i) ) * ddy 1051 dwdz = ( w(k,j,i) - w(k-1,j,i) ) * ddzw(k) 1052 1053 def = 2.0 * ( dudx**2 + dvdy**2 + dwdz**2 ) + & 1054 dudy**2 + dvdx**2 + dwdx**2 + dwdy**2 + dudz**2 + & 1055 dvdz**2 + 2.0 * ( dvdx*dudy + dwdx*dudz + dwdy*dvdz ) 1056 1057 IF ( def < 0.0 ) def = 0.0 1058 1059 tend(k,j,i) = tend(k,j,i) + km(k,j,i) * def 1060 1061 ENDIF 1062 1063 ENDDO 1064 ENDDO 1065 ENDDO 1066 1067 ENDIF 1068 1069 ! 1070 !-- If required, calculate TKE production by buoyancy 1071 IF ( .NOT. neutral ) THEN 1072 1073 IF ( .NOT. humidity ) THEN 1074 1075 IF ( use_reference ) THEN 1076 1077 IF ( ocean ) THEN 1078 ! 1079 !-- So far in the ocean no special treatment of density flux 1080 !-- in the bottom and top surface layer 1081 !$acc loop 1082 DO i = nxl, nxr 1083 DO j = nys, nyn 1084 !$acc loop vector( 32 ) 1085 DO k = 1, nzt 1086 IF ( k > nzb_s_inner(j,i) ) THEN 1087 tend(k,j,i) = tend(k,j,i) + & 1088 kh(k,j,i) * g / rho_reference * & 1089 ( rho(k+1,j,i) - rho(k-1,j,i) ) * & 1090 dd2zu(k) 1091 ENDIF 1092 ENDDO 1093 ENDDO 1094 ENDDO 1095 1096 ELSE 1097 1098 !$acc loop 1099 DO i = nxl, nxr 1100 DO j = nys, nyn 1101 !$acc loop vector( 32 ) 1102 DO k = 1, nzt_diff 1103 IF ( k >= nzb_diff_s_inner(j,i) ) THEN 1104 tend(k,j,i) = tend(k,j,i) - & 1105 kh(k,j,i) * g / pt_reference * & 1106 ( pt(k+1,j,i) - pt(k-1,j,i) ) * & 1107 dd2zu(k) 1108 ENDIF 1109 1110 IF ( k == nzb_diff_s_inner(j,i)-1 .AND. & 1111 use_surface_fluxes ) THEN 1112 tend(k,j,i) = tend(k,j,i) + g / pt_reference * & 1113 shf(j,i) 1114 ENDIF 1115 1116 IF ( k == nzt .AND. use_top_fluxes ) THEN 1117 tend(k,j,i) = tend(k,j,i) + g / pt_reference * & 1118 tswst(j,i) 1119 ENDIF 1120 ENDDO 1121 ENDDO 1122 ENDDO 1123 1124 ENDIF 1125 1126 ELSE 1127 1128 IF ( ocean ) THEN 1129 ! 1130 !-- So far in the ocean no special treatment of density flux 1131 !-- in the bottom and top surface layer 1132 !$acc loop 1133 DO i = nxl, nxr 1134 DO j = nys, nyn 1135 !$acc loop vector( 32 ) 1136 DO k = 1, nzt 1137 IF ( k > nzb_s_inner(j,i) ) THEN 1138 tend(k,j,i) = tend(k,j,i) + & 1139 kh(k,j,i) * g / rho(k,j,i) * & 1140 ( rho(k+1,j,i) - rho(k-1,j,i) ) * & 1141 dd2zu(k) 1142 ENDIF 1143 ENDDO 1144 ENDDO 1145 ENDDO 1146 1147 ELSE 1148 1149 !$acc loop 1150 DO i = nxl, nxr 1151 DO j = nys, nyn 1152 !$acc loop vector( 32 ) 1153 DO k = 1, nzt_diff 1154 IF( k >= nzb_diff_s_inner(j,i) ) THEN 1155 tend(k,j,i) = tend(k,j,i) - & 1156 kh(k,j,i) * g / pt(k,j,i) * & 1157 ( pt(k+1,j,i) - pt(k-1,j,i) ) * & 1158 dd2zu(k) 1159 ENDIF 1160 1161 IF ( k == nzb_diff_s_inner(j,i)-1 .AND. & 1162 use_surface_fluxes ) THEN 1163 tend(k,j,i) = tend(k,j,i) + g / pt(k,j,i) * & 1164 shf(j,i) 1165 ENDIF 1166 1167 IF ( k == nzt .AND. use_top_fluxes ) THEN 1168 tend(k,j,i) = tend(k,j,i) + g / pt(k,j,i) * & 1169 tswst(j,i) 1170 ENDIF 1171 ENDDO 1172 ENDDO 1173 ENDDO 1174 1175 ENDIF 1176 1177 ENDIF 1178 1179 ELSE 1180 ! 1181 !++ This part gives the PGI compiler problems in the previous loop 1182 !++ even without any acc statements???? 1183 ! STOP '+++ production_e problems with acc-directives' 1184 ! !acc loop 1185 ! DO i = nxl, nxr 1186 ! DO j = nys, nyn 1187 ! !acc loop vector( 32 ) 1188 ! DO k = 1, nzt_diff 1189 ! 1190 ! IF ( k >= nzb_diff_s_inner(j,i) ) THEN 1191 ! 1192 ! IF ( .NOT. cloud_physics .AND. .NOT. cloud_droplets ) THEN 1193 ! k1 = 1.0 + 0.61 * q(k,j,i) 1194 ! k2 = 0.61 * pt(k,j,i) 1195 ! tend(k,j,i) = tend(k,j,i) - kh(k,j,i) * & 1196 ! g / vpt(k,j,i) * & 1197 ! ( k1 * ( pt(k+1,j,i)-pt(k-1,j,i) ) + & 1198 ! k2 * ( q(k+1,j,i) - q(k-1,j,i) ) & 1199 ! ) * dd2zu(k) 1200 ! ELSE IF ( cloud_physics ) THEN 1201 ! IF ( ql(k,j,i) == 0.0 ) THEN 1202 ! k1 = 1.0 + 0.61 * q(k,j,i) 1203 ! k2 = 0.61 * pt(k,j,i) 1204 ! ELSE 1205 ! theta = pt(k,j,i) + pt_d_t(k) * l_d_cp * ql(k,j,i) 1206 ! temp = theta * t_d_pt(k) 1207 ! k1 = ( 1.0 - q(k,j,i) + 1.61 * & 1208 ! ( q(k,j,i) - ql(k,j,i) ) * & 1209 ! ( 1.0 + 0.622 * l_d_r / temp ) ) / & 1210 ! ( 1.0 + 0.622 * l_d_r * l_d_cp * & 1211 ! ( q(k,j,i) - ql(k,j,i) ) / ( temp * temp ) ) 1212 ! k2 = theta * ( l_d_cp / temp * k1 - 1.0 ) 1213 ! ENDIF 1214 ! tend(k,j,i) = tend(k,j,i) - kh(k,j,i) * & 1215 ! g / vpt(k,j,i) * & 1216 ! ( k1 * ( pt(k+1,j,i)-pt(k-1,j,i) ) + & 1217 ! k2 * ( q(k+1,j,i) - q(k-1,j,i) ) & 1218 ! ) * dd2zu(k) 1219 ! ELSE IF ( cloud_droplets ) THEN 1220 ! k1 = 1.0 + 0.61 * q(k,j,i) - ql(k,j,i) 1221 ! k2 = 0.61 * pt(k,j,i) 1222 ! tend(k,j,i) = tend(k,j,i) - & 1223 ! kh(k,j,i) * g / vpt(k,j,i) * & 1224 ! ( k1 * ( pt(k+1,j,i)- pt(k-1,j,i) ) + & 1225 ! k2 * ( q(k+1,j,i) - q(k-1,j,i) ) - & 1226 ! pt(k,j,i) * ( ql(k+1,j,i) - & 1227 ! ql(k-1,j,i) ) ) * dd2zu(k) 1228 ! ENDIF 1229 ! 1230 ! ENDIF 1231 ! 1232 ! ENDDO 1233 ! ENDDO 1234 ! ENDDO 1235 ! 1236 1237 !!++ Next two loops are probably very inefficiently parallellized 1238 !!++ and will require better optimization 1239 ! IF ( use_surface_fluxes ) THEN 1240 ! 1241 ! !acc loop 1242 ! DO i = nxl, nxr 1243 ! DO j = nys, nyn 1244 ! !acc loop vector( 32 ) 1245 ! DO k = 1, nzt_diff 1246 ! 1247 ! IF ( k == nzb_diff_s_inner(j,i)-1 ) THEN 1248 ! 1249 ! IF ( .NOT. cloud_physics .AND. .NOT. cloud_droplets ) THEN 1250 ! k1 = 1.0 + 0.61 * q(k,j,i) 1251 ! k2 = 0.61 * pt(k,j,i) 1252 ! ELSE IF ( cloud_physics ) THEN 1253 ! IF ( ql(k,j,i) == 0.0 ) THEN 1254 ! k1 = 1.0 + 0.61 * q(k,j,i) 1255 ! k2 = 0.61 * pt(k,j,i) 1256 ! ELSE 1257 ! theta = pt(k,j,i) + pt_d_t(k) * l_d_cp * ql(k,j,i) 1258 ! temp = theta * t_d_pt(k) 1259 ! k1 = ( 1.0 - q(k,j,i) + 1.61 * & 1260 ! ( q(k,j,i) - ql(k,j,i) ) * & 1261 ! ( 1.0 + 0.622 * l_d_r / temp ) ) / & 1262 ! ( 1.0 + 0.622 * l_d_r * l_d_cp * & 1263 ! ( q(k,j,i) - ql(k,j,i) ) / ( temp * temp ) ) 1264 ! k2 = theta * ( l_d_cp / temp * k1 - 1.0 ) 1265 ! ENDIF 1266 ! ELSE IF ( cloud_droplets ) THEN 1267 ! k1 = 1.0 + 0.61 * q(k,j,i) - ql(k,j,i) 1268 ! k2 = 0.61 * pt(k,j,i) 1269 ! ENDIF 1270 ! 1271 ! tend(k,j,i) = tend(k,j,i) + g / vpt(k,j,i) * & 1272 ! ( k1* shf(j,i) + k2 * qsws(j,i) ) 1273 ! ENDIF 1274 ! 1275 ! ENDDO 1276 ! ENDDO 1277 ! ENDDO 1278 ! 1279 ! ENDIF 1280 ! 1281 ! IF ( use_top_fluxes ) THEN 1282 ! 1283 ! !acc loop 1284 ! DO i = nxl, nxr 1285 ! DO j = nys, nyn 1286 ! !acc loop vector( 32 ) 1287 ! DO k = 1, nzt 1288 ! IF ( k == nzt ) THEN 1289 ! 1290 ! IF ( .NOT. cloud_physics .AND. .NOT. cloud_droplets ) THEN 1291 ! k1 = 1.0 + 0.61 * q(k,j,i) 1292 ! k2 = 0.61 * pt(k,j,i) 1293 ! ELSE IF ( cloud_physics ) THEN 1294 ! IF ( ql(k,j,i) == 0.0 ) THEN 1295 ! k1 = 1.0 + 0.61 * q(k,j,i) 1296 ! k2 = 0.61 * pt(k,j,i) 1297 ! ELSE 1298 ! theta = pt(k,j,i) + pt_d_t(k) * l_d_cp * ql(k,j,i) 1299 ! temp = theta * t_d_pt(k) 1300 ! k1 = ( 1.0 - q(k,j,i) + 1.61 * & 1301 ! ( q(k,j,i) - ql(k,j,i) ) * & 1302 ! ( 1.0 + 0.622 * l_d_r / temp ) ) / & 1303 ! ( 1.0 + 0.622 * l_d_r * l_d_cp * & 1304 ! ( q(k,j,i) - ql(k,j,i) ) / ( temp * temp ) ) 1305 ! k2 = theta * ( l_d_cp / temp * k1 - 1.0 ) 1306 ! ENDIF 1307 ! ELSE IF ( cloud_droplets ) THEN 1308 ! k1 = 1.0 + 0.61 * q(k,j,i) - ql(k,j,i) 1309 ! k2 = 0.61 * pt(k,j,i) 1310 ! ENDIF 1311 ! 1312 ! tend(k,j,i) = tend(k,j,i) + g / vpt(k,j,i) * & 1313 ! ( k1* tswst(j,i) + k2 * qswst(j,i) ) 1314 ! 1315 ! ENDIF 1316 ! 1317 ! ENDDO 1318 ! ENDDO 1319 ! ENDDO 1320 ! 1321 ! ENDIF 1322 1323 ENDIF 1324 1325 ENDIF 1326 !$acc end kernels 1327 1328 END SUBROUTINE production_e_acc 1329 1330 1331 !------------------------------------------------------------------------------! 677 1332 ! Call for grid point i,j 678 1333 !------------------------------------------------------------------------------! -
palm/trunk/SOURCE/prognostic_equations.f90
r1002 r1015 4 4 ! Current revisions: 5 5 ! ----------------- 6 ! 6 ! new branch prognostic_equations_acc 7 ! OpenACC statements added + code changes required for GPU optimization 7 8 ! 8 9 ! Former revisions: … … 136 137 PRIVATE 137 138 PUBLIC prognostic_equations_noopt, prognostic_equations_cache, & 138 prognostic_equations_vector 139 prognostic_equations_vector, prognostic_equations_acc 139 140 140 141 INTERFACE prognostic_equations_noopt … … 149 150 MODULE PROCEDURE prognostic_equations_vector 150 151 END INTERFACE prognostic_equations_vector 152 153 INTERFACE prognostic_equations_acc 154 MODULE PROCEDURE prognostic_equations_acc 155 END INTERFACE prognostic_equations_acc 151 156 152 157 … … 1900 1905 1901 1906 1907 SUBROUTINE prognostic_equations_acc 1908 1909 !------------------------------------------------------------------------------! 1910 ! Version for accelerator boards 1911 !------------------------------------------------------------------------------! 1912 1913 IMPLICIT NONE 1914 1915 CHARACTER (LEN=9) :: time_to_string 1916 INTEGER :: i, j, k, runge_step 1917 REAL :: sbt 1918 1919 ! 1920 !-- Set switch for intermediate Runge-Kutta step 1921 runge_step = 0 1922 IF ( timestep_scheme(1:5) == 'runge' ) THEN 1923 IF ( intermediate_timestep_count == 1 ) THEN 1924 runge_step = 1 1925 ELSEIF ( intermediate_timestep_count < & 1926 intermediate_timestep_count_max ) THEN 1927 runge_step = 2 1928 ENDIF 1929 ENDIF 1930 1931 ! 1932 !-- Calculate those variables needed in the tendency terms which need 1933 !-- global communication 1934 IF ( .NOT. neutral ) CALL calc_mean_profile( pt, 4 ) 1935 IF ( ocean ) CALL calc_mean_profile( rho, 64 ) 1936 IF ( humidity ) CALL calc_mean_profile( vpt, 44 ) 1937 IF ( ( ws_scheme_mom .OR. ws_scheme_sca ) .AND. & 1938 intermediate_timestep_count == 1 ) CALL ws_statistics 1939 1940 ! 1941 !-- u-velocity component 1942 !++ Statistics still not ported to accelerators 1943 !$acc update device( hom ) 1944 CALL cpu_log( log_point(5), 'u-equation', 'start' ) 1945 1946 IF ( timestep_scheme(1:5) == 'runge' ) THEN 1947 IF ( ws_scheme_mom ) THEN 1948 CALL advec_u_ws_acc 1949 ELSE 1950 tend = 0.0 ! to be removed later?? 1951 CALL advec_u_pw 1952 ENDIF 1953 ELSE 1954 CALL advec_u_up 1955 ENDIF 1956 CALL diffusion_u_acc 1957 CALL coriolis_acc( 1 ) 1958 IF ( sloping_surface .AND. .NOT. neutral ) THEN 1959 CALL buoyancy( pt, pt_reference, 1, 4 ) 1960 ENDIF 1961 1962 ! 1963 !-- Drag by plant canopy 1964 IF ( plant_canopy ) CALL plant_canopy_model( 1 ) 1965 1966 ! 1967 !-- External pressure gradient 1968 IF ( dp_external ) THEN 1969 DO i = nxlu, nxr 1970 DO j = nys, nyn 1971 DO k = dp_level_ind_b+1, nzt 1972 tend(k,j,i) = tend(k,j,i) - dpdxy(1) * dp_smooth_factor(k) 1973 ENDDO 1974 ENDDO 1975 ENDDO 1976 ENDIF 1977 1978 CALL user_actions( 'u-tendency' ) 1979 1980 ! 1981 !-- Prognostic equation for u-velocity component 1982 !$acc kernels present( nzb_u_inner, rdf, tend, tu_m, u, ug, u_p ) 1983 !$acc loop 1984 DO i = nxlu, nxr 1985 DO j = nys, nyn 1986 !$acc loop vector( 32 ) 1987 DO k = 1, nzt 1988 IF ( k > nzb_u_inner(j,i) ) THEN 1989 u_p(k,j,i) = u(k,j,i) + dt_3d * ( tsc(2) * tend(k,j,i) + & 1990 tsc(3) * tu_m(k,j,i) ) & 1991 - tsc(5) * rdf(k) * ( u(k,j,i) - ug(k) ) 1992 ! 1993 !-- Tendencies for the next Runge-Kutta step 1994 IF ( runge_step == 1 ) THEN 1995 tu_m(k,j,i) = tend(k,j,i) 1996 ELSEIF ( runge_step == 2 ) THEN 1997 tu_m(k,j,i) = -9.5625 * tend(k,j,i) + 5.3125 * tu_m(k,j,i) 1998 ENDIF 1999 ENDIF 2000 ENDDO 2001 ENDDO 2002 ENDDO 2003 !$acc end kernels 2004 2005 CALL cpu_log( log_point(5), 'u-equation', 'stop' ) 2006 !$acc update host( u_p ) 2007 2008 ! 2009 !-- v-velocity component 2010 CALL cpu_log( log_point(6), 'v-equation', 'start' ) 2011 2012 IF ( timestep_scheme(1:5) == 'runge' ) THEN 2013 IF ( ws_scheme_mom ) THEN 2014 CALL advec_v_ws_acc 2015 ELSE 2016 tend = 0.0 ! to be removed later?? 2017 CALL advec_v_pw 2018 END IF 2019 ELSE 2020 CALL advec_v_up 2021 ENDIF 2022 CALL diffusion_v_acc 2023 CALL coriolis_acc( 2 ) 2024 2025 ! 2026 !-- Drag by plant canopy 2027 IF ( plant_canopy ) CALL plant_canopy_model( 2 ) 2028 2029 ! 2030 !-- External pressure gradient 2031 IF ( dp_external ) THEN 2032 DO i = nxl, nxr 2033 DO j = nysv, nyn 2034 DO k = dp_level_ind_b+1, nzt 2035 tend(k,j,i) = tend(k,j,i) - dpdxy(2) * dp_smooth_factor(k) 2036 ENDDO 2037 ENDDO 2038 ENDDO 2039 ENDIF 2040 2041 CALL user_actions( 'v-tendency' ) 2042 2043 ! 2044 !-- Prognostic equation for v-velocity component 2045 !$acc kernels present( nzb_v_inner, rdf, tend, tv_m, v, vg, v_p ) 2046 !$acc loop 2047 DO i = nxl, nxr 2048 DO j = nysv, nyn 2049 !$acc loop vector( 32 ) 2050 DO k = 1, nzt 2051 IF ( k > nzb_v_inner(j,i) ) THEN 2052 v_p(k,j,i) = v(k,j,i) + dt_3d * ( tsc(2) * tend(k,j,i) + & 2053 tsc(3) * tv_m(k,j,i) ) & 2054 - tsc(5) * rdf(k) * ( v(k,j,i) - vg(k) ) 2055 ! 2056 !-- Tendencies for the next Runge-Kutta step 2057 IF ( runge_step == 1 ) THEN 2058 tv_m(k,j,i) = tend(k,j,i) 2059 ELSEIF ( runge_step == 2 ) THEN 2060 tv_m(k,j,i) = -9.5625 * tend(k,j,i) + 5.3125 * tv_m(k,j,i) 2061 ENDIF 2062 ENDIF 2063 ENDDO 2064 ENDDO 2065 ENDDO 2066 !$acc end kernels 2067 2068 CALL cpu_log( log_point(6), 'v-equation', 'stop' ) 2069 !$acc update host( v_p ) 2070 2071 ! 2072 !-- w-velocity component 2073 CALL cpu_log( log_point(7), 'w-equation', 'start' ) 2074 2075 IF ( timestep_scheme(1:5) == 'runge' ) THEN 2076 IF ( ws_scheme_mom ) THEN 2077 CALL advec_w_ws_acc 2078 ELSE 2079 tend = 0.0 ! to be removed later?? 2080 CALL advec_w_pw 2081 ENDIF 2082 ELSE 2083 CALL advec_w_up 2084 ENDIF 2085 CALL diffusion_w_acc 2086 CALL coriolis_acc( 3 ) 2087 2088 IF ( .NOT. neutral ) THEN 2089 IF ( ocean ) THEN 2090 CALL buoyancy( rho, rho_reference, 3, 64 ) 2091 ELSE 2092 IF ( .NOT. humidity ) THEN 2093 CALL buoyancy_acc( pt, pt_reference, 3, 4 ) 2094 ELSE 2095 CALL buoyancy( vpt, pt_reference, 3, 44 ) 2096 ENDIF 2097 ENDIF 2098 ENDIF 2099 2100 ! 2101 !-- Drag by plant canopy 2102 IF ( plant_canopy ) CALL plant_canopy_model( 3 ) 2103 2104 CALL user_actions( 'w-tendency' ) 2105 2106 ! 2107 !-- Prognostic equation for w-velocity component 2108 !$acc kernels present( nzb_w_inner, rdf, tend, tw_m, w, w_p ) 2109 !$acc loop 2110 DO i = nxl, nxr 2111 DO j = nys, nyn 2112 !$acc loop vector( 32 ) 2113 DO k = 1, nzt-1 2114 IF ( k > nzb_w_inner(j,i) ) THEN 2115 w_p(k,j,i) = w(k,j,i) + dt_3d * ( tsc(2) * tend(k,j,i) + & 2116 tsc(3) * tw_m(k,j,i) ) & 2117 - tsc(5) * rdf(k) * w(k,j,i) 2118 ! 2119 !-- Tendencies for the next Runge-Kutta step 2120 IF ( runge_step == 1 ) THEN 2121 tw_m(k,j,i) = tend(k,j,i) 2122 ELSEIF ( runge_step == 2 ) THEN 2123 tw_m(k,j,i) = -9.5625 * tend(k,j,i) + 5.3125 * tw_m(k,j,i) 2124 ENDIF 2125 ENDIF 2126 ENDDO 2127 ENDDO 2128 ENDDO 2129 !$acc end kernels 2130 2131 CALL cpu_log( log_point(7), 'w-equation', 'stop' ) 2132 !$acc update host( w_p ) 2133 2134 2135 ! 2136 !-- If required, compute prognostic equation for potential temperature 2137 IF ( .NOT. neutral ) THEN 2138 2139 CALL cpu_log( log_point(13), 'pt-equation', 'start' ) 2140 2141 ! 2142 !-- pt-tendency terms with communication 2143 sbt = tsc(2) 2144 IF ( scalar_advec == 'bc-scheme' ) THEN 2145 2146 IF ( timestep_scheme(1:5) /= 'runge' ) THEN 2147 ! 2148 !-- Bott-Chlond scheme always uses Euler time step. Thus: 2149 sbt = 1.0 2150 ENDIF 2151 tend = 0.0 2152 CALL advec_s_bc( pt, 'pt' ) 2153 2154 ENDIF 2155 2156 ! 2157 !-- pt-tendency terms with no communication 2158 IF ( scalar_advec /= 'bc-scheme' ) THEN 2159 tend = 0.0 2160 IF ( timestep_scheme(1:5) == 'runge' ) THEN 2161 IF ( ws_scheme_sca ) THEN 2162 CALL advec_s_ws_acc( pt, 'pt' ) 2163 ELSE 2164 tend = 0.0 ! to be removed later?? 2165 CALL advec_s_pw( pt ) 2166 ENDIF 2167 ELSE 2168 CALL advec_s_up( pt ) 2169 ENDIF 2170 ENDIF 2171 2172 CALL diffusion_s_acc( pt, shf, tswst, wall_heatflux ) 2173 2174 ! 2175 !-- If required compute heating/cooling due to long wave radiation processes 2176 IF ( radiation ) THEN 2177 CALL calc_radiation 2178 ENDIF 2179 2180 ! 2181 !-- If required compute impact of latent heat due to precipitation 2182 IF ( precipitation ) THEN 2183 CALL impact_of_latent_heat 2184 ENDIF 2185 2186 ! 2187 !-- Consideration of heat sources within the plant canopy 2188 IF ( plant_canopy .AND. ( cthf /= 0.0 ) ) THEN 2189 CALL plant_canopy_model( 4 ) 2190 ENDIF 2191 2192 ! 2193 !-- If required compute influence of large-scale subsidence/ascent 2194 IF ( large_scale_subsidence ) THEN 2195 CALL subsidence( tend, pt, pt_init ) 2196 ENDIF 2197 2198 CALL user_actions( 'pt-tendency' ) 2199 2200 ! 2201 !-- Prognostic equation for potential temperature 2202 !$acc kernels present( nzb_s_inner, rdf_sc, ptdf_x, ptdf_y, pt_init ) & 2203 !$acc present( tend, tpt_m, pt, pt_p ) 2204 !$acc loop 2205 DO i = nxl, nxr 2206 DO j = nys, nyn 2207 !$acc loop vector( 32 ) 2208 DO k = 1, nzt 2209 IF ( k > nzb_s_inner(j,i) ) THEN 2210 pt_p(k,j,i) = pt(k,j,i) + dt_3d * ( sbt * tend(k,j,i) + & 2211 tsc(3) * tpt_m(k,j,i) ) & 2212 - tsc(5) * ( pt(k,j,i) - pt_init(k) ) *& 2213 ( rdf_sc(k) + ptdf_x(i) + ptdf_y(j) ) 2214 ! 2215 !-- Tendencies for the next Runge-Kutta step 2216 IF ( runge_step == 1 ) THEN 2217 tpt_m(k,j,i) = tend(k,j,i) 2218 ELSEIF ( runge_step == 2 ) THEN 2219 tpt_m(k,j,i) = -9.5625 * tend(k,j,i) + 5.3125 * tpt_m(k,j,i) 2220 ENDIF 2221 ENDIF 2222 ENDDO 2223 ENDDO 2224 ENDDO 2225 !$acc end kernels 2226 2227 CALL cpu_log( log_point(13), 'pt-equation', 'stop' ) 2228 !$acc update host( pt_p ) 2229 2230 ENDIF 2231 2232 ! 2233 !-- If required, compute prognostic equation for salinity 2234 IF ( ocean ) THEN 2235 2236 CALL cpu_log( log_point(37), 'sa-equation', 'start' ) 2237 2238 ! 2239 !-- sa-tendency terms with communication 2240 sbt = tsc(2) 2241 IF ( scalar_advec == 'bc-scheme' ) THEN 2242 2243 IF ( timestep_scheme(1:5) /= 'runge' ) THEN 2244 ! 2245 !-- Bott-Chlond scheme always uses Euler time step. Thus: 2246 sbt = 1.0 2247 ENDIF 2248 tend = 0.0 2249 CALL advec_s_bc( sa, 'sa' ) 2250 2251 ENDIF 2252 2253 ! 2254 !-- sa-tendency terms with no communication 2255 IF ( scalar_advec /= 'bc-scheme' ) THEN 2256 tend = 0.0 2257 IF ( timestep_scheme(1:5) == 'runge' ) THEN 2258 IF ( ws_scheme_sca ) THEN 2259 CALL advec_s_ws( sa, 'sa' ) 2260 ELSE 2261 CALL advec_s_pw( sa ) 2262 ENDIF 2263 ELSE 2264 CALL advec_s_up( sa ) 2265 ENDIF 2266 ENDIF 2267 2268 CALL diffusion_s( sa, saswsb, saswst, wall_salinityflux ) 2269 2270 CALL user_actions( 'sa-tendency' ) 2271 2272 ! 2273 !-- Prognostic equation for salinity 2274 DO i = nxl, nxr 2275 DO j = nys, nyn 2276 DO k = nzb_s_inner(j,i)+1, nzt 2277 sa_p(k,j,i) = sa(k,j,i) + dt_3d * ( sbt * tend(k,j,i) + & 2278 tsc(3) * tsa_m(k,j,i) ) & 2279 - tsc(5) * rdf_sc(k) * & 2280 ( sa(k,j,i) - sa_init(k) ) 2281 IF ( sa_p(k,j,i) < 0.0 ) sa_p(k,j,i) = 0.1 * sa(k,j,i) 2282 ! 2283 !-- Tendencies for the next Runge-Kutta step 2284 IF ( runge_step == 1 ) THEN 2285 tsa_m(k,j,i) = tend(k,j,i) 2286 ELSEIF ( runge_step == 2 ) THEN 2287 tsa_m(k,j,i) = -9.5625 * tend(k,j,i) + 5.3125 * tsa_m(k,j,i) 2288 ENDIF 2289 ENDDO 2290 ENDDO 2291 ENDDO 2292 2293 CALL cpu_log( log_point(37), 'sa-equation', 'stop' ) 2294 2295 ! 2296 !-- Calculate density by the equation of state for seawater 2297 CALL cpu_log( log_point(38), 'eqns-seawater', 'start' ) 2298 CALL eqn_state_seawater 2299 CALL cpu_log( log_point(38), 'eqns-seawater', 'stop' ) 2300 2301 ENDIF 2302 2303 ! 2304 !-- If required, compute prognostic equation for total water content / scalar 2305 IF ( humidity .OR. passive_scalar ) THEN 2306 2307 CALL cpu_log( log_point(29), 'q/s-equation', 'start' ) 2308 2309 ! 2310 !-- Scalar/q-tendency terms with communication 2311 sbt = tsc(2) 2312 IF ( scalar_advec == 'bc-scheme' ) THEN 2313 2314 IF ( timestep_scheme(1:5) /= 'runge' ) THEN 2315 ! 2316 !-- Bott-Chlond scheme always uses Euler time step. Thus: 2317 sbt = 1.0 2318 ENDIF 2319 tend = 0.0 2320 CALL advec_s_bc( q, 'q' ) 2321 2322 ENDIF 2323 2324 ! 2325 !-- Scalar/q-tendency terms with no communication 2326 IF ( scalar_advec /= 'bc-scheme' ) THEN 2327 tend = 0.0 2328 IF ( timestep_scheme(1:5) == 'runge' ) THEN 2329 IF ( ws_scheme_sca ) THEN 2330 CALL advec_s_ws( q, 'q' ) 2331 ELSE 2332 CALL advec_s_pw( q ) 2333 ENDIF 2334 ELSE 2335 CALL advec_s_up( q ) 2336 ENDIF 2337 ENDIF 2338 2339 CALL diffusion_s( q, qsws, qswst, wall_qflux ) 2340 2341 ! 2342 !-- If required compute decrease of total water content due to 2343 !-- precipitation 2344 IF ( precipitation ) THEN 2345 CALL calc_precipitation 2346 ENDIF 2347 2348 ! 2349 !-- Sink or source of scalar concentration due to canopy elements 2350 IF ( plant_canopy ) CALL plant_canopy_model( 5 ) 2351 2352 ! 2353 !-- If required compute influence of large-scale subsidence/ascent 2354 IF ( large_scale_subsidence ) THEN 2355 CALL subsidence( tend, q, q_init ) 2356 ENDIF 2357 2358 CALL user_actions( 'q-tendency' ) 2359 2360 ! 2361 !-- Prognostic equation for total water content / scalar 2362 DO i = nxl, nxr 2363 DO j = nys, nyn 2364 DO k = nzb_s_inner(j,i)+1, nzt 2365 q_p(k,j,i) = q(k,j,i) + dt_3d * ( sbt * tend(k,j,i) + & 2366 tsc(3) * tq_m(k,j,i) ) & 2367 - tsc(5) * rdf_sc(k) * & 2368 ( q(k,j,i) - q_init(k) ) 2369 IF ( q_p(k,j,i) < 0.0 ) q_p(k,j,i) = 0.1 * q(k,j,i) 2370 ! 2371 !-- Tendencies for the next Runge-Kutta step 2372 IF ( runge_step == 1 ) THEN 2373 tq_m(k,j,i) = tend(k,j,i) 2374 ELSEIF ( runge_step == 2 ) THEN 2375 tq_m(k,j,i) = -9.5625 * tend(k,j,i) + 5.3125 * tq_m(k,j,i) 2376 ENDIF 2377 ENDDO 2378 ENDDO 2379 ENDDO 2380 2381 CALL cpu_log( log_point(29), 'q/s-equation', 'stop' ) 2382 2383 ENDIF 2384 2385 ! 2386 !-- If required, compute prognostic equation for turbulent kinetic 2387 !-- energy (TKE) 2388 IF ( .NOT. constant_diffusion ) THEN 2389 2390 CALL cpu_log( log_point(16), 'tke-equation', 'start' ) 2391 2392 ! 2393 !-- TKE-tendency terms with communication 2394 CALL production_e_init 2395 2396 sbt = tsc(2) 2397 IF ( .NOT. use_upstream_for_tke ) THEN 2398 IF ( scalar_advec == 'bc-scheme' ) THEN 2399 2400 IF ( timestep_scheme(1:5) /= 'runge' ) THEN 2401 ! 2402 !-- Bott-Chlond scheme always uses Euler time step. Thus: 2403 sbt = 1.0 2404 ENDIF 2405 tend = 0.0 2406 CALL advec_s_bc( e, 'e' ) 2407 2408 ENDIF 2409 ENDIF 2410 2411 ! 2412 !-- TKE-tendency terms with no communication 2413 IF ( scalar_advec /= 'bc-scheme' .OR. use_upstream_for_tke ) THEN 2414 IF ( use_upstream_for_tke ) THEN 2415 tend = 0.0 2416 CALL advec_s_up( e ) 2417 ELSE 2418 IF ( timestep_scheme(1:5) == 'runge' ) THEN 2419 IF ( ws_scheme_sca ) THEN 2420 CALL advec_s_ws_acc( e, 'e' ) 2421 ELSE 2422 tend = 0.0 ! to be removed later?? 2423 CALL advec_s_pw( e ) 2424 ENDIF 2425 ELSE 2426 tend = 0.0 ! to be removed later?? 2427 CALL advec_s_up( e ) 2428 ENDIF 2429 ENDIF 2430 ENDIF 2431 2432 IF ( .NOT. humidity ) THEN 2433 IF ( ocean ) THEN 2434 CALL diffusion_e( prho, prho_reference ) 2435 ELSE 2436 CALL diffusion_e_acc( pt, pt_reference ) 2437 ENDIF 2438 ELSE 2439 CALL diffusion_e( vpt, pt_reference ) 2440 ENDIF 2441 2442 CALL production_e_acc 2443 2444 ! 2445 !-- Additional sink term for flows through plant canopies 2446 IF ( plant_canopy ) CALL plant_canopy_model( 6 ) 2447 CALL user_actions( 'e-tendency' ) 2448 2449 ! 2450 !-- Prognostic equation for TKE. 2451 !-- Eliminate negative TKE values, which can occur due to numerical 2452 !-- reasons in the course of the integration. In such cases the old TKE 2453 !-- value is reduced by 90%. 2454 !$acc kernels present( e, e_p, nzb_s_inner, tend, te_m ) 2455 !$acc loop 2456 DO i = nxl, nxr 2457 DO j = nys, nyn 2458 !$acc loop vector( 32 ) 2459 DO k = 1, nzt 2460 IF ( k > nzb_s_inner(j,i) ) THEN 2461 e_p(k,j,i) = e(k,j,i) + dt_3d * ( sbt * tend(k,j,i) + & 2462 tsc(3) * te_m(k,j,i) ) 2463 IF ( e_p(k,j,i) < 0.0 ) e_p(k,j,i) = 0.1 * e(k,j,i) 2464 ! 2465 !-- Tendencies for the next Runge-Kutta step 2466 IF ( runge_step == 1 ) THEN 2467 te_m(k,j,i) = tend(k,j,i) 2468 ELSEIF ( runge_step == 2 ) THEN 2469 te_m(k,j,i) = -9.5625 * tend(k,j,i) + 5.3125 * te_m(k,j,i) 2470 ENDIF 2471 ENDIF 2472 ENDDO 2473 ENDDO 2474 ENDDO 2475 !$acc end kernels 2476 2477 CALL cpu_log( log_point(16), 'tke-equation', 'stop' ) 2478 !$acc update host( e_p ) 2479 2480 ENDIF 2481 2482 2483 END SUBROUTINE prognostic_equations_acc 2484 2485 1902 2486 END MODULE prognostic_equations_mod -
palm/trunk/SOURCE/read_var_list.f90
r1004 r1015 4 4 ! Current revisions: 5 5 ! ------------------ 6 ! 6 ! -adjust_mixing_length 7 7 ! 8 8 ! Former revisions: … … 228 228 SELECT CASE ( TRIM( variable_chr ) ) 229 229 230 CASE ( 'adjust_mixing_length' )231 READ ( 13 ) adjust_mixing_length232 230 CASE ( 'advected_distance_x' ) 233 231 READ ( 13 ) advected_distance_x -
palm/trunk/SOURCE/time_integration.f90
r1002 r1015 4 4 ! Current revisions: 5 5 ! ----------------- 6 ! 6 ! +call of prognostic_equations_acc 7 7 ! 8 8 ! Former revisions: … … 176 176 IF ( loop_optimization == 'vector' ) THEN 177 177 CALL prognostic_equations_vector 178 ELSEIF ( loop_optimization == 'acc' ) THEN 179 CALL prognostic_equations_acc 178 180 ELSE 179 181 IF ( scalar_advec == 'bc-scheme' ) THEN … … 284 286 CALL pres 285 287 ENDIF 288 ! 289 !-- Update device memory for calculating diffusion quantities and for next 290 !-- timestep 291 !$acc update device( e, pt, u, v, w ) 292 !$acc update device( q ) if ( allocated( q ) ) 286 293 287 294 ! 288 295 !-- If required, compute virtuell potential temperature 289 IF ( humidity ) CALL compute_vpt 296 IF ( humidity ) THEN 297 CALL compute_vpt 298 !$acc update device( vpt ) 299 ENDIF 290 300 291 301 ! 292 302 !-- If required, compute liquid water content 293 IF ( cloud_physics ) CALL calc_liquid_water_content 303 IF ( cloud_physics ) THEN 304 CALL calc_liquid_water_content 305 !$acc update device( ql ) 306 ENDIF 294 307 295 308 ! … … 303 316 CALL prandtl_fluxes 304 317 CALL cpu_log( log_point(19), 'prandtl_fluxes', 'stop' ) 318 ! 319 !++ Statistics still require updates on host 320 !$acc update host( qs, qsws, rif, shf, ts ) 305 321 ENDIF 306 322 … … 318 334 ENDIF 319 335 CALL cpu_log( log_point(17), 'diffusivities', 'stop' ) 336 ! 337 !++ Statistics still require update of diffusivities on host 338 !$acc update host( kh, km ) 320 339 321 340 ENDIF -
palm/trunk/SOURCE/wall_fluxes.f90
r667 r1015 3 3 ! Current revisions: 4 4 ! ----------------- 5 ! 5 ! accelerator version (*_acc) added 6 6 ! 7 7 ! Former revisions: … … 32 32 !------------------------------------------------------------------------------! 33 33 PRIVATE 34 PUBLIC wall_fluxes, wall_fluxes_ e34 PUBLIC wall_fluxes, wall_fluxes_acc, wall_fluxes_e, wall_fluxes_e_acc 35 35 36 36 INTERFACE wall_fluxes … … 39 39 END INTERFACE wall_fluxes 40 40 41 INTERFACE wall_fluxes_acc 42 MODULE PROCEDURE wall_fluxes_acc 43 END INTERFACE wall_fluxes_acc 44 41 45 INTERFACE wall_fluxes_e 42 46 MODULE PROCEDURE wall_fluxes_e … … 44 48 END INTERFACE wall_fluxes_e 45 49 50 INTERFACE wall_fluxes_e_acc 51 MODULE PROCEDURE wall_fluxes_e_acc 52 END INTERFACE wall_fluxes_e_acc 53 46 54 CONTAINS 47 55 … … 189 197 END SUBROUTINE wall_fluxes 190 198 199 200 !------------------------------------------------------------------------------! 201 ! Call for all grid points - accelerator version 202 !------------------------------------------------------------------------------! 203 SUBROUTINE wall_fluxes_acc( wall_flux, a, b, c1, c2, nzb_uvw_inner, & 204 nzb_uvw_outer, wall ) 205 206 USE arrays_3d 207 USE control_parameters 208 USE grid_variables 209 USE indices 210 USE statistics 211 212 IMPLICIT NONE 213 214 INTEGER :: i, j, k, max_outer, min_inner, wall_index 215 216 INTEGER, DIMENSION(nysg:nyng,nxlg:nxrg) :: nzb_uvw_inner, & 217 nzb_uvw_outer 218 REAL :: a, b, c1, c2, h1, h2, zp 219 REAL :: pts, pt_i, rifs, u_i, v_i, us_wall, vel_total, ws, wspts 220 221 REAL, DIMENSION(nysg:nyng,nxlg:nxrg) :: wall 222 REAL, DIMENSION(nzb:nzt+1,nys:nyn,nxl:nxr) :: wall_flux 223 224 225 zp = 0.5 * ( (a+c1) * dy + (b+c2) * dx ) 226 wall_flux = 0.0 227 wall_index = NINT( a+ 2*b + 3*c1 + 4*c2 ) 228 229 min_inner = MINVAL( nzb_uvw_inner(nys:nyn,nxl:nxr) ) + 1 230 max_outer = MINVAL( nzb_uvw_outer(nys:nyn,nxl:nxr) ) 231 232 !$acc kernels present( hom, nzb_uvw_inner, nzb_uvw_outer, pt, rif_wall ) & 233 !$acc present( u, v, w, wall, wall_flux, z0 ) 234 !$acc loop 235 DO i = nxl, nxr 236 DO j = nys, nyn 237 !$acc loop vector( 32 ) 238 DO k = min_inner, max_outer 239 ! 240 !-- All subsequent variables are computed for the respective 241 !-- location where the respective flux is defined. 242 IF ( k >= nzb_uvw_inner(j,i)+1 .AND. & 243 k <= nzb_uvw_outer(j,i) .AND. wall(j,i) /= 0.0 ) THEN 244 ! 245 !-- (1) Compute rifs, u_i, v_i, ws, pt' and w'pt' 246 rifs = rif_wall(k,j,i,wall_index) 247 248 u_i = a * u(k,j,i) + c1 * 0.25 * & 249 ( u(k+1,j,i+1) + u(k+1,j,i) + u(k,j,i+1) + u(k,j,i) ) 250 251 v_i = b * v(k,j,i) + c2 * 0.25 * & 252 ( v(k+1,j+1,i) + v(k+1,j,i) + v(k,j+1,i) + v(k,j,i) ) 253 254 ws = ( c1 + c2 ) * w(k,j,i) + 0.25 * ( & 255 a * ( w(k-1,j,i-1) + w(k-1,j,i) + w(k,j,i-1) + w(k,j,i) ) & 256 + b * ( w(k-1,j-1,i) + w(k-1,j,i) + w(k,j-1,i) + w(k,j,i) ) & 257 ) 258 pt_i = 0.5 * ( pt(k,j,i) + a * pt(k,j,i-1) + & 259 b * pt(k,j-1,i) + ( c1 + c2 ) * pt(k+1,j,i) ) 260 261 pts = pt_i - hom(k,1,4,0) 262 wspts = ws * pts 263 264 ! 265 !-- (2) Compute wall-parallel absolute velocity vel_total 266 vel_total = SQRT( ws**2 + (a+c1) * u_i**2 + (b+c2) * v_i**2 ) 267 268 ! 269 !-- (3) Compute wall friction velocity us_wall 270 IF ( rifs >= 0.0 ) THEN 271 272 ! 273 !-- Stable stratification (and neutral) 274 us_wall = kappa * vel_total / ( LOG( zp / z0(j,i) ) + & 275 5.0 * rifs * ( zp - z0(j,i) ) / zp & 276 ) 277 ELSE 278 279 ! 280 !-- Unstable stratification 281 h1 = SQRT( SQRT( 1.0 - 16.0 * rifs ) ) 282 h2 = SQRT( SQRT( 1.0 - 16.0 * rifs * z0(j,i) / zp ) ) 283 284 us_wall = kappa * vel_total / ( & 285 LOG( zp / z0(j,i) ) - & 286 LOG( ( 1.0 + h1 )**2 * ( 1.0 + h1**2 ) / ( & 287 ( 1.0 + h2 )**2 * ( 1.0 + h2**2 ) ) ) + & 288 2.0 * ( ATAN( h1 ) - ATAN( h2 ) ) & 289 ) 290 ENDIF 291 292 ! 293 !-- (4) Compute zp/L (corresponds to neutral Richardson flux 294 !-- number rifs) 295 rifs = -1.0 * zp * kappa * g * wspts / ( pt_i * & 296 ( us_wall**3 + 1E-30 ) ) 297 298 ! 299 !-- Limit the value range of the Richardson numbers. 300 !-- This is necessary for very small velocities (u,w --> 0), 301 !-- because the absolute value of rif can then become very 302 !-- large, which in consequence would result in very large 303 !-- shear stresses and very small momentum fluxes (both are 304 !-- generally unrealistic). 305 IF ( rifs < rif_min ) rifs = rif_min 306 IF ( rifs > rif_max ) rifs = rif_max 307 308 ! 309 !-- (5) Compute wall_flux (u'v', v'u', w'v', or w'u') 310 IF ( rifs >= 0.0 ) THEN 311 312 ! 313 !-- Stable stratification (and neutral) 314 wall_flux(k,j,i) = kappa * & 315 ( a*u(k,j,i) + b*v(k,j,i) + (c1+c2)*w(k,j,i) ) / & 316 ( LOG( zp / z0(j,i) ) + & 317 5.0 * rifs * ( zp - z0(j,i) ) / zp & 318 ) 319 ELSE 320 321 ! 322 !-- Unstable stratification 323 h1 = SQRT( SQRT( 1.0 - 16.0 * rifs ) ) 324 h2 = SQRT( SQRT( 1.0 - 16.0 * rifs * z0(j,i) / zp ) ) 325 326 wall_flux(k,j,i) = kappa * & 327 ( a*u(k,j,i) + b*v(k,j,i) + (c1+c2)*w(k,j,i) ) / ( & 328 LOG( zp / z0(j,i) ) - & 329 LOG( ( 1.0 + h1 )**2 * ( 1.0 + h1**2 ) / ( & 330 ( 1.0 + h2 )**2 * ( 1.0 + h2**2 ) ) ) + & 331 2.0 * ( ATAN( h1 ) - ATAN( h2 ) ) & 332 ) 333 ENDIF 334 wall_flux(k,j,i) = -wall_flux(k,j,i) * us_wall 335 336 ! 337 !-- store rifs for next time step 338 rif_wall(k,j,i,wall_index) = rifs 339 340 ENDIF 341 342 ENDDO 343 ENDDO 344 ENDDO 345 !$acc end kernels 346 347 END SUBROUTINE wall_fluxes_acc 191 348 192 349 … … 444 601 END SUBROUTINE wall_fluxes_e 445 602 603 604 !------------------------------------------------------------------------------! 605 ! Call for all grid points - accelerator version 606 !------------------------------------------------------------------------------! 607 SUBROUTINE wall_fluxes_e_acc( wall_flux, a, b, c1, c2, wall ) 608 609 !------------------------------------------------------------------------------! 610 ! Description: 611 ! ------------ 612 ! Calculates momentum fluxes at vertical walls for routine production_e 613 ! assuming Monin-Obukhov similarity. 614 ! Indices: usvs a=1, vsus b=1, wsvs c1=1, wsus c2=1 (other=0). 615 !------------------------------------------------------------------------------! 616 617 USE arrays_3d 618 USE control_parameters 619 USE grid_variables 620 USE indices 621 USE statistics 622 623 IMPLICIT NONE 624 625 INTEGER :: i, j, k, kk, max_outer, min_inner, wall_index 626 REAL :: a, b, c1, c2, h1, h2, u_i, v_i, us_wall, vel_total, vel_zp, & 627 ws, zp 628 629 REAL :: rifs 630 631 REAL, DIMENSION(nysg:nyng,nxlg:nxrg) :: wall 632 REAL, DIMENSION(nzb:nzt+1,nys:nyn,nxl:nxr) :: wall_flux 633 634 635 zp = 0.5 * ( (a+c1) * dy + (b+c2) * dx ) 636 wall_flux = 0.0 637 wall_index = NINT( a+ 2*b + 3*c1 + 4*c2 ) 638 639 min_inner = MINVAL( nzb_diff_s_inner(nys:nyn,nxl:nxr) ) - 1 640 max_outer = MAXVAL( nzb_diff_s_outer(nys:nyn,nxl:nxr) ) - 2 641 642 !$acc kernels present( nzb_diff_s_inner, nzb_diff_s_outer, pt, rif_wall ) & 643 !$acc present( u, v, w, wall, wall_flux, z0 ) 644 !$acc loop 645 DO i = nxl, nxr 646 DO j = nys, nyn 647 !$acc loop vector(32) 648 DO k = min_inner, max_outer 649 ! 650 !-- All subsequent variables are computed for scalar locations 651 IF ( k >= nzb_diff_s_inner(j,i)-1 .AND. & 652 k <= nzb_diff_s_outer(j,i)-2 .AND. wall(j,i) /= 0.0 ) THEN 653 ! 654 !-- (1) Compute rifs, u_i, v_i, and ws 655 IF ( k == nzb_diff_s_inner(j,i)-1 ) THEN 656 kk = nzb_diff_s_inner(j,i)-1 657 ELSE 658 kk = k-1 659 ENDIF 660 rifs = 0.5 * ( rif_wall(k,j,i,wall_index) + & 661 a * rif_wall(k,j,i+1,1) + b * rif_wall(k,j+1,i,2) + & 662 c1 * rif_wall(kk,j,i,3) + c2 * rif_wall(kk,j,i,4) & 663 ) 664 665 u_i = 0.5 * ( u(k,j,i) + u(k,j,i+1) ) 666 v_i = 0.5 * ( v(k,j,i) + v(k,j+1,i) ) 667 ws = 0.5 * ( w(k,j,i) + w(k-1,j,i) ) 668 ! 669 !-- (2) Compute wall-parallel absolute velocity vel_total and 670 !-- interpolate appropriate velocity component vel_zp. 671 vel_total = SQRT( ws**2 + (a+c1) * u_i**2 + (b+c2) * v_i**2 ) 672 vel_zp = 0.5 * ( a * u_i + b * v_i + (c1+c2) * ws ) 673 ! 674 !-- (3) Compute wall friction velocity us_wall 675 IF ( rifs >= 0.0 ) THEN 676 677 ! 678 !-- Stable stratification (and neutral) 679 us_wall = kappa * vel_total / ( LOG( zp / z0(j,i) ) + & 680 5.0 * rifs * ( zp - z0(j,i) ) / zp & 681 ) 682 ELSE 683 684 ! 685 !-- Unstable stratification 686 h1 = SQRT( SQRT( 1.0 - 16.0 * rifs ) ) 687 h2 = SQRT( SQRT( 1.0 - 16.0 * rifs * z0(j,i) / zp ) ) 688 689 us_wall = kappa * vel_total / ( & 690 LOG( zp / z0(j,i) ) - & 691 LOG( ( 1.0 + h1 )**2 * ( 1.0 + h1**2 ) / ( & 692 ( 1.0 + h2 )**2 * ( 1.0 + h2**2 ) ) ) + & 693 2.0 * ( ATAN( h1 ) - ATAN( h2 ) ) & 694 ) 695 ENDIF 696 697 ! 698 !-- Skip step (4) of wall_fluxes, because here rifs is already 699 !-- available from (1) 700 ! 701 !-- (5) Compute wall_flux (u'v', v'u', w'v', or w'u') 702 703 IF ( rifs >= 0.0 ) THEN 704 705 ! 706 !-- Stable stratification (and neutral) 707 wall_flux(k,j,i) = kappa * vel_zp / & 708 ( LOG( zp/z0(j,i) ) + 5.0*rifs * ( zp-z0(j,i) ) / zp ) 709 ELSE 710 711 ! 712 !-- Unstable stratification 713 h1 = SQRT( SQRT( 1.0 - 16.0 * rifs ) ) 714 h2 = SQRT( SQRT( 1.0 - 16.0 * rifs * z0(j,i) / zp ) ) 715 716 wall_flux(k,j,i) = kappa * vel_zp / ( & 717 LOG( zp / z0(j,i) ) - & 718 LOG( ( 1.0 + h1 )**2 * ( 1.0 + h1**2 ) / ( & 719 ( 1.0 + h2 )**2 * ( 1.0 + h2**2 ) ) ) + & 720 2.0 * ( ATAN( h1 ) - ATAN( h2 ) ) & 721 ) 722 ENDIF 723 wall_flux(k,j,i) = - wall_flux(k,j,i) * us_wall 724 725 ENDIF 726 727 ENDDO 728 ENDDO 729 ENDDO 730 !$acc end kernels 731 732 END SUBROUTINE wall_fluxes_e_acc 446 733 447 734 -
palm/trunk/SOURCE/write_var_list.f90
r1004 r1015 4 4 ! Current revisions: 5 5 ! ----------------- 6 ! 6 ! -adjust_mixing_length 7 7 ! 8 8 ! Former revisions: … … 150 150 !-- list in read_var_list. 151 151 152 WRITE ( 14 ) 'adjust_mixing_length '153 WRITE ( 14 ) adjust_mixing_length154 152 WRITE ( 14 ) 'advected_distance_x ' 155 153 WRITE ( 14 ) advected_distance_x
Note: See TracChangeset
for help on using the changeset viewer.