Ignore:
Timestamp:
Sep 27, 2012 9:23:24 AM (9 years ago)
Author:
raasch
Message:

Starting with changes required for GPU optimization. OpenACC statements for using NVIDIA GPUs added.
Adjustment of mixing length to the Prandtl mixing length at first grid point above ground removed.
mask array is set zero for ghost boundaries

File:
1 edited

Legend:

Unmodified
Added
Removed
  • palm/trunk/SOURCE/palm.f90

    r863 r1015  
    44! Current revisions:
    55! -----------------
    6 !
     6! OpenACC statements added + code changes required for GPU optimization
    77!
    88! Former revisions:
     
    7777    USE statistics
    7878
     79#if defined( __openacc )
     80    USE OPENACC
     81#endif
     82
    7983    IMPLICIT NONE
    8084
     
    8488    CHARACTER (LEN=1) ::  cdum
    8589    INTEGER           ::  i, run_description_header_i(80)
     90#if defined( __openacc )
     91    REAL, DIMENSION(100) ::  acc_dum
     92#endif
    8693
    8794    version = 'PALM 3.8a'
     
    102109#endif
    103110
     111#if defined( __openacc )
     112!
     113!-- Get the number of accelerator boards per node and assign the MPI processes
     114!-- to these boards
     115    num_acc_per_node  = ACC_GET_NUM_DEVICES( ACC_DEVICE_NVIDIA )
     116    acc_rank = MOD( myid, num_acc_per_node )
     117    CALL ACC_SET_DEVICE_NUM ( acc_rank, ACC_DEVICE_NVIDIA )
     118!
     119!-- Test output (to be removed later)
     120    WRITE (*,'(A,I4,A,I3,A,I3,A,I3)') '*** Connect MPI-Task ', myid,' to CPU ',&
     121                                      acc_rank, ' Devices: ', num_acc_per_node,&
     122                                      ' connected to:',                        &
     123                                      ACC_GET_DEVICE_NUM( ACC_DEVICE_NVIDIA )
     124#endif
     125!
     126!-- Ensure that OpenACC first attaches the GPU devices by copying a dummy data
     127!-- region
     128    !$acc data copyin( acc_dum )
     129
    104130!
    105131!-- Initialize measuring of the CPU-time remaining to the run
     
    177203    ENDIF
    178204
     205!
     206!-- Declare and initialize variables in the accelerator memory with their
     207!-- host values
     208    !$acc  data copyin( diss, e, e_p, kh, km, pt, pt_p, q, ql, tend, te_m, tpt_m, tu_m, tv_m, tw_m, u, u_p, v, vpt, v_p, w, w_p )          &
     209    !$acc       copyin( ddzu, ddzw, dd2zu, l_grid, l_wall, ptdf_x, ptdf_y, pt_init, rdf, rdf_sc, ug, vg, zu, zw )   &
     210    !$acc       copyin( hom, qs, qsws, qswst, rif, rif_wall, shf, ts, tswst, us, usws, uswst, vsws, vswst, z0, z0h )      &
     211    !$acc       copyin( fxm, fxp, fym, fyp, fwxm, fwxp, fwym, fwyp, nzb_diff_s_inner, nzb_diff_s_outer, nzb_diff_u )       &
     212    !$acc       copyin( nzb_diff_v, nzb_s_inner, nzb_s_outer, nzb_u_inner )    &
     213    !$acc       copyin( nzb_u_outer, nzb_v_inner, nzb_v_outer, nzb_w_inner )   &
     214    !$acc       copyin( nzb_w_outer, wall_heatflux, wall_e_x, wall_e_y, wall_u, wall_v, wall_w_x, wall_w_y, wall_flags_0 )
    179215!
    180216!-- Integration of the model equations using timestep-scheme
     
    242278
    243279!
     280!-- Close the OpenACC dummy data region
     281    !$acc end data
     282    !$acc end data
     283
     284!
    244285!-- Take final CPU-time for CPU-time analysis
    245286    CALL cpu_log( log_point(1), 'total', 'stop' )
Note: See TracChangeset for help on using the changeset viewer.