Ignore:
Timestamp:
Oct 18, 2014 12:34:45 PM (10 years ago)
Author:
raasch
Message:

adjustments for using CUDA-aware MPI

File:
1 edited

Legend:

Unmodified
Added
Removed
  • palm/trunk/SOURCE/palm.f90

    r1469 r1482  
    2020! Current revisions:
    2121! -----------------
    22 !
     22! adjustments for using CUDA-aware OpenMPI
    2323!
    2424! Former revisions:
     
    136136!
    137137!-- Local variables
    138     CHARACTER(LEN=9) ::  time_to_string  !:
    139     INTEGER(iwp)     ::  i               !:
     138    CHARACTER(LEN=9)  ::  time_to_string  !:
     139    CHARACTER(LEN=10) ::  env_string      !: to store string of environment var
     140    INTEGER(iwp)      ::  env_stat        !: to hold status of GET_ENV
     141    INTEGER(iwp)      ::  i               !:
     142    INTEGER(iwp)      ::  myid_openmpi    !: OpenMPI local rank for CUDA aware MPI
    140143#if defined( __openacc )
    141144    REAL(wp), DIMENSION(100) ::  acc_dum     !:
     
    161164#if defined( __openacc )
    162165!
     166!-- Get the local MPI rank in case of CUDA aware OpenMPI. Important, if there
     167!-- is more than one accelerator board on the node
     168    CALL GET_ENVIRONMENT_VARIABLE('OMPI_COMM_WORLD_LOCAL_RANK',                &
     169         VALUE=env_string, STATUS=env_stat )
     170    READ( env_string, '(I1)' )  myid_openmpi
     171    PRINT*, '### local_rank = ', myid_openmpi, '  status=',env_stat
     172!
    163173!-- Get the number of accelerator boards per node and assign the MPI processes
    164174!-- to these boards
     
    166176    num_acc_per_node  = ACC_GET_NUM_DEVICES( ACC_DEVICE_NVIDIA )
    167177    IF ( numprocs == 1  .AND.  num_acc_per_node > 0 )  num_acc_per_node = 1
    168     PRINT*, '*** myid = ', myid, ' num_acc_per_node = ', num_acc_per_node
    169     acc_rank = MOD( myid, num_acc_per_node )
    170 !    STOP '****'
     178    PRINT*, '*** myid = ', myid_openmpi, ' num_acc_per_node = ', num_acc_per_node
     179    acc_rank = MOD( myid_openmpi, num_acc_per_node )
    171180    CALL ACC_SET_DEVICE_NUM ( acc_rank, ACC_DEVICE_NVIDIA )
    172181!
    173182!-- Test output (to be removed later)
    174     WRITE (*,'(A,I6,A,I3,A,I3,A,I3)') '*** Connect MPI-Task ', myid,' to CPU ',&
    175                                       acc_rank, ' Devices: ', num_acc_per_node,&
    176                                       ' connected to:',                        &
     183    WRITE (*,'(A,I6,A,I3,A,I3,A,I3)') '*** Connect MPI-Task ', myid_openmpi,   &
     184                                      ' to CPU ', acc_rank, ' Devices: ',      &
     185                                      num_acc_per_node, ' connected to:',      &
    177186                                      ACC_GET_DEVICE_NUM( ACC_DEVICE_NVIDIA )
    178187#endif
Note: See TracChangeset for help on using the changeset viewer.