Changeset 3689 for palm/trunk/SOURCE


Ignore:
Timestamp:
Jan 22, 2019 7:34:35 PM (6 years ago)
Author:
knoop
Message:

Added multi-GPU capability when using OpenACC.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • palm/trunk/SOURCE/palm.f90

    r3687 r3689  
    324324        ONLY:  wrd_global, wrd_local
    325325
     326#if defined( __parallel) && defined( _OPENACC )
     327    USE openacc
     328#endif
     329
    326330
    327331    IMPLICIT NONE
     
    331335    CHARACTER(LEN=9) ::  time_to_string  !<
    332336    INTEGER(iwp)     ::  i               !< loop counter for blocked I/O
     337#if defined( __parallel) && defined( _OPENACC )
     338    INTEGER(iwp)     :: local_comm       !< local communicator (shared memory)
     339    INTEGER(iwp)     :: local_num_procs  !< local number of processes
     340    INTEGER(iwp)     :: local_id         !< local id
     341    INTEGER(acc_device_kind) :: device_type !< device type for OpenACC
     342    INTEGER(iwp)     ::  num_devices     !< number of devices visible to OpenACC
     343    INTEGER(iwp)     ::  my_device       !< device used by this process
     344#endif
    333345
    334346    version = 'PALM 6.0'
     
    368380       CALL init_coupling
    369381    ENDIF
     382
     383#ifdef _OPENACC
     384!
     385!-- Select OpenACC device to use in this process. For this find out how many
     386!-- neighbors there are running on the same node and which id this process is.
     387    IF ( nested_run )  THEN
     388       CALL MPI_COMM_SPLIT_TYPE( comm_palm, MPI_COMM_TYPE_SHARED, 0,           &
     389                                 MPI_INFO_NULL, local_comm, ierr )
     390    ELSE
     391       CALL MPI_COMM_SPLIT_TYPE( MPI_COMM_WORLD, MPI_COMM_TYPE_SHARED, 0,      &
     392                                 MPI_INFO_NULL, local_comm, ierr )
     393    ENDIF
     394    CALL MPI_COMM_SIZE( local_comm, local_num_procs, ierr )
     395    CALL MPI_COMM_RANK( local_comm, local_id, ierr )
     396
     397!
     398!-- This loop including the barrier is a workaround for PGI compiler versions
     399!-- up to and including 18.4. Later releases are able to select their GPUs in
     400!-- parallel, without running into spurious errors.
     401    DO i = 0, local_num_procs-1
     402       CALL MPI_BARRIER( local_comm, ierr )
     403
     404       IF ( i == local_id )  THEN
     405          device_type = acc_get_device_type()
     406          num_devices = acc_get_num_devices( device_type )
     407          my_device = MOD( local_id, num_devices )
     408          CALL acc_set_device_num( my_device, device_type )
     409       ENDIF
     410    ENDDO
     411
     412    CALL MPI_COMM_FREE( local_comm, ierr )
     413#endif
    370414#endif
    371415
Note: See TracChangeset for help on using the changeset viewer.