Home

Context Navigation

← Previous Change
Next Change →

radiation_model_mod.f90

Timestamp:

Sep 18, 2020 9:23:09 AM (4 years ago)

Author:

pavelkrc

Message:

Add option to limit the size of MPI_Alltoall calls in radiation_model

File:

: 1 edited

palm/trunk/SOURCE/radiation_model_mod.f90 (modified) (13 diffs)

Legend:

: Unmodified
: Added
: Removed

palm/trunk/SOURCE/radiation_model_mod.f90

-                      r4679
+                      r4683
 ! -----------------
 ! $Id$
+! Add option to limit the size of MPI_Alltoall calls
+!
+! 4679 2020-09-14 13:53:52Z pavelkrc
 ! Enable warning about unrealistically large radiative fluxes by default
+!
 …
 !-- configuration parameters (they can be setup in PALM config)
+    INTEGER(iwp)                                   ::  bufsize_alltoall = 0               !< max no. of items to send in mpi_alltoall at once (0=infinite)
     LOGICAL                                        ::  raytrace_mpi_rma = .TRUE.          !< use MPI RMA to access LAD and gridsurf from remote processes during raytracing
     LOGICAL                                        ::  rad_angular_discretization = .TRUE.!< whether to use fixed resolution discretization of view factors for
 …
        NAMELIST /radiation_par/   albedo, albedo_lw_dif, albedo_lw_dir,         &
                                   albedo_sw_dif, albedo_sw_dir, albedo_type,    &
+                                  bufsize_alltoall,                             &
                                   constant_albedo, dt_radiation, emissivity,    &
                                   lw_radiation, max_raytracing_dist,            &
 …
        NAMELIST /radiation_parameters/ albedo, albedo_lw_dif, albedo_lw_dir,    &
                                   albedo_sw_dif, albedo_sw_dir, albedo_type,    &
+                                  bufsize_alltoall,                             &
                                   constant_albedo, dt_radiation, emissivity,    &
                                   lw_radiation, max_raytracing_dist,            &
 …
         IMPLICIT NONE
         INTEGER(iwp)                                  :: i, j, k, d, ip, jp
+        INTEGER(iwp)                                  :: i, j, k, ip, jp
         INTEGER(iwp)                                  :: isvf, ksvf, icsf, kcsf, npcsfl, isvf_surflt, imrt, imrtf, ipcgb
         INTEGER(iwp)                                  :: sd, td
 …
         INTEGER(iwp), DIMENSION(:), ALLOCATABLE,TARGET:: kcsflt_l,kpcsflt_l
         INTEGER(iwp), DIMENSION(:,:), POINTER         :: kcsflt,kpcsflt
         INTEGER(iwp), DIMENSION(:), ALLOCATABLE       :: icsflt,dcsflt,ipcsflt,dpcsflt
+        INTEGER(iwp), DIMENSION(:), ALLOCATABLE       :: icsflt
         REAL(wp), DIMENSION(3)                        :: uv
         LOGICAL                                       :: visible
 …
             kcsflt(1:kdcsf,1:udim) => kcsflt_l(1:kdcsf*udim)
             ALLOCATE( icsflt(0:numprocs-1) )
-            ALLOCATE( dcsflt(0:numprocs-1) )
-            ALLOCATE( ipcsflt(0:numprocs-1) )
-            ALLOCATE( dpcsflt(0:numprocs-1) )
 !--         fill out arrays of csf values and
 …
 !--         for particular precessors
             icsflt = 0
-            dcsflt = 0
             ip = -1
             j = -1
-            d = 0
             DO kcsf = 1, ncsfl
                 j = j+1
 …
 !--                 number of elements of previous block
                     IF ( ip>=0) icsflt(ip) = j
-                    d = d+j
 !--                 blank blocks
                     DO jp = ip+1, acsf(kcsf)%ip-1
 !--                     number of elements is zero, displacement is equal to previous
                         icsflt(jp) = 0
-                        dcsflt(jp) = d
                     ENDDO
 !--                 the actual block
                     ip = acsf(kcsf)%ip
-                    dcsflt(ip) = d
                     j = 0
                 ENDIF
 …
             j = j+1
             IF ( ip>=0 ) icsflt(ip) = j
-            d = d+j
             DO jp = ip+1, numprocs-1
 !--             number of elements is zero, displacement is equal to previous
                 icsflt(jp) = 0
-                dcsflt(jp) = d
             ENDDO
 …
 #if defined( __parallel )
+!--         scatter and gather the number of elements to and from all processor
+!--         and calculate displacements
+            IF ( debug_output )  CALL debug_message( 'Scatter and gather the number of elements to and from all processor', 'info' )
+            CALL MPI_AlltoAll(icsflt,1,MPI_INTEGER,ipcsflt,1,MPI_INTEGER,comm2d, ierr)
+            IF ( ierr /= 0 ) THEN
+                WRITE(9,*) 'Error MPI_AlltoAll1:', ierr, SIZE(icsflt), SIZE(ipcsflt)
+                FLUSH(9)
+            ENDIF
+            npcsfl = SUM(ipcsflt)
+            d = 0
+            DO i = 0, numprocs-1
+                dpcsflt(i) = d
+                d = d + ipcsflt(i)
+            ENDDO
+!--         exchange csf fields between processors
+!
+!--         Exchange csf fields between processors
             IF ( debug_output )  CALL debug_message( 'Exchange CSF fields between processors', 'start' )
+            udim = max(npcsfl,1)
+            ALLOCATE( pcsflt_l(ndcsf*udim) )
+            pcsflt(1:ndcsf,1:udim) => pcsflt_l(1:ndcsf*udim)
+            ALLOCATE( kpcsflt_l(kdcsf*udim) )
+            kpcsflt(1:kdcsf,1:udim) => kpcsflt_l(1:kdcsf*udim)
+            CALL MPI_AlltoAllv(csflt_l, ndcsf*icsflt, ndcsf*dcsflt, MPI_REAL, &
+                pcsflt_l, ndcsf*ipcsflt, ndcsf*dpcsflt, MPI_REAL, comm2d, ierr)
+            IF ( ierr /= 0 ) THEN
+                WRITE(9,*) 'Error MPI_AlltoAllv1:', ierr, SIZE(ipcsflt), ndcsf*icsflt, &
+                            ndcsf*dcsflt, SIZE(pcsflt_l),ndcsf*ipcsflt, ndcsf*dpcsflt
+                FLUSH(9)
+            ENDIF
+            IF ( debug_output )  CALL debug_message( 'Exchange CSF fields: finished first part', 'info' )
+            CALL MPI_AlltoAllv(kcsflt_l, kdcsf*icsflt, kdcsf*dcsflt, MPI_INTEGER, &
+                kpcsflt_l, kdcsf*ipcsflt, kdcsf*dpcsflt, MPI_INTEGER, comm2d, ierr)
+            IF ( ierr /= 0 ) THEN
+                WRITE(9,*) 'Error MPI_AlltoAllv2:', ierr, SIZE(kcsflt_l),kdcsf*icsflt, &
+                           kdcsf*dcsflt, SIZE(kpcsflt_l), kdcsf*ipcsflt, kdcsf*dpcsflt
+                FLUSH(9)
+            ENDIF
+            CALL radiation_exchange_alltoall(icsflt, kdcsf, ndcsf, kcsflt_l, csflt_l,              &
+                                             npcsfl, kpcsflt_l, pcsflt_l)
+            pcsflt(1:ndcsf,1:npcsfl) => pcsflt_l(0:ndcsf*npcsfl-1)
+            kpcsflt(1:kdcsf,1:npcsfl) => kpcsflt_l(0:kdcsf*npcsfl-1)
             IF ( debug_output )  CALL debug_message( 'Exchange CSF fields between processors', 'end' )
 …
             DEALLOCATE( kcsflt_l )
             DEALLOCATE( icsflt )
-            DEALLOCATE( dcsflt )
-            DEALLOCATE( ipcsflt )
-            DEALLOCATE( dpcsflt )
 !--         sort csf ( a version of quicksort )
 …
+!--------------------------------------------------------------------------------------------------!
+!
+! Description:
+! ------------
+!> Performs MPI alltoall exchange for integer and floating-point data, optionally splitting the
+!> exchange to multiple iterations with maximum number of items per iteration.
+!--------------------------------------------------------------------------------------------------!
+ SUBROUTINE radiation_exchange_alltoall(ntosend, npint, npfloat, isendbuf, fsendbuf,               &
+                                        nrecv, irecvbuf, frecvbuf)
+    IMPLICIT NONE
+    INTEGER(iwp), DIMENSION(0:), INTENT(IN)              ::  ntosend  !< number of records to send
+                                                                      !< to each process
+    INTEGER(iwp), INTENT(IN)                             ::  npint    !< no. of integers in a record
+    INTEGER(iwp), INTENT(IN)                             ::  npfloat  !< no. of floats in a record
+    INTEGER(iwp), DIMENSION(0:), INTENT(IN)              ::  isendbuf !< send buffer with integers
+    REAL(wp), DIMENSION(0:), INTENT(IN)                  ::  fsendbuf !< send buffer with floats
+    INTEGER(iwp), INTENT(OUT)                            ::  nrecv    !< total no. of records received
+    INTEGER(iwp), DIMENSION(:), ALLOCATABLE, INTENT(OUT) ::  irecvbuf !< int receive buffer (will be
+                                                                      !< allocated to proper size
+    REAL(wp), DIMENSION(:), ALLOCATABLE, INTENT(OUT)     ::  frecvbuf !< float receive buffer
+    INTEGER(iwp)                            ::  i, j     !< iterators
+    INTEGER(iwp)                            ::  iproc    !< process iterator
+    INTEGER(iwp)                            ::  iter     !< current iteration
+    INTEGER(iwp)                            ::  niters   !< local number of iterations needed
+    INTEGER(iwp)                            ::  nitersg  !< global no. of iterations needed
+    INTEGER(iwp)                            ::  nmaxsend !< max no. of records sent to each process
+                                                         !< in each iteration
+    INTEGER(iwp), DIMENSION(:), ALLOCATABLE ::  drecv    !< received data displacements per proc
+    INTEGER(iwp), DIMENSION(:), ALLOCATABLE ::  drecvnow !< current receive displacements
+    INTEGER(iwp), DIMENSION(:), ALLOCATABLE ::  dsend    !< sent data displacements per process
+    INTEGER(iwp), DIMENSION(:), ALLOCATABLE ::  dsendnow !< current send displacements
+    INTEGER(iwp), DIMENSION(:), ALLOCATABLE ::  nrecvnow !< no. of items to receive in current iteration
+    INTEGER(iwp), DIMENSION(:), ALLOCATABLE ::  nsendnow !< no. of items to send in current iteration
+    INTEGER(iwp), DIMENSION(:), ALLOCATABLE ::  ntorecv  !< no. of records to receive from each process
+    ALLOCATE( ntorecv(0:numprocs-1) )
+    ALLOCATE( dsend(0:numprocs) )
+    ALLOCATE( drecv(0:numprocs) )
+    ALLOCATE( dsendnow(0:numprocs-1) )
+    ALLOCATE( drecvnow(0:numprocs-1) )
+    ALLOCATE( nsendnow(0:numprocs-1) )
+    ALLOCATE( nrecvnow(0:numprocs-1) )
+!
+!-- Exchange send and receive sizes
+    CALL MPI_Alltoall(ntosend, 1, MPI_INTEGER, ntorecv, 1, MPI_INTEGER, comm2d, ierr)
+    IF ( ierr /= 0 )  THEN
+       WRITE (9,*) 'Error at MPI_Alltoall 1:', ierr, ntosend, ntorecv
+       FLUSH (9)
+    ENDIF
+!
+!-- Calculate initial displacements
+    i = 0
+    j = 0
+    DO  iproc = 0, numprocs-1
+       dsend(iproc) = i
+       dsendnow(iproc) = i
+       drecv(iproc) = j
+       drecvnow(iproc) = j
+       i = i + ntosend(iproc)
+       j = j + ntorecv(iproc)
+    ENDDO
+    dsend(numprocs) = i ! behind last pos = sum of all to send
+    drecv(numprocs) = j ! behind last pos = sum of all to receive
+    nrecv = j
+!
+!-- Allocate receive buffers
+    ALLOCATE( irecvbuf(0:nrecv*npint-1) )
+    ALLOCATE( frecvbuf(0:nrecv*npfloat-1) )
+!
+!-- Determine number of iterations among all processes
+!-- (e.g. this process may have nothing to send and receive, yet some other still might)
+    IF ( bufsize_alltoall <= 0 )  THEN
+       nitersg = 1
+       nmaxsend = HUGE(nitersg)
+    ELSE
+       nmaxsend = bufsize_alltoall
+       niters = (MAXVAL(ntosend(:)) + nmaxsend - 1) / nmaxsend
+       CALL MPI_Allreduce(niters, nitersg, 1, MPI_INTEGER, MPI_MAX, comm2d, ierr)
+       IF ( nitersg > 1 )  THEN
+          WRITE( debug_string, '("The MPI AllToAll call has been split to ' //   &
+                               '",I8," iterations of max. ",I12," records ' //   &
+                               'each.")' )                                       &
+                 nitersg, bufsize_alltoall
+          CALL debug_message( debug_string, 'info' )
+       ENDIF
+    ENDIF
+!
+!-- Iterate alltoall using max-sized buffers
+    DO  iter = 1, nitersg
+       nsendnow(:) = MIN(dsend(1:) - dsendnow(:), nmaxsend)
+       nrecvnow(:) = MIN(drecv(1:) - drecvnow(:), nmaxsend)
+!
+!--    Send integer data
+       CALL MPI_Alltoallv(isendbuf, nsendnow(:)*npint, dsendnow(:)*npint, MPI_INTEGER,             &
+                          irecvbuf, nrecvnow(:)*npint, drecvnow(:)*npint, MPI_INTEGER,             &
+                          comm2d, ierr)
+       IF ( ierr /= 0 )  THEN
+          WRITE (9,*) 'Error at MPI_Alltoallv 1:', ierr, iter, nmaxsend, dsend, dsendnow, nsendnow,&
+                                                                         drecv, drecvnow, nrecvnow
+          FLUSH (9)
+       ENDIF
+!
+!--    Send floating point data
+       CALL MPI_Alltoallv(fsendbuf, nsendnow(:)*npfloat, dsendnow(:)*npfloat, MPI_REAL,            &
+                          frecvbuf, nrecvnow(:)*npfloat, drecvnow(:)*npfloat, MPI_REAL,            &
+                          comm2d, ierr)
+       IF ( ierr /= 0 )  THEN
+          WRITE (9,*) 'Error at MPI_Alltoallv 2:', ierr, iter, nmaxsend, dsend, dsendnow, nsendnow,&
+                                                                         drecv, drecvnow, nrecvnow
+          FLUSH (9)
+       ENDIF
+!
+!--    Shift displacements for next iteration
+       dsendnow(:) = dsendnow(:) + nsendnow(:)
+       drecvnow(:) = drecvnow(:) + nrecvnow(:)
+    ENDDO
+    DEALLOCATE( ntorecv, dsend, drecv, dsendnow, drecvnow, nsendnow, nrecvnow )
+ END SUBROUTINE radiation_exchange_alltoall
 !------------------------------------------------------------------------------!
+!

Note: See TracChangeset for help on using the changeset viewer.

Context Navigation

Changeset 4683 for palm/trunk/SOURCE/radiation_model_mod.f90

Legend:

palm/trunk/SOURCE/radiation_model_mod.f90

Download in other formats: