Home

Context Navigation

← Previous Change
Next Change →

Changeset 683 for palm/trunk

Timestamp:

Feb 9, 2011 2:25:15 PM (14 years ago)

Author:

raasch

Message:

New:
---

optional exchange of ghost points in synchronous mode via MPI_SENDRCV,
steered by d3par parameter synchronous_exchange
(cpu_statistics, exchange_horiz, modules, parin)

openMP-parallelization of pressure solver (fft-method) for 2d-domain-decomposition
(poisfft, transpose)

Changed:

Errors:

mpt bugfix for netCDF4 usage (mrun)

Location:

palm/trunk

Files:

: 7 edited

SCRIPTS/mrun (modified) (4 diffs)
SOURCE/cpu_statistics.f90 (modified) (4 diffs)
SOURCE/exchange_horiz.f90 (modified) (5 diffs)
SOURCE/modules.f90 (modified) (2 diffs)
SOURCE/parin.f90 (modified) (2 diffs)
SOURCE/poisfft.f90 (modified) (12 diffs)
SOURCE/transpose.f90 (modified) (33 diffs)

Legend:

: Unmodified
: Added
: Removed

palm/trunk/SCRIPTS/mrun

-                      r678
+                      r683
      # 02/02/10 - Siggi  - further adjustments on Tsubame and concerning openMP
      #                     usage
+     # 09/02/10 - Siggi  - mpt bugfix for netCDF4 usage
 …
  fi
     # bugfix for wrong netcdf module
+    # bugfix for wrong netcdf module and for netCDF4 usage in case of mpt
  if [[ $host = lcsgib  ||  $host = lcsgih ]]
  then
 …
     then
        export module_calls="$module_calls export LD_LIBRARY_PATH=/sw/dataformats/netcdf/3.6.3-intel/lib:\$LD_LIBRARY_PATH;"
+    fi
+    if [[ $(echo $module_calls | grep -c mpt) != 0 ]]
+    then
+       export module_calls="$module_calls export LD_LIBRARY_PATH=/sw/sgi/mpt/2011-02-07/lib:\$LD_LIBRARY_PATH;"
+       echo "*** module_calls = $module_calls"
     fi
  fi
 …
                       export MPI_TYPE_DEPTH=20
                       echo "*** MPI_TYPE_DEPTH=$MPI_TYPE_DEPTH"
+                      export MPI_GROUP_MAX=64
+                      echo "*** MPI_GROUP_MAX=$MPI_GROUP_MAX"
                       mpiexec_mpt -np $ii   ./a.out  $ROPTS  < runfile_atmos

palm/trunk/SOURCE/cpu_statistics.f90

-                      r623
+                      r683
 ! Current revisions:
 ! -----------------
+!
+! output of handling of ghostpoint exchange
+!
 ! Former revisions:
 …
+!
 !--    Output handling of collective operations
+!--    Output of handling of MPI operations
        IF ( collective_wait )  THEN
           WRITE ( 18, 103 )
 …
           WRITE ( 18, 104 )
        ENDIF
+       IF ( synchronous_exchange )  THEN
+          WRITE ( 18, 105 )
+       ELSE
+          WRITE ( 18, 106 )
+       ENDIF
+!
 !--    Empty lines in order to create a gap to the results of the model
 !--    continuation runs
        WRITE ( 18, 105 )
+       WRITE ( 18, 107 )
+!
 …
 FORMAT (/'Barriers are set in front of collective operations')
 FORMAT (/'No barriers are set in front of collective operations')
+FORMAT (//)
+FORMAT (/'Exchange of ghostpoints via MPI_SENDRCV')
+FORMAT (/'Exchange of ghostpoints via MPI_ISEND/MPI_IRECV')
+FORMAT (//)
  END SUBROUTINE cpu_statistics

palm/trunk/SOURCE/exchange_horiz.f90

-                      r668
+                      r683
 ! Current revisions:
 ! -----------------
+! optional synchronous exchange (sendrecv) implemented, code partly reformatted
+!
 ! Former revisions:
 …
     INTEGER, DIMENSION(MPI_STATUS_SIZE,4) ::  wait_stat
 #endif
     INTEGER :: i,nbgp_local
+    INTEGER ::  i, nbgp_local
     REAL, DIMENSION(nzb:nzt+1,nys-nbgp_local:nyn+nbgp_local, &
                     nxl-nbgp_local:nxr+nbgp_local) ::  ar
 …
     CALL cpu_log( log_point_s(2), 'exchange_horiz', 'start' )
+    IF ( exchange_mg == .TRUE. ) THEN
+      i = grid_level
+!
+!-- In the Poisson multigrid solver arrays with coarser grids are used.
+!-- Set i appropriately, because the coarser grids have different
+!-- MPI datatypes type_xz, type_yz.
+    IF ( exchange_mg == .TRUE. )  THEN
+       i = grid_level
     ELSE
       i = 0
+       i = 0
     END IF
 #if defined( __parallel )
 …
     ELSE
        req = 0
+       IF ( synchronous_exchange )  THEN
+!
 !--    Send left boundary, receive right one
        CALL MPI_ISEND(ar(nzb,nys-nbgp_local,nxl),1,type_yz(i),pleft,0,comm2d,&
                       req(1),ierr)
        CALL MPI_IRECV(ar(nzb,nys-nbgp_local,nxr+1),1,type_yz(i),pright,0,&
                      comm2d,req(2),ierr)
+!--       Send left boundary, receive right one (synchronous)
+          CALL MPI_SENDRECV(                                                   &
+                       ar(nzb,nys-nbgp_local,nxl),   1, type_yz(i), pleft,  0, &
+                       ar(nzb,nys-nbgp_local,nxr+1), 1, type_yz(i), pright, 0, &
+                       comm2d, status, ierr )
+!
+!--    Send right boundary, receive left one
+!--       Send right boundary, receive left one (synchronous)
+          CALL MPI_SENDRECV(                                                   &
+            ar(nzb,nys-nbgp_local,nxr+1-nbgp_local), 1, type_yz(i), pright, 1, &
+            ar(nzb,nys-nbgp_local,nxl-nbgp_local),   1, type_yz(i), pleft,  1, &
+                       comm2d, status, ierr )
+       ELSE
+       CALL MPI_ISEND(ar(nzb,nys-nbgp_local,nxr+1-nbgp_local),1,type_yz(i),pright, 1,  &
+                      comm2d, req(3), ierr )
+       CALL MPI_IRECV(ar(nzb,nys-nbgp_local,nxl-nbgp_local),1,type_yz(i),pleft,1,&
+                      comm2d,req(4), ierr)
+          req = 0
+!
+!--       Send left boundary, receive right one (asynchronous)
+          CALL MPI_ISEND( ar(nzb,nys-nbgp_local,nxl),   1, type_yz(i), pleft,  &
+, comm2d, req(1), ierr )
+          CALL MPI_IRECV( ar(nzb,nys-nbgp_local,nxr+1), 1, type_yz(i), pright, &
+, comm2d, req(2), ierr )
+!
+!--       Send right boundary, receive left one (asynchronous)
+          CALL MPI_ISEND( ar(nzb,nys-nbgp_local,nxr+1-nbgp_local), 1,          &
+                          type_yz(i), pright, 1, comm2d, req(3), ierr )
+          CALL MPI_IRECV( ar(nzb,nys-nbgp_local,nxl-nbgp_local),   1,          &
+                          type_yz(i), pleft,  1, comm2d, req(4), ierr )
+       CALL MPI_WAITALL( 4, req, wait_stat, ierr )
+          CALL MPI_WAITALL( 4, req, wait_stat, ierr )
+       ENDIF
     ENDIF
 …
     ELSE
        req = 0
+       IF ( synchronous_exchange )  THEN
+!
+!--    Send front boundary, receive rear one
+!--    MPI_ISEND initial send adress changed, type_xz() is sendet nbgp times
+!--       Send front boundary, receive rear one (synchronous)
+          CALL MPI_SENDRECV(                                                   &
+                       ar(nzb,nys,nxl-nbgp_local),   1, type_xz(i), psouth, 0, &
+                       ar(nzb,nyn+1,nxl-nbgp_local), 1, type_xz(i), pnorth, 0, &
+                       comm2d, status, ierr )
+!
+!--       Send rear boundary, receive front one (synchronous)
+          CALL MPI_SENDRECV(                                                   &
+            ar(nzb,nyn-nbgp_local+1,nxl-nbgp_local), 1, type_xz(i), pnorth, 1, &
+            ar(nzb,nys-nbgp_local,nxl-nbgp_local),   1, type_xz(i), psouth, 1, &
+            comm2d, status, ierr )
+       CALL MPI_ISEND( ar(nzb,nys,nxl-nbgp_local),1, type_xz(i), psouth, 0, &
+                       comm2d, req(1), ierr )
+       CALL MPI_IRECV( ar(nzb,nyn+1,nxl-nbgp_local),1, type_xz(i), pnorth, 0, &
+                       comm2d, req(2), ierr )
+       ELSE
+          req = 0
+!
+!--    Send rear boundary, receive front one
+       CALL MPI_ISEND( ar(nzb,nyn-nbgp_local+1,nxl-nbgp_local),1, type_xz(i), pnorth, 1, &
+                       comm2d, req(3), ierr )
+       CALL MPI_IRECV( ar(nzb,nys-nbgp_local,nxl-nbgp_local),1, type_xz(i), psouth, 1, &
+                       comm2d, req(4), ierr )
+       call MPI_WAITALL( 4, req, wait_stat, ierr )
+!--       Send front boundary, receive rear one (asynchronous)
+          CALL MPI_ISEND( ar(nzb,nys,nxl-nbgp_local),   1, type_xz(i), psouth, &
+, comm2d, req(1), ierr )
+          CALL MPI_IRECV( ar(nzb,nyn+1,nxl-nbgp_local), 1, type_xz(i), pnorth, &
+, comm2d, req(2), ierr )
+!
+!--       Send rear boundary, receive front one (asynchronous)
+          CALL MPI_ISEND( ar(nzb,nyn-nbgp_local+1,nxl-nbgp_local), 1,          &
+                          type_xz(i), pnorth, 1, comm2d, req(3), ierr )
+          CALL MPI_IRECV( ar(nzb,nys-nbgp_local,nxl-nbgp_local),   1,          &
+                          type_xz(i), psouth, 1, comm2d, req(4), ierr )
+          CALL MPI_WAITALL( 4, req, wait_stat, ierr )
+       ENDIF
     ENDIF

palm/trunk/SOURCE/modules.f90

-                      r674
+                      r683
 ! Current revisions:
 ! -----------------
+! +synchronous_exchange
+!
 ! Former revisions:
 …
     INTEGER, DIMENSION(:), ALLOCATABLE ::  ngp_yz, type_xz, type_yz
+    LOGICAL ::  collective_wait = .FALSE., reorder = .TRUE.
+    LOGICAL ::  collective_wait = .FALSE., reorder = .TRUE., &
+                synchronous_exchange = .FALSE.
     LOGICAL, DIMENSION(2) ::  cyclic = (/ .TRUE. , .TRUE. /), &
                               remain_dims

palm/trunk/SOURCE/parin.f90

-                      r668
+                      r683
 ! Current revisions:
 ! -----------------
+! +synchronous_exchange in d3par
+!
 ! Former revisions:
 …
              skip_time_data_output, skip_time_data_output_av, skip_time_dopr, &
              skip_time_do2d_xy, skip_time_do2d_xz, skip_time_do2d_yz, &
              skip_time_do3d, skip_time_domask, termination_time_needed, &
              use_prior_plot1d_parameters, z_max_do1d, z_max_do1d_normalized, &
              z_max_do2d
+             skip_time_do3d, skip_time_domask, synchronous_exchange, &
+             termination_time_needed, use_prior_plot1d_parameters, z_max_do1d, &
+             z_max_do1d_normalized, z_max_do2d

palm/trunk/SOURCE/poisfft.f90

-                      r668
+                      r683
 ! Current revisions:
 ! -----------------
+! openMP parallelization for 2d-domain-decomposition
+!
 ! Former revisions:
 …
+!
 !--    Define constant elements of the tridiagonal matrix.
+!$OMP  PARALLEL PRIVATE ( k, i )
+!$OMP  DO
        DO  k = 0, nz-1
           DO  i = nxl_z, nxr_z
 …
           ENDDO
        ENDDO
+!$OMP  END PARALLEL
 #if defined( __parallel )
+!
 !--    Repeat for all y-levels.
+!$OMP  PARALLEL FIRSTPRIVATE( tri ) PRIVATE ( ar1, j )
+!$OMP  DO
        DO  j = nys_z, nyn_z
           IF ( j <= nnyh )  THEN
 …
           CALL substi( ar, ar1, tri, j )
        ENDDO
+!$OMP  END PARALLEL
 #else
+!
 …
+!
 !--    Performing the fft with one of the methods implemented
+!$OMP  PARALLEL PRIVATE ( j, k )
+!$OMP  DO
        DO  k = nzb_x, nzt_x
           DO  j = nys_x, nyn_x
 …
           ENDDO
        ENDDO
+!$OMP  END PARALLEL
     END SUBROUTINE fftxp
 …
+!
 !--    Performing the fft with one of the methods implemented
+!$OMP  PARALLEL PRIVATE ( j, k )
+!$OMP  DO
        DO  k = 1, nz
           DO  j = 0, ny
 …
           ENDDO
        ENDDO
+!$OMP  END PARALLEL
     END SUBROUTINE fftx
 …
+!
 !--    Performing the fft with one of the methods implemented
+!$OMP  PARALLEL PRIVATE ( i, k )
+!$OMP  DO
        DO  k = nzb_y, nzt_y
           DO  i = nxl_y, nxr_y
 …
           ENDDO
        ENDDO
+!$OMP  END PARALLEL
     END SUBROUTINE fftyp
 …
+!
 !--    Performing the fft with one of the methods implemented
+!$OMP  PARALLEL PRIVATE ( i, k )
+!$OMP  DO
        DO  k = 1, nz
           DO  i = 0, nx
 …
           ENDDO
        ENDDO
+!$OMP  END PARALLEL
     END SUBROUTINE ffty

palm/trunk/SOURCE/transpose.f90

-                      r623
+                      r683
 ! Current revisions:
 ! -----------------
+!
+! openMP parallelization of transpositions for 2d-domain-decomposition
+!
 ! Former revisions:
 …
 !-- Rearrange indices of input array in order to make data to be send
 !-- by MPI contiguous
+!$OMP  PARALLEL PRIVATE ( i, j, k )
+!$OMP  DO
     DO  i = 0, nxa
        DO  k = nzb_x, nzt_xa
 …
        ENDDO
     ENDDO
+!$OMP  END PARALLEL
+!
 …
+!
 !-- Reorder transposed array
+    m = 0
+!$OMP  PARALLEL PRIVATE ( i, j, k, l, m, ys )
+!$OMP  DO
     DO  l = 0, pdims(2) - 1
+       m  = l * ( nxr_ya - nxl_y + 1 ) * ( nzt_ya - nzb_y + 1 ) * &
+                ( nyn_xa - nys_x + 1 )
        ys = 0 + l * ( nyn_xa - nys_x + 1 )
        DO  i = nxl_y, nxr_ya
 …
        ENDDO
     ENDDO
+!$OMP  END PARALLEL
 #endif
 …
+!
 !--    Reorder input array for transposition
+       m = 0
+!$OMP  PARALLEL PRIVATE ( i, j, k, l, m, xs )
+!$OMP  DO
        DO  l = 0, pdims(1) - 1
+          m  = l * ( nzt_xa - nzb_x + 1 ) * nnx * ( nyn_xa - nys_x + 1 )
           xs = 0 + l * nnx
           DO  k = nzb_x, nzt_xa
 …
           ENDDO
        ENDDO
+!$OMP  END PARALLEL
+!
 …
+!
 !--    Reorder transposed array in a way that the z index is in first position
+!$OMP  PARALLEL PRIVATE ( i, j, k )
+!$OMP  DO
        DO  k = 1, nza
           DO  i = nxl, nxra
 …
           ENDDO
        ENDDO
+!$OMP  END PARALLEL
     ELSE
+!
 !--    Reorder the array in a way that the z index is in first position
+!$OMP  PARALLEL PRIVATE ( i, j, k )
+!$OMP  DO
        DO  i = nxl, nxra
           DO  j = nys, nyna
 …
           ENDDO
        ENDDO
+!$OMP  END PARALLEL
+!$OMP  PARALLEL PRIVATE ( i, j, k )
+!$OMP  DO
        DO  k = 1, nza
           DO  i = nxl, nxra
 …
           ENDDO
        ENDDO
+!$OMP  END PARALLEL
     ENDIF
 …
+!
 !-- Reorder input array for transposition
+    m = 0
+!$OMP  PARALLEL PRIVATE ( i, j, k, l, m, ys )
+!$OMP  DO
     DO  l = 0, pdims(2) - 1
+       m  = l * ( nxr_ya - nxl_y + 1 ) * ( nzt_ya - nzb_y + 1 ) * &
+                ( nyn_xa - nys_x + 1 )
        ys = 0 + l * ( nyn_xa - nys_x + 1 )
        DO  i = nxl_y, nxr_ya
 …
        ENDDO
     ENDDO
+!$OMP  END PARALLEL
+!
 …
+!
 !-- Reorder transposed array in a way that the x index is in first position
+!$OMP  PARALLEL PRIVATE ( i, j, k )
+!$OMP  DO
     DO  i = 0, nxa
        DO  k = nzb_x, nzt_xa
 …
        ENDDO
     ENDDO
+!$OMP  END PARALLEL
 #endif
 …
 !-- Rearrange indices of input array in order to make data to be send
 !-- by MPI contiguous
+!$OMP  PARALLEL PRIVATE ( i, j, k )
+!$OMP  DO
     DO  j = 0, nya
        DO  k = nzb_y, nzt_ya
 …
        ENDDO
     ENDDO
+!$OMP  END PARALLEL
+!
 …
 !-- of the data is necessary and no transposition has to be done.
     IF ( pdims(1) == 1 )  THEN
+!$OMP  PARALLEL PRIVATE ( i, j, k )
+!$OMP  DO
        DO  j = 0, nya
           DO  k = nzb_y, nzt_ya
 …
           ENDDO
        ENDDO
+!$OMP  END PARALLEL
        RETURN
     ENDIF
 …
+!
 !-- Reorder transposed array
+    m = 0
+!$OMP  PARALLEL PRIVATE ( i, j, k, l, m, zs )
+!$OMP  DO
     DO  l = 0, pdims(1) - 1
+       m  = l * ( nyn_za - nys_z + 1 ) * ( nzt_ya - nzb_y + 1 ) * &
+                ( nxr_za - nxl_z + 1 )
        zs = 1 + l * ( nzt_ya - nzb_y + 1 )
        DO  j = nys_z, nyn_za
 …
        ENDDO
     ENDDO
+!$OMP  END PARALLEL
 #endif
 …
 !-- Rearrange indices of input array in order to make data to be send
 !-- by MPI contiguous
+!$OMP  PARALLEL PRIVATE ( i, j, k )
+!$OMP  DO
     DO  k = 1,nza
        DO  i = nxl, nxra
 …
        ENDDO
     ENDDO
+!$OMP  END PARALLEL
+!
 …
 !-- of the data is necessary and no transposition has to be done.
     IF ( pdims(1) == 1 )  THEN
+!$OMP  PARALLEL PRIVATE ( i, j, k )
+!$OMP  DO
        DO  k = 1, nza
           DO  i = nxl, nxra
 …
           ENDDO
        ENDDO
+!$OMP  END PARALLEL
        RETURN
     ENDIF
 …
+!
 !-- Reorder transposed array
+    m = 0
+!$OMP  PARALLEL PRIVATE ( i, j, k, l, m, xs )
+!$OMP  DO
     DO  l = 0, pdims(1) - 1
+       m  = l * ( nzt_xa - nzb_x + 1 ) * nnx * ( nyn_xa - nys_x + 1 )
        xs = 0 + l * nnx
        DO  k = nzb_x, nzt_xa
 …
        ENDDO
     ENDDO
+!$OMP  END PARALLEL
 #endif
 …
+!
 !--    Reorder input array for transposition
+       m = 0
+!$OMP  PARALLEL PRIVATE ( i, j, k, l, m, zs )
+!$OMP  DO
        DO  l = 0, pdims(1) - 1
+          m  = l * ( nyn_za - nys_z + 1 ) * ( nzt_ya - nzb_y + 1 ) * &
+                   ( nxr_za - nxl_z + 1 )
           zs = 1 + l * ( nzt_ya - nzb_y + 1 )
           DO  j = nys_z, nyn_za
 …
           ENDDO
        ENDDO
+!$OMP  END PARALLEL
+!
 …
+!
 !--    Reorder transposed array in a way that the y index is in first position
+!$OMP  PARALLEL PRIVATE ( i, j, k )
+!$OMP  DO
        DO  j = 0, nya
           DO  k = nzb_y, nzt_ya
 …
           ENDDO
        ENDDO
+!$OMP  END PARALLEL
     ELSE
+!
 !--    Reorder the array in a way that the y index is in first position
+!$OMP  PARALLEL PRIVATE ( i, j, k )
+!$OMP  DO
        DO  k = nzb_y, nzt_ya
           DO  j = 0, nya
 …
           ENDDO
        ENDDO
+!$OMP  END PARALLEL
+!
 !--    Move data to output array
+!$OMP  PARALLEL PRIVATE ( i, j, k )
+!$OMP  DO
        DO  k = nzb_y, nzt_ya
           DO  i = nxl_y, nxr_ya
 …
           ENDDO
        ENDDO
+!$OMP  END PARALLEL
     ENDIF

Note: See TracChangeset for help on using the changeset viewer.