Changeset 683 for palm/trunk


Ignore:
Timestamp:
Feb 9, 2011 2:25:15 PM (14 years ago)
Author:
raasch
Message:

New:
---

optional exchange of ghost points in synchronous mode via MPI_SENDRCV,
steered by d3par parameter synchronous_exchange
(cpu_statistics, exchange_horiz, modules, parin)

openMP-parallelization of pressure solver (fft-method) for 2d-domain-decomposition
(poisfft, transpose)

Changed:


Errors:


mpt bugfix for netCDF4 usage (mrun)

Location:
palm/trunk
Files:
7 edited

Legend:

Unmodified
Added
Removed
  • palm/trunk/SCRIPTS/mrun

    r678 r683  
    219219     # 02/02/10 - Siggi  - further adjustments on Tsubame and concerning openMP
    220220     #                     usage
     221     # 09/02/10 - Siggi  - mpt bugfix for netCDF4 usage
    221222
    222223
     
    21132114 fi
    21142115
    2115     # bugfix for wrong netcdf module
     2116    # bugfix for wrong netcdf module and for netCDF4 usage in case of mpt
    21162117 if [[ $host = lcsgib  ||  $host = lcsgih ]]
    21172118 then
     
    21192120    then
    21202121       export module_calls="$module_calls export LD_LIBRARY_PATH=/sw/dataformats/netcdf/3.6.3-intel/lib:\$LD_LIBRARY_PATH;"
     2122    fi
     2123    if [[ $(echo $module_calls | grep -c mpt) != 0 ]]
     2124    then
     2125       export module_calls="$module_calls export LD_LIBRARY_PATH=/sw/sgi/mpt/2011-02-07/lib:\$LD_LIBRARY_PATH;"
     2126       echo "*** module_calls = $module_calls"
    21212127    fi
    21222128 fi
     
    33053311                      export MPI_TYPE_DEPTH=20
    33063312                      echo "*** MPI_TYPE_DEPTH=$MPI_TYPE_DEPTH"
     3313                      export MPI_GROUP_MAX=64
     3314                      echo "*** MPI_GROUP_MAX=$MPI_GROUP_MAX"
    33073315                      mpiexec_mpt -np $ii   ./a.out  $ROPTS  < runfile_atmos
    33083316
  • palm/trunk/SOURCE/cpu_statistics.f90

    r623 r683  
    44! Current revisions:
    55! -----------------
    6 !
     6! output of handling of ghostpoint exchange
    77!
    88! Former revisions:
     
    251251
    252252!
    253 !--    Output handling of collective operations
     253!--    Output of handling of MPI operations
    254254       IF ( collective_wait )  THEN
    255255          WRITE ( 18, 103 )
     
    257257          WRITE ( 18, 104 )
    258258       ENDIF
     259       IF ( synchronous_exchange )  THEN
     260          WRITE ( 18, 105 )
     261       ELSE
     262          WRITE ( 18, 106 )
     263       ENDIF
    259264
    260265!
    261266!--    Empty lines in order to create a gap to the results of the model
    262267!--    continuation runs
    263        WRITE ( 18, 105 )
     268       WRITE ( 18, 107 )
    264269
    265270!
     
    288293103 FORMAT (/'Barriers are set in front of collective operations')
    289294104 FORMAT (/'No barriers are set in front of collective operations')
    290 105 FORMAT (//)
     295105 FORMAT (/'Exchange of ghostpoints via MPI_SENDRCV')
     296106 FORMAT (/'Exchange of ghostpoints via MPI_ISEND/MPI_IRECV')
     297107 FORMAT (//)
    291298
    292299 END SUBROUTINE cpu_statistics
  • palm/trunk/SOURCE/exchange_horiz.f90

    r668 r683  
    44! Current revisions:
    55! -----------------
     6! optional synchronous exchange (sendrecv) implemented, code partly reformatted
    67!
    78! Former revisions:
     
    4849    INTEGER, DIMENSION(MPI_STATUS_SIZE,4) ::  wait_stat
    4950#endif
    50     INTEGER :: i,nbgp_local
     51    INTEGER ::  i, nbgp_local
    5152    REAL, DIMENSION(nzb:nzt+1,nys-nbgp_local:nyn+nbgp_local, &
    5253                    nxl-nbgp_local:nxr+nbgp_local) ::  ar
     
    5455    CALL cpu_log( log_point_s(2), 'exchange_horiz', 'start' )
    5556
    56     IF ( exchange_mg == .TRUE. ) THEN
    57       i = grid_level
     57!
     58!-- In the Poisson multigrid solver arrays with coarser grids are used.
     59!-- Set i appropriately, because the coarser grids have different
     60!-- MPI datatypes type_xz, type_yz.
     61    IF ( exchange_mg == .TRUE. )  THEN
     62       i = grid_level
    5863    ELSE
    59       i = 0
     64       i = 0
    6065    END IF
     66
    6167#if defined( __parallel )
    6268
     
    7480    ELSE
    7581
    76        req = 0
     82       IF ( synchronous_exchange )  THEN
    7783!
    78 !--    Send left boundary, receive right one
    79        CALL MPI_ISEND(ar(nzb,nys-nbgp_local,nxl),1,type_yz(i),pleft,0,comm2d,&
    80                       req(1),ierr)
    81        CALL MPI_IRECV(ar(nzb,nys-nbgp_local,nxr+1),1,type_yz(i),pright,0,&
    82                      comm2d,req(2),ierr)
     84!--       Send left boundary, receive right one (synchronous)
     85          CALL MPI_SENDRECV(                                                   &
     86                       ar(nzb,nys-nbgp_local,nxl),   1, type_yz(i), pleft,  0, &
     87                       ar(nzb,nys-nbgp_local,nxr+1), 1, type_yz(i), pright, 0, &
     88                       comm2d, status, ierr )
    8389!
    84 !--    Send right boundary, receive left one
     90!--       Send right boundary, receive left one (synchronous)
     91          CALL MPI_SENDRECV(                                                   &
     92            ar(nzb,nys-nbgp_local,nxr+1-nbgp_local), 1, type_yz(i), pright, 1, &
     93            ar(nzb,nys-nbgp_local,nxl-nbgp_local),   1, type_yz(i), pleft,  1, &
     94                       comm2d, status, ierr )
    8595
     96       ELSE
    8697
    87        CALL MPI_ISEND(ar(nzb,nys-nbgp_local,nxr+1-nbgp_local),1,type_yz(i),pright, 1,  &
    88                       comm2d, req(3), ierr )
    89        CALL MPI_IRECV(ar(nzb,nys-nbgp_local,nxl-nbgp_local),1,type_yz(i),pleft,1,&
    90                       comm2d,req(4), ierr)
     98          req = 0
     99!
     100!--       Send left boundary, receive right one (asynchronous)
     101          CALL MPI_ISEND( ar(nzb,nys-nbgp_local,nxl),   1, type_yz(i), pleft,  &
     102                          0, comm2d, req(1), ierr )
     103          CALL MPI_IRECV( ar(nzb,nys-nbgp_local,nxr+1), 1, type_yz(i), pright, &
     104                          0, comm2d, req(2), ierr )
     105!
     106!--       Send right boundary, receive left one (asynchronous)
     107          CALL MPI_ISEND( ar(nzb,nys-nbgp_local,nxr+1-nbgp_local), 1,          &
     108                          type_yz(i), pright, 1, comm2d, req(3), ierr )
     109          CALL MPI_IRECV( ar(nzb,nys-nbgp_local,nxl-nbgp_local),   1,          &
     110                          type_yz(i), pleft,  1, comm2d, req(4), ierr )
    91111
    92        CALL MPI_WAITALL( 4, req, wait_stat, ierr )
     112          CALL MPI_WAITALL( 4, req, wait_stat, ierr )
     113
     114       ENDIF
    93115
    94116    ENDIF
     
    106128    ELSE
    107129
    108        req = 0
     130       IF ( synchronous_exchange )  THEN
    109131!
    110 !--    Send front boundary, receive rear one
    111 !--    MPI_ISEND initial send adress changed, type_xz() is sendet nbgp times
     132!--       Send front boundary, receive rear one (synchronous)
     133          CALL MPI_SENDRECV(                                                   &
     134                       ar(nzb,nys,nxl-nbgp_local),   1, type_xz(i), psouth, 0, &
     135                       ar(nzb,nyn+1,nxl-nbgp_local), 1, type_xz(i), pnorth, 0, &
     136                       comm2d, status, ierr )
     137!
     138!--       Send rear boundary, receive front one (synchronous)
     139          CALL MPI_SENDRECV(                                                   &
     140            ar(nzb,nyn-nbgp_local+1,nxl-nbgp_local), 1, type_xz(i), pnorth, 1, &
     141            ar(nzb,nys-nbgp_local,nxl-nbgp_local),   1, type_xz(i), psouth, 1, &
     142            comm2d, status, ierr )
    112143
    113        CALL MPI_ISEND( ar(nzb,nys,nxl-nbgp_local),1, type_xz(i), psouth, 0, &
    114                        comm2d, req(1), ierr )
    115        CALL MPI_IRECV( ar(nzb,nyn+1,nxl-nbgp_local),1, type_xz(i), pnorth, 0, &
    116                        comm2d, req(2), ierr )
     144       ELSE
     145
     146          req = 0
    117147!
    118 !--    Send rear boundary, receive front one
    119        CALL MPI_ISEND( ar(nzb,nyn-nbgp_local+1,nxl-nbgp_local),1, type_xz(i), pnorth, 1, &
    120                        comm2d, req(3), ierr )
    121        CALL MPI_IRECV( ar(nzb,nys-nbgp_local,nxl-nbgp_local),1, type_xz(i), psouth, 1, &
    122                        comm2d, req(4), ierr )
    123        call MPI_WAITALL( 4, req, wait_stat, ierr )
     148!--       Send front boundary, receive rear one (asynchronous)
     149          CALL MPI_ISEND( ar(nzb,nys,nxl-nbgp_local),   1, type_xz(i), psouth, &
     150                          0, comm2d, req(1), ierr )
     151          CALL MPI_IRECV( ar(nzb,nyn+1,nxl-nbgp_local), 1, type_xz(i), pnorth, &
     152                          0, comm2d, req(2), ierr )
     153!
     154!--       Send rear boundary, receive front one (asynchronous)
     155          CALL MPI_ISEND( ar(nzb,nyn-nbgp_local+1,nxl-nbgp_local), 1,          &
     156                          type_xz(i), pnorth, 1, comm2d, req(3), ierr )
     157          CALL MPI_IRECV( ar(nzb,nys-nbgp_local,nxl-nbgp_local),   1,          &
     158                          type_xz(i), psouth, 1, comm2d, req(4), ierr )
     159
     160          CALL MPI_WAITALL( 4, req, wait_stat, ierr )
     161
     162       ENDIF
    124163
    125164    ENDIF
  • palm/trunk/SOURCE/modules.f90

    r674 r683  
    55! Current revisions:
    66! -----------------
     7! +synchronous_exchange
    78!
    89! Former revisions:
     
    12271228    INTEGER, DIMENSION(:), ALLOCATABLE ::  ngp_yz, type_xz, type_yz
    12281229
    1229     LOGICAL ::  collective_wait = .FALSE., reorder = .TRUE.
     1230    LOGICAL ::  collective_wait = .FALSE., reorder = .TRUE., &
     1231                synchronous_exchange = .FALSE.
    12301232    LOGICAL, DIMENSION(2) ::  cyclic = (/ .TRUE. , .TRUE. /), &
    12311233                              remain_dims
  • palm/trunk/SOURCE/parin.f90

    r668 r683  
    44! Current revisions:
    55! -----------------
     6! +synchronous_exchange in d3par
    67!
    78! Former revisions:
     
    185186             skip_time_data_output, skip_time_data_output_av, skip_time_dopr, &
    186187             skip_time_do2d_xy, skip_time_do2d_xz, skip_time_do2d_yz, &
    187              skip_time_do3d, skip_time_domask, termination_time_needed, &
    188              use_prior_plot1d_parameters, z_max_do1d, z_max_do1d_normalized, &
    189              z_max_do2d
     188             skip_time_do3d, skip_time_domask, synchronous_exchange, &
     189             termination_time_needed, use_prior_plot1d_parameters, z_max_do1d, &
     190             z_max_do1d_normalized, z_max_do2d
    190191
    191192
  • palm/trunk/SOURCE/poisfft.f90

    r668 r683  
    44! Current revisions:
    55! -----------------
     6! openMP parallelization for 2d-domain-decomposition
    67!
    78! Former revisions:
     
    287288!
    288289!--    Define constant elements of the tridiagonal matrix.
     290!$OMP  PARALLEL PRIVATE ( k, i )
     291!$OMP  DO
    289292       DO  k = 0, nz-1
    290293          DO  i = nxl_z, nxr_z
     
    293296          ENDDO
    294297       ENDDO
     298!$OMP  END PARALLEL
    295299
    296300#if defined( __parallel )
    297301!
    298302!--    Repeat for all y-levels.
     303!$OMP  PARALLEL FIRSTPRIVATE( tri ) PRIVATE ( ar1, j )
     304!$OMP  DO
    299305       DO  j = nys_z, nyn_z
    300306          IF ( j <= nnyh )  THEN
     
    306312          CALL substi( ar, ar1, tri, j )
    307313       ENDDO
     314!$OMP  END PARALLEL
    308315#else
    309316!
     
    527534!
    528535!--    Performing the fft with one of the methods implemented
     536!$OMP  PARALLEL PRIVATE ( j, k )
     537!$OMP  DO
    529538       DO  k = nzb_x, nzt_x
    530539          DO  j = nys_x, nyn_x
     
    532541          ENDDO
    533542       ENDDO
     543!$OMP  END PARALLEL
    534544
    535545    END SUBROUTINE fftxp
     
    550560!
    551561!--    Performing the fft with one of the methods implemented
     562!$OMP  PARALLEL PRIVATE ( j, k )
     563!$OMP  DO
    552564       DO  k = 1, nz
    553565          DO  j = 0, ny
     
    555567          ENDDO
    556568       ENDDO
     569!$OMP  END PARALLEL
    557570
    558571    END SUBROUTINE fftx
     
    575588!
    576589!--    Performing the fft with one of the methods implemented
     590!$OMP  PARALLEL PRIVATE ( i, k )
     591!$OMP  DO
    577592       DO  k = nzb_y, nzt_y
    578593          DO  i = nxl_y, nxr_y
     
    580595          ENDDO
    581596       ENDDO
     597!$OMP  END PARALLEL
    582598
    583599    END SUBROUTINE fftyp
     
    598614!
    599615!--    Performing the fft with one of the methods implemented
     616!$OMP  PARALLEL PRIVATE ( i, k )
     617!$OMP  DO
    600618       DO  k = 1, nz
    601619          DO  i = 0, nx
     
    603621          ENDDO
    604622       ENDDO
     623!$OMP  END PARALLEL
    605624
    606625    END SUBROUTINE ffty
  • palm/trunk/SOURCE/transpose.f90

    r623 r683  
    44! Current revisions:
    55! -----------------
    6 !
     6! openMP parallelization of transpositions for 2d-domain-decomposition
    77!
    88! Former revisions:
     
    6161!-- Rearrange indices of input array in order to make data to be send
    6262!-- by MPI contiguous
     63!$OMP  PARALLEL PRIVATE ( i, j, k )
     64!$OMP  DO
    6365    DO  i = 0, nxa
    6466       DO  k = nzb_x, nzt_xa
     
    6870       ENDDO
    6971    ENDDO
     72!$OMP  END PARALLEL
    7073
    7174!
     
    8083!
    8184!-- Reorder transposed array
    82     m = 0
     85!$OMP  PARALLEL PRIVATE ( i, j, k, l, m, ys )
     86!$OMP  DO
    8387    DO  l = 0, pdims(2) - 1
     88       m  = l * ( nxr_ya - nxl_y + 1 ) * ( nzt_ya - nzb_y + 1 ) * &
     89                ( nyn_xa - nys_x + 1 )
    8490       ys = 0 + l * ( nyn_xa - nys_x + 1 )
    8591       DO  i = nxl_y, nxr_ya
     
    9298       ENDDO
    9399    ENDDO
     100!$OMP  END PARALLEL
    94101
    95102#endif
     
    131138!
    132139!--    Reorder input array for transposition
    133        m = 0
     140!$OMP  PARALLEL PRIVATE ( i, j, k, l, m, xs )
     141!$OMP  DO
    134142       DO  l = 0, pdims(1) - 1
     143          m  = l * ( nzt_xa - nzb_x + 1 ) * nnx * ( nyn_xa - nys_x + 1 )
    135144          xs = 0 + l * nnx
    136145          DO  k = nzb_x, nzt_xa
     
    143152          ENDDO
    144153       ENDDO
     154!$OMP  END PARALLEL
    145155
    146156!
     
    155165!
    156166!--    Reorder transposed array in a way that the z index is in first position
     167!$OMP  PARALLEL PRIVATE ( i, j, k )
     168!$OMP  DO
    157169       DO  k = 1, nza
    158170          DO  i = nxl, nxra
     
    162174          ENDDO
    163175       ENDDO
     176!$OMP  END PARALLEL
    164177    ELSE
    165178!
    166179!--    Reorder the array in a way that the z index is in first position
     180!$OMP  PARALLEL PRIVATE ( i, j, k )
     181!$OMP  DO
    167182       DO  i = nxl, nxra
    168183          DO  j = nys, nyna
     
    172187          ENDDO
    173188       ENDDO
    174 
     189!$OMP  END PARALLEL
     190
     191!$OMP  PARALLEL PRIVATE ( i, j, k )
     192!$OMP  DO
    175193       DO  k = 1, nza
    176194          DO  i = nxl, nxra
     
    180198          ENDDO
    181199       ENDDO
     200!$OMP  END PARALLEL
    182201
    183202    ENDIF
     
    218237!
    219238!-- Reorder input array for transposition
    220     m = 0
     239!$OMP  PARALLEL PRIVATE ( i, j, k, l, m, ys )
     240!$OMP  DO
    221241    DO  l = 0, pdims(2) - 1
     242       m  = l * ( nxr_ya - nxl_y + 1 ) * ( nzt_ya - nzb_y + 1 ) * &
     243                ( nyn_xa - nys_x + 1 )
    222244       ys = 0 + l * ( nyn_xa - nys_x + 1 )
    223245       DO  i = nxl_y, nxr_ya
     
    230252       ENDDO
    231253    ENDDO
     254!$OMP  END PARALLEL
    232255
    233256!
     
    242265!
    243266!-- Reorder transposed array in a way that the x index is in first position
     267!$OMP  PARALLEL PRIVATE ( i, j, k )
     268!$OMP  DO
    244269    DO  i = 0, nxa
    245270       DO  k = nzb_x, nzt_xa
     
    249274       ENDDO
    250275    ENDDO
     276!$OMP  END PARALLEL
    251277
    252278#endif
     
    353379!-- Rearrange indices of input array in order to make data to be send
    354380!-- by MPI contiguous
     381!$OMP  PARALLEL PRIVATE ( i, j, k )
     382!$OMP  DO
    355383    DO  j = 0, nya
    356384       DO  k = nzb_y, nzt_ya
     
    360388       ENDDO
    361389    ENDDO
     390!$OMP  END PARALLEL
    362391
    363392!
     
    367396!-- of the data is necessary and no transposition has to be done.
    368397    IF ( pdims(1) == 1 )  THEN
     398!$OMP  PARALLEL PRIVATE ( i, j, k )
     399!$OMP  DO
    369400       DO  j = 0, nya
    370401          DO  k = nzb_y, nzt_ya
     
    374405          ENDDO
    375406       ENDDO
     407!$OMP  END PARALLEL
    376408       RETURN
    377409    ENDIF
     
    388420!
    389421!-- Reorder transposed array
    390     m = 0
     422!$OMP  PARALLEL PRIVATE ( i, j, k, l, m, zs )
     423!$OMP  DO
    391424    DO  l = 0, pdims(1) - 1
     425       m  = l * ( nyn_za - nys_z + 1 ) * ( nzt_ya - nzb_y + 1 ) * &
     426                ( nxr_za - nxl_z + 1 )
    392427       zs = 1 + l * ( nzt_ya - nzb_y + 1 )
    393428       DO  j = nys_z, nyn_za
     
    400435       ENDDO
    401436    ENDDO
     437!$OMP  END PARALLEL
    402438
    403439#endif
     
    435471!-- Rearrange indices of input array in order to make data to be send
    436472!-- by MPI contiguous
     473!$OMP  PARALLEL PRIVATE ( i, j, k )
     474!$OMP  DO
    437475    DO  k = 1,nza
    438476       DO  i = nxl, nxra
     
    442480       ENDDO
    443481    ENDDO
     482!$OMP  END PARALLEL
    444483
    445484!
     
    449488!-- of the data is necessary and no transposition has to be done.
    450489    IF ( pdims(1) == 1 )  THEN
     490!$OMP  PARALLEL PRIVATE ( i, j, k )
     491!$OMP  DO
    451492       DO  k = 1, nza
    452493          DO  i = nxl, nxra
     
    456497          ENDDO
    457498       ENDDO
     499!$OMP  END PARALLEL
    458500       RETURN
    459501    ENDIF
     
    470512!
    471513!-- Reorder transposed array
    472     m = 0
     514!$OMP  PARALLEL PRIVATE ( i, j, k, l, m, xs )
     515!$OMP  DO
    473516    DO  l = 0, pdims(1) - 1
     517       m  = l * ( nzt_xa - nzb_x + 1 ) * nnx * ( nyn_xa - nys_x + 1 )
    474518       xs = 0 + l * nnx
    475519       DO  k = nzb_x, nzt_xa
     
    482526       ENDDO
    483527    ENDDO
     528!$OMP  END PARALLEL
    484529
    485530#endif
     
    521566!
    522567!--    Reorder input array for transposition
    523        m = 0
     568!$OMP  PARALLEL PRIVATE ( i, j, k, l, m, zs )
     569!$OMP  DO
    524570       DO  l = 0, pdims(1) - 1
     571          m  = l * ( nyn_za - nys_z + 1 ) * ( nzt_ya - nzb_y + 1 ) * &
     572                   ( nxr_za - nxl_z + 1 )
    525573          zs = 1 + l * ( nzt_ya - nzb_y + 1 )
    526574          DO  j = nys_z, nyn_za
     
    533581          ENDDO
    534582       ENDDO
     583!$OMP  END PARALLEL
    535584
    536585!
     
    545594!
    546595!--    Reorder transposed array in a way that the y index is in first position
     596!$OMP  PARALLEL PRIVATE ( i, j, k )
     597!$OMP  DO
    547598       DO  j = 0, nya
    548599          DO  k = nzb_y, nzt_ya
     
    552603          ENDDO
    553604       ENDDO
     605!$OMP  END PARALLEL
    554606    ELSE
    555607!
    556608!--    Reorder the array in a way that the y index is in first position
     609!$OMP  PARALLEL PRIVATE ( i, j, k )
     610!$OMP  DO
    557611       DO  k = nzb_y, nzt_ya
    558612          DO  j = 0, nya
     
    562616          ENDDO
    563617       ENDDO
     618!$OMP  END PARALLEL
    564619!
    565620!--    Move data to output array
     621!$OMP  PARALLEL PRIVATE ( i, j, k )
     622!$OMP  DO
    566623       DO  k = nzb_y, nzt_ya
    567624          DO  i = nxl_y, nxr_ya
     
    571628          ENDDO
    572629       ENDDO
     630!$OMP  END PARALLEL
    573631
    574632    ENDIF
Note: See TracChangeset for help on using the changeset viewer.