Changeset 696 for palm/trunk


Ignore:
Timestamp:
Mar 18, 2011 7:03:49 AM (14 years ago)
Author:
raasch
Message:

adjustments for openmp usage on ibmkisti (mrun, subjob); OpenMP-bugfixes: work_fftx removed from PRIVATE clauses in fftx_tr_xy and tr_yx_fftx (poisfft); Bugfix: Summation of Wicker-Skamarock scheme fluxes and variances for all threads (flow_statistics)

Location:
palm/trunk
Files:
4 edited

Legend:

Unmodified
Added
Removed
  • palm/trunk/SCRIPTS/mrun

    r693 r696  
    219219     # 02/02/10 - Siggi  - further adjustments on Tsubame and concerning openMP
    220220     #                     usage
    221      # 09/02/10 - Siggi  - adjustments for ibmkisti, mpt bugfix for netCDF4
     221     # 09/03/10 - Siggi  - adjustments for ibmkisti, mpt bugfix for netCDF4
    222222     #                     usage, totalview usage extended
     223     # 17/03/11 - Siggi -  adjustments for openmp usage on ibmkisti
    223224
    224225
     
    31313132                export LANG=en_US
    31323133                export MP_SHARED_MEMORY=yes
    3133                 export MP_SINGLE_THREAD=yes
    3134                 export MEMORY_AFFINITY=MCM
     3134                if [[ $threads_per_task = 1 ]]
     3135                then
     3136                   export MP_SINGLE_THREAD=yes
     3137                   export MEMORY_AFFINITY=MCM
     3138                else
     3139                   export OMP_NUM_THREADS=$threads_per_task
     3140                fi
    31353141             fi
    31363142
     
    32913297             (( ii = $numprocs / $threads_per_task ))
    32923298             export OMP_NUM_THREADS=$threads_per_task
    3293 #             echo "OMP_NUM_THREADS=$OMP_NUM_THREADS"
     3299             # echo "*** OMP_NUM_THREADS=$OMP_NUM_THREADS"
    32943300             if [[ $threads_per_task != 1 ]]
    32953301             then
  • palm/trunk/SCRIPTS/subjob

    r693 r696  
    128128     # 02/02/11 - Siggi - further asjustments for Tsubame concerning openMP
    129129     # 06/03/11 - Siggi - adjustments for ibmkisti
     130     # 17/03/11 - Siggi - adjustments for openmp usage on ibmkisti
    130131
    131132
     
    152153
    153154 typeset  -i   cputime=0  memory=0  Memory=0  minuten  resttime  sekunden  stunden
    154  typeset  -i   inumprocs  nodes=0  processes_per_node=0 tasks_per_node=0  threads_per_task=1
     155 typeset  -i   inumprocs  mpi_tasks=0  nodes=0  processes_per_node=0 tasks_per_node=0  threads_per_task=1
    155156 typeset  -L20 spalte1
    156157 typeset  -R40 spalte2
     
    548549 (( processes_per_node = tasks_per_node * threads_per_task ))
    549550
     551    # Calculate number of MPI tasks
     552 (( mpi_tasks = numprocs / threads_per_task ))
     553
    550554
    551555    # HEADER-AUSGABE
     
    638642       network_to_use="# @ network.MPI = sn_all,shared,US"
    639643       wall_clock_limit="# @ wall_clock_limit = $timestring"
    640        rset="# @ rset = RSET_MCM_AFFINITY"
    641        mcm_affinity_options="# @ mcm_affinity_options = mcm_mem_pref mcm_sni_none mcm_distribute"
     644       if [[ $threads_per_task = 1 ]]
     645       then
     646          rset="# @ rset = RSET_MCM_AFFINITY"
     647          mcm_affinity_options="# @ mcm_affinity_options = mcm_mem_pref mcm_sni_none mcm_distribute"
     648       fi
    642649       environment=""
    643650       use_shell=""
     
    701708
    702709          cat >> $job_to_send << %%END%%
    703 # @ total_tasks = $numprocs
     710# @ total_tasks = $mpi_tasks
    704711# @ blocking = unlimited
    705712# @ queue
     
    733740       fi
    734741
     742    fi
     743
     744       # workaround because of silly job filter on ibmkisti
     745    if [[ $remote_host = ibmkisti  &&  $threads_per_task != 1 ]]
     746    then
     747       echo  "export OMP_NUM_THREADS=$threads_per_task"  >>  $job_to_send
    735748    fi
    736749
  • palm/trunk/SOURCE/flow_statistics.f90

    r679 r696  
    44! Current revisions:
    55! -----------------
    6 !
     6! Bugfix: Summation of Wicker-Skamarock scheme fluxes and variances for all
     7! threads
    78!
    89! Former revisions:
     
    155156!--    the local array sums_l() for further computations
    156157       IF ( ws_scheme_mom )  THEN
     158
    157159!       
    158160!--       According to the Neumann bc for the horizontal velocity components,
     
    162164             sums_vs2_ws_l(nzt+1,sr) = sums_vs2_ws_l(nzt,sr)   
    163165          ENDIF
     166
     167          DO  i = 0, threads_per_task-1
    164168!         
    165 !--       Swap the turbulent quantities evaluated in advec_ws.
    166           sums_l(:,13,0) = sums_wsus_ws_l(:,sr)       ! w*u*
    167           sums_l(:,15,0) = sums_wsvs_ws_l(:,sr)       ! w*v*
    168           sums_l(:,30,0) = sums_us2_ws_l(:,sr)        ! u*2
    169           sums_l(:,31,0) = sums_vs2_ws_l(:,sr)        ! v*2
    170           sums_l(:,32,0) = sums_ws2_ws_l(:,sr)        ! w*2
    171           sums_l(:,34,0) = sums_l(:,34,0) + 0.5 *                             &
    172                 (sums_us2_ws_l(:,sr) + sums_vs2_ws_l(:,sr)                    &
    173                 + sums_ws2_ws_l(:,sr))                      ! e*
    174           DO  k = nzb, nzt
    175              sums_l(nzb+5,pr_palm,0) = sums_l(nzb+5,pr_palm,0) + 0.5 * (      &
    176                 sums_us2_ws_l(k,sr) + sums_vs2_ws_l(k,sr) +                   &
    177                 sums_ws2_ws_l(k,sr))
    178           ENDDO
    179        ENDIF
     169!--          Swap the turbulent quantities evaluated in advec_ws.
     170             sums_l(:,13,i) = sums_wsus_ws_l(:,sr)       ! w*u*
     171             sums_l(:,15,i) = sums_wsvs_ws_l(:,sr)       ! w*v*
     172             sums_l(:,30,i) = sums_us2_ws_l(:,sr)        ! u*2
     173             sums_l(:,31,i) = sums_vs2_ws_l(:,sr)        ! v*2
     174             sums_l(:,32,i) = sums_ws2_ws_l(:,sr)        ! w*2
     175             sums_l(:,34,i) = sums_l(:,34,i) + 0.5 *                        &
     176                              ( sums_us2_ws_l(:,sr) + sums_vs2_ws_l(:,sr) + &
     177                                sums_ws2_ws_l(:,sr) )    ! e*
     178             DO  k = nzb, nzt
     179                sums_l(nzb+5,pr_palm,i) = sums_l(nzb+5,pr_palm,i) + 0.5 * (  &
     180                                                      sums_us2_ws_l(k,sr) +  &
     181                                                      sums_vs2_ws_l(k,sr) +  &
     182                                                      sums_ws2_ws_l(k,sr) )
     183             ENDDO
     184          ENDDO
     185
     186       ENDIF
     187
    180188       IF ( ws_scheme_sca )  THEN
    181           sums_l(:,17,0) = sums_wspts_ws_l(:,sr)      ! w*pt* from advec_s_ws
    182           IF ( ocean ) sums_l(:,66,0) = sums_wssas_ws_l(:,sr) ! w*sa*
    183           IF ( humidity  .OR.  passive_scalar ) sums_l(:,49,0) =              &
    184                                                    sums_wsqs_ws_l(:,sr) !w*q*
     189
     190          DO  i = 0, threads_per_task-1
     191             sums_l(:,17,i) = sums_wspts_ws_l(:,sr)      ! w*pt* from advec_s_ws
     192             IF ( ocean ) sums_l(:,66,i) = sums_wssas_ws_l(:,sr) ! w*sa*
     193             IF ( humidity .OR. passive_scalar ) sums_l(:,49,i) =              &
     194                                                   sums_wsqs_ws_l(:,sr) !w*q*
     195          ENDDO
     196
    185197       ENDIF
    186198!
  • palm/trunk/SOURCE/poisfft.f90

    r684 r696  
    44! Current revisions:
    55! -----------------
    6 !
     6! work_fftx removed from PRIVATE clauses in fftx_tr_xy and tr_yx_fftx
    77!
    88! Former revisions:
     
    10701070!--       Cache optimized code (there might be still a potential for better
    10711071!--       optimization).
    1072 !$OMP     PARALLEL PRIVATE (i,j,k,work_fftx)
     1072!$OMP     PARALLEL PRIVATE (i,j,k)
    10731073!$OMP     DO
    10741074          DO  i = 0, nx
     
    11811181!--       Cache optimized code (there might be still a potential for better
    11821182!--       optimization).
    1183 !$OMP     PARALLEL PRIVATE (i,j,k,work_fftx)
     1183!$OMP     PARALLEL PRIVATE (i,j,k)
    11841184!$OMP     DO
    11851185          DO  j = nys, nyn
     
    12451245
    12461246       tn = 0           ! Default thread number in case of one thread
    1247 !$OMP  PARALLEL PRIVATE ( i, j, k, m, n, tn, work_ffty, work_triy )
    1248 !$OMP  DO
     1247!$OMP  PARALLEL DO PRIVATE ( i, j, k, m, n, tn, work_ffty, work_triy )
    12491248       DO  i = nxl_y, nxr_y
    12501249
     
    13361335
    13371336       ENDDO
    1338 !$OMP  END PARALLEL
    13391337
    13401338       DEALLOCATE( tri )
Note: See TracChangeset for help on using the changeset viewer.