Ignore:
Timestamp:
Jul 17, 2015 11:37:41 AM (9 years ago)
Author:
heinze
Message:

adjustments for Mistral at DKRZ Hamburg (lcbullhh)

File:
1 edited

Legend:

Unmodified
Added
Removed
  • palm/trunk/SCRIPTS/mrun

    r1610 r1620  
    2222# Current revisions:
    2323# ------------------
    24 #
     24# adjustments for Mistral at DKRZ Hamburg (lcbullhh)
    2525#
    2626# Former revisions:
     
    324324 typeset -i  cputime i ii iia iii iio icycle inode ival jobges jobsek last_char_int maxcycle minuten nodes pes remaining_pes sekunden tp1
    325325
    326 
    327 
    328326    # ERROR HANDLING IN CASE OF EXIT
    329327 trap 'rm -rf  $working_directory/tmp_mrun
     
    610608    do_remote=true
    611609    case  $host  in
    612         (ibm|ibmh|ibmkisti|ibmku|ibms|nech|lccrayb|lccrayh|lccrayf|lcflow|lckyoto|unics|lcxe6|lcxt5m|lck|lckiaps|lckordi|lckyuh|lckyut|lcsb)  true;;
     610        (ibm|ibmh|ibmkisti|ibmku|ibms|nech|lcbullhh|lccrayb|lccrayh|lccrayf|lcflow|lckyoto|unics|lcxe6|lcxt5m|lck|lckiaps|lckordi|lckyuh|lckyut|lcsb)  true;;
    613611        (*)  printf "\n"
    614612             printf "\n  +++ sorry: execution of batch jobs on remote host \"$host\""
     
    669667 fi
    670668
    671 
    672669    # CHECK, IF FILE-ARCHIVING HAS FAILED IN PREVIOUS JOB (OF A JOB-CHAIN)
    673670 if [[ -f ~/job_queue/ARCHIVE_ERROR_$fname ]]
     
    697694 if [[ "$read_from_config" = false ]]
    698695 then
    699 
    700696    [[ $silent = false ]]  &&  printf "\n    Reading the configuration file... "
    701697
     
    769765                   do_remote=true
    770766                   case  $host  in
    771                        (ibm|ibmh|ibmkisti|ibmku|ibms|lccrayb|lccrayh|lccrayf|lcflow|lckyoto|nech|unics|lcxe6|lcxt5m|lck|lckiaps|lckordi|lckyuh|lckyut|lcsb)  true;;
     767                       (ibm|ibmh|ibmkisti|ibmku|ibms|lcbullhh|lccrayb|lccrayh|lccrayf|lcflow|lckyoto|nech|unics|lcxe6|lcxt5m|lck|lckiaps|lckordi|lckyuh|lckyut|lcsb)  true;;
    772768                       (*)  printf "\n  +++ sorry: execution of batch jobs on remote host \"$host\""
    773769                            printf "\n      is not available"
     
    887883 else
    888884
    889 
    890885       # EVALUATE THE CONFIGURATION-FILE BY FORTRAN-PROGRAM
    891886    [[ $silent = false ]]  &&  printf "..."
     
    10341029    do_remote=true
    10351030    case  $host  in
    1036         (ibm|ibmh|ibmkisti|ibmku|ibms|lccrayb|lccrayh|lccrayf|lcflow|lckyoto|nech|unics|lcxe6|lcxt5m|lck|lckiaps|lckordi|lckyuh|lckyut|lcsb)  true;;
     1031        (ibm|ibmh|ibmkisti|ibmku|ibms|lcbullhh|lccrayb|lccrayh|lccrayf|lcflow|lckyoto|nech|unics|lcxe6|lcxt5m|lck|lckiaps|lckordi|lckyuh|lckyut|lcsb)  true;;
    10371032        (*)  printf "\n"
    10381033             printf "\n  +++ sorry: execution of batch jobs on remote host \"$host\""
     
    10451040
    10461041
     1042
    10471043    # IN CASE OF PARALLEL EXECUTION, CHECK SOME SPECIFICATIONS CONCERNING PROCESSOR NUMBERS
    10481044 if [[ "$cond1" = parallel  ||  "$cond2" = parallel ]]
     
    11601156        (ibmh)       queue=cluster;;
    11611157        (ibmkisti)   queue=class.32plus;;
     1158        (lcbullhh)   queue=compute;;
    11621159        (lccrayb)    queue=mpp1q;;
    11631160        (lccrayh)    queue=mpp1q;;
     
    21702167 fi
    21712168
    2172 
    21732169    # QUERY FOR CONTINUE (ON LOCAL MACHINES ONLY)
    21742170 if [[ $remotecall = false  &&  $silent = false  &&  $jobmo != BATCH ]]
     
    26342630 rm -rf  $check_sources
    26352631 cd $working_directory
    2636 
    26372632
    26382633    # DETERMINE PATH FOR MAKE DEPOSITORY
     
    28252820             [[ "$check_for_file" = "" ]]  &&  compile_error=true
    28262821             continue   # STATUS=1, IF a.out EXISTS
    2827           elif [[ $localhost = lccrayb  ||  $localhost = lccrayf ||  $localhost = lccrayh ]]
     2822          elif [[ $localhost = lcbullhh  ||  $localhost = lccrayb  ||  $localhost = lccrayf ||  $localhost = lccrayh ]]
    28282823          then
    28292824             make $mopts -f Makefile PROG=a.out  F90=$compiler_name  COPT="$cpp_options"  F90FLAGS="$fopts"  LDFLAGS="$lopts"
     
    34973492                # COPY HOSTFILE FROM SOURCE DIRECTORY OR CREATE IT, IF IT
    34983493                # DOES NOT EXIST
    3499              if [[  $host != lccrayb  &&$host != lccrayf  && $host != lccrayh  &&  $host != lckyuh  &&  $host != lckyut ]]
     3494             if [[  $host != lccrayb  && $host != lccrayf  && $host != lccrayh  &&  $host != lckyuh  &&  $host != lckyut ]]
    35003495             then
    35013496                if [[ -f $hostfile ]]
     
    36663661             echo "mpirun  -np $ii  -hostfile $PBS_NODEFILE ./a.out  $ROPTS"
    36673662             mpirun  -np $ii  -hostfile $PBS_NODEFILE ./a.out  $ROPTS
     3663
     3664          elif [[ $host = lcbullhh ]]
     3665          then
     3666             export OMPI_MCA_pml=cm
     3667             export OMPI_MCA_mtl=mxm
     3668             export OMPI_MCA_coll=^ghc
     3669             export OMPI_MCA_mtl_mxm_np=0
     3670             export MXM_RDMA_PORTS=mlx5_0:1
     3671             export MXM_LOG_LEVEL=ERROR
     3672             export OMP_NUM_THREADS=$threads_per_task
     3673             export KMP_AFFINITY=verbose,granularity=core,compact,1
     3674             export KMP_STACKSIZE=64m
     3675
     3676             srun  --nodes=$nodes --ntasks-per-node=$tasks_per_node ./a.out 
    36683677          else
    36693678             mpprun  -n $numprocs  a.out  $ROPTS
    36703679          fi
    3671        else
     3680      else
    36723681          a.out  $ROPTS
    36733682       fi
     
    38923901                   then
    38933902                      ssh $usern@hlogin1 ". \\$HOME/.profile; cd $TEMPDIR; batch_scp $PORTOPT $cps -b -m -u $return_username $return_address  ${localout[$i]} \"${pathout[$i]}\" ${localhost}_${fname}${endout[$i]}  ${extout[$i]}"
     3903                   elif [[ $localhost = lcbullhh ]]
     3904                   then
     3905                      ssh $usern@mlogin101 ". \\$HOME/.profile; cd $TEMPDIR; batch_scp $PORTOPT $cps -b -m -u $return_username $return_address  ${localout[$i]} \"${pathout[$i]}\" ${localhost}_${fname}${endout[$i]}  ${extout[$i]}"
    38943906                   elif [[ $localhost = lcxe6 ]]
    38953907                   then
     
    44544466          then
    44554467
    4456              if [[ $localhost = lccrayb  || $localhost = lccrayh  ||  $localhost = nech  ||  $localhost = ibmh  ||  $localhost = ibmkisti  ||  $localhost = ibmku  ||  $localhost = ibms  ||  $localhost = lcflow  ||  $localhost = lckyu* || $localhost = lcxe6 ]]
     4468             if [[ $localhost = lcbullhh  ||  $localhost = lccrayb  ||  $localhost = lccrayh  ||  $localhost = nech  ||  $localhost = ibmh  ||  $localhost = ibmkisti  ||  $localhost = ibmku  ||  $localhost = ibms  ||  $localhost = lcflow  ||  $localhost = lckyu*  || $localhost = lcxe6 ]]
    44574469             then
    44584470                echo "*** ssh will be used to initiate restart-runs!"
     
    44644476                   then
    44654477                      ssh $SSH_PORTOPT $usern@136.172.40.15 "ssh $SSH_PORTOPT $return_address -l $return_username \". \\\$HOME/.profile; module load intel-compiler hdf5 netcdf; PATH=\\\$PATH:$LOCAL_MRUN_PATH;export PALM_BIN=$LOCAL_MRUN_PATH;cd $LOCAL_PWD; $mc\" "
     4478                   elif [[ $localhost = lcbullhh ]]
     4479                   then
     4480                      ssh $SSH_PORTOPT $usern@mlogin101 "ssh $SSH_PORTOPT $return_address -l $return_username \". \\\$HOME/.profile; module load intel-compiler hdf5 netcdf; PATH=\\\$PATH:$LOCAL_MRUN_PATH;export PALM_BIN=$LOCAL_MRUN_PATH;cd $LOCAL_PWD; $mc\" "
    44664481                   elif [[ $localhost = lccrayb ]]
    44674482                   then
     
    45824597 else
    45834598
    4584 
    45854599       # PREPARING ACTIONS,
    45864600       # IF A BATCH-JOB IS TO BE GENERATED AND STARTED ON A REMOTE-MACHINE GERECHNET
     
    46964710       #              THIS MAY CREATE A QUITE LARGE JOB-FILE, WHICH CAN CAUSE PROBLEMS WITH SOME
    46974711       #              QUEUEING-SYSTEMS
    4698     if [[ $host = ibmkisti  ||  $host = lccrayb  ||  $host = lccrayf  ||  $host = lccrayh ]]
     4712    if [[ $host = ibmkisti  ||  $host = lcbullhh  ||  $host = lccrayb  ||  $host = lccrayf  ||  $host = lccrayh ]]
    46994713    then
    47004714
     
    47114725       if [[ $host = $localhost ]]
    47124726       then
    4713 
    47144727             # DUE TO UNKNOWN REASONS, COPY WITH cp COMMAND CREATES CORRUPT
    47154728             # FILES ON CRAY XC30 SYSTEMS (CSC HELSINKI), rsync IS USED INSTEAD
     
    47274740             echo  "localdir=\`pwd\`"                            >>  $jobfile
    47284741             echo  "ssh $SSH_PORTOPT $remote_username@gaiad \"cd \$localdir; scp $PORTOPT -r  $return_username@$return_address:$working_directory/SOURCES_FOR_RUN_$fname .\" "  >>  $jobfile
     4742         elif [[ $host = lcbullhh ]]
     4743          then
     4744             echo  "localdir=\`pwd\`"                            >>  $jobfile
     4745             echo  "ssh $SSH_PORTOPT $remote_username@mlogin101 \"cd \$localdir; scp $PORTOPT -r  $return_username@$return_address:$working_directory/SOURCES_FOR_RUN_$fname .\" "  >>  $jobfile
     4746
    47294747          elif [[ $host = lccrayb ]]
    47304748          then
Note: See TracChangeset for help on using the changeset viewer.