Ignore:
Timestamp:
Sep 24, 2014 2:06:57 PM (10 years ago)
Author:
maronga
Message:

New flag files allow to force unscheduled termination/restarts of batch jobs, progress output is made for batch runs, small adjustments for lxce6 and lccrayh/lccrayb

File:
1 edited

Legend:

Unmodified
Added
Removed
  • palm/trunk/SCRIPTS/mrun

    r1443 r1468  
    2222# Current revisions:
    2323# ------------------
    24 #
     24# Typo removed (addres->address)
     25# Bugfix: returning files to IMUK via ssh did not work for lccrayh and lcycrayb
     26# Added support for restart runs (remote) for lcxe6
    2527#
    2628# Former revisions:
     
    257259 if [[ `hostname` = rte10 ]]
    258260 then
    259     return_addres=133.5.185.60
    260     echo "+++ WARNING: fixed return_addres = $return_addres is used !!!!!"
     261    return_address=133.5.185.60
     262    echo "+++ WARNING: fixed return_address = $return_address is used !!!!!"
    261263 elif [[ `hostname` = climate0 ]]
    262264 then
    263     return_addres=165.132.26.68
    264     echo "+++ WARNING: fixed return_addres = $return_addres is used !!!!!"
     265    return_address=165.132.26.68
     266    echo "+++ WARNING: fixed return_address = $return_address is used !!!!!"
    265267 elif [[ `hostname` = urban00 ]]
    266268 then
    267     return_addres=147.46.30.151
    268     echo "+++ WARNING: fixed return_addres = $return_addres is used !!!!!"
     269    return_address=147.46.30.151
     270    echo "+++ WARNING: fixed return_address = $return_address is used !!!!!"
    269271 else
    270     return_addres=$(nslookup `hostname` 2>&1 | grep "Address:" | tail -1 | awk '{print $2}')
     272    return_address=$(nslookup `hostname` 2>&1 | grep "Address:" | tail -1 | awk '{print $2}')
    271273 fi
    272274 return_password=""
     
    409411       (q)   queue=$OPTARG; mc="$mc -q$OPTARG";;
    410412       (r)   run_mode=$OPTARG; mc="$mc -r'$OPTARG'";;
    411        (R)   remotecall=true;return_addres=$OPTARG; mc="$mc -R$OPTARG";;
     413       (R)   remotecall=true;return_address=$OPTARG; mc="$mc -R$OPTARG";;
    412414       (s)   source_list=$OPTARG; mc="$mc -s'$OPTARG'";;
    413415       (S)   read_from_config=false; mc="$mc -S";;
     
    670672
    671673    # READ AND EVALUATE THE CONFIGURATION-FILE FROM WITHIN THIS SHELLSCRIPT
    672     # (OPTION -S). THE DEFAULT IS USING THE ROUTINE interpret_config
     674    # (OPTION -S). THE DEFAULT IS USING THE ROUTINE <<<<
    673675 if [[ "$read_from_config" = false ]]
    674676 then
     
    34073409                echo "fname=$fname"                      >>  mpi_exec_shell
    34083410                echo "localhost=$localhost"              >>  mpi_exec_shell
    3409                 echo "return_addres=$return_addres"      >>  mpi_exec_shell
     3411                echo "return_address=$return_address"    >>  mpi_exec_shell
    34103412                echo "return_username=$return_username"  >>  mpi_exec_shell
    34113413                echo "tasks_per_node=$tasks_per_node"    >>  mpi_exec_shell
     
    34193421                echo "export fname"                      >>  mpi_exec_shell
    34203422                echo "export localhost"                  >>  mpi_exec_shell
    3421                 echo "export return_addres"              >>  mpi_exec_shell
     3423                echo "export return_address"             >>  mpi_exec_shell
    34223424                echo "export return_username"            >>  mpi_exec_shell
    34233425                echo "export tasks_per_node"             >>  mpi_exec_shell
     
    37943796                   printf "\n              ${pathout[$i]}/${localhost}_${fname}${endout[$i]}$cst"
    37953797                   printf "\n              or higher cycle\n"
    3796                    echo "batch_scp $PORTOPT $cps -b -m -u $return_username $return_addres  $file_to_transfer \"${pathout[$i]}\" ${localhost}_${fname}${endout[$i]}  ${extout[$i]}"  >>  transfer_${localout[$i]}
     3798                   echo "batch_scp $PORTOPT $cps -b -m -u $return_username $return_address  $file_to_transfer \"${pathout[$i]}\" ${localhost}_${fname}${endout[$i]}  ${extout[$i]}"  >>  transfer_${localout[$i]}
    37973799
    37983800                   echo "[[ \$? = 0 ]]  &&  rm  $file_to_transfer"  >>  transfer_${localout[$i]}
     
    38183820                   if [[ $localhost = lccrayb ]]
    38193821                   then
    3820                       ssh $usern@blogin1 "cd $TEMPDIR; batch_scp $PORTOPT $cps -b -m -u $return_username $return_addres  ${localout[$i]} \"${pathout[$i]}\" ${localhost}_${fname}${endout[$i]}  ${extout[$i]}"
     3822                      ssh $usern@blogin1 ". \\$HOME/.profile; cd $TEMPDIR; batch_scp $PORTOPT $cps -b -m -u $return_username $return_address  ${localout[$i]} \"${pathout[$i]}\" ${localhost}_${fname}${endout[$i]}  ${extout[$i]}"
    38213823                   elif [[ $localhost = lccrayh ]]
    38223824                   then
    3823                       ssh $usern@hlogin1 "cd $TEMPDIR; batch_scp $PORTOPT $cps -b -m -u $return_username $return_addres  ${localout[$i]} \"${pathout[$i]}\" ${localhost}_${fname}${endout[$i]}  ${extout[$i]}"
     3825                      ssh $usern@hlogin1 ". \\$HOME/.profile; cd $TEMPDIR; batch_scp $PORTOPT $cps -b -m -u $return_username $return_address  ${localout[$i]} \"${pathout[$i]}\" ${localhost}_${fname}${endout[$i]}  ${extout[$i]}"
     3826                   elif [[ $localhost = lcxe6 ]]
     3827                   then
     3828                      ssh $usern@hexagon ". \\$HOME/.profile; cd $TEMPDIR; batch_scp $PORTOPT $cps -b -m -u $return_username $return_address  ${localout[$i]} \"${pathout[$i]}\" ${localhost}_${fname}${endout[$i]}  ${extout[$i]}"
    38243829                   else
    3825                       batch_scp $PORTOPT $cps -b -m -u $return_username $return_addres  ${localout[$i]} "${pathout[$i]}" ${localhost}_${fname}${endout[$i]}  ${extout[$i]}
     3830                      batch_scp $PORTOPT $cps -b -m -u $return_username $return_address  ${localout[$i]} "${pathout[$i]}" ${localhost}_${fname}${endout[$i]}  ${extout[$i]}
    38263831                   fi
    38273832                   [[ $? != 0 ]]  &&  transfer_failed=true
     
    38743879                   printf "\n              ${pathout[$i]}/${localhost}_${fname}${endout[$i]}"
    38753880                   printf "\n              or higher cycle\n"
    3876                    echo "batch_scp $PORTOPT -A -b -m -u $return_username $return_addres  $file_to_transfer \"${pathout[$i]}\" ${localhost}_${fname}${endout[$i]}  ${extout[$i]}"  >>  transfer_${localout[$i]}
     3881                   echo "batch_scp $PORTOPT -A -b -m -u $return_username $return_address  $file_to_transfer \"${pathout[$i]}\" ${localhost}_${fname}${endout[$i]}  ${extout[$i]}"  >>  transfer_${localout[$i]}
    38773882
    38783883                   echo "[[ \$? = 0 ]]  &&  rm  $file_to_transfer"  >>  transfer_${localout[$i]}
     
    38983903                   if [[ $localhost = lccrayb ]]
    38993904                   then
    3900                       ssh $usern@blogin1 "cd $TEMPDIR; batch_scp $PORTOPT -A -b -m -u $return_username $return_addres  ${localout[$i]} \"${pathout[$i]}\" ${localhost}_${fname}${endout[$i]}  ${extout[$i]}"
     3905                      ssh $usern@blogin1 ". \\$HOME/.profile; cd $TEMPDIR; batch_scp $PORTOPT -A -b -m -u $return_username $return_address  ${localout[$i]} \"${pathout[$i]}\" ${localhost}_${fname}${endout[$i]}  ${extout[$i]}"
    39013906                   elif [[ $localhost = lccrayh ]]
    39023907                   then
    3903                       ssh $usern@hlogin1 "cd $TEMPDIR; batch_scp $PORTOPT -A -b -m -u $return_username $return_addres  ${localout[$i]} \"${pathout[$i]}\" ${localhost}_${fname}${endout[$i]}  ${extout[$i]}"
     3908                      ssh $usern@hlogin1 ". \\$HOME/.profile; cd $TEMPDIR; batch_scp $PORTOPT -A -b -m -u $return_username $return_address  ${localout[$i]} \"${pathout[$i]}\" ${localhost}_${fname}${endout[$i]}  ${extout[$i]}"
     3909                   elif [[ $localhost = lcxe6 ]]
     3910                   then
     3911                      ssh $usern@hexagon ". \\$HOME/.profile; cd $TEMPDIR; batch_scp $PORTOPT -A -b -m -u $return_username $return_address  ${localout[$i]} \"${pathout[$i]}\" ${localhost}_${fname}${endout[$i]}  ${extout[$i]}"
    39043912                   else
    3905                       batch_scp $PORTOPT -A -b -m -u $return_username  $return_addres ${localout[$i]} "${pathout[$i]}" ${localhost}_${fname}${endout[$i]}  ${extout[$i]}
     3913                      batch_scp $PORTOPT -A -b -m -u $return_username  $return_address ${localout[$i]} "${pathout[$i]}" ${localhost}_${fname}${endout[$i]}  ${extout[$i]}
    39063914                   fi
    39073915                   [[ $? != 0 ]]  &&  transfer_failed=true
     
    43724380
    43734381             # START THE RESTART-JOB
    4374           printf "\n\n  *** initiating restart-run on \"$return_addres\" using command:\n"
     4382          printf "\n\n  *** initiating restart-run on \"$return_address\" using command:\n"
    43754383          echo "      $mc"
    43764384          printf "\n$dashes\n"
     
    43784386          then
    43794387
    4380              if [[ $localhost = lccrayb  || $localhost = lccrayh  ||  $localhost = nech  ||  $localhost = ibmh  ||  $localhost = ibmkisti  ||  $localhost = ibmku  ||  $localhost = ibms  ||  $localhost = lcflow  ||  $localhost = lckyu* ]]
     4388             if [[ $localhost = lccrayb  || $localhost = lccrayh  ||  $localhost = nech  ||  $localhost = ibmh  ||  $localhost = ibmkisti  ||  $localhost = ibmku  ||  $localhost = ibms  ||  $localhost = lcflow  ||  $localhost = lckyu* || $localhost = lcxe6 ]]
    43814389             then
    43824390                echo "*** ssh will be used to initiate restart-runs!"
    4383                 echo "    return_addres=\"$return_addres\" "
     4391                echo "    return_address=\"$return_address\" "
    43844392                echo "    return_username=\"$return_username\" "
    4385                 if [[ $(echo $return_addres | grep -c "130.75.105") = 1 ]]
     4393                if [[ $(echo $return_address | grep -c "130.75.105") = 1 ]]
    43864394                then
    43874395                   if [[ $localhost = ibmh ]]
    43884396                   then
    4389                       ssh $SSH_PORTOPT $usern@136.172.40.15 "ssh $SSH_PORTOPT $return_addres -l $return_username \"PATH=\\\$PATH:$LOCAL_MRUN_PATH;. /muksoft_i/packages/intel/composer_xe_2013_sp1.2.144/bin/compilervars.sh intel64;export PALM_BIN=$LOCAL_MRUN_PATH;cd $LOCAL_PWD; $mc\" "
     4397                      ssh $SSH_PORTOPT $usern@136.172.40.15 "ssh $SSH_PORTOPT $return_address -l $return_username \". \\\$HOME/.profile; module load intel-compiler hdf5 netcdf; PATH=\\\$PATH:$LOCAL_MRUN_PATH;export PALM_BIN=$LOCAL_MRUN_PATH;cd $LOCAL_PWD; $mc\" "
    43904398                   elif [[ $localhost = lccrayb ]]
    43914399                   then
    4392                       ssh $usern@blogin1 "ssh $SSH_PORTOPT $return_addres -l $return_username \"PATH=\\\$PATH:$LOCAL_MRUN_PATH;. /muksoft_i/packages/intel/composer_xe_2013_sp1.2.144/bin/compilervars.sh intel64;export PALM_BIN=$LOCAL_MRUN_PATH;cd $LOCAL_PWD; $mc\" "
     4400                      ssh $usern@blogin1 "ssh $SSH_PORTOPT $return_address -l $return_username \". \\\$HOME/.profile; module load intel-compiler hdf5 netcdf; PATH=\\\$PATH:$LOCAL_MRUN_PATH; export PALM_BIN=$LOCAL_MRUN_PATH;cd $LOCAL_PWD; $mc\" "
    43934401                   elif [[ $localhost = lccrayh ]]
    43944402                   then
    4395                       ssh $usern@hlogin1 "ssh $SSH_PORTOPT $return_addres -l $return_username \"PATH=\\\$PATH:$LOCAL_MRUN_PATH;. /muksoft_i/packages/intel/composer_xe_2013_sp1.2.144/bin/compilervars.sh intel64;export PALM_BIN=$LOCAL_MRUN_PATH;cd $LOCAL_PWD; $mc\" "
    4396 
     4403                      ssh $usern@hlogin1 "ssh $SSH_PORTOPT $return_address -l $return_username \". \\\$HOME/.profile; module load intel-compiler hdf5 netcdf; PATH=\\\$PATH:$LOCAL_MRUN_PATH; export PALM_BIN=$LOCAL_MRUN_PATH;cd $LOCAL_PWD; $mc\" "
     4404                   elif [[ $localhost = lcxe6 ]]
     4405                   then
     4406                      ssh $usern@hexagon "ssh $SSH_PORTOPT $return_address -l $return_username \". \\\$HOME/.profile; module load intel-compiler hdf5 netcdf; PATH=\\\$PATH:$LOCAL_MRUN_PATH; export PALM_BIN=$LOCAL_MRUN_PATH;cd $LOCAL_PWD; $mc\" "
    43974407                   else
    4398                       ssh $SSH_PORTOPT $return_addres -l $return_username "PATH=\$PATH:$LOCAL_MRUN_PATH;. /muksoft_i/packages/intel/composer_xe_2013_sp1.2.144/bin/compilervars.sh intel64;export PALM_BIN=$LOCAL_MRUN_PATH;cd $LOCAL_PWD; $mc "
     4408                      ssh $SSH_PORTOPT $return_address -l $return_username ". \\\$HOME/.profile; module load intel-compiler hdf5 netcdf; PATH=\$PATH:$LOCAL_MRUN_PATH;export PALM_BIN=$LOCAL_MRUN_PATH;cd $LOCAL_PWD; $mc "
    43994409                   fi
    44004410                else
    44014411                   if [[ $localhost = ibmkisti ]]
    44024412                   then
    4403                       ssh $SSH_PORTOPT $usern@gaiad "ssh $SSH_PORTOPT $return_addres -l $return_username \"PATH=\\\$PATH:$LOCAL_MRUN_PATH;export PALM_BIN=$LOCAL_MRUN_PATH;cd $LOCAL_PWD; $mc\" "
     4413                      ssh $SSH_PORTOPT $usern@gaiad "ssh $SSH_PORTOPT $return_address -l $return_username \"PATH=\\\$PATH:$LOCAL_MRUN_PATH;export PALM_BIN=$LOCAL_MRUN_PATH;cd $LOCAL_PWD; $mc\" "
    44044414                   elif [[ $localhost = lcflow ]]
    44054415                   then
    4406                       /usr/bin/ssh $SSH_PORTOPT $return_addres -l $return_username "PATH=\$PATH:$LOCAL_MRUN_PATH;export PALM_BIN=$LOCAL_MRUN_PATH;cd $LOCAL_PWD; $mc "
     4416                      /usr/bin/ssh $SSH_PORTOPT $return_address -l $return_username "PATH=\$PATH:$LOCAL_MRUN_PATH;export PALM_BIN=$LOCAL_MRUN_PATH;cd $LOCAL_PWD; $mc "
    44074417                   elif [[ $localhost = lccrayb ]]
    44084418                   then
    4409                       ssh $usern@blogin1 "ssh $SSH_PORTOPT $return_addres -l $return_username \"PATH=\\\$PATH:$LOCAL_MRUN_PATH;export PALM_BIN=$LOCAL_MRUN_PATH;cd $LOCAL_PWD; $mc\" "
     4419                      ssh $usern@blogin1 "ssh $SSH_PORTOPT $return_address -l $return_username \"PATH=\\\$PATH:$LOCAL_MRUN_PATH;export PALM_BIN=$LOCAL_MRUN_PATH;cd $LOCAL_PWD; $mc\" "
    44104420                   elif [[ $localhost = lccrayh ]]
    44114421                   then
    4412                       ssh $usern@hlogin1 "ssh $SSH_PORTOPT $return_addres -l $return_username \"PATH=\\\$PATH:$LOCAL_MRUN_PATH;export PALM_BIN=$LOCAL_MRUN_PATH;cd $LOCAL_PWD; $mc\" "
     4422                      ssh $usern@hlogin1 "ssh $SSH_PORTOPT $return_address -l $return_username \"PATH=\\\$PATH:$LOCAL_MRUN_PATH;export PALM_BIN=$LOCAL_MRUN_PATH;cd $LOCAL_PWD; $mc\" "
    44134423                   else
    4414                       ssh $SSH_PORTOPT $return_addres -l $return_username "PATH=\$PATH:$LOCAL_MRUN_PATH;export PALM_BIN=$LOCAL_MRUN_PATH;cd $LOCAL_PWD; $mc "
     4424                      ssh $SSH_PORTOPT $return_address -l $return_username "PATH=\$PATH:$LOCAL_MRUN_PATH;export PALM_BIN=$LOCAL_MRUN_PATH;cd $LOCAL_PWD; $mc "
    44154425                   fi
    44164426                fi
     
    45094519
    45104520       # BUILD THE MRUN-COMMAND TO BE CALLED IN THE BATCH-JOB ON THE REMOTE-MACHINE
    4511     mrun_com="$mrun_script_name -a $afname -c $config_file -d $fname -h $host -H $fromhost -m $memory -t $cpumax -q $queue -R $return_addres -U $return_username -u $remote_username"
     4521    mrun_com="$mrun_script_name -a $afname -c $config_file -d $fname -h $host -H $fromhost -m $memory -t $cpumax -q $queue -R $return_address -U $return_username -u $remote_username"
    45124522    [[ "$cpp_opts" != "" ]]       &&  mrun_com=${mrun_com}" -D \"$cpp_opts\""
    45134523    [[ "$global_revision" != "" ]]  &&  mrun_com=${mrun_com}" -G \"$global_revision\""
     
    46484658                # ON KISTI'S IBM FIREWALL IS ONLY OPENED ON INTERACTIVE NODE
    46494659             echo  "localdir=\`pwd\`"                            >>  $jobfile
    4650              echo  "ssh $SSH_PORTOPT $remote_username@gaiad \"cd \$localdir; scp $PORTOPT -r  $return_username@$return_addres:$working_directory/SOURCES_FOR_RUN_$fname .\" "  >>  $jobfile
     4660             echo  "ssh $SSH_PORTOPT $remote_username@gaiad \"cd \$localdir; scp $PORTOPT -r  $return_username@$return_address:$working_directory/SOURCES_FOR_RUN_$fname .\" "  >>  $jobfile
    46514661          elif [[ $host = lccrayb ]]
    46524662          then
    46534663             echo  "localdir=\`pwd\`"                            >>  $jobfile
    4654              echo  "ssh $SSH_PORTOPT $remote_username@blogin1 \"cd \$localdir; scp $PORTOPT -r  $return_username@$return_addres:$working_directory/SOURCES_FOR_RUN_$fname .\" "  >>  $jobfile
     4664             echo  "ssh $SSH_PORTOPT $remote_username@blogin1 \"cd \$localdir; scp $PORTOPT -r  $return_username@$return_address:$working_directory/SOURCES_FOR_RUN_$fname .\" "  >>  $jobfile
    46554665          elif [[ $host = lccrayh ]]
    46564666          then
    46574667             echo  "localdir=\`pwd\`"                            >>  $jobfile
    4658              echo  "ssh $SSH_PORTOPT $remote_username@hlogin1 \"cd \$localdir; scp $PORTOPT -r  $return_username@$return_addres:$working_directory/SOURCES_FOR_RUN_$fname .\" "  >>  $jobfile
     4668             echo  "ssh $SSH_PORTOPT $remote_username@hlogin1 \"cd \$localdir; scp $PORTOPT -r  $return_username@$return_address:$working_directory/SOURCES_FOR_RUN_$fname .\" "  >>  $jobfile
    46594669
    46604670          else
    4661              echo  "scp $PORTOPT -r $return_username@$return_addres:$working_directory/SOURCES_FOR_RUN_$fname ."  >>  $jobfile
     4671             echo  "scp $PORTOPT -r $return_username@$return_address:$working_directory/SOURCES_FOR_RUN_$fname ."  >>  $jobfile
    46624672          fi
    46634673       fi
     
    47574767             echo  "%END%"                                    >>  $jobfile
    47584768          else
    4759              echo  "batch_scp $PORTOPT -b -o -g -s -u $return_username $return_addres ${remotepathin[$i]} \"${pathin[$i]}\" ${frelin[$i]}" >>  $jobfile
     4769             echo  "batch_scp $PORTOPT -b -o -g -s -u $return_username $return_address ${remotepathin[$i]} \"${pathin[$i]}\" ${frelin[$i]}" >>  $jobfile
    47604770          fi
    47614771
Note: See TracChangeset for help on using the changeset viewer.