Changeset 1620
- Timestamp:
- Jul 17, 2015 11:37:41 AM (10 years ago)
- Location:
- palm/trunk/SCRIPTS
- Files:
-
- 1 added
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
palm/trunk/SCRIPTS/mbuild
r1614 r1620 22 22 # Current revisions: 23 23 # ------------------ 24 # 24 # adjustments for Mistral at DKRZ Hamburg (lcbullhh) 25 25 # 26 26 # Former revisions: … … 516 516 # DETERMINE IP-ADDRES OF THE REMOTE-HOST 517 517 case $remote_host in 518 (lcbullhh) remote_address=136.172.50.13;; 518 519 (lccrayb) remote_address=130.73.233.1;; 519 520 (lccrayh) remote_address=130.75.4.1;; -
palm/trunk/SCRIPTS/mrun
r1610 r1620 22 22 # Current revisions: 23 23 # ------------------ 24 # 24 # adjustments for Mistral at DKRZ Hamburg (lcbullhh) 25 25 # 26 26 # Former revisions: … … 324 324 typeset -i cputime i ii iia iii iio icycle inode ival jobges jobsek last_char_int maxcycle minuten nodes pes remaining_pes sekunden tp1 325 325 326 327 328 326 # ERROR HANDLING IN CASE OF EXIT 329 327 trap 'rm -rf $working_directory/tmp_mrun … … 610 608 do_remote=true 611 609 case $host in 612 (ibm|ibmh|ibmkisti|ibmku|ibms|nech|lc crayb|lccrayh|lccrayf|lcflow|lckyoto|unics|lcxe6|lcxt5m|lck|lckiaps|lckordi|lckyuh|lckyut|lcsb) true;;610 (ibm|ibmh|ibmkisti|ibmku|ibms|nech|lcbullhh|lccrayb|lccrayh|lccrayf|lcflow|lckyoto|unics|lcxe6|lcxt5m|lck|lckiaps|lckordi|lckyuh|lckyut|lcsb) true;; 613 611 (*) printf "\n" 614 612 printf "\n +++ sorry: execution of batch jobs on remote host \"$host\"" … … 669 667 fi 670 668 671 672 669 # CHECK, IF FILE-ARCHIVING HAS FAILED IN PREVIOUS JOB (OF A JOB-CHAIN) 673 670 if [[ -f ~/job_queue/ARCHIVE_ERROR_$fname ]] … … 697 694 if [[ "$read_from_config" = false ]] 698 695 then 699 700 696 [[ $silent = false ]] && printf "\n Reading the configuration file... " 701 697 … … 769 765 do_remote=true 770 766 case $host in 771 (ibm|ibmh|ibmkisti|ibmku|ibms|lc crayb|lccrayh|lccrayf|lcflow|lckyoto|nech|unics|lcxe6|lcxt5m|lck|lckiaps|lckordi|lckyuh|lckyut|lcsb) true;;767 (ibm|ibmh|ibmkisti|ibmku|ibms|lcbullhh|lccrayb|lccrayh|lccrayf|lcflow|lckyoto|nech|unics|lcxe6|lcxt5m|lck|lckiaps|lckordi|lckyuh|lckyut|lcsb) true;; 772 768 (*) printf "\n +++ sorry: execution of batch jobs on remote host \"$host\"" 773 769 printf "\n is not available" … … 887 883 else 888 884 889 890 885 # EVALUATE THE CONFIGURATION-FILE BY FORTRAN-PROGRAM 891 886 [[ $silent = false ]] && printf "..." … … 1034 1029 do_remote=true 1035 1030 case $host in 1036 (ibm|ibmh|ibmkisti|ibmku|ibms|lc crayb|lccrayh|lccrayf|lcflow|lckyoto|nech|unics|lcxe6|lcxt5m|lck|lckiaps|lckordi|lckyuh|lckyut|lcsb) true;;1031 (ibm|ibmh|ibmkisti|ibmku|ibms|lcbullhh|lccrayb|lccrayh|lccrayf|lcflow|lckyoto|nech|unics|lcxe6|lcxt5m|lck|lckiaps|lckordi|lckyuh|lckyut|lcsb) true;; 1037 1032 (*) printf "\n" 1038 1033 printf "\n +++ sorry: execution of batch jobs on remote host \"$host\"" … … 1045 1040 1046 1041 1042 1047 1043 # IN CASE OF PARALLEL EXECUTION, CHECK SOME SPECIFICATIONS CONCERNING PROCESSOR NUMBERS 1048 1044 if [[ "$cond1" = parallel || "$cond2" = parallel ]] … … 1160 1156 (ibmh) queue=cluster;; 1161 1157 (ibmkisti) queue=class.32plus;; 1158 (lcbullhh) queue=compute;; 1162 1159 (lccrayb) queue=mpp1q;; 1163 1160 (lccrayh) queue=mpp1q;; … … 2170 2167 fi 2171 2168 2172 2173 2169 # QUERY FOR CONTINUE (ON LOCAL MACHINES ONLY) 2174 2170 if [[ $remotecall = false && $silent = false && $jobmo != BATCH ]] … … 2634 2630 rm -rf $check_sources 2635 2631 cd $working_directory 2636 2637 2632 2638 2633 # DETERMINE PATH FOR MAKE DEPOSITORY … … 2825 2820 [[ "$check_for_file" = "" ]] && compile_error=true 2826 2821 continue # STATUS=1, IF a.out EXISTS 2827 elif [[ $localhost = lc crayb || $localhost = lccrayf || $localhost = lccrayh ]]2822 elif [[ $localhost = lcbullhh || $localhost = lccrayb || $localhost = lccrayf || $localhost = lccrayh ]] 2828 2823 then 2829 2824 make $mopts -f Makefile PROG=a.out F90=$compiler_name COPT="$cpp_options" F90FLAGS="$fopts" LDFLAGS="$lopts" … … 3497 3492 # COPY HOSTFILE FROM SOURCE DIRECTORY OR CREATE IT, IF IT 3498 3493 # DOES NOT EXIST 3499 if [[ $host != lccrayb && $host != lccrayf && $host != lccrayh && $host != lckyuh && $host != lckyut ]]3494 if [[ $host != lccrayb && $host != lccrayf && $host != lccrayh && $host != lckyuh && $host != lckyut ]] 3500 3495 then 3501 3496 if [[ -f $hostfile ]] … … 3666 3661 echo "mpirun -np $ii -hostfile $PBS_NODEFILE ./a.out $ROPTS" 3667 3662 mpirun -np $ii -hostfile $PBS_NODEFILE ./a.out $ROPTS 3663 3664 elif [[ $host = lcbullhh ]] 3665 then 3666 export OMPI_MCA_pml=cm 3667 export OMPI_MCA_mtl=mxm 3668 export OMPI_MCA_coll=^ghc 3669 export OMPI_MCA_mtl_mxm_np=0 3670 export MXM_RDMA_PORTS=mlx5_0:1 3671 export MXM_LOG_LEVEL=ERROR 3672 export OMP_NUM_THREADS=$threads_per_task 3673 export KMP_AFFINITY=verbose,granularity=core,compact,1 3674 export KMP_STACKSIZE=64m 3675 3676 srun --nodes=$nodes --ntasks-per-node=$tasks_per_node ./a.out 3668 3677 else 3669 3678 mpprun -n $numprocs a.out $ROPTS 3670 3679 fi 3671 3680 else 3672 3681 a.out $ROPTS 3673 3682 fi … … 3892 3901 then 3893 3902 ssh $usern@hlogin1 ". \\$HOME/.profile; cd $TEMPDIR; batch_scp $PORTOPT $cps -b -m -u $return_username $return_address ${localout[$i]} \"${pathout[$i]}\" ${localhost}_${fname}${endout[$i]} ${extout[$i]}" 3903 elif [[ $localhost = lcbullhh ]] 3904 then 3905 ssh $usern@mlogin101 ". \\$HOME/.profile; cd $TEMPDIR; batch_scp $PORTOPT $cps -b -m -u $return_username $return_address ${localout[$i]} \"${pathout[$i]}\" ${localhost}_${fname}${endout[$i]} ${extout[$i]}" 3894 3906 elif [[ $localhost = lcxe6 ]] 3895 3907 then … … 4454 4466 then 4455 4467 4456 if [[ $localhost = lc crayb || $localhost = lccrayh || $localhost = nech || $localhost = ibmh || $localhost = ibmkisti || $localhost = ibmku || $localhost = ibms || $localhost = lcflow || $localhost = lckyu* ||$localhost = lcxe6 ]]4468 if [[ $localhost = lcbullhh || $localhost = lccrayb || $localhost = lccrayh || $localhost = nech || $localhost = ibmh || $localhost = ibmkisti || $localhost = ibmku || $localhost = ibms || $localhost = lcflow || $localhost = lckyu* || $localhost = lcxe6 ]] 4457 4469 then 4458 4470 echo "*** ssh will be used to initiate restart-runs!" … … 4464 4476 then 4465 4477 ssh $SSH_PORTOPT $usern@136.172.40.15 "ssh $SSH_PORTOPT $return_address -l $return_username \". \\\$HOME/.profile; module load intel-compiler hdf5 netcdf; PATH=\\\$PATH:$LOCAL_MRUN_PATH;export PALM_BIN=$LOCAL_MRUN_PATH;cd $LOCAL_PWD; $mc\" " 4478 elif [[ $localhost = lcbullhh ]] 4479 then 4480 ssh $SSH_PORTOPT $usern@mlogin101 "ssh $SSH_PORTOPT $return_address -l $return_username \". \\\$HOME/.profile; module load intel-compiler hdf5 netcdf; PATH=\\\$PATH:$LOCAL_MRUN_PATH;export PALM_BIN=$LOCAL_MRUN_PATH;cd $LOCAL_PWD; $mc\" " 4466 4481 elif [[ $localhost = lccrayb ]] 4467 4482 then … … 4582 4597 else 4583 4598 4584 4585 4599 # PREPARING ACTIONS, 4586 4600 # IF A BATCH-JOB IS TO BE GENERATED AND STARTED ON A REMOTE-MACHINE GERECHNET … … 4696 4710 # THIS MAY CREATE A QUITE LARGE JOB-FILE, WHICH CAN CAUSE PROBLEMS WITH SOME 4697 4711 # QUEUEING-SYSTEMS 4698 if [[ $host = ibmkisti || $host = lc crayb || $host = lccrayf || $host = lccrayh ]]4712 if [[ $host = ibmkisti || $host = lcbullhh || $host = lccrayb || $host = lccrayf || $host = lccrayh ]] 4699 4713 then 4700 4714 … … 4711 4725 if [[ $host = $localhost ]] 4712 4726 then 4713 4714 4727 # DUE TO UNKNOWN REASONS, COPY WITH cp COMMAND CREATES CORRUPT 4715 4728 # FILES ON CRAY XC30 SYSTEMS (CSC HELSINKI), rsync IS USED INSTEAD … … 4727 4740 echo "localdir=\`pwd\`" >> $jobfile 4728 4741 echo "ssh $SSH_PORTOPT $remote_username@gaiad \"cd \$localdir; scp $PORTOPT -r $return_username@$return_address:$working_directory/SOURCES_FOR_RUN_$fname .\" " >> $jobfile 4742 elif [[ $host = lcbullhh ]] 4743 then 4744 echo "localdir=\`pwd\`" >> $jobfile 4745 echo "ssh $SSH_PORTOPT $remote_username@mlogin101 \"cd \$localdir; scp $PORTOPT -r $return_username@$return_address:$working_directory/SOURCES_FOR_RUN_$fname .\" " >> $jobfile 4746 4729 4747 elif [[ $host = lccrayb ]] 4730 4748 then -
palm/trunk/SCRIPTS/subjob
r1576 r1620 23 23 # Current revisions: 24 24 # ------------------ 25 # 25 # adjustments for Mistral at DKRZ Hamburg (lcbullhh) 26 26 # 27 27 # Former revisions: … … 174 174 175 175 176 177 178 176 # DETERMINE NAME OF LOCAL HOST 179 177 local_host=$(hostname) 180 181 182 178 183 179 # SET HOST-SPECIFIC VARIABLES VEREINBAREN (CHECK, IF LOCAL HOST … … 223 219 (meller) local_address=134.106.74.155; local_host=lcfor;; 224 220 (meteo-login*) local_address=193.166.211.144;local_host=lcxt5m;; 221 (mlogin1*|m1*) local_address=136.172.50.13; local_host=lcbullhh;; 225 222 (hexagon*) local_address=129.177.20.113; local_host=lcxe6;; 226 223 (nobel*) local_address=150.183.5.101; local_host=ibms;; … … 367 364 (ibmku) queue=s4; remote_address=133.5.4.129; submcom=/usr/local/bin/llsubmit;; 368 365 (ibms) queue=p_normal; remote_address=150.183.5.101; submcom=/usr/lpp/LoadL/full/bin/llsubmit;; 366 (lcbullhh) queue=compute; remote_address=136.172.50.13; submcom=/usr/bin/sbatch;; 369 367 (lccrayb) queue=mpp1testq; remote_address=130.73.233.1; submcom=/opt/moab/default/bin/msub;; 370 368 (lccrayh) queue=mpp1testq; remote_address=130.75.4.1; submcom=/opt/moab/default/bin/msub;; … … 413 411 (ibms) case $ndq in 414 412 (express|normal|p_express|p_normal|p_normal_1.3|p_normal_1.7|grand) error=false;; 413 (*) error=true;; 414 esac;; 415 (lcbullhh) case $ndq in 416 (compute|shared) error=false;; 415 417 (*) error=true;; 416 418 esac;; … … 617 619 fi 618 620 619 620 621 621 # GENERATE RANDOM IDENTIFIER, AND DETERMINE THE JOBNAME ON THE TARGET HOST 622 622 identifier=$RANDOM … … 630 630 remote_dayfile=/dev/null 631 631 fi 632 633 632 634 633 … … 780 779 %%END%% 781 780 781 elif [[ $remote_host = lcbullhh ]] 782 then 783 if [[ $numprocs != 0 ]] 784 then 785 cat > $job_to_send << %%END%% 786 #!/bin/bash -l 787 #SBATCH -J $job_name 788 #SBATCH -t $timestring 789 #SBATCH -N $nodes 790 #SBATCH --ntasks-per-node=$processes_per_node 791 #SBATCH -p $queue 792 #SBATCH -o $remote_dayfile 793 #SBATCH -e $remote_dayfile 794 #SBATCH -A $project_account 795 796 $init_cmds 797 $module_calls 798 799 %%END%% 800 801 else 802 cat > $job_to_send << %%END%% 803 #!/bin/bash -l 804 #SBATCH -J $job_name 805 #SBATCH -t $timestring 806 #SBATCH -l ncpus=1 807 #SBATCH -l pmem=${memory}mb 808 #SBATCH -m abe 809 #SBATCH -o $remote_dayfile 810 #SBATCH -e $remote_dayfile 811 #SBATCH -A $project_account 812 813 $init_cmds 814 $module_calls 815 816 %%END%% 817 818 fi 819 782 820 elif [[ $remote_host = lccrayb || $remote_host = lccrayh ]] 783 821 then … … 1196 1234 echo "trap '" >> $job_to_send 1197 1235 echo "set +vx" >> $job_to_send 1198 if [[ $(echo $remote_host | cut -c1-3) = ibm || $remote_host = lc crayb || $remote_host = lccrayh || $(echo $remote_host | cut -c1-3) = nec || $remote_host = lcflow || $remote_host = lckiaps || $remote_host = lckyu* || $remote_host = lcxe6 ]]1236 if [[ $(echo $remote_host | cut -c1-3) = ibm || $remote_host = lcbullhh || $remote_host = lccrayb || $remote_host = lccrayh || $(echo $remote_host | cut -c1-3) = nec || $remote_host = lcflow || $remote_host = lckiaps || $remote_host = lckyu* || $remote_host = lcxe6 ]] 1199 1237 then 1200 1238 if [[ $remote_host = ibmh ]] … … 1210 1248 then 1211 1249 return_queue=p_normal 1250 elif [[ $remote_host = lcbullhh ]] 1251 then 1252 return_queue=shared 1212 1253 elif [[ $remote_host = lccrayb || $remote_host = lccrayh ]] 1213 1254 then … … 1276 1317 echo "[[ \"\$for_subjob_to_do\" != \"\" ]] && eval \$for_subjob_to_do" >> $job_to_send 1277 1318 echo "%%END%%" >> $job_to_send 1319 1320 elif [[ $remote_host = lcbullhh ]] 1321 then 1322 echo "cat > scpjob.$identifier << %%END%%" >> $job_to_send 1323 echo "#!/bin/bash" >> $job_to_send 1324 echo "#SBATCH --job-name=job_protocol_transfer" >> $job_to_send 1325 echo "#SBATCH -t 00:20:00" >> $job_to_send 1326 echo "#SBATCH -N 1" >> $job_to_send 1327 echo "#SBATCH -n 1" >> $job_to_send 1328 echo "#SBATCH -o \$HOME/job_queue/last_job_transfer_protocol" >> $job_to_send 1329 echo "#SBATCH -o $remote_dayfile" >> $job_to_send 1330 echo "#SBATCH -e $remote_dayfile" >> $job_to_send 1331 echo "#SBATCH -A $project_account" >> $job_to_send 1332 echo "#SBATCH -p $return_queue" >> $job_to_send 1333 echo " " >> $job_to_send 1334 echo "set -x" >> $job_to_send 1335 echo "batch_scp $PORTOPT -d -w 10 -u $local_user $local_address ${job_catalog}/$remote_dayfile \"$job_catalog\" $local_dayfile" >> $job_to_send 1336 echo "[[ \"\$for_subjob_to_do\" != \"\" ]] && eval \$for_subjob_to_do" >> $job_to_send 1337 echo "%%END%%" >> $job_to_send 1278 1338 1279 1339 elif [[ $remote_host = lckyuh ]] … … 1387 1447 echo "[[ \"\$for_subjob_to_do\" != \"\" ]] && eval \$for_subjob_to_do" >> $job_to_send 1388 1448 echo "%%END%%" >> $job_to_send 1449 1389 1450 fi 1390 1451 … … 1392 1453 then 1393 1454 echo "llsubmit scpjob.$identifier" >> $job_to_send 1455 elif [[ $remote_host = lcbullhh ]] 1456 then 1457 echo "sbatch scpjob.$identifier" >> $job_to_send 1394 1458 elif [[ $remote_host = lccrayb || $remote_host = lccrayh ]] 1395 1459 then … … 1436 1500 1437 1501 1438 1439 1502 # APPEND THE JOB-FILE (CREATE BY mrun) TO THE JOB-DIRECTIVES GENERATED ABOVE 1440 1503 cat $file_to_send >> $job_to_send … … 1525 1588 eval $submcom -q $queue $job_on_remhost 1526 1589 fi 1590 elif [[ $local_host = lcbullhh ]] 1591 then 1592 if [[ $queue = default ]] 1593 then 1594 eval $submcom $job_on_remhost 1595 fi 1527 1596 else 1528 1597 qsub $job_on_remhost … … 1539 1608 fi 1540 1609 1541 1542 1543 1610 # FINAL ACTIONS 1544 1611 if [[ $no_submit = false ]]
Note: See TracChangeset
for help on using the changeset viewer.