Changeset 635
- Timestamp:
- Dec 14, 2010 1:33:02 PM (14 years ago)
- Location:
- palm/trunk/SCRIPTS
- Files:
-
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
palm/trunk/SCRIPTS/mbuild
r622 r635 115 115 # SGI-ICE/lcsgi 116 116 # adjustments for Kyushu Univ. (lcrte, ibmku) 117 # 14/12/10 - Siggi - adjustments for new Tsubame system at Tokyo 118 # institute of technology (lctit) 117 119 118 120 … … 471 473 (lcsgih) remote_addres=130.75.4.102;; 472 474 (lck) remote_addres=165.132.26.61;; 473 (lctit) remote_addres=1 72.17.75.161;;475 (lctit) remote_addres=10.1.6.170;; 474 476 (lcxt4) remote_addres=129.177.20.113;; 475 477 (lcxt5m) remote_addres=193.166.211.144;; -
palm/trunk/SCRIPTS/mrun
r622 r635 213 213 # has now argument threads_per_task 214 214 # adjustments for Kyushu Univ. (lcrte, ibmku) 215 # 14/12/10 - Siggi - adjustments for new Tsubame system at Tokyo 216 # institute of technology (lctit) 215 217 216 218 … … 662 664 do_remote=true 663 665 case $host in 664 (ibm|ibmh|ibmku|ibms|ibmy|nech|necriam|lckyoto|lcsgib|lcsgih| lctit|unics|lcxt4|lcxt5m|lck) true;;666 (ibm|ibmh|ibmku|ibms|ibmy|nech|necriam|lckyoto|lcsgib|lcsgih|unics|lcxt4|lcxt5m|lck) true;; 665 667 (*) printf "\n" 666 668 printf "\n +++ sorry: execution of batch jobs on remote host \"$host\"" … … 839 841 do_remote=true 840 842 case $host in 841 (ibm|ibmh|ibmku|ibms|ibmy|lckyoto|lcsgib|lcsgih| lctit|nech|necriam|unics|lcxt4|lcxt5m|lck) true;;843 (ibm|ibmh|ibmku|ibms|ibmy|lckyoto|lcsgib|lcsgih|nech|necriam|unics|lcxt4|lcxt5m|lck) true;; 842 844 (*) printf "\n +++ sorry: execution of batch jobs on remote host \"$host\"" 843 845 printf "\n is not available" … … 1121 1123 do_remote=true 1122 1124 case $host in 1123 (ibm|ibmh|ibmku|ibms|ibmy|lckyoto|lcsgib|lcsgih| lctit|nech|necriam|unics|lcxt4|lcxt5m|lck) true;;1125 (ibm|ibmh|ibmku|ibms|ibmy|lckyoto|lcsgib|lcsgih|nech|necriam|unics|lcxt4|lcxt5m|lck) true;; 1124 1126 (*) printf "\n" 1125 1127 printf "\n +++ sorry: execution of batch jobs on remote host \"$host\"" … … 1208 1210 then 1209 1211 node_usage=shared 1210 elif [[ $host = lctit ]]1211 then1212 node_usage=RAM64GB1213 1212 elif [[ $(echo $host | cut -c1-5) = lcsgi ]] 1214 1213 then … … 1219 1218 fi 1220 1219 1221 if [[ $node_usage != shared && $node_usage != not_shared && $node_usage != singlejob && "$(echo $node_usage | cut -c1-3)" != "sla" && $node_usage != RAM64GB]]1220 if [[ $node_usage != shared && $node_usage != not_shared && $node_usage != singlejob && "$(echo $node_usage | cut -c1-3)" != "sla" ]] 1222 1221 then 1223 1222 printf "\n" … … 1270 1269 (lcsgib) queue=bigq;; 1271 1270 (lcsgih) queue=bigq;; 1272 (lctit) queue= RAM64GB;;1271 (lctit) queue=S;; 1273 1272 (nech) queue=none;; 1274 1273 (necriam) queue=SP;; … … 1967 1966 1968 1967 # JOBMODUS FESTSTELLEN 1969 if [[ "$ENVIRONMENT" = BATCH && $localhost != lctit || "$QUEUE" != interactive && $localhost = lctit]]1968 if [[ "$ENVIRONMENT" = BATCH ]] 1970 1969 then 1971 1970 jobmo=BATCH 1972 1971 else 1973 1972 jobmo=INTERACTIVE 1973 fi 1974 1975 1976 # no interactive runs on lctit 1977 if [[ $host = lctit && $jobmo = INTERACTIVE && $do_batch = false ]] 1978 then 1979 printf "\n +++ no interactive runs allowed on host \"$host\" " 1980 printf "\n please submit batch job using mrun option \"-b\" \n" 1981 locat=normal; exit 1974 1982 fi 1975 1983 … … 2533 2541 continue # ANDERENFALLS IST STATUS=1, FALLS A.OUT VORHANDEN 2534 2542 else 2535 [[ $localhost = lctit ]] && export LM_LICENSE_FILE=27050@tggls2536 2543 [[ $localhost = lcxt4 ]] && . /opt/modules/default/init/ksh 2537 2544 [[ "$module_calls" != "" ]] && eval $module_calls … … 3439 3446 elif [[ $host = lctit ]] 3440 3447 then 3441 export runfile=runfile.$kennung 3442 3443 echo "cd $PWD" > $runfile 3444 echo "export OMP_NUM_THREADS=$OMP_NUM_THREADS" >> $runfile 3445 echo "export cpurest=$cpurest" >> $runfile 3446 echo "export fname=$fname" >> $runfile 3447 echo "export localhost=$localhost" >> $runfile 3448 echo "export return_addres=$return_addres" >> $runfile 3449 echo "export return_username=$return_username" >> $runfile 3450 echo "export tasks_per_node=$tasks_per_node" >> $runfile 3451 echo "export write_binary=$write_binary" >> $runfile 3452 echo "export use_seperate_pe_for_dvrp_output=$use_seperate_pe_for_dvrp_output" >> $runfile 3453 echo "./a.out" >> $runfile 3454 chmod u+x $runfile 3455 3456 if [[ "$QUEUE" = interactive ]] 3457 then 3458 mpirun -np $numprocs a.out $ROPTS 3459 else 3460 (( mem_tsubame = $memory / 1024.0 )) 3461 echo "Memory for Tsubame for each process in GB:", $mem_tsubame 3462 # n1ge -fore -g 1S060156 -mpi $numprocs -mem 4 -N palm -q $queue a.out $ROPTS 3463 # /n1ge/TITECH_GRID/tools/bin/n1ge -fore -g $group_number -mpi ${numprocs}:$tasks_per_node -mem $mem_tsubame -N palm -rt $minuten -q $queue a.out $ROPTS 3464 n1ge -fore -g $group_number -mpi ${numprocs}:$tasks_per_node -mem $mem_tsubame -N palm -rt $minuten -q $queue a.out $ROPTS 3465 fi 3466 3467 rm $runfile 3448 mpirun -np $numprocs -hostfile $PBS_NODEFILE ./a.out $ROPTS 3468 3449 else 3469 3450 mpprun -n $numprocs a.out $ROPTS … … 4233 4214 then 4234 4215 4235 if [[ $localhost = lcsgih || $localhost = lcsgib || $localhost = nech || $localhost = ibmh || $localhost = ibmku || $localhost = ibms || $localhost = lctit]]4216 if [[ $localhost = lcsgih || $localhost = lcsgib || $localhost = nech || $localhost = ibmh || $localhost = ibmku || $localhost = ibms ]] 4236 4217 then 4237 4218 echo "*** ssh will be used to initiate restart-runs!" 4238 4219 echo " return_addres=\"$return_addres\" " 4239 4220 echo " return_username=\"$return_username\" " 4240 if [[ $return_addres = 172.20.25.41 ]] 4241 then 4242 # WORKAROUND AUF SCIROCCO AM TIT 4243 print "PATH=\$PATH:$LOCAL_MRUN_PATH;export PALM_BIN=$LOCAL_MRUN_PATH;cd $LOCAL_PWD; $mc " | ssh $return_addres -l $return_username 4244 elif [[ $(echo $return_addres | grep -c "130.75.105") = 1 ]] 4221 if [[ $(echo $return_addres | grep -c "130.75.105") = 1 ]] 4245 4222 then 4246 4223 ssh $return_addres -l $return_username "PATH=\$PATH:$LOCAL_MRUN_PATH;export LD_LIBRARY_PATH=\$LD_LIBRARY_PATH:/muksoft/packages/intel/Compiler/11.0/083/lib/intel64/;export PALM_BIN=$LOCAL_MRUN_PATH;cd $LOCAL_PWD; $mc " … … 4460 4437 cat ${PALM_BIN}/$mrun_script_name >> $jobfile 4461 4438 echo "%END%" >> $jobfile 4462 if [[ $host = lctit ]]4463 then4464 echo "sed 's/bin\/ksh/home2\/usr5\/mkanda\/pub\/ksh/' < $mrun_script_name > mrun_new" >> $jobfile4465 echo "mv mrun_new $mrun_script_name" >> $jobfile4466 fi4467 4439 echo "chmod u+x $mrun_script_name" >> $jobfile 4468 4440 echo "execute_mrun=true" >> $jobfile -
palm/trunk/SCRIPTS/subjob
r622 r635 124 124 # SGI-ICE/lcsgi 125 125 # adjustments for Kyushu Univ. (lcrte, ibmku) 126 # 14/12/10 - Siggi - adjustments for new Tsubame system at Tokyo 127 # institute of technology (lctit) 126 128 127 129 … … 236 238 (r1*) local_addres=130.75.4.102; local_host=lcsgih;; 237 239 (r2*) local_addres=130.73.232.102; local_host=lcsgib;; 238 (t gg*) local_addres=172.17.75.161;local_host=lctit;;240 (t2a*) local_addres=10.1.6.165; local_host=lctit;; 239 241 (vorias) local_addres=172.20.25.43; local_host=lcmuk;; 240 242 (*.cc.kyushu-u.ac.jp) local_addres=133.5.4.129; local_host=ibmku;; … … 367 369 (lcsgih) queue=smallq; remote_addres=130.75.4.101; submcom=/opt/moab/bin/msub;; 368 370 (lck) remote_addres=165.132.26.61; submcom=/usr/torque/bin/qsub;; 369 (lctit) queue= lctit; remote_addres=172.17.75.161; submcom=/n1ge/TITECH_GRID/tools/bin/n1ge;;371 (lctit) queue=S; remote_addres=10.1.6.165; submcom=/opt/pbs/tools/bin/t2sub;; 370 372 (lcxt4) remote_addres=129.177.20.113; submcom=/opt/torque/2.4.9-snap.201005191035/bin/qsub;; 371 373 (lcxt5m) remote_addres=193.166.211.144; submcom=/opt/pbs/10.1.0.91350/bin/qsub;; … … 415 417 esac;; 416 418 (lctit) case $ndq in 417 ( novice|sla1|sla2|sla3|RAM64GB|RAM128GB)error=false;;419 (G|L128|L256|L512H|S|S96|V) error=false;; 418 420 (*) error=true;; 419 421 esac;; … … 513 515 (( Memory = memory / 1000 )) 514 516 fi 517 elif [[ $remote_host = lctit ]] 518 then 519 (( Memory = memory * tasks_per_node / 1000 )) 515 520 fi 516 521 … … 1031 1036 then 1032 1037 cat > $job_to_send << %%END%% 1033 #!/bin/bash 1034 #$ -S /bin/bash 1035 cd $job_catalog 1036 export PATH=$PALM_BIN:\$PATH 1037 echo \$PATH 1038 #!/bin/ksh 1038 1039 1039 1040 %%END%% 1040 1041 1041 1042 # OPTIONEN FUER SUBMIT-KOMMANDO ZUSAMMENSTELLEN 1042 # submcom="$submcom -N $job_name -sgeout $remote_dayfile -q default" 1043 submcom="$submcom -g $group_number -N $job_name -rt 4320 -sgeout $remote_dayfile -q $node_usage " 1043 submcom="$submcom -W group_list=$group_number -N $job_name -l walltime=$timestring -l select=$nodes:ncpus=$processes_per_node:mpiprocs=$tasks_per_node:mem=${Memory}gb -l place=scatter -o $remote_dayfile -j oe -q $queue " 1044 1044 1045 1045 else … … 1066 1066 echo "trap '" >> $job_to_send 1067 1067 echo "set +vx" >> $job_to_send 1068 if [[ $(echo $remote_host | cut -c1-3) = ibm || $(echo $remote_host | cut -c1-5) = lcsgi || $(echo $remote_host | cut -c1-3) = nec || $remote_host = lctit]]1068 if [[ $(echo $remote_host | cut -c1-3) = ibm || $(echo $remote_host | cut -c1-5) = lcsgi || $(echo $remote_host | cut -c1-3) = nec ]] 1069 1069 then 1070 1070 if [[ $remote_host = ibmh ]] … … 1083 1083 then 1084 1084 return_queue=serialq 1085 elif [[ $remote_host = lctit ]]1086 then1087 return_queue=sla31088 1085 elif [[ $remote_host = necriam ]] 1089 1086 then … … 1169 1166 echo "[[ \"\$for_subjob_to_do\" != \"\" ]] && eval \$for_subjob_to_do" >> $job_to_send 1170 1167 echo "%%END%%" >> $job_to_send 1171 1172 elif [[ $remote_host = lctit ]]1173 then1174 echo "cat > scpjob.$kennung << %%END%%" >> $job_to_send1175 echo "set -x" >> $job_to_send1176 echo "PATH=\$PATH:$job_catalog/../pub" >> $job_to_send1177 echo "cd $job_catalog" >> $job_to_send1178 echo "batch_scp -d -w 10 -u $local_user $local_addres $remote_dayfile \"$job_catalog\" $local_dayfile" >> $job_to_send1179 echo "[[ \"\$for_subjob_to_do\" != \"\" ]] && eval \$for_subjob_to_do" >> $job_to_send1180 echo "%%END%%" >> $job_to_send1181 1168 1182 1169 else … … 1213 1200 echo "chmod u+x scpjob.$kennung" >> $job_to_send 1214 1201 echo "msub scpjob.$kennung" >> $job_to_send 1215 elif [[ $remote_host = lctit ]]1216 then1217 echo "chmod u+x scpjob.$kennung" >> $job_to_send1218 echo "rm -rf ${job_catalog}/last_job_transfer_protocol" >> $job_to_send1219 echo "n1ge -N jobtransfer -q $return_queue -sgeout ${job_catalog}/last_job_transfer_protocol scpjob.$kennung" >> $job_to_send1220 echo "sleep 10" >> $job_to_send1221 1202 elif [[ $remote_host = t3eb || $remote_host = t3eh || $remote_host = t3ej2 || $remote_host = t3ej5 ]] 1222 1203 then … … 1255 1236 echo "exit" >> $job_to_send 1256 1237 fi 1238 1239 # remove job file 1257 1240 if [[ $remote_host = lctit || $remote_host = ibmku ]] 1258 1241 then … … 1322 1305 then 1323 1306 [[ $verify = true ]] && printf "\n >>> submitting job using \"qsub\"...\n" 1324 if [[ $remote_host != lctit ]] 1325 then 1326 if [[ $(echo $remote_host | cut -c1-5) = lcsgi && $prio = true ]] 1327 then 1328 printf "\n >>> submit with HLRN qos-feature hiprio...\n" 1329 ssh $remote_addres -l $remote_user "cd $job_catalog; $submcom -l qos=hiprio $job_on_remhost; rm $job_on_remhost" 1330 elif [[ $remote_host = ibmku ]] 1331 then 1332 ssh $remote_addres -l $remote_user "cd $job_catalog; $submcom $job_on_remhost" 1333 else 1334 ssh $remote_addres -l $remote_user "cd $job_catalog; $submcom $job_on_remhost; rm $job_on_remhost" 1335 fi 1307 1308 if [[ $(echo $remote_host | cut -c1-5) = lcsgi && $prio = true ]] 1309 then 1310 printf "\n >>> submit with HLRN qos-feature hiprio...\n" 1311 ssh $remote_addres -l $remote_user "cd $job_catalog; $submcom -l qos=hiprio $job_on_remhost; rm $job_on_remhost" 1312 elif [[ $remote_host = ibmku ]] 1313 then 1314 ssh $remote_addres -l $remote_user "cd $job_catalog; $submcom $job_on_remhost" 1336 1315 else 1337 # TIT ERLAUBT NUR DIE AUSFUEHRUNG GANZ BESTIMMTER KOMMANDOS 1338 # MIT SSH, DESHALB AUFRUF PER PIPE 1339 # UEBERGANGSWEISE CHECK, OB N1GE ENVIRONMENT WIRKLICH VERFUEGBAR 1340 print "cd $job_catalog; chmod u+x $job_on_remhost" | ssh $remote_addres -l $remote_user > /dev/null 2>&1 1341 echo "first try" > send_protocol 1342 while [[ $(cat send_protocol | grep -c "Forwarding to N1GE") = 0 ]] 1343 do 1344 if [[ $(cat send_protocol | grep -c "first try") = 1 ]] 1345 then 1346 printf "\n trying to submit job to TIT cluster..." 1347 else 1348 printf "\n +++ failed ... trying to submit again ..." 1349 fi 1350 print "cd $job_catalog; $submcom $job_on_remhost" | ssh $remote_addres -l $remote_user > send_protocol 2>&1 1351 cat send_protocol 1352 done 1353 sleep 10 1354 print "cd $job_catalog; rm $job_on_remhost" | ssh $remote_addres -l $remote_user > /dev/null 2>&1 1355 echo "$submcom $job_on_remhost" 1356 rm send_protocol 1316 ssh $remote_addres -l $remote_user "cd $job_catalog; $submcom $job_on_remhost; rm $job_on_remhost" 1357 1317 fi 1358 1318 … … 1365 1325 elif [[ $local_host = lcfimm || $local_host = lctit || $localhost = lcxt4 || $localhost = lck ]] 1366 1326 then 1327 chmod u+x $job_on_remhost 1328 echo "$submcom $job_on_remhost" 1367 1329 eval $submcom $job_on_remhost 1368 echo "$submcom $job_on_remhost"1369 chmod u+x $job_on_remhost1370 1371 1330 elif [[ $local_host = nech ]] 1372 1331 then
Note: See TracChangeset
for help on using the changeset viewer.