Changeset 2420 for palm/trunk/SCRIPTS/palmrun
- Timestamp:
- Sep 7, 2017 6:20:11 AM (7 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
palm/trunk/SCRIPTS/palmrun
r2410 r2420 59 59 compiler_name_ser="" 60 60 compiler_options="" 61 config_file=""62 61 cores="" 63 62 cores_atmos=0 … … 80 79 fname=test 81 80 global_revision="" 82 host ="default"81 host_identifier="default" 83 82 hostfile="" 84 83 hp="" … … 157 156 158 157 159 # CHECK IF THE PATH FOR THE PALM BINARIES (SCRIPTS+UTILITY-PROGRAMS) HAS160 # BEEN SET161 if [[ "$PALM_BIN" = "" ]]162 then163 printf "\n +++ environment variable PALM_BIN has not been set"164 printf "\n please set it to the directory where the PALM scripts are located"165 locat=palm_bin; exit166 fi167 export PATH=$PALM_BIN:$PATH168 169 170 171 158 # READ SHELLSCRIPT-OPTIONS AND REBUILD THE PALMRUN-COMMAND STRING (prc), 172 159 # WHICH WILL BE USED TO START RESTART-JOBS 173 while getopts :a:bB c:Cd:FG:h:jkm:M:O:q:r:R:s:t:T:u:U:vw:xX:yY:Z option160 while getopts :a:bBCd:FG:h:jkm:M:O:q:r:R:s:t:T:u:U:vw:xX:yY:Z option 174 161 do 175 162 case $option in … … 177 164 (b) create_batch_job=true; prc="$prc -b";; 178 165 (B) delete_temporary_catalog=false; prc="$prc -B";; 179 (c) config_file=$OPTARG; prc="$prc -c$OPTARG";;180 166 (C) restart_run=true; prc="$prc -C";; 181 167 (d) fname=$OPTARG; prc="$prc -d$OPTARG";; 182 168 (F) create_jobfile_only=true;; 183 169 (G) global_revision=$OPTARG; prc="$prc -G'$OPTARG'";; 184 (h) host =$OPTARG; prc="$prc -h$OPTARG";;170 (h) host_identifier=$OPTARG; prc="$prc -h$OPTARG";; 185 171 (j) running_in_batch_mode=true;; 186 172 (k) keep_data_from_previous_run=true; prc="$prc -k";; … … 223 209 printf "\n -b batch-job on local machine ---" 224 210 printf "\n -B do not delete temporary directory at end ---" 225 printf "\n -c configuration file .palm.config.default"226 211 printf "\n -d base name of files attached to program test" 227 212 printf "\n -F create batch job file only ---" 228 printf "\n -h execution host\"default\" "213 printf "\n -h host identifier \"default\" " 229 214 printf "\n -k keep data from previous run" 230 215 printf "\n -m memory demand in MB (batch-jobs) 0 MB" … … 268 253 269 254 # BUILD THE CONFIGURATION-FILE NAME 270 if [[ "$config_file" = "" ]] 271 then 272 config_file=.palm.config.$host 273 fi 255 config_file=.palm.config.$host_identifier 274 256 275 257 … … 462 444 # BATCH MODE AND ... 463 445 running_on_remote=true 464 host_name=$remote_hostname465 446 466 447 else … … 474 455 475 456 fi 476 host_name=$local_hostname477 457 running_on_remote=false 478 458 fi … … 1213 1193 fi 1214 1194 done 1195 cpumax=$cputime 1215 1196 1216 1197 # CHECK THE MEMORY DEMAND … … 1234 1215 while [[ -z $remote_username ]] 1235 1216 do 1236 printf "\n +++ username on remote host \"$remote_ip\" is undefined"1217 printf "\n +++ username on remote host with IP \"$remote_ip\" is undefined" 1237 1218 printf "\n >>> Please type username:" 1238 1219 printf "\n >>> " … … 1243 1224 else 1244 1225 1245 cputime=10000000 # NO LIMT FOR INTERACTIVE RUNS 1246 1247 fi 1248 cpumax=$cputime 1226 if [[ $running_in_batch_mode = false ]] 1227 then 1228 cputime=10000000 # NO LIMT FOR INTERACTIVE RUNS 1229 cpumax=$cputime 1230 else 1231 cputime=$cpumax 1232 fi 1233 1234 fi 1249 1235 1250 1236 … … 1267 1253 if [[ $create_remote_batch_job = true ]] 1268 1254 then 1269 column1="execution on:"; column2="$ remote_hostname(username: $remote_username)"1255 column1="execution on:"; column2="$host_identifier (username: $remote_username)" 1270 1256 else 1271 1257 if [[ $running_on_remote = true ]] 1272 1258 then 1273 column1="execution on:"; column2="$ remote_hostname"1259 column1="execution on:"; column2="$host_identifier (IP:$remote_ip)" 1274 1260 else 1275 column1="execution on:"; column2="$ local_hostname"1261 column1="execution on:"; column2="$host_identifier (IP:$local_ip)" 1276 1262 fi 1277 1263 fi … … 1525 1511 fi 1526 1512 1527 palmbuild -v -h $host -d $fname 1528 rm -rf SOURCES_FOR_RUN_$fname 1513 palmbuild -v -h $host_identifier -d $fname 1529 1514 1530 1515 if [[ $? != 0 ]] … … 1534 1519 printf "\n +++ error while creating executable and/or other sources" 1535 1520 locat=execution 1521 rm -rf SOURCES_FOR_RUN_$fname 1536 1522 exit 1537 1523 … … 1539 1525 1540 1526 printf "\n$dashes\n *** executable and other sources created\n" 1527 rm -rf SOURCES_FOR_RUN_$fname 1541 1528 1542 1529 fi … … 1766 1753 # (FILE ENVPAR WILL BE READ BY PALM) 1767 1754 cat > ENVPAR << EOF 1768 &envpar run_identifier = '$fname', host = '$host ',1755 &envpar run_identifier = '$fname', host = '$host_identifier', 1769 1756 write_binary = .${write_binary}., tasks_per_node = $tasks_per_node, 1770 1757 maximum_parallel_io_streams = $maximum_parallel_io_streams, … … 2034 2021 transfer_failed=false 2035 2022 printf "\n >>> OUTPUT: ${localout[$i]}$catalog_string $append_string by SCP to" 2036 printf "\n ${pathout[$i]}/${ remote_hostname}_${fname}${endout[$i]}$catalog_string\n"2023 printf "\n ${pathout[$i]}/${host_identifier}_${fname}${endout[$i]}$catalog_string\n" 2037 2024 2038 2025 # TRANSFER VIA SCP 2039 2026 if [[ "$remote_loginnode" != "" ]] 2040 2027 then 2041 echo "cd $TEMPDIR; ${fast_io_catalog}/SOURCES_FOR_RUN_${fname}/batch_scp $PORTOPT $catalog_option $append_option -b -m $usecycle_option -u $local_username $return_address ${localout[$i]} \"${pathout[$i]}\" ${ remote_hostname}_${fname}${endout[$i]} ${extout[$i]}" | ssh -q $remote_username@$remote_loginnode2028 echo "cd $TEMPDIR; ${fast_io_catalog}/SOURCES_FOR_RUN_${fname}/batch_scp $PORTOPT $catalog_option $append_option -b -m $usecycle_option -u $local_username $return_address ${localout[$i]} \"${pathout[$i]}\" ${host_identifier}_${fname}${endout[$i]} ${extout[$i]}" | ssh -q $remote_username@$remote_loginnode 2042 2029 else 2043 batch_scp $PORTOPT $catalog_option $append_option -b -m $usecycle_option -u $local_username $return_address ${localout[$i]} "${pathout[$i]}" ${ remote_hostname}_${fname}${endout[$i]} ${extout[$i]}2030 batch_scp $PORTOPT $catalog_option $append_option -b -m $usecycle_option -u $local_username $return_address ${localout[$i]} "${pathout[$i]}" ${host_identifier}_${fname}${endout[$i]} ${extout[$i]} 2044 2031 fi 2045 2032 [[ $? != 0 ]] && transfer_failed=true … … 2050 2037 then 2051 2038 printf " +++ transfer failed. Trying to save a copy on this host under:\n" 2052 printf " ${pathout[$i]}/${ remote_hostname}_${fname}${endout[$i]}_$run_id\n"2039 printf " ${pathout[$i]}/${host_identifier}_${fname}${endout[$i]}_$run_id\n" 2053 2040 2054 2041 # FIRST CHECK, IF DIRECTORY EXISTS, AND CREATE IT, IF NECESSARY … … 2060 2047 mkdir -p $local_catalog 2061 2048 fi 2062 eval cp ${localout[$i]} ${pathout[$i]}/${ remote_hostname}_${fname}${endout[$i]}_$run_id2049 eval cp ${localout[$i]} ${pathout[$i]}/${host_identifier}_${fname}${endout[$i]}_$run_id 2063 2050 transfer_problems=true 2064 2051 fi … … 2221 2208 if [[ "$remote_loginnode" != "" ]] 2222 2209 then 2223 echo "echo \" PATH=\\\$PATH:$LOCAL_PALMRUN_PATH; export PALM_BIN=$LOCAL_PALMRUN_PATH;cd $LOCAL_PWD; $prc\" | ssh -q $SSH_PORTOPT $local_username@$return_address " | ssh -q $remote_username@$remote_loginnode2210 echo "echo \" PATH=\\\$PATH:$LOCAL_PALMRUN_PATH; cd $LOCAL_PWD; $prc\" | ssh -q $SSH_PORTOPT $local_username@$return_address " | ssh -q $remote_username@$remote_loginnode 2224 2211 else 2225 echo \" PATH=\\\$PATH:$LOCAL_PALMRUN_PATH; export PALM_BIN=$LOCAL_PALMRUN_PATH;cd $LOCAL_PWD; $prc\" | ssh -q $SSH_PORTOPT $local_username@$return_address2212 echo \" PATH=\\\$PATH:$LOCAL_PALMRUN_PATH; cd $LOCAL_PWD; $prc\" | ssh -q $SSH_PORTOPT $local_username@$return_address 2226 2213 fi 2227 2214 … … 2268 2255 2269 2256 # BUILD THE PALMRUN-COMMAND TO BE CALLED IN THE BATCH-JOB 2270 palmrun_com="$palmrun_script_name - c $config_file -d $fname -h $host-m $memory -t $cpumax -q $queue -r $run_id -U $local_username"2257 palmrun_com="$palmrun_script_name -d $fname -h $host_identifier -m $memory -t $cpumax -q $queue -r $run_id -U $local_username" 2271 2258 [[ "$activation_string_list" != "" ]] && palmrun_com=${palmrun_com}" -a \"$activation_string_list\"" 2272 2259 [[ "$global_revision" != "" ]] && palmrun_com=${palmrun_com}" -G \"$global_revision\"" … … 2284 2271 then 2285 2272 palmrun_com=${palmrun_com}" -j -u $remote_username -R $local_ip" 2286 printf "\n *** PALMRUN-command on remote host:\n $palmrun_com \n" 2273 if [[ $do_trace = true ]] 2274 then 2275 printf "\n *** PALMRUN-command on remote host:\n $palmrun_com \n" 2276 fi 2287 2277 elif [[ $create_batch_job = true ]] 2288 2278 then 2289 2279 palmrun_com=${palmrun_com}" -j" 2290 printf "\n *** PALMRUN-command on local host:\n $palmrun_com \n" 2280 if [[ $do_trace = true ]] 2281 then 2282 printf "\n *** PALMRUN-command on local host:\n $palmrun_com \n" 2283 fi 2291 2284 fi 2292 2285 … … 2294 2287 # DETERMINE THE FULL PATHS FOR THE JOB PROTOCOL FILES ON THE LOCAL AND 2295 2288 # REMOTE HOST 2296 job_protocol_file_local=${local_jobcatalog}/${host }_${job_id}2289 job_protocol_file_local=${local_jobcatalog}/${host_identifier}_${job_id} 2297 2290 job_protocol_file=$job_protocol_file_local 2298 2291 if [[ $create_remote_batch_job = true ]] 2299 2292 then 2300 job_protocol_file_remote=${remote_jobcatalog}/${host }_${job_id}2293 job_protocol_file_remote=${remote_jobcatalog}/${host_identifier}_${job_id} 2301 2294 job_protocol_file=$job_protocol_file_remote 2302 2295 job_transfer_protocol_file=${remote_jobcatalog}/last_job_transfer_protocol … … 2314 2307 do 2315 2308 (( i = i + 1 )) 2316 line=`echo "${batch_directive[$i]}" | sed 's/{{JOB_ID}}/$job_id/g' | sed 's/{{JOBFILE}}/$job_protocol_file/g' | sed 's/{{CPU_HOURS}}/$cpu_hours/g' | sed 's/{{CPU_MINUTES}}/$cpu_minutes/g' | sed 's/{{CPU_SECONDS}}/$cpu_seconds/g' | sed 's/{{NODES}}/$nodes/g' | sed 's/{{CORES}}/$cores/g' | sed 's/{{TASKS_PER_NODE}}/$tasks_per_node/g' | sed 's/{{ LOCAL_HOSTNAME}}/${local_hostname}/g' | sed 's/{{HOST}}/${host}/g' | sed 's/{{FNAME}}/$fname/g' | sed 's/{{QUEUE}}/$queue/g' | sed 's/{{MEMORY}}/$memory/g'`2309 line=`echo "${batch_directive[$i]}" | sed 's/{{JOB_ID}}/$job_id/g' | sed 's/{{JOBFILE}}/$job_protocol_file/g' | sed 's/{{CPU_HOURS}}/$cpu_hours/g' | sed 's/{{CPU_MINUTES}}/$cpu_minutes/g' | sed 's/{{CPU_SECONDS}}/$cpu_seconds/g' | sed 's/{{NODES}}/$nodes/g' | sed 's/{{CORES}}/$cores/g' | sed 's/{{TASKS_PER_NODE}}/$tasks_per_node/g' | sed 's/{{HOST_IDENTIFIER}}/${host_identifier}/g' | sed 's/{{FNAME}}/$fname/g' | sed 's/{{QUEUE}}/$queue/g' | sed 's/{{MEMORY}}/$memory/g'` 2317 2310 eval line=\"$line\" 2318 2311 echo "$line" >> $jobfile … … 2336 2329 do 2337 2330 (( i = i + 1 )) 2338 line=`echo "${batch_directive_transfer[$i]}" | sed 's/{{JOB_ID}}/$job_id/g' | sed 's/{{JOBFILE}}/$job_protocol_file/g' | sed 's/{{JOB_TRANSFER_PROTOCOL_FILE}}/$job_transfer_protocol_file/g' | sed 's/{{CPU_HOURS}}/$cpu_hours/g' | sed 's/{{CPU_MINUTES}}/$cpu_minutes/g' | sed 's/{{CPU_SECONDS}}/$cpu_seconds/g' | sed 's/{{NODES}}/$nodes/g' | sed 's/{{TASKS_PER_NODE}}/$tasks_per_node/g' | sed 's/{{ LOCAL_HOSTNAME}}/${local_hostname}/g' | sed 's/{{HOST}}/${host}/g' | sed 's/{{FNAME}}/$fname/g'`2331 line=`echo "${batch_directive_transfer[$i]}" | sed 's/{{JOB_ID}}/$job_id/g' | sed 's/{{JOBFILE}}/$job_protocol_file/g' | sed 's/{{JOB_TRANSFER_PROTOCOL_FILE}}/$job_transfer_protocol_file/g' | sed 's/{{CPU_HOURS}}/$cpu_hours/g' | sed 's/{{CPU_MINUTES}}/$cpu_minutes/g' | sed 's/{{CPU_SECONDS}}/$cpu_seconds/g' | sed 's/{{NODES}}/$nodes/g' | sed 's/{{TASKS_PER_NODE}}/$tasks_per_node/g' | sed 's/{{HOST_IDENTIFIER}}/${host_identifier}/g' | sed 's/{{FNAME}}/$fname/g'` 2339 2332 eval line=\"$line\" 2340 2333 echo "$line" >> $jobfile … … 2343 2336 2344 2337 echo "set -x" >> $jobfile 2345 echo "${fast_io_catalog}/SOURCES_FOR_RUN_${fname}/batch_scp $PORTOPT -d -w 10 -u $local_username $local_ip $job_protocol_file_remote \"$local_jobcatalog\" ${host }_${fname}" >> $jobfile2338 echo "${fast_io_catalog}/SOURCES_FOR_RUN_${fname}/batch_scp $PORTOPT -d -w 10 -u $local_username $local_ip $job_protocol_file_remote \"$local_jobcatalog\" ${host_identifier}_${fname}" >> $jobfile 2346 2339 echo "%%END%%" >> $jobfile 2347 2340 echo "echo \" *** submitting job for transfering the job protocol file to $local_ip\" " >> $jobfile … … 2408 2401 echo " echo \" \" " >> $jobfile 2409 2402 echo " echo \"+++ file ${remotepathin[$i]} could not be created\" " >> $jobfile 2410 echo " echo \" please check, if directory exists on $host !\" " >> $jobfile2403 echo " echo \" please check, if directory exists on $host_identifier!\" " >> $jobfile 2411 2404 echo " echo \"+++ PALMRUN will not be continued\" " >> $jobfile 2412 2405 echo " execute_palmrun=false" >> $jobfile … … 2424 2417 2425 2418 # PROVIDE THE PATH OF THE LOCAL PALMRUN-SCRIPT FOR THE SAME REASON 2426 echo "LOCAL_PALMRUN_PATH=$ PALM_BIN">> $jobfile2419 echo "LOCAL_PALMRUN_PATH=${source_path}/../SCRIPTS" >> $jobfile 2427 2420 echo "export LOCAL_PALMRUN_PATH" >> $jobfile 2428 2421 … … 2441 2434 then 2442 2435 2443 echo " *** transfer of job to remote host via scp" 2444 echo " scp $ssh_key $PORTOPT $jobfile ${remote_username}@${remote_ip}:${remote_jobcatalog}/${host}_${job_id}" 2445 scp $ssh_key $PORTOPT $jobfile ${remote_username}@${remote_ip}:${remote_jobcatalog}/${host}_${job_id} 2446 2436 echo " " 2437 echo " *** transfer of job to remote host via scp" 2438 if [[ $do_trace = true ]] 2439 then 2440 echo " scp $ssh_key $PORTOPT $jobfile ${remote_username}@${remote_ip}:${remote_jobcatalog}/${host_identifier}_${job_id}" 2441 fi 2442 scp $ssh_key $PORTOPT $jobfile ${remote_username}@${remote_ip}:${remote_jobcatalog}/${host_identifier}_${job_id} 2443 2444 echo " " 2445 echo " *** submit the job (output of submit command, e.g. the job-id, may follow)" 2446 if [[ $do_trace = true ]] 2447 then 2448 echo " cd $remote_jobcatalog; $submit_command ${host_identifier}_${job_id}; rm ${host_identifier}_${job_id} | ssh -q $ssh_key $SSH_PORTOPT ${remote_username}@${remote_ip} 2>&1" 2449 fi 2450 echo "cd $remote_jobcatalog; $submit_command ${host_identifier}_${job_id}; rm ${host_identifier}_${job_id}" | ssh -q $ssh_key $SSH_PORTOPT ${remote_username}@${remote_ip} 2>&1 2451 2452 elif [[ $create_batch_job = true ]] 2453 then 2454 2455 eval local_jobcatalog=$local_jobcatalog 2456 cp $jobfile ${local_jobcatalog}/${host_identifier}_${job_id} 2457 cd $local_jobcatalog 2447 2458 echo " " 2448 2459 echo " *** submit the job" 2449 echo " cd $remote_jobcatalog; $submit_command ${host}_${job_id}; rm ${host}_${job_id} | ssh -q $ssh_key $SSH_PORTOPT ${remote_username}@${remote_ip} 2>&1" 2450 echo "cd $remote_jobcatalog; $submit_command ${host}_${job_id}; rm ${host}_${job_id}" | ssh -q $ssh_key $SSH_PORTOPT ${remote_username}@${remote_ip} 2>&1 2451 2452 elif [[ $create_batch_job = true ]] 2453 then 2454 2455 eval local_jobcatalog=$local_jobcatalog 2456 cp $jobfile ${local_jobcatalog}/${host}_${job_id} 2457 cd $local_jobcatalog 2458 echo " " 2459 echo "--- submit the job" 2460 echo "$submit_command ${host}_${job_id}" 2461 $submit_command ${host}_${job_id} 2462 rm ${host}_${job_id} 2460 if [[ $do_trace = true ]] 2461 then 2462 echo "$submit_command ${host_identifier}_${job_id}" 2463 fi 2464 $submit_command ${host_identifier}_${job_id} 2465 rm ${host_identifier}_${job_id} 2463 2466 cd - > /dev/null 2464 2467
Note: See TracChangeset
for help on using the changeset viewer.