Changeset 1446 for palm/trunk/SCRIPTS/hlrn_watchdog
- Timestamp:
- Aug 7, 2014 10:08:56 AM (10 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
palm/trunk/SCRIPTS/hlrn_watchdog
r1310 r1446 1 1 #!/bin/ksh 2 3 2 #--------------------------------------------------------------------------------# 4 3 # This file is part of PALM. … … 20 19 # Current revisions: 21 20 # ----------------- 22 # 21 # Adapted for HLRN III. Added windows if no runs are queued. 22 # 23 23 # Former revisions: 24 24 # ----------------- … … 41 41 #------------------------------------------------------------------------------! 42 42 43 check_hannover=false 44 check_berlin=true 45 debug=true 46 PALM_BIN="/home/maronga/palm/tmp_version/trunk/SCRIPTS" 47 43 48 cd $PALM_BIN 44 49 45 gate_h="h icegate.hlrn.de"46 gate_b="b icegate.hlrn.de"50 gate_h="hlogin.hlrn.de" 51 gate_b="blogin.hlrn.de" 47 52 update_frequency=600 48 53 49 54 # trap strg+c 50 trap 'kill all kdialog; exit' 255 trap 'kill -9 $infoPID > /dev/null; kill -9 $dialogPID > /dev/null; exit' 2 51 56 52 57 … … 61 66 kill -9 $killid > /dev/null 62 67 killall kdialog 68 69 if [[ -f .watchdog_report.x ]] then 70 rm .watchdog_report.x 71 fi 72 if [[ -f .watchdog_status.x ]] then 73 rm .watchdog_status.x 74 fi 63 75 echo "*** hlrn_watchdog stopped." 64 76 else … … 73 85 echo "+++ hlrn_watchdog is already running." 74 86 else 75 nohup ./hlrn_watchdog $2 1> /dev/null 2> /dev/null & 87 if [[ $debug = true ]] then 88 ./hlrn_watchdog $2 & 89 else 90 nohup ./hlrn_watchdog $2 1> /dev/null 2> /dev/null & 91 fi 76 92 echo "\n*** hlrn_watchdog starting..." 77 93 fi 78 94 exit 79 95 else 80 81 96 # login via ssh and collect information in .watchdog_report.x 82 97 while true 83 98 do 84 ssh $gate_h -l $1 "showq | egrep \"($1)\"" > .watchdog_report.x 99 touch .watchdog_report.x 100 if [[ $check_hannover == true ]] then 101 ssh $gate_h -l $1 "showq | egrep \"($1)\"" > .watchdog_report.x 102 fi 103 if [[ $check_berlin == true ]] then 85 104 ssh $gate_b -l $1 "showq | egrep \"($1)\"" >> .watchdog_report.x 105 fi 86 106 87 107 i=0 … … 104 124 done 105 125 rm .watchdog_report.x 126 touch .watchdog_status.x 106 127 107 128 # check for terminated jobs and status changes … … 138 159 done 139 160 161 # check whether any jobs are queued 162 touch .watchdog_report.x 163 file_size=`ls -l .watchdog_report.x | tr -s " " | cut -d " " -f 5` 164 if [[ $file_size == 0 ]] then 165 printf "No jobs queued.\n" >> .watchdog_report.x 166 fi 167 140 168 # get estimated starting time for all idle jobs and write watchdog output in .watchdog_report.x 141 169 while (( $j < $i )) … … 162 190 163 191 # kill all windows 164 killall kdialog 192 if [[ "$infoPID" -ne "" ]] then 193 kill -9 $infoPID > /dev/null 194 unset $infoPID 195 fi 196 if [[ "$dialogPID" -ne "" ]] then 197 kill -9 $dialogPID > /dev/null 198 unset $dialogPID 199 fi 165 200 166 201 timestamp=`date` 167 202 168 203 # create window and show information 169 kdialog --textbox .watchdog_report.x 550 150 --title "HLRN watchdog (last update: $timestamp)" & 204 kdialog --textbox .watchdog_report.x 550 150 --title "HLRN watchdog (last update: $timestamp)" & dialogPID=$! 170 205 171 206 # in case of status changes and terminated jobs, inform the user 172 207 if [[ $info != "" ]] 173 208 then 174 kdialog --msgbox "$info" --title "HLRN Job Information" & 209 kdialog --msgbox "$info" --title "HLRN Job Information" & infoPID=$! 175 210 fi 176 211
Note: See TracChangeset
for help on using the changeset viewer.