Changeset 2416


Ignore:
Timestamp:
Sep 6, 2017 2:28:14 PM (7 years ago)
Author:
maronga
Message:

watchdog adapted for palmrun

Location:
palm/trunk/SCRIPTS
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • palm/trunk/SCRIPTS/palm_wd

    r1754 r2416  
    2525# -----------------
    2626# $Id$
     27# Adapted for palmrun
     28#
     29# 1754 2016-02-22 13:50:22Z maronga
    2730#
    2831# 1753 2016-02-22 13:49:49Z maronga
     
    662665        # Return internal jobname
    663666        jobname = self.table.item(self.table.currentRow(),6).text()
     667        jobrealname = self.table.item(self.table.currentRow(),0).text()     
    664668 
    665669        # Check description of job in order to login on the correct host
     
    700704    def ShowDetails(self):
    701705
    702         jobname = self.table.item(self.table.currentRow(),6).text()
     706        jobname = self.table.item(self.table.currentRow(),6).text() 
    703707        descr   = self.table.item(self.table.currentRow(),1).text()
    704708        for h in range(0,len(description)):
     
    725729        # Return internal jobname
    726730        jobname = self.table.item(self.table.currentRow(),6).text()
    727  
     731        jobrealname = self.table.item(self.table.currentRow(),0).text()   
    728732        # Check description of job in order to login on the correct host
    729733        descr = self.table.item(self.table.currentRow(),1).text()
     
    733737                user = username[h]
    734738
    735         ssh = sub.Popen(["ssh", "%s" % host, "/sw/tools/python/2.7.6/generic/bin/python palm_wdd stop " + user + " " + jobname],
     739        ssh = sub.Popen(["ssh", "%s" % host, "/sw/tools/python/2.7.6/generic/bin/python palm_wdd stop " + jobrealname],
    736740                           shell=False,
    737741                           stdout=sub.PIPE,
     
    756760        # Return internal jobname
    757761        jobname = self.table.item(self.table.currentRow(),6).text()
    758  
     762        jobrealname = self.table.item(self.table.currentRow(),0).text()   
     763         
    759764        # Check description of job in order to login on the correct host
    760765        descr = self.table.item(self.table.currentRow(),1).text()
     
    764769                user = username[h]
    765770
    766         ssh = sub.Popen(["ssh", "%s" % host, "/sw/tools/python/2.7.6/generic/bin/python palm_wdd restart " + user + " " + jobname],
     771        ssh = sub.Popen(["ssh", "%s" % host, "/sw/tools/python/2.7.6/generic/bin/python palm_wdd restart " + jobrealname],
    767772                           shell=False,
    768773                           stdout=sub.PIPE,
     
    796801                user = username[h]
    797802
    798         ssh = sub.Popen(["ssh", "%s" % host, "/sw/tools/python/2.7.6/generic/bin/python palm_wdd parin " + user + " " + jobname],
     803        ssh = sub.Popen(["ssh", "%s" % host, "/sw/tools/python/2.7.6/generic/bin/python palm_wdd parin " + jobrealname],
    799804                           shell=False,
    800805                           stdout=sub.PIPE,
     
    829834                user = username[h]
    830835
    831         ssh = sub.Popen(["ssh", "%s" % host, "/sw/tools/python/2.7.6/generic/bin/python palm_wdd rc " + user + " " + jobname],
     836        ssh = sub.Popen(["ssh", "%s" % host, "/sw/tools/python/2.7.6/generic/bin/python palm_wdd rc " + jobrealname],
    832837                           shell=False,
    833838                           stdout=sub.PIPE,
  • palm/trunk/SCRIPTS/palm_wd_files/.wdd.config.hlrnIII

    r1618 r2416  
    55[Settings]
    66readqueue="showq | egrep"
    7 tmpdir="/gfs1/tmp/"
     7tmpdir="/gfs1/work/"
    88canceljob="canceljob"
    99checkjob="checkjob"
  • palm/trunk/SCRIPTS/palm_wdd

    r2062 r2416  
    2020# Current revisions:
    2121# -----------------
    22 #
     22# 
    2323#
    2424# Former revisions:
    2525# -----------------
    2626# $Id$
    27 #
    28 # Bugfix: return proper error messages when .wdd.config is not found and for
    29 # wrong actions
     27# Adapted for palmrun
     28#
     29# 1619 2015-07-13 06:53:19Z maronga
    3030#
    3131# 1618 2015-07-13 06:52:15Z maronga
     
    6565import ConfigParser
    6666import os
     67import pwd
    6768from subprocess import check_output
    6869import sys
    69 import socket
     70
    7071
    7172# Read configuration file
    7273# First check if the configuration file exists
    7374if ( os.path.exists('.wdd.config') == False ):
    74     sys.stderr.write("No configuration file .wdd.config found on remote host %s." % socket.gethostname())
     75    print "Error. No configuration file .wdd.config found."
    7576    raise SystemExit     
    7677
     
    8990action   = str(sys.argv[1])
    9091data     = str(sys.argv[2])
    91 if ( len(sys.argv) > 3 ):
    92    data2 =  str(sys.argv[3])
    93 
    94 
    95 cmd_readqueue = cmd_readqueue + " " + data
    96 cmd_tmpdir    = cmd_tmpdir + data
     92
     93cmd_readqueue = cmd_readqueue + " " + pwd.getpwuid( os.getuid() )[ 0 ]
     94cmd_tmpdir    = cmd_tmpdir + pwd.getpwuid( os.getuid() )[ 0 ]
    9795
    9896# reading queuing system
     
    137135            out = check_output(cmd_progress, shell=True, stderr=devnull)
    138136            progress_lines = out.splitlines()
    139             job_progress = progress_lines[1].split(" ")[1]
     137            job_progress = progress_lines[1].split(" ")[3]
    140138            out = None
    141139         except:
     
    192190
    193191
    194 def DoStopNow(username,jobid):
    195 
    196    # collect progress information
    197    cmd_dostop = "touch " + cmd_tmpdir + "/" + username + "." + jobid.partition(".")[2] + "/DO_STOP_NOW"
     192def DoStopNow(jobid):
     193
     194   # collect progress information
     195   cmd_dostop = "touch " + cmd_tmpdir + "/" + jobid + "/DO_STOP_NOW"
    198196   try:
    199197      devnull = open(os.devnull, 'w')
     
    204202      return return_message
    205203
    206 def DoRestartNow(username,jobid):
    207 
    208    # collect progress information
    209    cmd_dorestart = "touch " + cmd_tmpdir + "/" + username + "." + jobid.partition(".")[2] + "/DO_RESTART_NOW"
     204def DoRestartNow(jobid):
     205
     206   # collect progress information
     207   cmd_dorestart = "touch " + cmd_tmpdir + "/" + jobid + "/DO_RESTART_NOW"
    210208   try:
    211209      devnull = open(os.devnull, 'w')
     
    216214      return return_message
    217215
    218 def GetPARIN(username,jobid):
    219 
    220    # collect progress information
    221    cmd_dorestart = "cat " + cmd_tmpdir + "/" + username + "." + jobid.partition(".")[2] + "/PARIN"
     216def GetPARIN(jobid):
     217
     218   # collect progress information
     219   cmd_dorestart = "cat " + cmd_tmpdir + "/" + jobid + "/PARIN"
    222220   try:
    223221      devnull = open(os.devnull, 'w')
     
    226224      out = None
    227225   except:
    228       return_message = "Action failed." 
     226      return_message = "Action failed."   + "cat " + cmd_tmpdir + "/" + jobid + "/PARIN"
    229227     
    230228   return return_message
    231229
    232 def GetRC(username,jobid):
    233 
    234    # collect progress information
    235    cmd_dorestart = "cat " + cmd_tmpdir + "/" + username + "." + jobid.partition(".")[2] + "/RUN_CONTROL"
     230def GetRC(jobid):
     231
     232   # collect progress information
     233   cmd_dorestart = "cat " + cmd_tmpdir + "/" + jobid + "/RUN_CONTROL"
    236234   try:
    237235      devnull = open(os.devnull, 'w')
     
    240238      out = None
    241239   except:
    242       return_message = "Action failed." 
     240      return_message = "Action failed."
    243241     
    244242   return return_message
     
    254252   print GetStartTime(data)
    255253elif ( action == "stop"):
    256    print DoStopNow(data,data2)
     254   print DoStopNow(data)
    257255elif ( action == "restart"):
    258    print DoRestartNow(data,data2)
     256   print DoRestartNow(data)
    259257elif ( action == "parin"):
    260    print GetPARIN(data,data2)
     258   print GetPARIN(data)
    261259elif ( action == "rc"):
    262    print GetRC(data,data2
     260   print GetRC(data
    263261else:
    264    sys.stderr.write("Unknown action on remote host %s." % hostname)
    265 
     262   print "Error. Action " + action + " unknown."
     263
Note: See TracChangeset for help on using the changeset viewer.